forked from expo/troggle
reformatted using black
This commit is contained in:
parent
0f8fe0e290
commit
ba2ae6cd82
341
databaseReset.py
341
databaseReset.py
@ -20,8 +20,8 @@ troggle application.
|
||||
"""
|
||||
print(" - settings on loading databaseReset.py", flush=True)
|
||||
|
||||
os.environ['PYTHONPATH'] = str(settings.PYTHON_PATH)
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
||||
os.environ["PYTHONPATH"] = str(settings.PYTHON_PATH)
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
|
||||
|
||||
print(" - settings on loading databaseReset.py")
|
||||
|
||||
@ -31,14 +31,15 @@ print(f" - Memory footprint before loading Django: {resource.getrusage(resource.
|
||||
try:
|
||||
django.setup()
|
||||
except:
|
||||
print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..")
|
||||
print(
|
||||
" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce.."
|
||||
)
|
||||
raise
|
||||
print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB")
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.core import management
|
||||
from django.db import (close_old_connections, connection, connections,
|
||||
transaction)
|
||||
from django.db import close_old_connections, connection, connections, transaction
|
||||
from django.http import HttpResponse
|
||||
from django.urls import reverse
|
||||
|
||||
@ -46,24 +47,32 @@ import troggle.core.models.survex
|
||||
from troggle.core.models.caves import Cave, Entrance
|
||||
from troggle.core.models.troggle import DataIssue
|
||||
from troggle.core.utils import get_process_memory
|
||||
from troggle.parsers.imports import (import_caves, import_drawingsfiles,
|
||||
import_ents, import_loadpos,
|
||||
import_logbook, import_logbooks,
|
||||
import_people, import_QMs, import_survex,
|
||||
import_surveyscans)
|
||||
from troggle.parsers.imports import (
|
||||
import_caves,
|
||||
import_drawingsfiles,
|
||||
import_ents,
|
||||
import_loadpos,
|
||||
import_logbook,
|
||||
import_logbooks,
|
||||
import_people,
|
||||
import_QMs,
|
||||
import_survex,
|
||||
import_surveyscans,
|
||||
)
|
||||
|
||||
if os.geteuid() == 0:
|
||||
# This protects the server from having the wrong file permissions written on logs and caches
|
||||
print("This script should be run as expo not root - quitting")
|
||||
exit()
|
||||
|
||||
expouser=settings.EXPOUSER
|
||||
expouserpass=settings.EXPOUSERPASS
|
||||
expouseremail=settings.EXPOUSER_EMAIL
|
||||
expouser = settings.EXPOUSER
|
||||
expouserpass = settings.EXPOUSERPASS
|
||||
expouseremail = settings.EXPOUSER_EMAIL
|
||||
|
||||
expoadminuser = settings.EXPOADMINUSER
|
||||
expoadminuserpass = settings.EXPOADMINUSERPASS
|
||||
expoadminuseremail = settings.EXPOADMINUSER_EMAIL
|
||||
|
||||
expoadminuser=settings.EXPOADMINUSER
|
||||
expoadminuserpass=settings.EXPOADMINUSERPASS
|
||||
expoadminuseremail=settings.EXPOADMINUSER_EMAIL
|
||||
|
||||
def reinit_db():
|
||||
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
||||
@ -72,22 +81,26 @@ def reinit_db():
|
||||
in memory (django python models, not the database), so there is already a full load
|
||||
of stuff known. Deleting the db file does not clear memory.
|
||||
"""
|
||||
print("Reinitialising db ",end="")
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
currentdbname = settings.DATABASES['default']['NAME']
|
||||
if currentdbname == ':memory:':
|
||||
print("Reinitialising db ", end="")
|
||||
print(django.db.connections.databases["default"]["NAME"])
|
||||
currentdbname = settings.DATABASES["default"]["NAME"]
|
||||
if currentdbname == ":memory:":
|
||||
# closing connections should wipe the in-memory database
|
||||
django.db.close_old_connections()
|
||||
for conn in django.db.connections.all():
|
||||
print(" ! Closing another connection to db...")
|
||||
conn.close()
|
||||
elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
||||
elif django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
||||
if os.path.isfile(currentdbname):
|
||||
try:
|
||||
print(" - deleting " + currentdbname)
|
||||
os.remove(currentdbname)
|
||||
except OSError:
|
||||
print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n")
|
||||
print(
|
||||
" ! OSError on removing: "
|
||||
+ currentdbname
|
||||
+ "\n ! Is the file open in another app? Is the server running?\n"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
|
||||
@ -102,102 +115,110 @@ def reinit_db():
|
||||
cursor.execute(f"USE {currentdbname}")
|
||||
print(f" - Nuked : {currentdbname}\n")
|
||||
|
||||
print(" - Migrating: " + django.db.connections.databases['default']['NAME'])
|
||||
print(" - Migrating: " + django.db.connections.databases["default"]["NAME"])
|
||||
|
||||
if django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
||||
#with transaction.atomic():
|
||||
management.call_command('makemigrations','core', interactive=False)
|
||||
management.call_command('migrate', interactive=False)
|
||||
management.call_command('migrate','core', interactive=False)
|
||||
if django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
||||
# with transaction.atomic():
|
||||
management.call_command("makemigrations", "core", interactive=False)
|
||||
management.call_command("migrate", interactive=False)
|
||||
management.call_command("migrate", "core", interactive=False)
|
||||
else:
|
||||
management.call_command('makemigrations','core', interactive=False)
|
||||
management.call_command('migrate', interactive=False)
|
||||
management.call_command('migrate','core', interactive=False)
|
||||
management.call_command("makemigrations", "core", interactive=False)
|
||||
management.call_command("migrate", interactive=False)
|
||||
management.call_command("migrate", "core", interactive=False)
|
||||
|
||||
|
||||
print(" - done migration on: " + settings.DATABASES['default']['NAME'])
|
||||
print("users in db already: ",len(User.objects.all()))
|
||||
print(" - done migration on: " + settings.DATABASES["default"]["NAME"])
|
||||
print("users in db already: ", len(User.objects.all()))
|
||||
with transaction.atomic():
|
||||
try:
|
||||
print(" - Setting up expo user on: " + django.db.connections.databases['default']['NAME'])
|
||||
print(" - Setting up expo user on: " + django.db.connections.databases["default"]["NAME"])
|
||||
print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ")
|
||||
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
||||
user.is_staff = False
|
||||
user.is_superuser = False
|
||||
user.save()
|
||||
except:
|
||||
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
|
||||
print(django.db.connections.databases["default"]["NAME"])
|
||||
print(" ! You probably have not got a clean db when you thought you had.\n")
|
||||
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
||||
print("users in db: ",len(User.objects.all()))
|
||||
print("tables in db: ",len(connection.introspection.table_names()))
|
||||
memdumpsql(fn='integrityfail.sql')
|
||||
django.db.connections.databases['default']['NAME'] = ':memory:'
|
||||
#raise
|
||||
print("users in db: ", len(User.objects.all()))
|
||||
print("tables in db: ", len(connection.introspection.table_names()))
|
||||
memdumpsql(fn="integrityfail.sql")
|
||||
django.db.connections.databases["default"]["NAME"] = ":memory:"
|
||||
# raise
|
||||
|
||||
with transaction.atomic():
|
||||
try:
|
||||
print(" - Setting up expoadmin user on: " + django.db.connections.databases['default']['NAME'])
|
||||
print(" - Setting up expoadmin user on: " + django.db.connections.databases["default"]["NAME"])
|
||||
print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ")
|
||||
user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass)
|
||||
user.is_staff = True
|
||||
user.is_superuser = True
|
||||
user.save()
|
||||
except:
|
||||
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
|
||||
print(django.db.connections.databases["default"]["NAME"])
|
||||
print(" ! You probably have not got a clean db when you thought you had.\n")
|
||||
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
||||
print("users in db: ",len(User.objects.all()))
|
||||
print("tables in db: ",len(connection.introspection.table_names()))
|
||||
memdumpsql(fn='integrityfail.sql')
|
||||
django.db.connections.databases['default']['NAME'] = ':memory:'
|
||||
#raise
|
||||
print("users in db: ", len(User.objects.all()))
|
||||
print("tables in db: ", len(connection.introspection.table_names()))
|
||||
memdumpsql(fn="integrityfail.sql")
|
||||
django.db.connections.databases["default"]["NAME"] = ":memory:"
|
||||
# raise
|
||||
|
||||
|
||||
def memdumpsql(fn):
|
||||
'''Unused option to dump SQL. Aborted attempt to create a cache for loading data
|
||||
'''
|
||||
"""Unused option to dump SQL. Aborted attempt to create a cache for loading data"""
|
||||
djconn = django.db.connection
|
||||
from dump import _iterdump
|
||||
with open(fn, 'w') as f:
|
||||
|
||||
with open(fn, "w") as f:
|
||||
for line in _iterdump(djconn):
|
||||
f.write(f"{line.encode('utf8')}\n")
|
||||
return True
|
||||
|
||||
|
||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
class JobQueue():
|
||||
|
||||
class JobQueue:
|
||||
"""A list of import operations to run. Always reports profile times
|
||||
of the import operations in the same order.
|
||||
"""
|
||||
|
||||
def __init__(self,run):
|
||||
'''Initialises the job queue object with a fixed order for reporting
|
||||
def __init__(self, run):
|
||||
"""Initialises the job queue object with a fixed order for reporting
|
||||
options during a run. Imports the timings from previous runs.
|
||||
'''
|
||||
"""
|
||||
self.runlabel = run
|
||||
self.queue = [] # tuples of (jobname, jobfunction)
|
||||
self.results = {}
|
||||
self.results_order=[
|
||||
"date","runlabel","reinit", "caves", "people",
|
||||
"logbooks", "QMs", "scans", "survex",
|
||||
"drawings", "test" ]
|
||||
self.results_order = [
|
||||
"date",
|
||||
"runlabel",
|
||||
"reinit",
|
||||
"caves",
|
||||
"people",
|
||||
"logbooks",
|
||||
"QMs",
|
||||
"scans",
|
||||
"survex",
|
||||
"drawings",
|
||||
"test",
|
||||
]
|
||||
for k in self.results_order:
|
||||
self.results[k]=[]
|
||||
self.results[k] = []
|
||||
self.tfile = "import_profile.json"
|
||||
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
|
||||
|
||||
|
||||
def enq(self,label,func):
|
||||
'''Enqueue: Adding elements to queue
|
||||
'''
|
||||
self.queue.append((label,func))
|
||||
def enq(self, label, func):
|
||||
"""Enqueue: Adding elements to queue"""
|
||||
self.queue.append((label, func))
|
||||
return True
|
||||
|
||||
def loadprofiles(self):
|
||||
"""Load timings for previous imports for each data import type
|
||||
"""
|
||||
"""Load timings for previous imports for each data import type"""
|
||||
if os.path.isfile(self.tfile):
|
||||
try:
|
||||
f = open(self.tfile, "r")
|
||||
@ -213,31 +234,27 @@ class JobQueue():
|
||||
return True
|
||||
|
||||
def dellastprofile(self):
|
||||
"""trim one set of data from the results
|
||||
"""
|
||||
"""trim one set of data from the results"""
|
||||
for j in self.results_order:
|
||||
self.results[j].pop() # delete last item
|
||||
return True
|
||||
|
||||
def delfirstprofile(self):
|
||||
"""trim one set of data from the results
|
||||
"""
|
||||
"""trim one set of data from the results"""
|
||||
for j in self.results_order:
|
||||
self.results[j].pop(0) # delete zeroth item
|
||||
return True
|
||||
|
||||
def saveprofiles(self):
|
||||
"""Save timings for the set of imports just completed
|
||||
"""
|
||||
with open(self.tfile, 'w') as f:
|
||||
"""Save timings for the set of imports just completed"""
|
||||
with open(self.tfile, "w") as f:
|
||||
json.dump(self.results, f)
|
||||
return True
|
||||
|
||||
def runqonce(self):
|
||||
"""Run all the jobs in the queue provided - once
|
||||
"""
|
||||
print("** Running job ", self.runlabel,end=" to ")
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
"""Run all the jobs in the queue provided - once"""
|
||||
print("** Running job ", self.runlabel, end=" to ")
|
||||
print(django.db.connections.databases["default"]["NAME"])
|
||||
jobstart = time.time()
|
||||
print(f"-- Initial memory in use {get_process_memory():.3f} MB")
|
||||
self.results["date"].pop()
|
||||
@ -249,57 +266,59 @@ class JobQueue():
|
||||
start = time.time()
|
||||
memstart = get_process_memory()
|
||||
jobname, jobparser = runfunction
|
||||
#--------------------
|
||||
# --------------------
|
||||
jobparser() # invokes function passed in the second item in the tuple
|
||||
#--------------------
|
||||
# --------------------
|
||||
memend = get_process_memory()
|
||||
duration = time.time()-start
|
||||
#print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
|
||||
print("\n*- Ended \"", jobname, f"\" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)")
|
||||
duration = time.time() - start
|
||||
# print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
|
||||
print(
|
||||
'\n*- Ended "',
|
||||
jobname,
|
||||
f'" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)',
|
||||
)
|
||||
self.results[jobname].pop() # the null item
|
||||
self.results[jobname].append(duration)
|
||||
|
||||
|
||||
jobend = time.time()
|
||||
jobduration = jobend-jobstart
|
||||
jobduration = jobend - jobstart
|
||||
print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.")
|
||||
return True
|
||||
|
||||
|
||||
def append_placeholders(self):
|
||||
'''Ads a dummy timing for each option, to fix off by one error
|
||||
'''
|
||||
"""Ads a dummy timing for each option, to fix off by one error"""
|
||||
for j in self.results_order:
|
||||
self.results[j].append(None) # append a placeholder
|
||||
|
||||
def run_now_django_tests(self,n):
|
||||
"""Runs the standard django test harness system which is in troggle/core/TESTS/tests.py
|
||||
"""
|
||||
management.call_command('test', verbosity=n)
|
||||
def run_now_django_tests(self, n):
|
||||
"""Runs the standard django test harness system which is in troggle/core/TESTS/tests.py"""
|
||||
management.call_command("test", verbosity=n)
|
||||
django.db.close_old_connections()
|
||||
|
||||
def run(self):
|
||||
"""Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data.
|
||||
"""
|
||||
"""Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data."""
|
||||
self.loadprofiles()
|
||||
print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME'])
|
||||
print(
|
||||
"-- start ",
|
||||
django.db.connections.databases["default"]["ENGINE"],
|
||||
django.db.connections.databases["default"]["NAME"],
|
||||
)
|
||||
self.runqonce()
|
||||
if settings.DATABASES['default']['NAME'] ==":memory:":
|
||||
memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later..
|
||||
if settings.DATABASES["default"]["NAME"] == ":memory:":
|
||||
memdumpsql("memdump.sql") # saved contents of in-memory db, could be imported later..
|
||||
self.saveprofiles()
|
||||
return True
|
||||
|
||||
def showprofile(self):
|
||||
"""Prints out the time it took to run the jobqueue
|
||||
"""
|
||||
"""Prints out the time it took to run the jobqueue"""
|
||||
for k in self.results_order:
|
||||
if k =="test":
|
||||
if k == "test":
|
||||
break
|
||||
elif k =="date":
|
||||
print(" days ago ", end=' ')
|
||||
elif k == "date":
|
||||
print(" days ago ", end=" ")
|
||||
else:
|
||||
print('%10s (s)' % k, end=' ')
|
||||
percen=0
|
||||
print("%10s (s)" % k, end=" ")
|
||||
percen = 0
|
||||
r = self.results[k]
|
||||
|
||||
for i in range(len(r)):
|
||||
@ -308,39 +327,39 @@ class JobQueue():
|
||||
rp = r[i]
|
||||
else:
|
||||
rp = " - "
|
||||
print('%8s' % rp, end=' ')
|
||||
elif k =="date":
|
||||
print("%8s" % rp, end=" ")
|
||||
elif k == "date":
|
||||
# Calculate dates as days before present
|
||||
if r[i]:
|
||||
if i == len(r)-1:
|
||||
print(" this", end=' ')
|
||||
if i == len(r) - 1:
|
||||
print(" this", end=" ")
|
||||
else:
|
||||
# prints one place to the left of where you expect
|
||||
if r[len(r)-1]:
|
||||
s = r[i]-r[len(r)-1]
|
||||
elif r[len(r)-2]:
|
||||
s = r[i]-r[len(r)-2]
|
||||
if r[len(r) - 1]:
|
||||
s = r[i] - r[len(r) - 1]
|
||||
elif r[len(r) - 2]:
|
||||
s = r[i] - r[len(r) - 2]
|
||||
else:
|
||||
s = 0
|
||||
days = (s)/(24*60*60)
|
||||
print(f'{days:8.2f}', end=' ')
|
||||
days = (s) / (24 * 60 * 60)
|
||||
print(f"{days:8.2f}", end=" ")
|
||||
elif r[i]:
|
||||
print(f'{r[i]:8.1f}', end=' ')
|
||||
if i == len(r)-1 and r[i-1]:
|
||||
percen = 100* (r[i] - r[i-1])/r[i-1]
|
||||
if abs(percen) >0.1:
|
||||
print(f'{percen:8.1f}%', end=' ')
|
||||
print(f"{r[i]:8.1f}", end=" ")
|
||||
if i == len(r) - 1 and r[i - 1]:
|
||||
percen = 100 * (r[i] - r[i - 1]) / r[i - 1]
|
||||
if abs(percen) > 0.1:
|
||||
print(f"{percen:8.1f}%", end=" ")
|
||||
else:
|
||||
print(" - ", end=' ')
|
||||
print(" - ", end=" ")
|
||||
print("")
|
||||
print("\n")
|
||||
return True
|
||||
|
||||
|
||||
def usage():
|
||||
'''Prints command line options, can print history of previous runs with timings
|
||||
'''
|
||||
print("""Usage is 'python databaseReset.py <command> [runlabel]'
|
||||
"""Prints command line options, can print history of previous runs with timings"""
|
||||
print(
|
||||
"""Usage is 'python databaseReset.py <command> [runlabel]'
|
||||
where command is:
|
||||
test - testing... imports people and prints profile. Deletes nothing.
|
||||
profile - print the profile from previous runs. Import nothing.
|
||||
@ -370,7 +389,9 @@ def usage():
|
||||
|
||||
Note that running the subfunctions will not produce a consistent website
|
||||
- only the full 'reset' does that.
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -381,70 +402,70 @@ if __name__ == "__main__":
|
||||
|
||||
if sys.getfilesystemencoding() != "utf-8":
|
||||
print("UTF-8 is NOT the default file encoding. You must fix this.")
|
||||
print(f'- {sys.getdefaultencoding()=}')
|
||||
print(f'- {sys.getfilesystemencoding()=}')
|
||||
print(f'- {locale.getdefaultlocale()=}')
|
||||
print(f'- {locale.getpreferredencoding()=}')
|
||||
print(f"- {sys.getdefaultencoding()=}")
|
||||
print(f"- {sys.getfilesystemencoding()=}")
|
||||
print(f"- {locale.getdefaultlocale()=}")
|
||||
print(f"- {locale.getpreferredencoding()=}")
|
||||
print("Aborting run.")
|
||||
exit()
|
||||
|
||||
if len(sys.argv)>2:
|
||||
runlabel = sys.argv[len(sys.argv)-1]
|
||||
if len(sys.argv) > 2:
|
||||
runlabel = sys.argv[len(sys.argv) - 1]
|
||||
else:
|
||||
runlabel=None
|
||||
runlabel = None
|
||||
|
||||
jq = JobQueue(runlabel)
|
||||
|
||||
if len(sys.argv)==1:
|
||||
if len(sys.argv) == 1:
|
||||
usage()
|
||||
exit()
|
||||
elif "init" in sys.argv:
|
||||
jq.enq("reinit",reinit_db)
|
||||
jq.enq("reinit", reinit_db)
|
||||
elif "ents" in sys.argv:
|
||||
jq.enq("survex",import_ents)
|
||||
jq.enq("survex", import_ents)
|
||||
elif "test2" in sys.argv:
|
||||
jq.enq("QMs",import_QMs)
|
||||
jq.enq("drawings",import_drawingsfiles)
|
||||
jq.enq("survex",import_survex)
|
||||
jq.enq("QMs", import_QMs)
|
||||
jq.enq("drawings", import_drawingsfiles)
|
||||
jq.enq("survex", import_survex)
|
||||
elif "caves" in sys.argv:
|
||||
jq.enq("caves",import_caves)
|
||||
jq.enq("caves", import_caves)
|
||||
elif "logbooks" in sys.argv:
|
||||
jq.enq("logbooks",import_logbooks)
|
||||
jq.enq("logbooks", import_logbooks)
|
||||
elif "logbook" in sys.argv:
|
||||
jq.enq("logbooks",import_logbook) # default year set in imports.py
|
||||
jq.enq("logbooks", import_logbook) # default year set in imports.py
|
||||
elif "people" in sys.argv:
|
||||
jq.enq("people",import_people)
|
||||
jq.enq("people", import_people)
|
||||
elif "QMs" in sys.argv:
|
||||
jq.enq("QMs",import_QMs)
|
||||
jq.enq("QMs", import_QMs)
|
||||
elif "reset" in sys.argv:
|
||||
jq.enq("reinit",reinit_db)
|
||||
jq.enq("caves",import_caves)
|
||||
jq.enq("people",import_people)
|
||||
jq.enq("scans",import_surveyscans)
|
||||
jq.enq("logbooks",import_logbooks)
|
||||
jq.enq("QMs",import_QMs)
|
||||
jq.enq("drawings",import_drawingsfiles)
|
||||
jq.enq("survex",import_survex)
|
||||
jq.enq("reinit", reinit_db)
|
||||
jq.enq("caves", import_caves)
|
||||
jq.enq("people", import_people)
|
||||
jq.enq("scans", import_surveyscans)
|
||||
jq.enq("logbooks", import_logbooks)
|
||||
jq.enq("QMs", import_QMs)
|
||||
jq.enq("drawings", import_drawingsfiles)
|
||||
jq.enq("survex", import_survex)
|
||||
elif "scans" in sys.argv:
|
||||
jq.enq("scans",import_surveyscans)
|
||||
jq.enq("scans", import_surveyscans)
|
||||
elif "survex" in sys.argv:
|
||||
jq.enq("survex",import_survex)
|
||||
jq.enq("survex", import_survex)
|
||||
elif "loadpos" in sys.argv:
|
||||
jq.enq("survex",import_loadpos)
|
||||
jq.enq("survex", import_loadpos)
|
||||
elif "drawings" in sys.argv:
|
||||
jq.enq("drawings",import_drawingsfiles)
|
||||
jq.enq("drawings", import_drawingsfiles)
|
||||
elif "dumplogbooks" in sys.argv: # untested in 2020
|
||||
dumplogbooks()
|
||||
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
|
||||
# writeCaves()
|
||||
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
|
||||
# writeCaves()
|
||||
elif "profile" in sys.argv:
|
||||
if runlabel == 'del' :
|
||||
if runlabel == "del":
|
||||
jq.loadprofiles()
|
||||
jq.dellastprofile()
|
||||
jq.dellastprofile() # twice because loadprofiles adds a dummy
|
||||
jq.showprofile()
|
||||
jq.saveprofiles()
|
||||
if runlabel == 'delfirst' :
|
||||
if runlabel == "delfirst":
|
||||
jq.loadprofiles()
|
||||
jq.dellastprofile() # remove the dummy
|
||||
jq.delfirstprofile()
|
||||
|
138
parsers/QMs.py
138
parsers/QMs.py
@ -9,13 +9,14 @@ from troggle.core.models.caves import QM, Cave, LogbookEntry
|
||||
from troggle.core.models.troggle import DataIssue
|
||||
from troggle.core.utils import save_carefully
|
||||
|
||||
'''Reads the CSV files containg QMs for a select few caves
|
||||
"""Reads the CSV files containg QMs for a select few caves
|
||||
See parsers/survex.py for the parser which extracts QMs from the survex files
|
||||
'''
|
||||
"""
|
||||
|
||||
|
||||
def deleteQMs():
|
||||
QM.objects.all().delete()
|
||||
DataIssue.objects.filter(parser='QMs').delete()
|
||||
DataIssue.objects.filter(parser="QMs").delete()
|
||||
|
||||
|
||||
def parseCaveQMs(cave, inputFile, ticked=False):
|
||||
@ -35,130 +36,137 @@ def parseCaveQMs(cave, inputFile, ticked=False):
|
||||
All QMs are created afresh and this is all only run once on import on a fresh database.
|
||||
"""
|
||||
|
||||
if cave=='204-steinBH':
|
||||
if cave == "204-steinBH":
|
||||
try:
|
||||
steinBr=Cave.objects.get(official_name="Steinbrückenhöhle")
|
||||
steinBr = Cave.objects.get(official_name="Steinbrückenhöhle")
|
||||
caveid = steinBr
|
||||
except Cave.DoesNotExist:
|
||||
message = f' ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser'
|
||||
message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='QMs', message=message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
return
|
||||
elif cave=='234-Hauch':
|
||||
elif cave == "234-Hauch":
|
||||
try:
|
||||
hauchHl=Cave.objects.get(official_name="Hauchhöhle")
|
||||
hauchHl = Cave.objects.get(official_name="Hauchhöhle")
|
||||
caveid = hauchHl
|
||||
except Cave.DoesNotExist:
|
||||
message = f' ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser'
|
||||
message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='QMs', message=message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
return
|
||||
elif cave =='161-KH':
|
||||
elif cave == "161-KH":
|
||||
try:
|
||||
kh=Cave.objects.get(official_name="Kaninchenhöhle")
|
||||
kh = Cave.objects.get(official_name="Kaninchenhöhle")
|
||||
caveid = kh
|
||||
except Cave.DoesNotExist:
|
||||
message = f' ! - {qmPath} KH is not in the database. Please run cave parser'
|
||||
message = f" ! - {qmPath} KH is not in the database. Please run cave parser"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='QMs', message=message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked)
|
||||
return nqms
|
||||
|
||||
#qmPath = settings.EXPOWEB+inputFile
|
||||
# qmPath = settings.EXPOWEB+inputFile
|
||||
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
|
||||
|
||||
qmCSVContents = open(qmPath,'r')
|
||||
dialect=csv.Sniffer().sniff(qmCSVContents.read())
|
||||
qmCSVContents.seek(0,0)
|
||||
qmReader = csv.reader(qmCSVContents,dialect=dialect)
|
||||
qmCSVContents = open(qmPath, "r")
|
||||
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
||||
qmCSVContents.seek(0, 0)
|
||||
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
||||
next(qmReader) # Skip header row
|
||||
n = 0
|
||||
nqms = 0
|
||||
for line in qmReader:
|
||||
try:
|
||||
n += 1
|
||||
year=int(line[0][1:5])
|
||||
logslug = f'PH_{int(year)}_{int(n):02d}'
|
||||
QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
|
||||
year = int(line[0][1:5])
|
||||
logslug = f"PH_{int(year)}_{int(n):02d}"
|
||||
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
||||
newQM = QM()
|
||||
# newQM.found_by=placeholder
|
||||
newQM.number=QMnum
|
||||
newQM.number = QMnum
|
||||
newQM.cave = caveid
|
||||
newQM.blockname = ""
|
||||
if line[1]=="Dig":
|
||||
newQM.grade="D"
|
||||
if line[1] == "Dig":
|
||||
newQM.grade = "D"
|
||||
else:
|
||||
newQM.grade=line[1]
|
||||
newQM.area=line[2]
|
||||
newQM.location_description=line[3]
|
||||
newQM.grade = line[1]
|
||||
newQM.area = line[2]
|
||||
newQM.location_description = line[3]
|
||||
|
||||
# In the table, completion is indicated by the presence of a completion discription.
|
||||
newQM.completion_description=line[4]
|
||||
newQM.nearest_station_description=line[5]
|
||||
newQM.completion_description = line[4]
|
||||
newQM.nearest_station_description = line[5]
|
||||
if newQM.completion_description:
|
||||
newQM.ticked = True
|
||||
else:
|
||||
newQM.ticked = False
|
||||
|
||||
newQM.comment=line[6]
|
||||
newQM.comment = line[6]
|
||||
try:
|
||||
# year and number are unique for a cave in CSV imports
|
||||
preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it
|
||||
if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
||||
preexistingQM = QM.objects.get(
|
||||
number=QMnum, found_by__date__year=year
|
||||
) # if we don't have this one in the DB, save it
|
||||
if (
|
||||
preexistingQM.new_since_parsing == False
|
||||
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
||||
preexistingQM.delete()
|
||||
newQM.expoyear = year
|
||||
newQM.save()
|
||||
else: # otherwise, print that it was ignored
|
||||
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
||||
|
||||
except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
|
||||
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
||||
newQM.expoyear = year
|
||||
newQM.save()
|
||||
nqms += 1
|
||||
except KeyError: #check on this one
|
||||
message = f' ! - {qmPath} KeyError {str(line)} '
|
||||
except KeyError: # check on this one
|
||||
message = f" ! - {qmPath} KeyError {str(line)} "
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='QMs', message=message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
continue
|
||||
except IndexError:
|
||||
message = f' ! - {qmPath} IndexError {str(line)} '
|
||||
message = f" ! - {qmPath} IndexError {str(line)} "
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='QMs', message=message)
|
||||
DataIssue.objects.create(parser="QMs", message=message)
|
||||
continue
|
||||
return nqms
|
||||
|
||||
|
||||
def parse_KH_QMs(kh, inputFile, ticked):
|
||||
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format
|
||||
"""
|
||||
khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r')
|
||||
khQMs=khQMs.readlines()
|
||||
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
|
||||
khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
|
||||
khQMs = khQMs.readlines()
|
||||
nqms = 0
|
||||
for line in khQMs:
|
||||
res=re.search(r'name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
|
||||
res = re.search(
|
||||
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
||||
line,
|
||||
)
|
||||
if res:
|
||||
res=res.groupdict()
|
||||
year=int(res['year'])
|
||||
res = res.groupdict()
|
||||
year = int(res["year"])
|
||||
# logbook placeholder code was previously here. No longer needed.
|
||||
#check if placeholder exists for given year, create it if not
|
||||
# check if placeholder exists for given year, create it if not
|
||||
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
|
||||
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
|
||||
# # if hadToCreate:
|
||||
# print(message)
|
||||
# DataIssue.objects.create(parser='QMs', message=message)
|
||||
lookupArgs={
|
||||
lookupArgs = {
|
||||
#'found_by':placeholder,
|
||||
'blockname': "",
|
||||
'expoyear':year,
|
||||
'number':res['number'],
|
||||
'cave': kh,
|
||||
'grade':res['grade']
|
||||
"blockname": "",
|
||||
"expoyear": year,
|
||||
"number": res["number"],
|
||||
"cave": kh,
|
||||
"grade": res["grade"],
|
||||
}
|
||||
nonLookupArgs={
|
||||
'ticked': ticked,
|
||||
'nearest_station_name':res['nearest_station'],
|
||||
'location_description':res['description']
|
||||
nonLookupArgs = {
|
||||
"ticked": ticked,
|
||||
"nearest_station_name": res["nearest_station"],
|
||||
"location_description": res["description"],
|
||||
}
|
||||
instance, created = save_carefully(QM,lookupArgs,nonLookupArgs)
|
||||
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
|
||||
# if created:
|
||||
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
|
||||
# print(message)
|
||||
@ -169,11 +177,11 @@ def parse_KH_QMs(kh, inputFile, ticked):
|
||||
|
||||
def Load_QMs():
|
||||
deleteQMs()
|
||||
n204 = parseCaveQMs(cave='204-steinBH',inputFile=r"1623/204/qm.csv")
|
||||
n234 = parseCaveQMs(cave='234-Hauch',inputFile=r"1623/234/qm.csv")
|
||||
n161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmtodo.htm", ticked=False)
|
||||
t161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmdone.htm", ticked=True)
|
||||
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
|
||||
n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv")
|
||||
n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv")
|
||||
n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False)
|
||||
t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True)
|
||||
# parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
|
||||
print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.")
|
||||
|
||||
print ()
|
||||
print()
|
||||
|
492
parsers/caves.py
492
parsers/caves.py
@ -6,49 +6,48 @@ from pathlib import Path
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
|
||||
from troggle.core.models.caves import (Area, Cave, CaveAndEntrance, CaveSlug,
|
||||
Entrance, EntranceSlug, GetCaveLookup)
|
||||
from troggle.core.models.caves import Area, Cave, CaveAndEntrance, CaveSlug, Entrance, EntranceSlug, GetCaveLookup
|
||||
from troggle.core.models.troggle import DataIssue
|
||||
from troggle.settings import (CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB,
|
||||
SURVEX_DATA)
|
||||
from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
|
||||
|
||||
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
|
||||
"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
|
||||
and creating the various Cave, Entrance and necessary Area objects.
|
||||
|
||||
This is the first import that happens after the database is reinitialised.
|
||||
So is the first thing that creates tables.
|
||||
|
||||
'''
|
||||
"""
|
||||
|
||||
todo='''
|
||||
todo = """
|
||||
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
|
||||
So we will need a separate file-editing capability just for this configuration file ?!
|
||||
|
||||
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true.
|
||||
The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
|
||||
and then restart the databasereset.py again. (status as of July 2022)
|
||||
'''
|
||||
"""
|
||||
entrances_xslug = {}
|
||||
caves_xslug = {}
|
||||
areas_xslug = {}
|
||||
|
||||
|
||||
def dummy_entrance(k, slug, msg="DUMMY"):
|
||||
'''Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
|
||||
"""Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
|
||||
user forgot to provide one when creating the cave
|
||||
'''
|
||||
"""
|
||||
ent = Entrance(
|
||||
name = k,
|
||||
entrance_description = "Dummy entrance: auto-created when registering a new cave " +
|
||||
"and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
|
||||
marking = '?')
|
||||
name=k,
|
||||
entrance_description="Dummy entrance: auto-created when registering a new cave "
|
||||
+ "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
|
||||
marking="?",
|
||||
)
|
||||
if ent:
|
||||
ent.save() # must save to have id before foreign keys work.
|
||||
try: # Now create a entrance slug ID
|
||||
es = EntranceSlug(entrance = ent,
|
||||
slug = slug, primary = False)
|
||||
es = EntranceSlug(entrance=ent, slug=slug, primary=False)
|
||||
except:
|
||||
message = f" ! {k:11s} {msg}-{slug} entrance create failure"
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
|
||||
print(message)
|
||||
|
||||
ent.cached_primary_slug = slug
|
||||
@ -57,41 +56,43 @@ def dummy_entrance(k, slug, msg="DUMMY"):
|
||||
return ent
|
||||
else:
|
||||
message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
|
||||
print(message)
|
||||
raise
|
||||
|
||||
|
||||
def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
|
||||
'''Entrance field either missing or holds a null string instead of a filename in a cave_data file.
|
||||
'''
|
||||
"""Entrance field either missing or holds a null string instead of a filename in a cave_data file."""
|
||||
global entrances_xslug
|
||||
try:
|
||||
entrance = dummy_entrance(id, slug, msg="DUMMY")
|
||||
letter = ""
|
||||
entrances_xslug[slug] = entrance
|
||||
ce = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = entrance)
|
||||
message = f' ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}'
|
||||
ce = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
|
||||
message = f" ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}"
|
||||
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
|
||||
print(message)
|
||||
except:
|
||||
#raise
|
||||
# raise
|
||||
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
|
||||
print(message)
|
||||
|
||||
|
||||
def do_pending_cave(k, url, area):
|
||||
'''
|
||||
"""
|
||||
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
|
||||
in expoweb/cave_data/1623-"k".html
|
||||
|
||||
Note that at this point in importing the data we have not yet seen the survex files, so we can't
|
||||
look inside the relevant survex file to find the year and so we con't provide helpful links.
|
||||
'''
|
||||
"""
|
||||
|
||||
def get_survex_file(k):
|
||||
'''Guesses at and finds a survex file for this pending cave.
|
||||
"""Guesses at and finds a survex file for this pending cave.
|
||||
Convoluted. Needs rewriting
|
||||
'''
|
||||
"""
|
||||
if k[0:3] == "162":
|
||||
id = Path(k[5:])
|
||||
else:
|
||||
@ -113,7 +114,7 @@ def do_pending_cave(k, url, area):
|
||||
for f in dir:
|
||||
if f.suffix == ".svx":
|
||||
survex_file = f.relative_to(settings.SURVEX_DATA)
|
||||
chk = min(5, len(f.name)-1)
|
||||
chk = min(5, len(f.name) - 1)
|
||||
if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works
|
||||
prime_suspect = survex_file
|
||||
if prime_suspect:
|
||||
@ -129,23 +130,29 @@ def do_pending_cave(k, url, area):
|
||||
with transaction.atomic():
|
||||
if slug in g:
|
||||
message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
|
||||
DataIssue.objects.create(parser='caves', message=message, url=url)
|
||||
DataIssue.objects.create(parser="caves", message=message, url=url)
|
||||
print(message)
|
||||
return
|
||||
|
||||
|
||||
|
||||
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
|
||||
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
|
||||
default_note += f"<br><br>\n\n - (0) look in the <a href=\"/noinfo/cave-number-index\">cave number index</a> for notes on this cave, "
|
||||
default_note += f'<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave, '
|
||||
default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
|
||||
default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
|
||||
default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
|
||||
default_note += (
|
||||
f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
|
||||
)
|
||||
default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
|
||||
default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
|
||||
default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
|
||||
default_note += (
|
||||
f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
|
||||
)
|
||||
default_note += (
|
||||
f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
|
||||
)
|
||||
default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
|
||||
default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
|
||||
default_note += (
|
||||
f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
|
||||
)
|
||||
default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
|
||||
default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
|
||||
default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
|
||||
@ -153,42 +160,46 @@ def do_pending_cave(k, url, area):
|
||||
default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
|
||||
default_note += f"<br><br>\n\n - "
|
||||
default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
|
||||
default_note += f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
|
||||
default_note += (
|
||||
f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
|
||||
)
|
||||
default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
|
||||
default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
|
||||
default_note += f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
|
||||
default_note += (
|
||||
f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
|
||||
)
|
||||
|
||||
survex_file = get_survex_file(k)
|
||||
|
||||
cave = Cave(
|
||||
unofficial_number = k,
|
||||
underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
|
||||
survex_file = survex_file,
|
||||
url = url,
|
||||
notes = default_note)
|
||||
unofficial_number=k,
|
||||
underground_description="Pending cave write-up - creating as empty object. No XML file available yet.",
|
||||
survex_file=survex_file,
|
||||
url=url,
|
||||
notes=default_note,
|
||||
)
|
||||
if cave:
|
||||
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
|
||||
cave.area.add(area)
|
||||
cave.save()
|
||||
message = f" ! {k:18} {cave.underground_description} url: {url}"
|
||||
DataIssue.objects.create(parser='caves', message=message, url=url)
|
||||
DataIssue.objects.create(parser="caves", message=message, url=url)
|
||||
print(message)
|
||||
|
||||
try: # Now create a cave slug ID
|
||||
cs = CaveSlug.objects.update_or_create(cave = cave,
|
||||
slug = slug, primary = False)
|
||||
cs = CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False)
|
||||
except:
|
||||
message = f" ! {k:11s} PENDING cave SLUG create failure"
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
else:
|
||||
message = f' ! {k:11s} PENDING cave create failure'
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
message = f" ! {k:11s} PENDING cave create failure"
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
|
||||
try:
|
||||
ent = dummy_entrance(k, slug, msg="PENDING")
|
||||
ceinsts = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = ent)
|
||||
ceinsts = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=ent)
|
||||
for ceinst in ceinsts:
|
||||
if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
|
||||
ceinst.cave = cave
|
||||
@ -196,15 +207,14 @@ def do_pending_cave(k, url, area):
|
||||
break
|
||||
except:
|
||||
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
|
||||
|
||||
|
||||
def readentrance(filename):
|
||||
'''Reads an enrance description from the .html file
|
||||
"""Reads an enrance description from the .html file
|
||||
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
||||
'''
|
||||
"""
|
||||
global entrances_xslug
|
||||
global caves_xslug
|
||||
global areas_xslug
|
||||
@ -213,85 +223,88 @@ def readentrance(filename):
|
||||
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
|
||||
contents = f.read()
|
||||
context = filename
|
||||
#print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
||||
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
|
||||
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
||||
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
|
||||
if len(entrancecontentslist) != 1:
|
||||
message = f'! BAD ENTRANCE at "{filename}"'
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
else:
|
||||
entrancecontents = entrancecontentslist[0]
|
||||
non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)
|
||||
name = getXML(entrancecontents, "name", maxItems = 1, context = context)
|
||||
slugs = getXML(entrancecontents, "slug", context = context)
|
||||
entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)
|
||||
explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)
|
||||
map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)
|
||||
location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)
|
||||
lastvisit = getXML(entrancecontents, "last visit date", maxItems = 1, minItems = 0, context = context)
|
||||
approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)
|
||||
underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)
|
||||
photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)
|
||||
marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)
|
||||
marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)
|
||||
findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)
|
||||
findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)
|
||||
alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)
|
||||
northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)
|
||||
easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)
|
||||
tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)
|
||||
exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)
|
||||
other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)
|
||||
other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)
|
||||
bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
|
||||
url = getXML(entrancecontents, "url", maxItems = 1, context = context)
|
||||
#if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
|
||||
e, state = Entrance.objects.update_or_create(name = name[0],
|
||||
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
|
||||
entrance_description = entrance_description[0],
|
||||
explorers = explorers[0],
|
||||
map_description = map_description[0],
|
||||
location_description = location_description[0],
|
||||
lastvisit = lastvisit[0],
|
||||
approach = approach[0],
|
||||
underground_description = underground_description[0],
|
||||
photo = photo[0],
|
||||
marking = marking[0],
|
||||
marking_comment = marking_comment[0],
|
||||
findability = findability[0],
|
||||
findability_description = findability_description[0],
|
||||
alt = alt[0],
|
||||
northing = northing[0],
|
||||
easting = easting[0],
|
||||
tag_station = tag_station[0],
|
||||
exact_station = exact_station[0],
|
||||
other_station = other_station[0],
|
||||
other_description = other_description[0],
|
||||
bearings = bearings[0],
|
||||
url = url[0],
|
||||
filename = filename,
|
||||
cached_primary_slug = slugs[0])
|
||||
non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context)
|
||||
name = getXML(entrancecontents, "name", maxItems=1, context=context)
|
||||
slugs = getXML(entrancecontents, "slug", context=context)
|
||||
entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context)
|
||||
explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context)
|
||||
map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context)
|
||||
location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context)
|
||||
lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context)
|
||||
approach = getXML(entrancecontents, "approach", maxItems=1, context=context)
|
||||
underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context)
|
||||
photo = getXML(entrancecontents, "photo", maxItems=1, context=context)
|
||||
marking = getXML(entrancecontents, "marking", maxItems=1, context=context)
|
||||
marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context)
|
||||
findability = getXML(entrancecontents, "findability", maxItems=1, context=context)
|
||||
findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context)
|
||||
alt = getXML(entrancecontents, "alt", maxItems=1, context=context)
|
||||
northing = getXML(entrancecontents, "northing", maxItems=1, context=context)
|
||||
easting = getXML(entrancecontents, "easting", maxItems=1, context=context)
|
||||
tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context)
|
||||
exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context)
|
||||
other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context)
|
||||
other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context)
|
||||
bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context)
|
||||
url = getXML(entrancecontents, "url", maxItems=1, context=context)
|
||||
# if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
|
||||
e, state = Entrance.objects.update_or_create(
|
||||
name=name[0],
|
||||
non_public={
|
||||
"True": True,
|
||||
"False": False,
|
||||
"true": True,
|
||||
"false": False,
|
||||
}[non_public[0]],
|
||||
entrance_description=entrance_description[0],
|
||||
explorers=explorers[0],
|
||||
map_description=map_description[0],
|
||||
location_description=location_description[0],
|
||||
lastvisit=lastvisit[0],
|
||||
approach=approach[0],
|
||||
underground_description=underground_description[0],
|
||||
photo=photo[0],
|
||||
marking=marking[0],
|
||||
marking_comment=marking_comment[0],
|
||||
findability=findability[0],
|
||||
findability_description=findability_description[0],
|
||||
alt=alt[0],
|
||||
northing=northing[0],
|
||||
easting=easting[0],
|
||||
tag_station=tag_station[0],
|
||||
exact_station=exact_station[0],
|
||||
other_station=other_station[0],
|
||||
other_description=other_description[0],
|
||||
bearings=bearings[0],
|
||||
url=url[0],
|
||||
filename=filename,
|
||||
cached_primary_slug=slugs[0],
|
||||
)
|
||||
primary = True
|
||||
for slug in slugs:
|
||||
#print("entrance slug:{} filename:{}".format(slug, filename))
|
||||
# print("entrance slug:{} filename:{}".format(slug, filename))
|
||||
try:
|
||||
cs = EntranceSlug.objects.update_or_create(entrance = e,
|
||||
slug = slug,
|
||||
primary = primary)
|
||||
cs = EntranceSlug.objects.update_or_create(entrance=e, slug=slug, primary=primary)
|
||||
except:
|
||||
# need to cope with duplicates
|
||||
message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}"
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
||||
kents = EntranceSlug.objects.all().filter(entrance = e,
|
||||
slug = slug,
|
||||
primary = primary)
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
|
||||
kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug, primary=primary)
|
||||
for k in kents:
|
||||
message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug())
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
||||
message = " ! - DUPLICATE in db. entrance:" + str(k.entrance) + ", slug:" + str(k.slug())
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
|
||||
print(message)
|
||||
for k in kents:
|
||||
if k.slug() != None:
|
||||
print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
|
||||
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
||||
k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes
|
||||
c = k
|
||||
primary = False
|
||||
@ -301,11 +314,12 @@ def readentrance(filename):
|
||||
# DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
||||
# print(message)
|
||||
|
||||
|
||||
def readcave(filename):
|
||||
'''Reads an enrance description from the .html file
|
||||
"""Reads an enrance description from the .html file
|
||||
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
||||
Assumes any area it hasn't seen before is a subarea of 1623
|
||||
'''
|
||||
"""
|
||||
global entrances_xslug
|
||||
global caves_xslug
|
||||
global areas_xslug
|
||||
@ -314,68 +328,97 @@ def readcave(filename):
|
||||
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
|
||||
contents = f.read()
|
||||
context = filename
|
||||
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
|
||||
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
|
||||
if len(cavecontentslist) != 1:
|
||||
message = f'! BAD CAVE at "{filename}"'
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
else:
|
||||
cavecontents = cavecontentslist[0]
|
||||
non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)
|
||||
slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)
|
||||
official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)
|
||||
areas = getXML(cavecontents, "area", context = context)
|
||||
kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)
|
||||
kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)
|
||||
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)
|
||||
explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)
|
||||
underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)
|
||||
equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)
|
||||
references = getXML(cavecontents, "references", maxItems = 1, context = context)
|
||||
survey = getXML(cavecontents, "survey", maxItems = 1, context = context)
|
||||
kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)
|
||||
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)
|
||||
notes = getXML(cavecontents, "notes", maxItems = 1, context = context)
|
||||
length = getXML(cavecontents, "length", maxItems = 1, context = context)
|
||||
depth = getXML(cavecontents, "depth", maxItems = 1, context = context)
|
||||
extent = getXML(cavecontents, "extent", maxItems = 1, context = context)
|
||||
survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)
|
||||
description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)
|
||||
url = getXML(cavecontents, "url", maxItems = 1, context = context)
|
||||
entrances = getXML(cavecontents, "entrance", context = context)
|
||||
non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
|
||||
slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
|
||||
official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
|
||||
areas = getXML(cavecontents, "area", context=context)
|
||||
kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
|
||||
kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
|
||||
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
|
||||
explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
|
||||
underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
|
||||
equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
|
||||
references = getXML(cavecontents, "references", maxItems=1, context=context)
|
||||
survey = getXML(cavecontents, "survey", maxItems=1, context=context)
|
||||
kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
|
||||
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
|
||||
notes = getXML(cavecontents, "notes", maxItems=1, context=context)
|
||||
length = getXML(cavecontents, "length", maxItems=1, context=context)
|
||||
depth = getXML(cavecontents, "depth", maxItems=1, context=context)
|
||||
extent = getXML(cavecontents, "extent", maxItems=1, context=context)
|
||||
survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
|
||||
description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
|
||||
url = getXML(cavecontents, "url", maxItems=1, context=context)
|
||||
entrances = getXML(cavecontents, "entrance", context=context)
|
||||
|
||||
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1:
|
||||
if (
|
||||
len(non_public) == 1
|
||||
and len(slugs) >= 1
|
||||
and len(official_name) == 1
|
||||
and len(areas) >= 1
|
||||
and len(kataster_code) == 1
|
||||
and len(kataster_number) == 1
|
||||
and len(unofficial_number) == 1
|
||||
and len(explorers) == 1
|
||||
and len(underground_description) == 1
|
||||
and len(equipment) == 1
|
||||
and len(references) == 1
|
||||
and len(survey) == 1
|
||||
and len(kataster_status) == 1
|
||||
and len(underground_centre_line) == 1
|
||||
and len(notes) == 1
|
||||
and len(length) == 1
|
||||
and len(depth) == 1
|
||||
and len(extent) == 1
|
||||
and len(survex_file) == 1
|
||||
and len(description_file) == 1
|
||||
and len(url) == 1
|
||||
):
|
||||
try:
|
||||
c, state = Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
|
||||
official_name = official_name[0],
|
||||
kataster_code = kataster_code[0],
|
||||
kataster_number = kataster_number[0],
|
||||
unofficial_number = unofficial_number[0],
|
||||
explorers = explorers[0],
|
||||
underground_description = underground_description[0],
|
||||
equipment = equipment[0],
|
||||
references = references[0],
|
||||
survey = survey[0],
|
||||
kataster_status = kataster_status[0],
|
||||
underground_centre_line = underground_centre_line[0],
|
||||
notes = notes[0],
|
||||
length = length[0],
|
||||
depth = depth[0],
|
||||
extent = extent[0],
|
||||
survex_file = survex_file[0],
|
||||
description_file = description_file[0],
|
||||
url = url[0],
|
||||
filename = filename)
|
||||
c, state = Cave.objects.update_or_create(
|
||||
non_public={
|
||||
"True": True,
|
||||
"False": False,
|
||||
"true": True,
|
||||
"false": False,
|
||||
}[non_public[0]],
|
||||
official_name=official_name[0],
|
||||
kataster_code=kataster_code[0],
|
||||
kataster_number=kataster_number[0],
|
||||
unofficial_number=unofficial_number[0],
|
||||
explorers=explorers[0],
|
||||
underground_description=underground_description[0],
|
||||
equipment=equipment[0],
|
||||
references=references[0],
|
||||
survey=survey[0],
|
||||
kataster_status=kataster_status[0],
|
||||
underground_centre_line=underground_centre_line[0],
|
||||
notes=notes[0],
|
||||
length=length[0],
|
||||
depth=depth[0],
|
||||
extent=extent[0],
|
||||
survex_file=survex_file[0],
|
||||
description_file=description_file[0],
|
||||
url=url[0],
|
||||
filename=filename,
|
||||
)
|
||||
except:
|
||||
print(" ! FAILED to get only one CAVE when updating using: "+filename)
|
||||
print(" ! FAILED to get only one CAVE when updating using: " + filename)
|
||||
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
|
||||
for k in kaves:
|
||||
message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug())
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
for k in kaves:
|
||||
if k.slug() != None:
|
||||
print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
|
||||
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
||||
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
|
||||
c = k
|
||||
|
||||
@ -383,11 +426,11 @@ def readcave(filename):
|
||||
if area_slug in areas_xslug:
|
||||
newArea = areas_xslug[area_slug]
|
||||
else:
|
||||
area = Area.objects.filter(short_name = area_slug)
|
||||
area = Area.objects.filter(short_name=area_slug)
|
||||
if area:
|
||||
newArea = area[0]
|
||||
else:
|
||||
newArea = Area(short_name = area_slug, super = Area.objects.get(short_name = "1623"))
|
||||
newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
|
||||
newArea.save()
|
||||
areas_xslug[area_slug] = newArea
|
||||
c.area.add(newArea)
|
||||
@ -397,14 +440,12 @@ def readcave(filename):
|
||||
cs = caves_xslug[slug]
|
||||
else:
|
||||
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
|
||||
cs = CaveSlug.objects.update_or_create(cave = c,
|
||||
slug = slug,
|
||||
primary = primary)
|
||||
cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
|
||||
caves_xslug[slug] = cs
|
||||
except Exception as ex:
|
||||
# This fails to do an update! It just crashes.. to be fixed
|
||||
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
|
||||
primary = False
|
||||
@ -414,8 +455,8 @@ def readcave(filename):
|
||||
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
||||
else:
|
||||
for entrance in entrances:
|
||||
eslug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
|
||||
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
|
||||
eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
|
||||
letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
|
||||
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
|
||||
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
||||
else:
|
||||
@ -423,62 +464,69 @@ def readcave(filename):
|
||||
if eslug in entrances_xslug:
|
||||
entrance = entrances_xslug[eslug]
|
||||
else:
|
||||
entrance = Entrance.objects.get(entranceslug__slug = eslug)
|
||||
entrance = Entrance.objects.get(entranceslug__slug=eslug)
|
||||
entrances_xslug[eslug] = entrance
|
||||
ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
|
||||
ce = CaveAndEntrance.objects.update_or_create(
|
||||
cave=c, entrance_letter=letter, entrance=entrance
|
||||
)
|
||||
except:
|
||||
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'{c.url}_edit/')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"{c.url}_edit/")
|
||||
print(message)
|
||||
|
||||
if survex_file[0]:
|
||||
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
|
||||
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug[0:4]}/{slug}_cave_edit/')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
|
||||
print(message)
|
||||
|
||||
|
||||
if description_file[0]: # if not an empty string
|
||||
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
|
||||
DataIssue.objects.create(parser='caves ok', message=message, url=f'/{slug}_cave_edit/')
|
||||
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
|
||||
print(message)
|
||||
|
||||
if not (Path(EXPOWEB) / description_file[0]).is_file():
|
||||
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
||||
print(message)
|
||||
#c.description_file="" # done only once, to clear out cruft.
|
||||
#c.save()
|
||||
# c.description_file="" # done only once, to clear out cruft.
|
||||
# c.save()
|
||||
else: # more than one item in long list
|
||||
message = f' ! ABORT loading this cave. in "{filename}"'
|
||||
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
|
||||
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
||||
print(message)
|
||||
|
||||
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
|
||||
"""Reads a single XML tag
|
||||
"""
|
||||
|
||||
def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""):
|
||||
"""Reads a single XML tag"""
|
||||
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
||||
if len(items) < minItems and printwarnings:
|
||||
message = " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % {"count": len(items),
|
||||
"itemname": itemname,
|
||||
"min": minItems} + " in file " + context
|
||||
DataIssue.objects.create(parser='caves', message=message, url=""+context)
|
||||
message = (
|
||||
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
|
||||
% {"count": len(items), "itemname": itemname, "min": minItems}
|
||||
+ " in file "
|
||||
+ context
|
||||
)
|
||||
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
||||
print(message)
|
||||
|
||||
if maxItems is not None and len(items) > maxItems and printwarnings:
|
||||
message = " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % {"count": len(items),
|
||||
"itemname": itemname,
|
||||
"max": maxItems} + " in file " + context
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
message = (
|
||||
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
|
||||
% {"count": len(items), "itemname": itemname, "max": maxItems}
|
||||
+ " in file "
|
||||
+ context
|
||||
)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
if minItems == 0:
|
||||
if not items:
|
||||
items = [ "" ]
|
||||
items = [""]
|
||||
return items
|
||||
|
||||
|
||||
def readcaves():
|
||||
'''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo.
|
||||
'''
|
||||
"""Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
|
||||
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
|
||||
# should put this in a simple list
|
||||
pending = set()
|
||||
@ -487,7 +535,7 @@ def readcaves():
|
||||
with open(fpending, "r") as fo:
|
||||
cids = fo.readlines()
|
||||
for cid in cids:
|
||||
pending.add(cid.strip().rstrip('\n').upper())
|
||||
pending.add(cid.strip().rstrip("\n").upper())
|
||||
|
||||
with transaction.atomic():
|
||||
print(" - Deleting Caves and Entrances")
|
||||
@ -505,27 +553,26 @@ def readcaves():
|
||||
except:
|
||||
pass
|
||||
# Clear the cave data issues and the caves as we are reloading
|
||||
DataIssue.objects.filter(parser='areas').delete()
|
||||
DataIssue.objects.filter(parser='caves').delete()
|
||||
DataIssue.objects.filter(parser='caves ok').delete()
|
||||
DataIssue.objects.filter(parser='entrances').delete()
|
||||
DataIssue.objects.filter(parser="areas").delete()
|
||||
DataIssue.objects.filter(parser="caves").delete()
|
||||
DataIssue.objects.filter(parser="caves ok").delete()
|
||||
DataIssue.objects.filter(parser="entrances").delete()
|
||||
|
||||
print(" - Creating Areas 1623, 1624, 1627 and 1626")
|
||||
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
|
||||
area_1623= Area.objects.create(short_name = "1623", super=None)
|
||||
area_1623 = Area.objects.create(short_name="1623", super=None)
|
||||
area_1623.save()
|
||||
area_1624= Area.objects.create(short_name = "1624", super=None)
|
||||
area_1624 = Area.objects.create(short_name="1624", super=None)
|
||||
area_1624.save()
|
||||
area_1626= Area.objects.create(short_name = "1626", super=None)
|
||||
area_1626 = Area.objects.create(short_name="1626", super=None)
|
||||
area_1626.save()
|
||||
area_1627= Area.objects.create(short_name = "1627", super=None)
|
||||
area_1627 = Area.objects.create(short_name="1627", super=None)
|
||||
area_1627.save()
|
||||
|
||||
|
||||
with transaction.atomic():
|
||||
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
|
||||
print(" - Reading Entrances from entrance descriptions xml files")
|
||||
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
|
||||
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
||||
# if filename.endswith('.html'):
|
||||
# if Path(filename).stem[5:] in pending:
|
||||
# print(f'Skipping pending entrance dummy file <{filename}>')
|
||||
@ -534,14 +581,14 @@ def readcaves():
|
||||
readentrance(filename)
|
||||
|
||||
print(" - Reading Caves from cave descriptions xml files")
|
||||
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
|
||||
if filename.endswith('.html'):
|
||||
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
||||
if filename.endswith(".html"):
|
||||
readcave(filename)
|
||||
|
||||
print (" - Setting up all the variously useful alias names")
|
||||
print(" - Setting up all the variously useful alias names")
|
||||
mycavelookup = GetCaveLookup()
|
||||
|
||||
print (" - Setting pending caves")
|
||||
print(" - Setting pending caves")
|
||||
# Do this last, so we can detect if they are created and no longer 'pending'
|
||||
|
||||
with transaction.atomic():
|
||||
@ -549,11 +596,10 @@ def readcaves():
|
||||
|
||||
if k[0:3] == "162":
|
||||
areanum = k[0:4]
|
||||
url = f'{areanum}/{k[5:]}' # Note we are not appending the .htm as we are modern folks now.
|
||||
url = f"{areanum}/{k[5:]}" # Note we are not appending the .htm as we are modern folks now.
|
||||
else:
|
||||
areanum = "1623"
|
||||
url = f'1623/{k}'
|
||||
|
||||
url = f"1623/{k}"
|
||||
|
||||
area = area_1623
|
||||
if areanum == "1623":
|
||||
@ -568,8 +614,6 @@ def readcaves():
|
||||
do_pending_cave(k, url, area)
|
||||
except:
|
||||
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
|
||||
DataIssue.objects.create(parser='caves', message=message)
|
||||
DataIssue.objects.create(parser="caves", message=message)
|
||||
print(message)
|
||||
raise
|
||||
|
||||
|
||||
|
@ -13,11 +13,11 @@ from troggle.core.models.survex import DrawingFile, SingleScan, Wallet
|
||||
from troggle.core.models.troggle import DataIssue
|
||||
from troggle.core.utils import save_carefully
|
||||
|
||||
'''Searches through all the :drawings: repository looking
|
||||
"""Searches through all the :drawings: repository looking
|
||||
for tunnel and therion files
|
||||
'''
|
||||
"""
|
||||
|
||||
todo='''- Rename functions more consistently between tunnel and therion variants
|
||||
todo = """- Rename functions more consistently between tunnel and therion variants
|
||||
|
||||
- Recode to use pathlib instead of whacky resetting of loop variable inside loop
|
||||
to scan sub-folders.
|
||||
@ -25,20 +25,23 @@ to scan sub-folders.
|
||||
- Recode rx_valid_ext to use pathlib suffix() function
|
||||
|
||||
- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity
|
||||
'''
|
||||
"""
|
||||
|
||||
rx_valid_ext = re.compile(r"(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$")
|
||||
|
||||
rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$')
|
||||
|
||||
def find_dwg_file(dwgfile, path):
|
||||
'''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
|
||||
"""Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
|
||||
which we have already seen when we imported all the files we could find in the surveyscans direstories.
|
||||
|
||||
The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ?
|
||||
|
||||
What is all this really for ?! Is this data used anywhere ??
|
||||
'''
|
||||
"""
|
||||
wallet, scansfile = None, None
|
||||
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path)
|
||||
mscansdir = re.search(
|
||||
r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
|
||||
)
|
||||
if mscansdir:
|
||||
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
|
||||
# This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first.
|
||||
@ -47,18 +50,18 @@ def find_dwg_file(dwgfile, path):
|
||||
if len(scanswalletl) > 1:
|
||||
message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Tunnel', message=message)
|
||||
DataIssue.objects.create(parser="Tunnel", message=message)
|
||||
|
||||
if wallet:
|
||||
scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2))
|
||||
if len(scansfilel):
|
||||
if len(scansfilel) > 1:
|
||||
plist =[]
|
||||
plist = []
|
||||
for sf in scansfilel:
|
||||
plist.append(sf.ffile)
|
||||
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Tunnel', message=message)
|
||||
DataIssue.objects.create(parser="Tunnel", message=message)
|
||||
scansfile = scansfilel[0]
|
||||
|
||||
if wallet:
|
||||
@ -66,25 +69,27 @@ def find_dwg_file(dwgfile, path):
|
||||
if scansfile:
|
||||
dwgfile.scans.add(scansfile)
|
||||
|
||||
elif path and not rx_valid_ext.search(path): # ie not recognised as a path where wallets live and not an image file type
|
||||
elif path and not rx_valid_ext.search(
|
||||
path
|
||||
): # ie not recognised as a path where wallets live and not an image file type
|
||||
name = os.path.split(path)[1]
|
||||
rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen
|
||||
if len(rdwgfilel):
|
||||
if len(rdwgfilel) > 1:
|
||||
plist =[]
|
||||
plist = []
|
||||
for df in rdwgfilel:
|
||||
plist.append(df.dwgpath)
|
||||
message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem?
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}')
|
||||
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{path}")
|
||||
rdwgfile = rdwgfilel[0]
|
||||
dwgfile.dwgcontains.add(rdwgfile)
|
||||
|
||||
dwgfile.save()
|
||||
|
||||
|
||||
def findwalletimage(therionfile, foundpath):
|
||||
'''Tries to link the drawing file (Therion format) to the referenced image (scan) file
|
||||
'''
|
||||
"""Tries to link the drawing file (Therion format) to the referenced image (scan) file"""
|
||||
foundpath = foundpath.strip("{}")
|
||||
mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath)
|
||||
if mscansdir:
|
||||
@ -93,9 +98,11 @@ def findwalletimage(therionfile, foundpath):
|
||||
if len(scanswalletl):
|
||||
wallet = scanswalletl[0]
|
||||
if len(scanswalletl) > 1:
|
||||
message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(therionfile, mscansdir.group(1), foundpath)
|
||||
message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(
|
||||
therionfile, mscansdir.group(1), foundpath
|
||||
)
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Therion', message=message)
|
||||
DataIssue.objects.create(parser="Therion", message=message)
|
||||
if wallet:
|
||||
therionfile.dwgwallets.add(wallet)
|
||||
|
||||
@ -105,33 +112,33 @@ def findwalletimage(therionfile, foundpath):
|
||||
# message = f'! {len(scansfilel)} {scansfilel} = {scanfilename} found in the wallet specified {wallet.walletname}'
|
||||
# print(message)
|
||||
if len(scansfilel) > 1:
|
||||
plist =[]
|
||||
plist = []
|
||||
for sf in scansfilel:
|
||||
plist.append(sf.ffile)
|
||||
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Therion', message=message)
|
||||
DataIssue.objects.create(parser="Therion", message=message)
|
||||
scansfile = scansfilel[0]
|
||||
therionfile.scans.add(scansfile)
|
||||
else:
|
||||
message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}'
|
||||
wurl = f'/survey_scans/{wallet.walletname}/'.replace("#",":")
|
||||
wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":")
|
||||
# print(message)
|
||||
DataIssue.objects.create(parser='Therion', message=message, url = wurl)
|
||||
DataIssue.objects.create(parser="Therion", message=message, url=wurl)
|
||||
|
||||
|
||||
def findimportinsert(therionfile, imp):
|
||||
'''Tries to link the scrap (Therion format) to the referenced therion scrap
|
||||
'''
|
||||
"""Tries to link the scrap (Therion format) to the referenced therion scrap"""
|
||||
pass
|
||||
|
||||
rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE)
|
||||
rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE)
|
||||
rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE)
|
||||
|
||||
rx_xth_me = re.compile(r"xth_me_image_insert.*{.*}$", re.MULTILINE)
|
||||
rx_scrap = re.compile(r"^survey (\w*).*$", re.MULTILINE)
|
||||
rx_input = re.compile(r"^input (\w*).*$", re.MULTILINE)
|
||||
|
||||
|
||||
def settherionfileinfo(filetuple):
|
||||
'''Read in the drawing file contents and sets values on the dwgfile object
|
||||
'''
|
||||
"""Read in the drawing file contents and sets values on the dwgfile object"""
|
||||
thtype, therionfile = filetuple
|
||||
|
||||
ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
|
||||
@ -139,17 +146,17 @@ def settherionfileinfo(filetuple):
|
||||
if therionfile.filesize <= 0:
|
||||
message = f"! Zero length therion file {ff}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
||||
DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
|
||||
return
|
||||
fin = open(ff,'r')
|
||||
fin = open(ff, "r")
|
||||
ttext = fin.read()
|
||||
fin.close()
|
||||
|
||||
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
|
||||
# print(len(re.findall(r"line", ttext)))
|
||||
if thtype=='th':
|
||||
if thtype == "th":
|
||||
therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
|
||||
elif thtype=='th2':
|
||||
elif thtype == "th2":
|
||||
therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
|
||||
therionfile.save()
|
||||
|
||||
@ -162,42 +169,44 @@ def settherionfileinfo(filetuple):
|
||||
|
||||
for xth_me in rx_xth_me.findall(ttext):
|
||||
# WORK IN PROGRESS. Do not clutter up the DataIssues list with this
|
||||
message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'
|
||||
message = f"! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}"
|
||||
# print(message)
|
||||
# DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
||||
# ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2
|
||||
|
||||
with open('therionrefs.log', 'a') as lg:
|
||||
lg.write(message + '\n')
|
||||
with open("therionrefs.log", "a") as lg:
|
||||
lg.write(message + "\n")
|
||||
|
||||
findwalletimage(therionfile, xth_me.split()[-3])
|
||||
|
||||
for inp in rx_input.findall(ttext):
|
||||
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
|
||||
# but we would need to disentangle to get the current path properly
|
||||
message = f'! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'
|
||||
#print(message)
|
||||
DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
||||
message = f"! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}"
|
||||
# print(message)
|
||||
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
|
||||
findimportinsert(therionfile, inp)
|
||||
|
||||
therionfile.save()
|
||||
|
||||
rx_skpath = re.compile(rb'<skpath')
|
||||
|
||||
rx_skpath = re.compile(rb"<skpath")
|
||||
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
|
||||
|
||||
|
||||
def settnlfileinfo(dwgfile):
|
||||
'''Read in the drawing file contents and sets values on the dwgfile object
|
||||
"""Read in the drawing file contents and sets values on the dwgfile object
|
||||
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
|
||||
then we could display on the master calendar per expo.
|
||||
'''
|
||||
"""
|
||||
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
|
||||
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
|
||||
if dwgfile.filesize <= 0:
|
||||
message = f"! Zero length tunnel file {ff}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
|
||||
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
|
||||
return
|
||||
fin = open(ff,'rb')
|
||||
fin = open(ff, "rb")
|
||||
ttext = fin.read()
|
||||
fin.close()
|
||||
|
||||
@ -216,22 +225,24 @@ def settnlfileinfo(dwgfile):
|
||||
|
||||
dwgfile.save()
|
||||
|
||||
|
||||
def setdrwfileinfo(dwgfile):
|
||||
'''Read in the drawing file contents and sets values on the dwgfile object,
|
||||
"""Read in the drawing file contents and sets values on the dwgfile object,
|
||||
but these are SVGs, PDFs or .txt files, so there is no useful format to search for
|
||||
This function is a placeholder in case we thnk of a way to do something
|
||||
to recognise generic survex filenames.
|
||||
'''
|
||||
"""
|
||||
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
||||
dwgfile.filesize = ff.stat().st_size
|
||||
if dwgfile.filesize <= 0:
|
||||
message = f"! Zero length drawing file {ff}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
|
||||
DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
|
||||
return
|
||||
|
||||
|
||||
def load_drawings_files():
|
||||
'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
||||
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
||||
This is brain-damaged very early code. Should be replaced with proper use of pathlib.
|
||||
|
||||
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
||||
@ -239,19 +250,18 @@ def load_drawings_files():
|
||||
|
||||
We import JPG, PNG and SVG files; which have already been put on the server,
|
||||
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
|
||||
'''
|
||||
"""
|
||||
all_xml = []
|
||||
drawdatadir = settings.DRAWINGS_DATA
|
||||
DrawingFile.objects.all().delete()
|
||||
DataIssue.objects.filter(parser='drawings').delete()
|
||||
DataIssue.objects.filter(parser='Therion').delete()
|
||||
DataIssue.objects.filter(parser='xTherion').delete()
|
||||
DataIssue.objects.filter(parser='Tunnel').delete()
|
||||
if(os.path.isfile('therionrefs.log')):
|
||||
os.remove('therionrefs.log')
|
||||
DataIssue.objects.filter(parser="drawings").delete()
|
||||
DataIssue.objects.filter(parser="Therion").delete()
|
||||
DataIssue.objects.filter(parser="xTherion").delete()
|
||||
DataIssue.objects.filter(parser="Tunnel").delete()
|
||||
if os.path.isfile("therionrefs.log"):
|
||||
os.remove("therionrefs.log")
|
||||
|
||||
|
||||
drawingsdirs = [ "" ]
|
||||
drawingsdirs = [""]
|
||||
while drawingsdirs:
|
||||
drawdir = drawingsdirs.pop()
|
||||
for f in os.listdir(os.path.join(drawdatadir, drawdir)):
|
||||
@ -260,64 +270,66 @@ def load_drawings_files():
|
||||
lf = os.path.join(drawdir, f)
|
||||
ff = os.path.join(drawdatadir, lf)
|
||||
if os.path.isdir(ff):
|
||||
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
|
||||
drawingsdirs.append(
|
||||
lf
|
||||
) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
|
||||
elif Path(f).suffix.lower() == ".txt":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('txt',dwgfile))
|
||||
all_xml.append(("txt", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".xml":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('xml',dwgfile))
|
||||
all_xml.append(("xml", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".th":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('th',dwgfile))
|
||||
all_xml.append(("th", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".th2":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('th2',dwgfile))
|
||||
all_xml.append(("th2", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".pdf":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('pdf',dwgfile))
|
||||
all_xml.append(("pdf", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".png":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('png',dwgfile))
|
||||
all_xml.append(("png", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".svg":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('svg',dwgfile))
|
||||
all_xml.append(("svg", dwgfile))
|
||||
elif Path(f).suffix.lower() == ".jpg":
|
||||
# Always creates new
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('jpg',dwgfile))
|
||||
elif Path(f).suffix == '':
|
||||
all_xml.append(("jpg", dwgfile))
|
||||
elif Path(f).suffix == "":
|
||||
# therion file
|
||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1])
|
||||
dwgfile.save()
|
||||
all_xml.append(('',dwgfile))
|
||||
all_xml.append(("", dwgfile))
|
||||
|
||||
print(f' - {len(all_xml)} Drawings files found')
|
||||
print(f" - {len(all_xml)} Drawings files found")
|
||||
|
||||
for d in all_xml:
|
||||
if d[0] in ['pdf', 'txt', 'svg', 'jpg', 'png', '']:
|
||||
if d[0] in ["pdf", "txt", "svg", "jpg", "png", ""]:
|
||||
setdrwfileinfo(d[1])
|
||||
if d[0] == 'xml':
|
||||
if d[0] == "xml":
|
||||
settnlfileinfo(d[1])
|
||||
# important to import .th2 files before .th so that we can assign them when found in .th files
|
||||
if d[0] == 'th2':
|
||||
if d[0] == "th2":
|
||||
settherionfileinfo(d)
|
||||
if d[0] == 'th':
|
||||
if d[0] == "th":
|
||||
settherionfileinfo(d)
|
||||
|
||||
# for drawfile in DrawingFile.objects.all():
|
||||
|
@ -4,8 +4,7 @@ import sys
|
||||
import django
|
||||
from django.contrib.auth.models import User
|
||||
from django.core import management
|
||||
from django.db import (close_old_connections, connection, connections,
|
||||
transaction)
|
||||
from django.db import close_old_connections, connection, connections, transaction
|
||||
from django.http import HttpResponse
|
||||
|
||||
import troggle.parsers.caves
|
||||
@ -16,41 +15,48 @@ import troggle.parsers.QMs
|
||||
import troggle.parsers.scans
|
||||
import troggle.settings
|
||||
|
||||
'''Master data import.
|
||||
"""Master data import.
|
||||
Used only by databaseReset.py and online controlpanel.
|
||||
'''
|
||||
"""
|
||||
|
||||
|
||||
def import_caves():
|
||||
print("-- Importing Caves to ",end="")
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
print("-- Importing Caves to ", end="")
|
||||
print(django.db.connections.databases["default"]["NAME"])
|
||||
troggle.parsers.caves.readcaves()
|
||||
|
||||
|
||||
def import_people():
|
||||
print("-- Importing People (folk.csv) to ",end="")
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
print("-- Importing People (folk.csv) to ", end="")
|
||||
print(django.db.connections.databases["default"]["NAME"])
|
||||
with transaction.atomic():
|
||||
troggle.parsers.people.load_people_expos()
|
||||
|
||||
|
||||
def import_surveyscans():
|
||||
print("-- Importing Survey Scans")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.scans.load_all_scans()
|
||||
|
||||
|
||||
def import_logbooks():
|
||||
print("-- Importing Logbooks")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.logbooks.LoadLogbooks()
|
||||
|
||||
|
||||
def import_logbook(year=2022):
|
||||
print(f"-- Importing Logbook {year}")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.logbooks.LoadLogbook(year)
|
||||
|
||||
|
||||
def import_QMs():
|
||||
print("-- Importing old QMs for 161, 204, 234 from CSV files")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.QMs.Load_QMs()
|
||||
|
||||
|
||||
def import_survex():
|
||||
# when this import is moved to the top with the rest it all crashes horribly
|
||||
print("-- Importing Survex and Entrance Positions")
|
||||
@ -63,23 +69,26 @@ def import_survex():
|
||||
with transaction.atomic():
|
||||
troggle.parsers.survex.LoadPositions()
|
||||
|
||||
|
||||
def import_ents():
|
||||
# when this import is moved to the top with the rest it all crashes horribly
|
||||
print(" - Survex entrances x/y/z Positions")
|
||||
with transaction.atomic():
|
||||
import troggle.parsers.survex
|
||||
|
||||
troggle.parsers.survex.LoadPositions()
|
||||
|
||||
|
||||
def import_loadpos():
|
||||
# when this import is moved to the top with the rest it all crashes horribly
|
||||
import troggle.parsers.survex
|
||||
|
||||
print(" - Survex entrances x/y/z Positions")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.survex.LoadPositions()
|
||||
|
||||
|
||||
def import_drawingsfiles():
|
||||
print("-- Importing Drawings files")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.drawings.load_drawings_files()
|
||||
|
||||
|
||||
|
@ -11,17 +11,16 @@ from django.template.defaultfilters import slugify
|
||||
from django.utils.timezone import get_current_timezone, make_aware
|
||||
|
||||
from parsers.people import GetPersonExpeditionNameLookup
|
||||
from troggle.core.models.caves import (Cave, GetCaveLookup, LogbookEntry,
|
||||
PersonTrip)
|
||||
from troggle.core.models.caves import Cave, GetCaveLookup, LogbookEntry, PersonTrip
|
||||
from troggle.core.models.troggle import DataIssue, Expedition
|
||||
from troggle.core.utils import TROG, save_carefully
|
||||
|
||||
'''
|
||||
"""
|
||||
Parses and imports logbooks in all their wonderful confusion
|
||||
See detailed explanation of the complete process:
|
||||
https://expo.survex.com/handbook/computing/logbooks-parsing.html
|
||||
'''
|
||||
todo='''
|
||||
"""
|
||||
todo = """
|
||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||
|
||||
- remove the TROG things since we need the database for multiuser access? Or not?
|
||||
@ -47,14 +46,14 @@ todo='''
|
||||
- use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache
|
||||
data for old logbooks? Not worth it..
|
||||
|
||||
'''
|
||||
"""
|
||||
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||
BLOG_PARSER_SETTINGS = {
|
||||
# "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
# "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
# "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
}
|
||||
# "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
# "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
# "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||
}
|
||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||
DEFAULT_LOGBOOK_PARSER = "parser_html"
|
||||
# All years since 2002 use the default value for Logbook parser
|
||||
@ -80,34 +79,68 @@ LOGBOOK_PARSER_SETTINGS = {
|
||||
"1984": ("logbook.html", "parser_html"),
|
||||
"1983": ("logbook.html", "parser_html"),
|
||||
"1982": ("logbook.html", "parser_html"),
|
||||
}
|
||||
}
|
||||
|
||||
entries = { "2022": 89, "2019": 55, "2018": 95, "2017": 74, "2016": 86, "2015": 80,
|
||||
"2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53,
|
||||
"2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
||||
"2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42,
|
||||
"1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34,
|
||||
"1985": 24, "1984": 32, "1983": 52, "1982": 42,}
|
||||
entries = {
|
||||
"2022": 89,
|
||||
"2019": 55,
|
||||
"2018": 95,
|
||||
"2017": 74,
|
||||
"2016": 86,
|
||||
"2015": 80,
|
||||
"2014": 65,
|
||||
"2013": 52,
|
||||
"2012": 75,
|
||||
"2011": 71,
|
||||
"2010": 22,
|
||||
"2009": 53,
|
||||
"2008": 49,
|
||||
"2007": 113,
|
||||
"2006": 60,
|
||||
"2005": 55,
|
||||
"2004": 76,
|
||||
"2003": 42,
|
||||
"2002": 31,
|
||||
"2001": 49,
|
||||
"2000": 54,
|
||||
"1999": 79,
|
||||
"1998": 43,
|
||||
"1997": 53,
|
||||
"1996": 95,
|
||||
"1995": 42,
|
||||
"1994": 32,
|
||||
"1993": 41,
|
||||
"1992": 62,
|
||||
"1991": 39,
|
||||
"1990": 87,
|
||||
"1989": 63,
|
||||
"1988": 61,
|
||||
"1987": 34,
|
||||
"1985": 24,
|
||||
"1984": 32,
|
||||
"1983": 52,
|
||||
"1982": 42,
|
||||
}
|
||||
|
||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||
noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
||||
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
||||
logdataissues = TROG['issues']['logdataissues']
|
||||
trips ={}
|
||||
noncaveplaces = ["Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"]
|
||||
logdataissues = TROG["issues"]["logdataissues"]
|
||||
trips = {}
|
||||
|
||||
#
|
||||
# the logbook loading section
|
||||
#
|
||||
def set_trip_id(year, seq):
|
||||
tid= f"{year}_s{seq:02d}"
|
||||
tid = f"{year}_s{seq:02d}"
|
||||
return tid
|
||||
|
||||
rx_tripperson = re.compile(r'(?i)<u>(.*?)</u>$')
|
||||
|
||||
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
|
||||
rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
||||
|
||||
|
||||
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
res = [ ]
|
||||
res = []
|
||||
author = None
|
||||
# print(f'# {tid}')
|
||||
# print(f" - {tid} '{trippeople}' ")
|
||||
@ -118,35 +151,33 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
mul = rx_tripperson.match(tripperson)
|
||||
if mul:
|
||||
tripperson = mul.group(1).strip()
|
||||
if tripperson and tripperson[0] != '*':
|
||||
if tripperson and tripperson[0] != "*":
|
||||
tripperson = re.sub(rx_round_bracket, "", tripperson).strip()
|
||||
|
||||
# these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition)
|
||||
if tripperson =="Wiggy":
|
||||
if tripperson == "Wiggy":
|
||||
tripperson = "Phil Wigglesworth"
|
||||
if tripperson =="Animal":
|
||||
if tripperson == "Animal":
|
||||
tripperson = "Mike Richardson"
|
||||
if tripperson =="MikeTA":
|
||||
if tripperson == "MikeTA":
|
||||
tripperson = "Mike Richardson"
|
||||
if tripperson =="CavingPig":
|
||||
if tripperson == "CavingPig":
|
||||
tripperson = "Elaine Oliver"
|
||||
if tripperson =="nobrotson":
|
||||
if tripperson == "nobrotson":
|
||||
tripperson = "Rob Watson"
|
||||
if tripperson =="Tinywoman":
|
||||
if tripperson == "Tinywoman":
|
||||
tripperson = "Nadia"
|
||||
if tripperson =="tcacrossley":
|
||||
if tripperson == "tcacrossley":
|
||||
tripperson = "Tom Crossley"
|
||||
if tripperson =="Samouse1":
|
||||
if tripperson == "Samouse1":
|
||||
tripperson = "Todd Rye"
|
||||
|
||||
|
||||
|
||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||
if not personyear:
|
||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year."
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
res.append((personyear, logtime_underground))
|
||||
if mul:
|
||||
author = personyear
|
||||
@ -155,11 +186,12 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
return "", 0
|
||||
author = res[-1][0] # the previous valid person and a time of 0 hours
|
||||
|
||||
#print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
||||
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
||||
return res, author
|
||||
|
||||
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
|
||||
""" saves a logbook entry and related persontrips
|
||||
"""saves a logbook entry and related persontrips
|
||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||
|
||||
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times..
|
||||
@ -188,68 +220,75 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
# print(f" - {author} - {logtime_underground}")
|
||||
except:
|
||||
message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["title"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues["title"] = message
|
||||
print(message)
|
||||
raise
|
||||
return
|
||||
|
||||
if not author:
|
||||
message = f" ! - {expedition.year} Warning: logentry: {title} - no expo member author for entry '{tid}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["title"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues["title"] = message
|
||||
print(message)
|
||||
#return
|
||||
# return
|
||||
|
||||
# This needs attention. The slug field is derived from 'title'
|
||||
# both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though?
|
||||
#tripCave = GetTripCave(place):
|
||||
# tripCave = GetTripCave(place):
|
||||
|
||||
lplace = place.lower()
|
||||
cave=None
|
||||
cave = None
|
||||
if lplace not in noncaveplaces:
|
||||
cave = GetCaveLookup().get(lplace)
|
||||
|
||||
y = str(date)[:4]
|
||||
|
||||
text = text.replace(' src="', f' src="/years/{y}/' )
|
||||
text = text.replace(" src='", f" src='/years/{y}/" )
|
||||
text = text.replace(' src="', f' src="/years/{y}/')
|
||||
text = text.replace(" src='", f" src='/years/{y}/")
|
||||
|
||||
text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/' )
|
||||
text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/" )
|
||||
text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/')
|
||||
text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/")
|
||||
|
||||
text = text.replace('\t', '' )
|
||||
text = text.replace('\n\n\n', '\n\n' )
|
||||
text = text.replace("\t", "")
|
||||
text = text.replace("\n\n\n", "\n\n")
|
||||
|
||||
#Check for an existing copy of the current entry, and save
|
||||
# Check for an existing copy of the current entry, and save
|
||||
expeditionday = expedition.get_expedition_day(date)
|
||||
lookupAttribs={'date':date, 'title':title}
|
||||
lookupAttribs = {"date": date, "title": title}
|
||||
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
||||
# but it is a db query which we should try to avoid - rewrite this
|
||||
|
||||
#NEW slug for a logbook entry here! Unique id + slugified title fragment
|
||||
# NEW slug for a logbook entry here! Unique id + slugified title fragment
|
||||
|
||||
if tid is not None:
|
||||
slug = tid
|
||||
# slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
||||
else:
|
||||
slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_')
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition,
|
||||
'time_underground':logtime_underground, 'cave_slug':str(cave), 'slug': slug}
|
||||
slug = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
|
||||
nonLookupAttribs = {
|
||||
"place": place,
|
||||
"text": text,
|
||||
"expedition": expedition,
|
||||
"time_underground": logtime_underground,
|
||||
"cave_slug": str(cave),
|
||||
"slug": slug,
|
||||
}
|
||||
|
||||
# This creates the lbo instance of LogbookEntry
|
||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
# for PersonTrip time_underground is float (decimal hours)
|
||||
for tripperson, time_underground in trippersons:
|
||||
# print(f" - {tid} '{tripperson}' author:{tripperson == author}")
|
||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||
lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo}
|
||||
nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
|
||||
# this creates the PersonTrip instance.
|
||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
|
||||
def ParseDate(tripdate, year):
|
||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
||||
"""Interprets dates in the expo logbooks and returns a correct datetime.date object"""
|
||||
dummydate = date(1970, 1, 1)
|
||||
month = 1
|
||||
day = 1
|
||||
@ -261,16 +300,16 @@ def ParseDate(tripdate, year):
|
||||
if mdatestandard:
|
||||
if not (mdatestandard.group(1) == year):
|
||||
message = f" ! - Bad date (year) in logbook: {tripdate} - {year}"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["tripdate"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues["tripdate"] = message
|
||||
return dummydate
|
||||
else:
|
||||
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
||||
elif mdategoof:
|
||||
if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
|
||||
message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["tripdate"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues["tripdate"] = message
|
||||
return dummydate
|
||||
else:
|
||||
yadd = int(year[:2]) * 100
|
||||
@ -278,25 +317,26 @@ def ParseDate(tripdate, year):
|
||||
else:
|
||||
year = 1970
|
||||
message = f" ! - Bad date in logbook: {tripdate} - {year}"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["tripdate"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues["tripdate"] = message
|
||||
|
||||
return date(year, month, day)
|
||||
except:
|
||||
message = f" ! - Failed to parse date in logbook: {tripdate} - {year}"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues["tripdate"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues["tripdate"] = message
|
||||
return datetime.date(1970, 1, 1)
|
||||
|
||||
|
||||
# 2002 - now
|
||||
def parser_html(year, expedition, txt, seq=""):
|
||||
'''This uses some of the more obscure capabilities of regular expressions,
|
||||
"""This uses some of the more obscure capabilities of regular expressions,
|
||||
see https://docs.python.org/3/library/re.html
|
||||
|
||||
You can't see it here, but a round-trip export-then-import will move
|
||||
the endmatter up to the frontmatter. This makes sense when moving
|
||||
from parser_html_01 format logfiles, believe me.
|
||||
'''
|
||||
"""
|
||||
global logentries
|
||||
global logdataissues
|
||||
|
||||
@ -305,29 +345,30 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
headpara = headmatch.groups()[0].strip()
|
||||
|
||||
# print(f" - headpara:\n'{headpara}'")
|
||||
if(len(headpara)>0):
|
||||
if len(headpara) > 0:
|
||||
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||
with open(frontpath,"w") as front:
|
||||
front.write(headpara+"\n")
|
||||
with open(frontpath, "w") as front:
|
||||
front.write(headpara + "\n")
|
||||
|
||||
# extract END material and stash for later use when rebuilding from list of entries
|
||||
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
|
||||
endpara = endmatch.groups()[0].strip()
|
||||
|
||||
# print(f" - endpara:\n'{endpara}'")
|
||||
if(len(endpara)>0):
|
||||
if len(endpara) > 0:
|
||||
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
|
||||
with open(endpath,"w") as end:
|
||||
end.write(endpara+"\n")
|
||||
with open(endpath, "w") as end:
|
||||
end.write(endpara + "\n")
|
||||
|
||||
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||
logbook_entry_count = 0
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count)
|
||||
tid = set_trip_id(year, logbook_entry_count)
|
||||
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
||||
|
||||
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
s = re.match(
|
||||
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||
\s*<div\s+class="trippeople">\s*(.*?)</div>
|
||||
@ -335,16 +376,19 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
([\s\S]*?)
|
||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||
\s*$
|
||||
''', trippara)
|
||||
""",
|
||||
trippara,
|
||||
)
|
||||
if s:
|
||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||
else: # allow title and people to be swapped in order
|
||||
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..."
|
||||
print(msg)
|
||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||
logdataissues[tid]=msg
|
||||
DataIssue.objects.create(parser="logbooks", message=msg)
|
||||
logdataissues[tid] = msg
|
||||
|
||||
s2 = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
s2 = re.match(
|
||||
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
||||
@ -352,15 +396,17 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
([\s\S]*?)
|
||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||
\s*$
|
||||
''', trippara)
|
||||
""",
|
||||
trippara,
|
||||
)
|
||||
if s2:
|
||||
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
|
||||
else:
|
||||
# if not re.search(r"Rigging Guide", trippara):
|
||||
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..."
|
||||
print(msg)
|
||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||
logdataissues[tid]=msg
|
||||
DataIssue.objects.create(parser="logbooks", message=msg)
|
||||
logdataissues[tid] = msg
|
||||
continue
|
||||
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
@ -370,14 +416,14 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
else:
|
||||
tripcave = "UNKNOWN"
|
||||
ltriptext = re.sub(r"</p>", "", triptext)
|
||||
#ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
# ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
||||
|
||||
triptitle = triptitle.strip()
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||
trippeople, expedition, tu, tripid1)
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tripid1)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
|
||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||
def parser_html_01(year, expedition, txt, seq=""):
|
||||
global logentries
|
||||
@ -389,10 +435,10 @@ def parser_html_01(year, expedition, txt, seq=""):
|
||||
headpara = headmatch.groups()[0].strip()
|
||||
|
||||
# print(f" - headpara:\n'{headpara}'")
|
||||
if(len(headpara)>0):
|
||||
if len(headpara) > 0:
|
||||
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||
with open(frontpath,"w") as front:
|
||||
front.write(headpara+"\n")
|
||||
with open(frontpath, "w") as front:
|
||||
front.write(headpara + "\n")
|
||||
|
||||
# extract END material and stash for later use when rebuilding from list of entries
|
||||
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
|
||||
@ -403,35 +449,34 @@ def parser_html_01(year, expedition, txt, seq=""):
|
||||
endpara = ""
|
||||
|
||||
# print(f" - endpara:\n'{endpara}'")
|
||||
if(len(endpara)>0):
|
||||
if len(endpara) > 0:
|
||||
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
|
||||
with open(endpath,"w") as end:
|
||||
end.write(endpara+"\n")
|
||||
with open(endpath, "w") as end:
|
||||
end.write(endpara + "\n")
|
||||
|
||||
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
||||
logbook_entry_count = 0
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count)
|
||||
tid = set_trip_id(year, logbook_entry_count)
|
||||
# print(f" #0 - tid: {tid}")
|
||||
try:
|
||||
#print(f" #1 - tid: {tid}")
|
||||
# print(f" #1 - tid: {tid}")
|
||||
s = re.match(r"(?i)(?s)\s*(?:<p>)?(.*?)</?p>(.*)$", trippara)
|
||||
if not s:
|
||||
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
break
|
||||
try:
|
||||
tripheader, triptext = s.group(1), s.group(2)
|
||||
except:
|
||||
message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
|
||||
|
||||
# mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
||||
# if not mtripid:
|
||||
# message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'"
|
||||
@ -442,32 +487,32 @@ def parser_html_01(year, expedition, txt, seq=""):
|
||||
# tripid = mtripid and mtripid.group(1) or ""
|
||||
# print(f" # - mtripid: {mtripid}")
|
||||
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
||||
#print(f" #2 - tid: {tid}")
|
||||
# print(f" #2 - tid: {tid}")
|
||||
try:
|
||||
tripdate, triptitle, trippeople = tripheader.split("|")
|
||||
except:
|
||||
message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
try:
|
||||
tripdate, triptitle = tripheader.split("|")
|
||||
trippeople = "GUESS ANON"
|
||||
except:
|
||||
message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
break
|
||||
#print(f" #3 - tid: {tid}")
|
||||
# print(f" #3 - tid: {tid}")
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
#print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
|
||||
#print(f" #4 - tid: {tid}")
|
||||
# print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
|
||||
# print(f" #4 - tid: {tid}")
|
||||
|
||||
mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext)
|
||||
mtu = re.search(r"<p[^>]*>(T/?U.*)", triptext)
|
||||
if mtu:
|
||||
tu = mtu.group(1)
|
||||
triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
|
||||
triptext = triptext[: mtu.start(0)] + triptext[mtu.end() :]
|
||||
else:
|
||||
tu = ""
|
||||
|
||||
@ -478,7 +523,7 @@ def parser_html_01(year, expedition, txt, seq=""):
|
||||
|
||||
mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext)
|
||||
if mtail:
|
||||
ltriptext = ltriptext[:mtail.start(0)]
|
||||
ltriptext = ltriptext[: mtail.start(0)]
|
||||
ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
||||
@ -488,31 +533,30 @@ def parser_html_01(year, expedition, txt, seq=""):
|
||||
|
||||
if ltriptext == "":
|
||||
message = " ! - Zero content for logbook entry!: " + tid
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
|
||||
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||
trippeople, expedition, tu, tid)
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tid)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
except:
|
||||
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
errorcount += 1
|
||||
raise
|
||||
if errorcount >5 :
|
||||
if errorcount > 5:
|
||||
message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
return
|
||||
|
||||
|
||||
def parser_blog(year, expedition, txt, sq=""):
|
||||
'''Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
|
||||
"""Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
|
||||
Note that the entries have dates and authors, but no titles.
|
||||
See detailed explanation of the complete process:
|
||||
https://expo.survex.com/handbook/computing/logbooks-parsing.html
|
||||
@ -527,23 +571,27 @@ def parser_blog(year, expedition, txt, sq=""):
|
||||
</article>
|
||||
</article>
|
||||
So the content is nested inside the header. Attachments (images) come after the content.
|
||||
'''
|
||||
"""
|
||||
global logentries
|
||||
global logdataissues
|
||||
errorcount = 0
|
||||
|
||||
tripheads = re.findall(r"<article class=\"message message--post js-post js-inlineModContainer\s*\"\s*([\s\S]*?)(?=</article)", txt)
|
||||
if not ( tripheads ) :
|
||||
tripheads = re.findall(
|
||||
r"<article class=\"message message--post js-post js-inlineModContainer\s*\"\s*([\s\S]*?)(?=</article)", txt
|
||||
)
|
||||
if not (tripheads):
|
||||
message = f" ! - Skipping on failure to parse article header: {txt[:500]}"
|
||||
print(message)
|
||||
|
||||
# (?= is a non-consuming match, see https://docs.python.org/3/library/re.html
|
||||
tripparas = re.findall(r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt)
|
||||
if not ( tripparas ) :
|
||||
tripparas = re.findall(
|
||||
r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt
|
||||
)
|
||||
if not (tripparas):
|
||||
message = f" ! - Skipping on failure to parse article content: {txt[:500]}"
|
||||
print(message)
|
||||
|
||||
if (len(tripheads) !=len(tripparas)):
|
||||
if len(tripheads) != len(tripparas):
|
||||
print(f"{len(tripheads)} != {len(tripparas)}")
|
||||
print(f"{len(tripheads)} - {len(tripparas)}")
|
||||
|
||||
@ -554,31 +602,31 @@ def parser_blog(year, expedition, txt, sq=""):
|
||||
tripstuff = tripparas[i]
|
||||
attach = tripstuff[2]
|
||||
# note use on non-greedy *? regex idiom here
|
||||
attach = re.sub(r"<div class=\"file-content\">[\s\S]*?(?=</li>)","",attach)
|
||||
attach = re.sub(r"<footer[\s\S]*(</footer>)","",attach)
|
||||
attach = re.sub(r"<div class=\"file-content\">[\s\S]*?(?=</li>)", "", attach)
|
||||
attach = re.sub(r"<footer[\s\S]*(</footer>)", "", attach)
|
||||
tripcontent = tripstuff[0] + attach
|
||||
#print(f"{i} - {len(tripstuff)} - {tripstuff[1]}")
|
||||
# print(f"{i} - {len(tripstuff)} - {tripstuff[1]}")
|
||||
triphead = tripheads[i]
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq
|
||||
tid = set_trip_id(year, logbook_entry_count) + "_blog" + sq
|
||||
# print(f" - tid: {tid}")
|
||||
|
||||
# data-author="tcacrossley"
|
||||
match_author = re.search(r".*data-author=\"([^\"]*)\" data-content=.*", triphead)
|
||||
if not ( match_author ) :
|
||||
if not (match_author):
|
||||
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse data-author {tid} {triphead[:400]}..."
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
break
|
||||
trippeople = match_author.group(1)
|
||||
# print(f" - tid: {tid} {trippeople}")
|
||||
# datetime="2019-07-11T13:16:18+0100"
|
||||
match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead)
|
||||
if not ( match_datetime ) :
|
||||
if not (match_datetime):
|
||||
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..."
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
break
|
||||
datestamp = match_datetime.group(1)
|
||||
@ -587,8 +635,8 @@ def parser_blog(year, expedition, txt, sq=""):
|
||||
tripdate = datetime.fromisoformat(datestamp)
|
||||
except:
|
||||
message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[tid]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[tid] = message
|
||||
print(message)
|
||||
# fallback, ignore the timestamp bits:
|
||||
tripdate = datetime.fromisoformat(datestamp[0:10])
|
||||
@ -597,20 +645,19 @@ def parser_blog(year, expedition, txt, sq=""):
|
||||
# tripname must have the location then a hyphen at the beginning as it is ignored by export function
|
||||
location = "Unknown"
|
||||
tripname = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}" # must be unique for a given date
|
||||
tripcontent = re.sub(r"(width=\"\d+\")","",tripcontent)
|
||||
tripcontent = re.sub(r"height=\"\d+\"","",tripcontent)
|
||||
tripcontent = re.sub(r"width: \d+px","",tripcontent)
|
||||
tripcontent = re.sub(r"\n\n+","\n\n",tripcontent)
|
||||
tripcontent = re.sub(r"<hr\s*>","",tripcontent)
|
||||
tripcontent = re.sub(r"(width=\"\d+\")", "", tripcontent)
|
||||
tripcontent = re.sub(r"height=\"\d+\"", "", tripcontent)
|
||||
tripcontent = re.sub(r"width: \d+px", "", tripcontent)
|
||||
tripcontent = re.sub(r"\n\n+", "\n\n", tripcontent)
|
||||
tripcontent = re.sub(r"<hr\s*>", "", tripcontent)
|
||||
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
||||
|
||||
entrytuple = (tripdate, location, tripname, tripcontent,
|
||||
trippeople, expedition, tu, tid)
|
||||
entrytuple = (tripdate, location, tripname, tripcontent, trippeople, expedition, tu, tid)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
|
||||
def LoadLogbookForExpedition(expedition, clean=True):
|
||||
""" Parses all logbook entries for one expedition
|
||||
"""Parses all logbook entries for one expedition
|
||||
if clean==True then it deletes all entries for this year first.
|
||||
"""
|
||||
global logentries
|
||||
@ -621,33 +668,32 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
||||
logbook_parseable = False
|
||||
yearlinks = LOGBOOK_PARSER_SETTINGS
|
||||
expologbase = os.path.join(settings.EXPOWEB, "years")
|
||||
logentries=[]
|
||||
logentries = []
|
||||
|
||||
year = expedition.year
|
||||
expect = entries[year]
|
||||
# print(" - Logbook for: " + year)
|
||||
|
||||
|
||||
|
||||
def cleanerrors(year):
|
||||
global logdataissues
|
||||
dataissues = DataIssue.objects.filter(parser='logbooks')
|
||||
dataissues = DataIssue.objects.filter(parser="logbooks")
|
||||
for di in dataissues:
|
||||
ph = year
|
||||
if re.search(ph, di.message) is not None:
|
||||
#print(f' - CLEANING dataissue {di.message}')
|
||||
# print(f' - CLEANING dataissue {di.message}')
|
||||
di.delete()
|
||||
|
||||
#print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year')
|
||||
# print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year')
|
||||
dellist = []
|
||||
for key, value in logdataissues.items():
|
||||
#print(f' - CLEANING logdataissues [{key}]: {value}')
|
||||
# print(f' - CLEANING logdataissues [{key}]: {value}')
|
||||
if key.startswith(year):
|
||||
#print(f' - CLEANING logdataissues [{key:12}]: {value} ')
|
||||
# print(f' - CLEANING logdataissues [{key:12}]: {value} ')
|
||||
dellist.append(key)
|
||||
for i in dellist:
|
||||
del logdataissues[i]
|
||||
if (clean):
|
||||
|
||||
if clean:
|
||||
cleanerrors(year)
|
||||
|
||||
if year in yearlinks:
|
||||
@ -665,7 +711,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
||||
expedition.save()
|
||||
|
||||
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
||||
if (clean):
|
||||
if clean:
|
||||
for lbe in lbes:
|
||||
lbe.delete()
|
||||
|
||||
@ -675,7 +721,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
||||
# print(f" ! End of blog. Next blog file in sequence not there:{lb}")
|
||||
break
|
||||
try:
|
||||
with open(lb,'rb') as file_in:
|
||||
with open(lb, "rb") as file_in:
|
||||
txt = file_in.read().decode("utf-8")
|
||||
logbook_parseable = True
|
||||
except (IOError):
|
||||
@ -689,7 +735,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
||||
|
||||
# --------------------
|
||||
parser = globals()[parsefunc]
|
||||
print(f' - {year} parsing with {parsefunc} - {lb}')
|
||||
print(f" - {year} parsing with {parsefunc} - {lb}")
|
||||
parser(year, expedition, txt, sq) # this launches the right parser for this year
|
||||
# --------------------
|
||||
dupl = {}
|
||||
@ -699,11 +745,10 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
||||
if check in dupl:
|
||||
dupl[check] += 1
|
||||
triptitle = f"{triptitle} #{dupl[check]}"
|
||||
print(f' - {triptitle} -- {date}')
|
||||
print(f" - {triptitle} -- {date}")
|
||||
else:
|
||||
dupl[check] = 1
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground,
|
||||
tripid1)
|
||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1)
|
||||
|
||||
if len(logentries) == expect:
|
||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||
@ -713,13 +758,13 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
||||
|
||||
return len(logentries)
|
||||
|
||||
|
||||
def LoadLogbook(year):
|
||||
'''One off logbook for testing purposes
|
||||
'''
|
||||
"""One off logbook for testing purposes"""
|
||||
global LOGBOOK_PARSER_SETTINGS
|
||||
|
||||
nlbe={}
|
||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
||||
nlbe = {}
|
||||
TROG["pagecache"]["expedition"][year] = None # clear cache
|
||||
|
||||
expo = Expedition.objects.get(year=year)
|
||||
year = expo.year # some type funny
|
||||
@ -729,10 +774,13 @@ def LoadLogbook(year):
|
||||
LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year]
|
||||
nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo
|
||||
else:
|
||||
print(f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}")
|
||||
print(
|
||||
f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
||||
)
|
||||
|
||||
|
||||
def LoadLogbooks():
|
||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
||||
"""This is the master function for parsing all logbooks into the Troggle database.
|
||||
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
||||
but must be serialised to write to database as sqlite is single-user.
|
||||
"""
|
||||
@ -740,33 +788,37 @@ def LoadLogbooks():
|
||||
global entries
|
||||
|
||||
logdataissues = {}
|
||||
DataIssue.objects.filter(parser='logbooks').delete()
|
||||
DataIssue.objects.filter(parser="logbooks").delete()
|
||||
expos = Expedition.objects.all()
|
||||
if len(expos) <= 1:
|
||||
message = f" ! - No expeditions found. Load 'people' first"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[f"sqlfail 0000"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[f"sqlfail 0000"] = message
|
||||
print(message)
|
||||
return
|
||||
|
||||
noexpo = ["1986", "2020", "2021",] #no expo
|
||||
noexpo = [
|
||||
"1986",
|
||||
"2020",
|
||||
"2021",
|
||||
] # no expo
|
||||
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
||||
sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.]
|
||||
nologbook = noexpo + lostlogbook + sqlfail
|
||||
|
||||
nlbe={}
|
||||
expd ={}
|
||||
nlbe = {}
|
||||
expd = {}
|
||||
loglist = []
|
||||
bloglist = []
|
||||
|
||||
for expo in expos: # pointless as we explicitly know the years in this code.
|
||||
year = expo.year
|
||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
||||
TROG["pagecache"]["expedition"][year] = None # clear cache
|
||||
if year in sqlfail:
|
||||
print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
|
||||
message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
logdataissues[f"sqlfail {year}"]=message
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
logdataissues[f"sqlfail {year}"] = message
|
||||
print(message)
|
||||
|
||||
if year not in nologbook:
|
||||
@ -778,7 +830,6 @@ def LoadLogbooks():
|
||||
if year in BLOG_PARSER_SETTINGS:
|
||||
bloglist.append(expo)
|
||||
|
||||
|
||||
for ex in loglist:
|
||||
nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo
|
||||
|
||||
@ -803,7 +854,6 @@ def LoadLogbooks():
|
||||
print(f"total {yt:,} log entries parsed in all expeditions")
|
||||
|
||||
|
||||
|
||||
# dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
||||
# expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
||||
# titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
|
||||
@ -813,4 +863,3 @@ def LoadLogbooks():
|
||||
# TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
||||
# locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
|
||||
# caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
|
||||
|
||||
|
@ -9,15 +9,15 @@ from pathlib import Path
|
||||
from django.conf import settings
|
||||
from unidecode import unidecode
|
||||
|
||||
from troggle.core.models.troggle import (DataIssue, Expedition, Person,
|
||||
PersonExpedition)
|
||||
from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
|
||||
from troggle.core.utils import TROG, save_carefully
|
||||
|
||||
'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
|
||||
"""These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
|
||||
href links to pages in troggle which troggle does not think are right.
|
||||
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
|
||||
or they should use the same code by importing a module.
|
||||
'''
|
||||
"""
|
||||
|
||||
|
||||
def parse_blurb(personline, header, person):
|
||||
"""create mugshot Photo instance"""
|
||||
@ -28,47 +28,48 @@ def parse_blurb(personline, header, person):
|
||||
if not ms_path.is_file():
|
||||
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
|
||||
DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
|
||||
return
|
||||
|
||||
if ms_filename.startswith('i/'):
|
||||
#if person just has an image, add it. It has format 'i/adama2018.jpg'
|
||||
if ms_filename.startswith("i/"):
|
||||
# if person just has an image, add it. It has format 'i/adama2018.jpg'
|
||||
person.mug_shot = str(Path("/folk", ms_filename))
|
||||
person.blurb = None
|
||||
|
||||
elif ms_filename.startswith('l/'):
|
||||
elif ms_filename.startswith("l/"):
|
||||
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
|
||||
with open(ms_path,'r') as blurbfile:
|
||||
with open(ms_path, "r") as blurbfile:
|
||||
blrb = blurbfile.read()
|
||||
pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
|
||||
pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
|
||||
if pblurb:
|
||||
person.mug_shot = None
|
||||
fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1)
|
||||
fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
|
||||
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
|
||||
fragment = fragment.replace("src='../i/", "src='/folk/i/")
|
||||
fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
|
||||
fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
|
||||
# replace src="../i/ with src="/folk/i
|
||||
person.blurb = fragment
|
||||
else:
|
||||
message = f"! Blurb parse error in {ms_filename}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='people', message=message, url="/folk/")
|
||||
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
||||
|
||||
elif ms_filename == '':
|
||||
elif ms_filename == "":
|
||||
pass
|
||||
else:
|
||||
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser='people', message=message, url="/folk/")
|
||||
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
||||
|
||||
person.save()
|
||||
|
||||
|
||||
def load_people_expos():
|
||||
'''This is where the folk.csv file is parsed to read people's names.
|
||||
"""This is where the folk.csv file is parsed to read people's names.
|
||||
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
|
||||
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
|
||||
'''
|
||||
DataIssue.objects.filter(parser='people').delete()
|
||||
"""
|
||||
DataIssue.objects.filter(parser="people").delete()
|
||||
|
||||
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
|
||||
personreader = csv.reader(persontab) # this is an iterator
|
||||
@ -80,8 +81,8 @@ def load_people_expos():
|
||||
years = headers[5:]
|
||||
|
||||
for year in years:
|
||||
lookupAttribs = {'year':year}
|
||||
nonLookupAttribs = {'name':f"CUCC expo {year}"}
|
||||
lookupAttribs = {"year": year}
|
||||
nonLookupAttribs = {"name": f"CUCC expo {year}"}
|
||||
|
||||
save_carefully(Expedition, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
@ -105,18 +106,18 @@ def load_people_expos():
|
||||
nickname = splitnick.group(2) or ""
|
||||
|
||||
fullname = fullname.strip()
|
||||
names = fullname.split(' ')
|
||||
names = fullname.split(" ")
|
||||
firstname = names[0]
|
||||
if len(names) == 1:
|
||||
lastname = ""
|
||||
|
||||
if personline[header["VfHO member"]] =='':
|
||||
if personline[header["VfHO member"]] == "":
|
||||
vfho = False
|
||||
else:
|
||||
vfho = True
|
||||
|
||||
lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
|
||||
nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname}
|
||||
lookupAttribs = {"first_name": firstname, "last_name": (lastname or "")}
|
||||
nonLookupAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nickname}
|
||||
person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
parse_blurb(personline=personline, header=header, person=person)
|
||||
@ -125,12 +126,13 @@ def load_people_expos():
|
||||
for year, attended in list(zip(headers, personline))[5:]:
|
||||
expedition = Expedition.objects.get(year=year)
|
||||
if attended == "1" or attended == "-1":
|
||||
lookupAttribs = {'person':person, 'expedition':expedition}
|
||||
nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
|
||||
lookupAttribs = {"person": person, "expedition": expedition}
|
||||
nonLookupAttribs = {"nickname": nickname, "is_guest": (personline[header["Guest"]] == "1")}
|
||||
save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)
|
||||
print("", flush=True)
|
||||
|
||||
def who_is_this(year,possibleid):
|
||||
|
||||
def who_is_this(year, possibleid):
|
||||
expo = Expedition.objects.filter(year=year)
|
||||
personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
|
||||
if personexpedition:
|
||||
@ -138,16 +140,33 @@ def who_is_this(year,possibleid):
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
global foreign_friends
|
||||
foreign_friends = ["P. Jeutter", "K. Jäger", "S. Steinberger", "R. Seebacher",
|
||||
"Dominik Jauch", "Fritz Mammel", "Marcus Scheuerman",
|
||||
"Uli Schütz", "Wieland Scheuerle", "Arndt Karger",
|
||||
"Kai Schwekend", "Regina Kaiser", "Thilo Müller","Wieland Scheuerle",
|
||||
"Florian Gruner", "Helmut Stopka-Ebeler", "Aiko", "Mark Morgan", "Arndt Karger"]
|
||||
foreign_friends = [
|
||||
"P. Jeutter",
|
||||
"K. Jäger",
|
||||
"S. Steinberger",
|
||||
"R. Seebacher",
|
||||
"Dominik Jauch",
|
||||
"Fritz Mammel",
|
||||
"Marcus Scheuerman",
|
||||
"Uli Schütz",
|
||||
"Wieland Scheuerle",
|
||||
"Arndt Karger",
|
||||
"Kai Schwekend",
|
||||
"Regina Kaiser",
|
||||
"Thilo Müller",
|
||||
"Wieland Scheuerle",
|
||||
"Florian Gruner",
|
||||
"Helmut Stopka-Ebeler",
|
||||
"Aiko",
|
||||
"Mark Morgan",
|
||||
"Arndt Karger",
|
||||
]
|
||||
|
||||
|
||||
def known_foreigner(id):
|
||||
'''If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching
|
||||
'''
|
||||
"""If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching"""
|
||||
global foreign_friends
|
||||
|
||||
if id in foreign_friends:
|
||||
@ -159,13 +178,14 @@ def known_foreigner(id):
|
||||
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
||||
# This is convoluted, the whole personexpedition concept is unnecessary?
|
||||
|
||||
Gpersonexpeditionnamelookup = { }
|
||||
Gpersonexpeditionnamelookup = {}
|
||||
|
||||
|
||||
def GetPersonExpeditionNameLookup(expedition):
|
||||
global Gpersonexpeditionnamelookup
|
||||
|
||||
def apply_variations(f, l):
|
||||
'''Be generous in guessing possible matches. Any duplicates will be ruled as invalid.
|
||||
'''
|
||||
"""Be generous in guessing possible matches. Any duplicates will be ruled as invalid."""
|
||||
f = f.lower()
|
||||
l = l.lower()
|
||||
variations = []
|
||||
@ -175,7 +195,7 @@ def GetPersonExpeditionNameLookup(expedition):
|
||||
variations.append(f + " " + l)
|
||||
variations.append(f + " " + l[0])
|
||||
variations.append(f + l[0])
|
||||
variations.append(f + " " +l[0] + '.')
|
||||
variations.append(f + " " + l[0] + ".")
|
||||
variations.append(f[0] + " " + l)
|
||||
variations.append(f[0] + ". " + l)
|
||||
variations.append(f[0] + l)
|
||||
@ -187,15 +207,15 @@ def GetPersonExpeditionNameLookup(expedition):
|
||||
if res:
|
||||
return res
|
||||
|
||||
res = { }
|
||||
res = {}
|
||||
duplicates = set()
|
||||
|
||||
#print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
||||
# print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
||||
personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
|
||||
short = {}
|
||||
dellist = []
|
||||
for personexpedition in personexpeditions:
|
||||
possnames = [ ]
|
||||
possnames = []
|
||||
f = unidecode(unescape(personexpedition.person.first_name.lower()))
|
||||
l = unidecode(unescape(personexpedition.person.last_name.lower()))
|
||||
full = unidecode(unescape(personexpedition.person.fullname.lower()))
|
||||
@ -206,7 +226,7 @@ def GetPersonExpeditionNameLookup(expedition):
|
||||
possnames.append(n)
|
||||
|
||||
if l:
|
||||
possnames += apply_variations(f,l)
|
||||
possnames += apply_variations(f, l)
|
||||
|
||||
if n:
|
||||
possnames += apply_variations(n, l)
|
||||
@ -246,38 +266,38 @@ def GetPersonExpeditionNameLookup(expedition):
|
||||
if f == "Becka".lower():
|
||||
possnames += apply_variations("Rebecca", l)
|
||||
|
||||
if f'{f} {l}' == "Andy Waddington".lower():
|
||||
if f"{f} {l}" == "Andy Waddington".lower():
|
||||
possnames += apply_variations("aer", "waddington")
|
||||
if f'{f} {l}' == "Phil Underwood".lower():
|
||||
if f"{f} {l}" == "Phil Underwood".lower():
|
||||
possnames += apply_variations("phil", "underpants")
|
||||
if f'{f} {l}' == "Naomi Griffiths".lower():
|
||||
if f"{f} {l}" == "Naomi Griffiths".lower():
|
||||
possnames += apply_variations("naomi", "makins")
|
||||
if f'{f} {l}' == "Tina White".lower():
|
||||
if f"{f} {l}" == "Tina White".lower():
|
||||
possnames += apply_variations("tina", "richardson")
|
||||
if f'{f} {l}' == "Cat Hulse".lower():
|
||||
if f"{f} {l}" == "Cat Hulse".lower():
|
||||
possnames += apply_variations("catherine", "hulse")
|
||||
possnames += apply_variations("cat", "henry")
|
||||
if f'{f} {l}' == "Jess Stirrups".lower():
|
||||
if f"{f} {l}" == "Jess Stirrups".lower():
|
||||
possnames += apply_variations("jessica", "stirrups")
|
||||
if f'{f} {l}' == "Nat Dalton".lower():
|
||||
if f"{f} {l}" == "Nat Dalton".lower():
|
||||
possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
|
||||
if f'{f} {l}' == "Mike Richardson".lower():
|
||||
if f"{f} {l}" == "Mike Richardson".lower():
|
||||
possnames.append("mta")
|
||||
possnames.append("miketa")
|
||||
possnames.append("mike the animal")
|
||||
possnames.append("animal")
|
||||
if f'{f} {l}' == "Eric Landgraf".lower():
|
||||
if f"{f} {l}" == "Eric Landgraf".lower():
|
||||
possnames.append("eric c.landgraf")
|
||||
possnames.append("eric c. landgraf")
|
||||
possnames.append("eric c landgraf")
|
||||
if f'{f} {l}' == "Nadia Raeburn".lower():
|
||||
if f"{f} {l}" == "Nadia Raeburn".lower():
|
||||
possnames.append("nadia rc")
|
||||
possnames.append("nadia raeburn-cherradi")
|
||||
|
||||
for i in [3, 4, 5, 6]:
|
||||
lim = min(i, len(f)+1) # short form, e.g. Dan for Daniel.
|
||||
lim = min(i, len(f) + 1) # short form, e.g. Dan for Daniel.
|
||||
if f[:lim] not in short:
|
||||
short[f[:lim]]= personexpedition
|
||||
short[f[:lim]] = personexpedition
|
||||
else:
|
||||
dellist.append(f[:lim])
|
||||
|
||||
@ -292,12 +312,10 @@ def GetPersonExpeditionNameLookup(expedition):
|
||||
del res[possname]
|
||||
|
||||
for possname in dellist:
|
||||
if possname in short: #always true ?
|
||||
if possname in short: # always true ?
|
||||
del short[possname]
|
||||
for shortname in short:
|
||||
res[shortname] = short[shortname]
|
||||
|
||||
|
||||
Gpersonexpeditionnamelookup[expedition.name] = res
|
||||
return res
|
||||
|
||||
|
@ -17,8 +17,8 @@ from troggle.core.models.troggle import DataIssue
|
||||
from troggle.core.utils import save_carefully
|
||||
from troggle.core.views.scans import datewallet
|
||||
|
||||
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
||||
'''
|
||||
"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
||||
"""
|
||||
|
||||
contentsjson = "contents.json"
|
||||
|
||||
@ -26,41 +26,67 @@ git = settings.GIT
|
||||
|
||||
# to do: Actually read all the JSON files and set the survex file field appropriately!
|
||||
|
||||
|
||||
def setwalletyear(wallet):
|
||||
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear
|
||||
|
||||
|
||||
def load_all_scans():
|
||||
'''This iterates through the scans directories (either here or on the remote server)
|
||||
"""This iterates through the scans directories (either here or on the remote server)
|
||||
and builds up the models we can access later.
|
||||
|
||||
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
|
||||
are done at runtime, when a wallet is accessed, not at import time.
|
||||
|
||||
'''
|
||||
print(' - Loading Survey Scans')
|
||||
"""
|
||||
print(" - Loading Survey Scans")
|
||||
|
||||
SingleScan.objects.all().delete()
|
||||
Wallet.objects.all().delete()
|
||||
print(' - deleting all Wallet and SingleScan objects')
|
||||
DataIssue.objects.filter(parser='scans').delete()
|
||||
print(" - deleting all Wallet and SingleScan objects")
|
||||
DataIssue.objects.filter(parser="scans").delete()
|
||||
|
||||
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
|
||||
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
|
||||
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
|
||||
".ods",".csv",".xcf",".xml"]
|
||||
validnames = ["thconfig","manifest"]
|
||||
valids = [
|
||||
".top",
|
||||
".txt",
|
||||
".tif",
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".pdf",
|
||||
".svg",
|
||||
".gif",
|
||||
".xvi",
|
||||
".json",
|
||||
".autosave",
|
||||
".sxd",
|
||||
".svx",
|
||||
".th",
|
||||
".th2",
|
||||
".tdr",
|
||||
".sql",
|
||||
".zip",
|
||||
".dxf",
|
||||
".3d",
|
||||
".ods",
|
||||
".csv",
|
||||
".xcf",
|
||||
".xml",
|
||||
]
|
||||
validnames = ["thconfig", "manifest"]
|
||||
|
||||
# iterate into the surveyscans directory
|
||||
# Not all folders with files in them are wallets.
|
||||
# they are if they are /2010/2010#33
|
||||
# or /1996-1999NotKHbook/
|
||||
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
|
||||
print(' ', end='')
|
||||
print(" ", end="")
|
||||
scans_path = Path(settings.SCANS_ROOT)
|
||||
seen = []
|
||||
c=0
|
||||
c = 0
|
||||
wallets = {}
|
||||
for p in scans_path.rglob('*'):
|
||||
for p in scans_path.rglob("*"):
|
||||
if p.is_file():
|
||||
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
||||
# print(f"'{p}'", end='\n')
|
||||
@ -69,11 +95,11 @@ def load_all_scans():
|
||||
pass
|
||||
else:
|
||||
|
||||
c+=1
|
||||
if c % 15 == 0 :
|
||||
print(".", end='')
|
||||
if c % 750 == 0 :
|
||||
print("\n ", end='')
|
||||
c += 1
|
||||
if c % 15 == 0:
|
||||
print(".", end="")
|
||||
if c % 750 == 0:
|
||||
print("\n ", end="")
|
||||
|
||||
if p.parent.parent.parent.parent == scans_path:
|
||||
# print(f"too deep {p}", end='\n')
|
||||
@ -86,7 +112,7 @@ def load_all_scans():
|
||||
if walletname in wallets:
|
||||
wallet = wallets[walletname]
|
||||
else:
|
||||
print("", flush=True, end='')
|
||||
print("", flush=True, end="")
|
||||
# Create the wallet object. But we don't have a date for it yet.
|
||||
wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||
setwalletyear(wallet)
|
||||
@ -96,26 +122,24 @@ def load_all_scans():
|
||||
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
|
||||
singlescan.save()
|
||||
|
||||
|
||||
# only printing progress:
|
||||
tag = p.parent
|
||||
if len(walletname)>4:
|
||||
if len(walletname) > 4:
|
||||
if walletname[4] == "#":
|
||||
tag = p.parent.parent
|
||||
|
||||
if tag not in seen:
|
||||
print(f" {tag.name} ", end='')
|
||||
print(f" {tag.name} ", end="")
|
||||
if len(str(tag.name)) > 17:
|
||||
print('\n ', end='')
|
||||
print("\n ", end="")
|
||||
seen.append(tag)
|
||||
|
||||
|
||||
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
|
||||
print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets")
|
||||
|
||||
# but we also need to check if JSON exists, even if there are no uploaded scan files.
|
||||
# Here we know there is a rigid folder structure, so no need to look for sub folders
|
||||
print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
|
||||
print(' ', end='')
|
||||
print(" ", end="")
|
||||
wjson = 0
|
||||
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
||||
for yeardir in contents_path.iterdir():
|
||||
@ -126,10 +150,10 @@ def load_all_scans():
|
||||
|
||||
if walletname not in wallets:
|
||||
wjson += 1
|
||||
if wjson % 10 == 0 :
|
||||
print("\n ", end='')
|
||||
if wjson % 10 == 0:
|
||||
print("\n ", end="")
|
||||
|
||||
print(f"{walletname} ", end='')
|
||||
print(f"{walletname} ", end="")
|
||||
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
|
||||
# The wallets found from JSON should all have dates already
|
||||
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
|
||||
@ -140,9 +164,11 @@ def load_all_scans():
|
||||
# But we *do* set the walletyear:
|
||||
setwalletyear(wallet)
|
||||
if not created:
|
||||
print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?")
|
||||
print(
|
||||
f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?"
|
||||
)
|
||||
wallet.save()
|
||||
print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets')
|
||||
print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets")
|
||||
wallets = Wallet.objects.filter(walletyear=None)
|
||||
for w in wallets:
|
||||
w.walletyear = datetime.date(1999, 1, 1)
|
||||
|
1063
parsers/survex.py
1063
parsers/survex.py
File diff suppressed because it is too large
Load Diff
5
pyproject.toml
Normal file
5
pyproject.toml
Normal file
@ -0,0 +1,5 @@
|
||||
[tool.black]
|
||||
line-length = 120
|
||||
|
||||
[tool.isort]
|
||||
profile = 'black'
|
92
settings.py
92
settings.py
@ -7,12 +7,12 @@ https://docs.djangoproject.com/en/dev/topics/settings/
|
||||
For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/dev/ref/settings/
|
||||
"""
|
||||
#Imports should be grouped in the following order:
|
||||
# Imports should be grouped in the following order:
|
||||
|
||||
#1.Standard library imports.
|
||||
#2.Related third party imports.
|
||||
#3.Local application/library specific imports.
|
||||
#4.You should put a blank line between each group of imports.
|
||||
# 1.Standard library imports.
|
||||
# 2.Related third party imports.
|
||||
# 3.Local application/library specific imports.
|
||||
# 4.You should put a blank line between each group of imports.
|
||||
|
||||
import os
|
||||
import urllib.parse
|
||||
@ -24,7 +24,7 @@ print("* importing troggle/settings.py")
|
||||
# default value, then gets overwritten by real secrets
|
||||
SECRET_KEY = "not-the-real-secret-key-a#vaeozn0---^fj!355qki*vj2"
|
||||
|
||||
GIT = 'git' # command for running git
|
||||
GIT = "git" # command for running git
|
||||
|
||||
# Note that this builds upon the django system installed
|
||||
# global settings in
|
||||
@ -32,18 +32,18 @@ GIT = 'git' # command for running git
|
||||
# read https://docs.djangoproject.com/en/3.0/topics/settings/
|
||||
|
||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||
#BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||
# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||
|
||||
# Django settings for troggle project.
|
||||
|
||||
ALLOWED_HOSTS = ['*', 'expo.survex.com', '.survex.com', 'localhost', '127.0.0.1', '192.168.0.5' ]
|
||||
ALLOWED_HOSTS = ["*", "expo.survex.com", ".survex.com", "localhost", "127.0.0.1", "192.168.0.5"]
|
||||
|
||||
ADMINS = (
|
||||
# ('Your Name', 'your_email@domain.com'),
|
||||
)
|
||||
MANAGERS = ADMINS
|
||||
|
||||
#LOGIN_URL = '/accounts/login/' # this is the default value so does not need to be set
|
||||
# LOGIN_URL = '/accounts/login/' # this is the default value so does not need to be set
|
||||
|
||||
# Local time zone for this installation. Choices can be found here:
|
||||
# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
|
||||
@ -51,11 +51,11 @@ MANAGERS = ADMINS
|
||||
# If running in a Windows environment this must be set to the same as your
|
||||
# system time zone.
|
||||
USE_TZ = True
|
||||
TIME_ZONE = 'Europe/London'
|
||||
TIME_ZONE = "Europe/London"
|
||||
|
||||
# Language code for this installation. All choices can be found here:
|
||||
# http://www.i18nguy.com/unicode/language-identifiers.html
|
||||
LANGUAGE_CODE = 'en-uk'
|
||||
LANGUAGE_CODE = "en-uk"
|
||||
|
||||
SITE_ID = 1
|
||||
|
||||
@ -73,37 +73,39 @@ SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
|
||||
# Caves for which survex files exist, but are not otherwise registered
|
||||
# replaced (?) by expoweb/cave_data/pendingcaves.txt
|
||||
# PENDING = ["1626-361", "2007-06", "2009-02",
|
||||
# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
|
||||
# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
|
||||
# "2018-pf-01", "2018-pf-02"]
|
||||
# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
|
||||
# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
|
||||
# "2018-pf-01", "2018-pf-02"]
|
||||
|
||||
APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
|
||||
SMART_APPEND_SLASH = True #not eorking as middleware different after Dj2.0
|
||||
APPEND_SLASH = (
|
||||
False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
|
||||
)
|
||||
SMART_APPEND_SLASH = True # not eorking as middleware different after Dj2.0
|
||||
|
||||
|
||||
LOGIN_REDIRECT_URL = '/' # does not seem to have any effect
|
||||
LOGIN_REDIRECT_URL = "/" # does not seem to have any effect
|
||||
|
||||
SECURE_CONTENT_TYPE_NOSNIFF = True
|
||||
SECURE_BROWSER_XSS_FILTER = True
|
||||
# SESSION_COOKIE_SECURE = True # if enabled, cannot login to Django control panel, bug elsewhere?
|
||||
# CSRF_COOKIE_SECURE = True # if enabled only sends cookies over SSL
|
||||
X_FRAME_OPTIONS = 'DENY' # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html
|
||||
X_FRAME_OPTIONS = "DENY" # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html
|
||||
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' # from Django 3.2
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # from Django 3.2
|
||||
|
||||
INSTALLED_APPS = (
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth', # includes the url redirections for login, logout
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.admindocs',
|
||||
'django.forms', #Required to customise widget templates
|
||||
# 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this
|
||||
'troggle.core',
|
||||
"django.contrib.admin",
|
||||
"django.contrib.auth", # includes the url redirections for login, logout
|
||||
"django.contrib.contenttypes",
|
||||
"django.contrib.sessions",
|
||||
"django.contrib.messages",
|
||||
"django.contrib.admindocs",
|
||||
"django.forms", # Required to customise widget templates
|
||||
# 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this
|
||||
"troggle.core",
|
||||
)
|
||||
|
||||
FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise widget templates
|
||||
FORM_RENDERER = "django.forms.renderers.TemplatesSetting" # Required to customise widget templates
|
||||
|
||||
# See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/
|
||||
# Note that this is a radically different onion architecture from earlier versions though it looks the same,
|
||||
@ -111,38 +113,38 @@ FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise
|
||||
# Seriously, read this: https://www.webforefront.com/django/middlewaredjango.html which is MUCH BETTER than the docs
|
||||
MIDDLEWARE = [
|
||||
#'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this
|
||||
'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache
|
||||
'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
|
||||
'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
|
||||
'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware', # Adds the user attribute, representing the currently-logged-in user
|
||||
'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs
|
||||
'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header
|
||||
"django.middleware.gzip.GZipMiddleware", # not needed when expofiles and photos served by apache
|
||||
"django.contrib.sessions.middleware.SessionMiddleware", # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
|
||||
"django.middleware.common.CommonMiddleware", # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
|
||||
"django.middleware.csrf.CsrfViewMiddleware", # Cross Site Request Forgeries by adding hidden form fields to POST
|
||||
"django.contrib.auth.middleware.AuthenticationMiddleware", # Adds the user attribute, representing the currently-logged-in user
|
||||
"django.contrib.admindocs.middleware.XViewMiddleware", # this and docutils needed by admindocs
|
||||
"django.contrib.messages.middleware.MessageMiddleware", # Cookie-based and session-based message support. Needed by admin system
|
||||
"django.middleware.clickjacking.XFrameOptionsMiddleware", # clickjacking protection via the X-Frame-Options header
|
||||
#'django.middleware.security.SecurityMiddleware', # SECURE_HSTS_SECONDS, SECURE_CONTENT_TYPE_NOSNIFF, SECURE_BROWSER_XSS_FILTER, SECURE_REFERRER_POLICY, and SECURE_SSL_REDIRECT
|
||||
#'troggle.core.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'troggle.urls'
|
||||
ROOT_URLCONF = "troggle.urls"
|
||||
|
||||
WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0
|
||||
WSGI_APPLICATION = "troggle.wsgi.application" # change to asgi as soon as we upgrade to Django 3.0
|
||||
|
||||
ACCOUNT_ACTIVATION_DAYS=3
|
||||
ACCOUNT_ACTIVATION_DAYS = 3
|
||||
|
||||
# AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ?
|
||||
|
||||
QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
|
||||
QM_PATTERN = "\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
|
||||
|
||||
# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html
|
||||
# TINYMCE_DEFAULT_CONFIG = {
|
||||
# 'plugins': "table,spellchecker,paste,searchreplace",
|
||||
# 'theme': "advanced",
|
||||
# 'plugins': "table,spellchecker,paste,searchreplace",
|
||||
# 'theme': "advanced",
|
||||
# }
|
||||
# TINYMCE_SPELLCHECKER = False
|
||||
# TINYMCE_COMPRESSOR = True
|
||||
|
||||
TEST_RUNNER = 'django.test.runner.DiscoverRunner'
|
||||
TEST_RUNNER = "django.test.runner.DiscoverRunner"
|
||||
|
||||
from localsettings import *
|
||||
|
||||
#localsettings needs to take precedence. Call it to override any existing vars.
|
||||
# localsettings needs to take precedence. Call it to override any existing vars.
|
||||
|
Loading…
Reference in New Issue
Block a user