diff --git a/databaseReset.py b/databaseReset.py index 8ae0f15..0d00bc2 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -20,8 +20,8 @@ troggle application. """ print(" - settings on loading databaseReset.py", flush=True) -os.environ['PYTHONPATH'] = str(settings.PYTHON_PATH) -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') +os.environ["PYTHONPATH"] = str(settings.PYTHON_PATH) +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") print(" - settings on loading databaseReset.py") @@ -31,14 +31,15 @@ print(f" - Memory footprint before loading Django: {resource.getrusage(resource. try: django.setup() except: - print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..") + print( + " ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce.." + ) raise print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB") from django.contrib.auth.models import User from django.core import management -from django.db import (close_old_connections, connection, connections, - transaction) +from django.db import close_old_connections, connection, connections, transaction from django.http import HttpResponse from django.urls import reverse @@ -46,24 +47,32 @@ import troggle.core.models.survex from troggle.core.models.caves import Cave, Entrance from troggle.core.models.troggle import DataIssue from troggle.core.utils import get_process_memory -from troggle.parsers.imports import (import_caves, import_drawingsfiles, - import_ents, import_loadpos, - import_logbook, import_logbooks, - import_people, import_QMs, import_survex, - import_surveyscans) +from troggle.parsers.imports import ( + import_caves, + import_drawingsfiles, + import_ents, + import_loadpos, + import_logbook, + import_logbooks, + import_people, + import_QMs, + import_survex, + import_surveyscans, +) if os.geteuid() == 0: # This protects the server from having the wrong file permissions written on logs and caches print("This script should be run as expo not root - quitting") exit() -expouser=settings.EXPOUSER -expouserpass=settings.EXPOUSERPASS -expouseremail=settings.EXPOUSER_EMAIL +expouser = settings.EXPOUSER +expouserpass = settings.EXPOUSERPASS +expouseremail = settings.EXPOUSER_EMAIL + +expoadminuser = settings.EXPOADMINUSER +expoadminuserpass = settings.EXPOADMINUSERPASS +expoadminuseremail = settings.EXPOADMINUSER_EMAIL -expoadminuser=settings.EXPOADMINUSER -expoadminuserpass=settings.EXPOADMINUSERPASS -expoadminuseremail=settings.EXPOADMINUSER_EMAIL def reinit_db(): """Rebuild database from scratch. Deletes the file first if sqlite is used, @@ -72,22 +81,26 @@ def reinit_db(): in memory (django python models, not the database), so there is already a full load of stuff known. Deleting the db file does not clear memory. """ - print("Reinitialising db ",end="") - print(django.db.connections.databases['default']['NAME']) - currentdbname = settings.DATABASES['default']['NAME'] - if currentdbname == ':memory:': + print("Reinitialising db ", end="") + print(django.db.connections.databases["default"]["NAME"]) + currentdbname = settings.DATABASES["default"]["NAME"] + if currentdbname == ":memory:": # closing connections should wipe the in-memory database django.db.close_old_connections() for conn in django.db.connections.all(): print(" ! Closing another connection to db...") conn.close() - elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3': + elif django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3": if os.path.isfile(currentdbname): try: print(" - deleting " + currentdbname) os.remove(currentdbname) except OSError: - print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n") + print( + " ! OSError on removing: " + + currentdbname + + "\n ! Is the file open in another app? Is the server running?\n" + ) raise else: print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n") @@ -102,102 +115,110 @@ def reinit_db(): cursor.execute(f"USE {currentdbname}") print(f" - Nuked : {currentdbname}\n") - print(" - Migrating: " + django.db.connections.databases['default']['NAME']) + print(" - Migrating: " + django.db.connections.databases["default"]["NAME"]) - if django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3': - #with transaction.atomic(): - management.call_command('makemigrations','core', interactive=False) - management.call_command('migrate', interactive=False) - management.call_command('migrate','core', interactive=False) + if django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3": + # with transaction.atomic(): + management.call_command("makemigrations", "core", interactive=False) + management.call_command("migrate", interactive=False) + management.call_command("migrate", "core", interactive=False) else: - management.call_command('makemigrations','core', interactive=False) - management.call_command('migrate', interactive=False) - management.call_command('migrate','core', interactive=False) + management.call_command("makemigrations", "core", interactive=False) + management.call_command("migrate", interactive=False) + management.call_command("migrate", "core", interactive=False) - - print(" - done migration on: " + settings.DATABASES['default']['NAME']) - print("users in db already: ",len(User.objects.all())) + print(" - done migration on: " + settings.DATABASES["default"]["NAME"]) + print("users in db already: ", len(User.objects.all())) with transaction.atomic(): try: - print(" - Setting up expo user on: " + django.db.connections.databases['default']['NAME']) + print(" - Setting up expo user on: " + django.db.connections.databases["default"]["NAME"]) print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ") user = User.objects.create_user(expouser, expouseremail, expouserpass) user.is_staff = False user.is_superuser = False user.save() except: - print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME']) - print(django.db.connections.databases['default']['NAME']) + print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"]) + print(django.db.connections.databases["default"]["NAME"]) print(" ! You probably have not got a clean db when you thought you had.\n") print(" ! Also you are probably NOT running an in-memory db now.\n") - print("users in db: ",len(User.objects.all())) - print("tables in db: ",len(connection.introspection.table_names())) - memdumpsql(fn='integrityfail.sql') - django.db.connections.databases['default']['NAME'] = ':memory:' - #raise - + print("users in db: ", len(User.objects.all())) + print("tables in db: ", len(connection.introspection.table_names())) + memdumpsql(fn="integrityfail.sql") + django.db.connections.databases["default"]["NAME"] = ":memory:" + # raise + with transaction.atomic(): try: - print(" - Setting up expoadmin user on: " + django.db.connections.databases['default']['NAME']) + print(" - Setting up expoadmin user on: " + django.db.connections.databases["default"]["NAME"]) print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ") user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass) user.is_staff = True user.is_superuser = True user.save() except: - print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME']) - print(django.db.connections.databases['default']['NAME']) + print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"]) + print(django.db.connections.databases["default"]["NAME"]) print(" ! You probably have not got a clean db when you thought you had.\n") print(" ! Also you are probably NOT running an in-memory db now.\n") - print("users in db: ",len(User.objects.all())) - print("tables in db: ",len(connection.introspection.table_names())) - memdumpsql(fn='integrityfail.sql') - django.db.connections.databases['default']['NAME'] = ':memory:' - #raise + print("users in db: ", len(User.objects.all())) + print("tables in db: ", len(connection.introspection.table_names())) + memdumpsql(fn="integrityfail.sql") + django.db.connections.databases["default"]["NAME"] = ":memory:" + # raise + def memdumpsql(fn): - '''Unused option to dump SQL. Aborted attempt to create a cache for loading data - ''' + """Unused option to dump SQL. Aborted attempt to create a cache for loading data""" djconn = django.db.connection from dump import _iterdump - with open(fn, 'w') as f: + + with open(fn, "w") as f: for line in _iterdump(djconn): f.write(f"{line.encode('utf8')}\n") return True -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -class JobQueue(): +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + +class JobQueue: """A list of import operations to run. Always reports profile times - of the import operations in the same order. + of the import operations in the same order. """ - - def __init__(self,run): - '''Initialises the job queue object with a fixed order for reporting - options during a run. Imports the timings from previous runs. - ''' - self.runlabel = run - self.queue = [] # tuples of (jobname, jobfunction) - self.results = {} - self.results_order=[ - "date","runlabel","reinit", "caves", "people", - "logbooks", "QMs", "scans", "survex", - "drawings", "test" ] - for k in self.results_order: - self.results[k]=[] - self.tfile = "import_profile.json" - self.htmlfile = "profile.html" # for HTML results table. Not yet done. - - def enq(self,label,func): - '''Enqueue: Adding elements to queue - ''' - self.queue.append((label,func)) + def __init__(self, run): + """Initialises the job queue object with a fixed order for reporting + options during a run. Imports the timings from previous runs. + """ + self.runlabel = run + self.queue = [] # tuples of (jobname, jobfunction) + self.results = {} + self.results_order = [ + "date", + "runlabel", + "reinit", + "caves", + "people", + "logbooks", + "QMs", + "scans", + "survex", + "drawings", + "test", + ] + for k in self.results_order: + self.results[k] = [] + self.tfile = "import_profile.json" + self.htmlfile = "profile.html" # for HTML results table. Not yet done. + + def enq(self, label, func): + """Enqueue: Adding elements to queue""" + self.queue.append((label, func)) return True def loadprofiles(self): - """Load timings for previous imports for each data import type - """ + """Load timings for previous imports for each data import type""" if os.path.isfile(self.tfile): try: f = open(self.tfile, "r") @@ -209,35 +230,31 @@ class JobQueue(): # Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12 f.close() for j in self.results_order: - self.results[j].append(None) # append a placeholder + self.results[j].append(None) # append a placeholder return True - + def dellastprofile(self): - """trim one set of data from the results - """ + """trim one set of data from the results""" for j in self.results_order: - self.results[j].pop() # delete last item + self.results[j].pop() # delete last item return True - + def delfirstprofile(self): - """trim one set of data from the results - """ + """trim one set of data from the results""" for j in self.results_order: - self.results[j].pop(0) # delete zeroth item + self.results[j].pop(0) # delete zeroth item return True - + def saveprofiles(self): - """Save timings for the set of imports just completed - """ - with open(self.tfile, 'w') as f: - json.dump(self.results, f) + """Save timings for the set of imports just completed""" + with open(self.tfile, "w") as f: + json.dump(self.results, f) return True def runqonce(self): - """Run all the jobs in the queue provided - once - """ - print("** Running job ", self.runlabel,end=" to ") - print(django.db.connections.databases['default']['NAME']) + """Run all the jobs in the queue provided - once""" + print("** Running job ", self.runlabel, end=" to ") + print(django.db.connections.databases["default"]["NAME"]) jobstart = time.time() print(f"-- Initial memory in use {get_process_memory():.3f} MB") self.results["date"].pop() @@ -249,98 +266,100 @@ class JobQueue(): start = time.time() memstart = get_process_memory() jobname, jobparser = runfunction - #-------------------- - jobparser() # invokes function passed in the second item in the tuple - #-------------------- + # -------------------- + jobparser() # invokes function passed in the second item in the tuple + # -------------------- memend = get_process_memory() - duration = time.time()-start - #print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, )) - print("\n*- Ended \"", jobname, f"\" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)") + duration = time.time() - start + # print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, )) + print( + '\n*- Ended "', + jobname, + f'" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)', + ) self.results[jobname].pop() # the null item self.results[jobname].append(duration) - jobend = time.time() - jobduration = jobend-jobstart + jobduration = jobend - jobstart print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.") return True - def append_placeholders(self): - '''Ads a dummy timing for each option, to fix off by one error - ''' + """Ads a dummy timing for each option, to fix off by one error""" for j in self.results_order: - self.results[j].append(None) # append a placeholder + self.results[j].append(None) # append a placeholder - def run_now_django_tests(self,n): - """Runs the standard django test harness system which is in troggle/core/TESTS/tests.py - """ - management.call_command('test', verbosity=n) - django.db.close_old_connections() + def run_now_django_tests(self, n): + """Runs the standard django test harness system which is in troggle/core/TESTS/tests.py""" + management.call_command("test", verbosity=n) + django.db.close_old_connections() def run(self): - """Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data. - """ + """Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data.""" self.loadprofiles() - print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME']) + print( + "-- start ", + django.db.connections.databases["default"]["ENGINE"], + django.db.connections.databases["default"]["NAME"], + ) self.runqonce() - if settings.DATABASES['default']['NAME'] ==":memory:": - memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later.. + if settings.DATABASES["default"]["NAME"] == ":memory:": + memdumpsql("memdump.sql") # saved contents of in-memory db, could be imported later.. self.saveprofiles() return True def showprofile(self): - """Prints out the time it took to run the jobqueue - """ + """Prints out the time it took to run the jobqueue""" for k in self.results_order: - if k =="test": + if k == "test": break - elif k =="date": - print(" days ago ", end=' ') + elif k == "date": + print(" days ago ", end=" ") else: - print('%10s (s)' % k, end=' ') - percen=0 - r = self.results[k] - + print("%10s (s)" % k, end=" ") + percen = 0 + r = self.results[k] + for i in range(len(r)): - if k == "runlabel": + if k == "runlabel": if r[i]: - rp = r[i] + rp = r[i] else: rp = " - " - print('%8s' % rp, end=' ') - elif k =="date": + print("%8s" % rp, end=" ") + elif k == "date": # Calculate dates as days before present if r[i]: - if i == len(r)-1: - print(" this", end=' ') + if i == len(r) - 1: + print(" this", end=" ") else: # prints one place to the left of where you expect - if r[len(r)-1]: - s = r[i]-r[len(r)-1] - elif r[len(r)-2]: - s = r[i]-r[len(r)-2] + if r[len(r) - 1]: + s = r[i] - r[len(r) - 1] + elif r[len(r) - 2]: + s = r[i] - r[len(r) - 2] else: s = 0 - days = (s)/(24*60*60) - print(f'{days:8.2f}', end=' ') - elif r[i]: - print(f'{r[i]:8.1f}', end=' ') - if i == len(r)-1 and r[i-1]: - percen = 100* (r[i] - r[i-1])/r[i-1] - if abs(percen) >0.1: - print(f'{percen:8.1f}%', end=' ') + days = (s) / (24 * 60 * 60) + print(f"{days:8.2f}", end=" ") + elif r[i]: + print(f"{r[i]:8.1f}", end=" ") + if i == len(r) - 1 and r[i - 1]: + percen = 100 * (r[i] - r[i - 1]) / r[i - 1] + if abs(percen) > 0.1: + print(f"{percen:8.1f}%", end=" ") else: - print(" - ", end=' ') + print(" - ", end=" ") print("") print("\n") return True def usage(): - '''Prints command line options, can print history of previous runs with timings - ''' - print("""Usage is 'python databaseReset.py [runlabel]' + """Prints command line options, can print history of previous runs with timings""" + print( + """Usage is 'python databaseReset.py [runlabel]' where command is: test - testing... imports people and prints profile. Deletes nothing. profile - print the profile from previous runs. Import nothing. @@ -370,7 +389,9 @@ def usage(): Note that running the subfunctions will not produce a consistent website - only the full 'reset' does that. - """) + """ + ) + if __name__ == "__main__": @@ -381,73 +402,73 @@ if __name__ == "__main__": if sys.getfilesystemencoding() != "utf-8": print("UTF-8 is NOT the default file encoding. You must fix this.") - print(f'- {sys.getdefaultencoding()=}') - print(f'- {sys.getfilesystemencoding()=}') - print(f'- {locale.getdefaultlocale()=}') - print(f'- {locale.getpreferredencoding()=}') + print(f"- {sys.getdefaultencoding()=}") + print(f"- {sys.getfilesystemencoding()=}") + print(f"- {locale.getdefaultlocale()=}") + print(f"- {locale.getpreferredencoding()=}") print("Aborting run.") exit() - if len(sys.argv)>2: - runlabel = sys.argv[len(sys.argv)-1] - else: - runlabel=None - + if len(sys.argv) > 2: + runlabel = sys.argv[len(sys.argv) - 1] + else: + runlabel = None + jq = JobQueue(runlabel) - - if len(sys.argv)==1: + + if len(sys.argv) == 1: usage() exit() elif "init" in sys.argv: - jq.enq("reinit",reinit_db) + jq.enq("reinit", reinit_db) elif "ents" in sys.argv: - jq.enq("survex",import_ents) + jq.enq("survex", import_ents) elif "test2" in sys.argv: - jq.enq("QMs",import_QMs) - jq.enq("drawings",import_drawingsfiles) - jq.enq("survex",import_survex) + jq.enq("QMs", import_QMs) + jq.enq("drawings", import_drawingsfiles) + jq.enq("survex", import_survex) elif "caves" in sys.argv: - jq.enq("caves",import_caves) + jq.enq("caves", import_caves) elif "logbooks" in sys.argv: - jq.enq("logbooks",import_logbooks) + jq.enq("logbooks", import_logbooks) elif "logbook" in sys.argv: - jq.enq("logbooks",import_logbook) # default year set in imports.py + jq.enq("logbooks", import_logbook) # default year set in imports.py elif "people" in sys.argv: - jq.enq("people",import_people) + jq.enq("people", import_people) elif "QMs" in sys.argv: - jq.enq("QMs",import_QMs) + jq.enq("QMs", import_QMs) elif "reset" in sys.argv: - jq.enq("reinit",reinit_db) - jq.enq("caves",import_caves) - jq.enq("people",import_people) - jq.enq("scans",import_surveyscans) - jq.enq("logbooks",import_logbooks) - jq.enq("QMs",import_QMs) - jq.enq("drawings",import_drawingsfiles) - jq.enq("survex",import_survex) + jq.enq("reinit", reinit_db) + jq.enq("caves", import_caves) + jq.enq("people", import_people) + jq.enq("scans", import_surveyscans) + jq.enq("logbooks", import_logbooks) + jq.enq("QMs", import_QMs) + jq.enq("drawings", import_drawingsfiles) + jq.enq("survex", import_survex) elif "scans" in sys.argv: - jq.enq("scans",import_surveyscans) + jq.enq("scans", import_surveyscans) elif "survex" in sys.argv: - jq.enq("survex",import_survex) + jq.enq("survex", import_survex) elif "loadpos" in sys.argv: - jq.enq("survex",import_loadpos) + jq.enq("survex", import_loadpos) elif "drawings" in sys.argv: - jq.enq("drawings",import_drawingsfiles) - elif "dumplogbooks" in sys.argv: # untested in 2020 + jq.enq("drawings", import_drawingsfiles) + elif "dumplogbooks" in sys.argv: # untested in 2020 dumplogbooks() -# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!! -# writeCaves() - elif "profile" in sys.argv: - if runlabel == 'del' : + # elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!! + # writeCaves() + elif "profile" in sys.argv: + if runlabel == "del": jq.loadprofiles() jq.dellastprofile() - jq.dellastprofile() # twice because loadprofiles adds a dummy + jq.dellastprofile() # twice because loadprofiles adds a dummy jq.showprofile() jq.saveprofiles() - if runlabel == 'delfirst' : + if runlabel == "delfirst": jq.loadprofiles() - jq.dellastprofile() # remove the dummy - jq.delfirstprofile() + jq.dellastprofile() # remove the dummy + jq.delfirstprofile() jq.showprofile() jq.saveprofiles() else: diff --git a/parsers/QMs.py b/parsers/QMs.py index efeeb19..79f9f0f 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -9,20 +9,21 @@ from troggle.core.models.caves import QM, Cave, LogbookEntry from troggle.core.models.troggle import DataIssue from troggle.core.utils import save_carefully -'''Reads the CSV files containg QMs for a select few caves +"""Reads the CSV files containg QMs for a select few caves See parsers/survex.py for the parser which extracts QMs from the survex files -''' +""" + def deleteQMs(): QM.objects.all().delete() - DataIssue.objects.filter(parser='QMs').delete() + DataIssue.objects.filter(parser="QMs").delete() def parseCaveQMs(cave, inputFile, ticked=False): - """Runs through the CSV file at inputFile (which is a relative path from expoweb) and + """Runs through the CSV file at inputFile (which is a relative path from expoweb) and saves each QM as a QM instance. This is creating and linking a Placeholder logbookentry dated 1st Jan. in the relevant - year. This is pointless but it is needed because found_by is a ForeignKey in the db + year. This is pointless but it is needed because found_by is a ForeignKey in the db and we can't be arsed to fudge this properly with a null.(July 2020) Linking to a passage in a SVX file might be more interesting as the QM does sometimes @@ -30,150 +31,157 @@ def parseCaveQMs(cave, inputFile, ticked=False): C2000-204-39 B Tree Pitch in Cave Tree treeumphant.28 Gosser Streamway The CSV file does not have the exact date for the QM, only the year, so links to survex files might be ambiguous. But potentially useful? - + Much of this code assumes that QMs are edited using troggle. This is not done so this code can be deleted. All QMs are created afresh and this is all only run once on import on a fresh database. """ - if cave=='204-steinBH': + if cave == "204-steinBH": try: - steinBr=Cave.objects.get(official_name="Steinbrückenhöhle") + steinBr = Cave.objects.get(official_name="Steinbrückenhöhle") caveid = steinBr except Cave.DoesNotExist: - message = f' ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser' + message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser" print(message) - DataIssue.objects.create(parser='QMs', message=message) + DataIssue.objects.create(parser="QMs", message=message) return - elif cave=='234-Hauch': + elif cave == "234-Hauch": try: - hauchHl=Cave.objects.get(official_name="Hauchhöhle") + hauchHl = Cave.objects.get(official_name="Hauchhöhle") caveid = hauchHl except Cave.DoesNotExist: - message = f' ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser' + message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser" print(message) - DataIssue.objects.create(parser='QMs', message=message) + DataIssue.objects.create(parser="QMs", message=message) return - elif cave =='161-KH': + elif cave == "161-KH": try: - kh=Cave.objects.get(official_name="Kaninchenhöhle") + kh = Cave.objects.get(official_name="Kaninchenhöhle") caveid = kh except Cave.DoesNotExist: - message = f' ! - {qmPath} KH is not in the database. Please run cave parser' + message = f" ! - {qmPath} KH is not in the database. Please run cave parser" print(message) - DataIssue.objects.create(parser='QMs', message=message) - nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked) + DataIssue.objects.create(parser="QMs", message=message) + nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked) return nqms - #qmPath = settings.EXPOWEB+inputFile - qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ? + # qmPath = settings.EXPOWEB+inputFile + qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ? - qmCSVContents = open(qmPath,'r') - dialect=csv.Sniffer().sniff(qmCSVContents.read()) - qmCSVContents.seek(0,0) - qmReader = csv.reader(qmCSVContents,dialect=dialect) - next(qmReader) # Skip header row + qmCSVContents = open(qmPath, "r") + dialect = csv.Sniffer().sniff(qmCSVContents.read()) + qmCSVContents.seek(0, 0) + qmReader = csv.reader(qmCSVContents, dialect=dialect) + next(qmReader) # Skip header row n = 0 nqms = 0 for line in qmReader: try: n += 1 - year=int(line[0][1:5]) - logslug = f'PH_{int(year)}_{int(n):02d}' - QMnum=re.match(r".*?-\d*?-X?(?P\d*)",line[0]).group("numb") + year = int(line[0][1:5]) + logslug = f"PH_{int(year)}_{int(n):02d}" + QMnum = re.match(r".*?-\d*?-X?(?P\d*)", line[0]).group("numb") newQM = QM() # newQM.found_by=placeholder - newQM.number=QMnum + newQM.number = QMnum newQM.cave = caveid newQM.blockname = "" - if line[1]=="Dig": - newQM.grade="D" + if line[1] == "Dig": + newQM.grade = "D" else: - newQM.grade=line[1] - newQM.area=line[2] - newQM.location_description=line[3] - - # In the table, completion is indicated by the presence of a completion discription. - newQM.completion_description=line[4] - newQM.nearest_station_description=line[5] - if newQM.completion_description: + newQM.grade = line[1] + newQM.area = line[2] + newQM.location_description = line[3] + + # In the table, completion is indicated by the presence of a completion discription. + newQM.completion_description = line[4] + newQM.nearest_station_description = line[5] + if newQM.completion_description: newQM.ticked = True else: newQM.ticked = False - newQM.comment=line[6] + newQM.comment = line[6] try: # year and number are unique for a cave in CSV imports - preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it - if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it - VERY OLD THING + preexistingQM = QM.objects.get( + number=QMnum, found_by__date__year=year + ) # if we don't have this one in the DB, save it + if ( + preexistingQM.new_since_parsing == False + ): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING preexistingQM.delete() newQM.expoyear = year newQM.save() else: # otherwise, print that it was ignored print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r")) - - except QM.DoesNotExist: #if there is no pre-existing QM, save the new one + + except QM.DoesNotExist: # if there is no pre-existing QM, save the new one newQM.expoyear = year - newQM.save() - nqms += 1 - except KeyError: #check on this one - message = f' ! - {qmPath} KeyError {str(line)} ' + newQM.save() + nqms += 1 + except KeyError: # check on this one + message = f" ! - {qmPath} KeyError {str(line)} " print(message) - DataIssue.objects.create(parser='QMs', message=message) + DataIssue.objects.create(parser="QMs", message=message) continue except IndexError: - message = f' ! - {qmPath} IndexError {str(line)} ' + message = f" ! - {qmPath} IndexError {str(line)} " print(message) - DataIssue.objects.create(parser='QMs', message=message) + DataIssue.objects.create(parser="QMs", message=message) continue return nqms + def parse_KH_QMs(kh, inputFile, ticked): - """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format - """ - khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r') - khQMs=khQMs.readlines() + """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format""" + khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r") + khQMs = khQMs.readlines() nqms = 0 for line in khQMs: - res=re.search(r'name=\"[CB](?P\d*)-(?P\d*)-(?P\d*).* (?P[ABDCV])
(?P.*)\[(?P.*)\]',line) + res = re.search( + r"name=\"[CB](?P\d*)-(?P\d*)-(?P\d*).* (?P[ABDCV])
(?P.*)\[(?P.*)\]", + line, + ) if res: - res=res.groupdict() - year=int(res['year']) + res = res.groupdict() + year = int(res["year"]) # logbook placeholder code was previously here. No longer needed. - #check if placeholder exists for given year, create it if not + # check if placeholder exists for given year, create it if not # message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip." # placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)}) # # if hadToCreate: - # print(message) - # DataIssue.objects.create(parser='QMs', message=message) - lookupArgs={ + # print(message) + # DataIssue.objects.create(parser='QMs', message=message) + lookupArgs = { #'found_by':placeholder, - 'blockname': "", - 'expoyear':year, - 'number':res['number'], - 'cave': kh, - 'grade':res['grade'] - } - nonLookupArgs={ - 'ticked': ticked, - 'nearest_station_name':res['nearest_station'], - 'location_description':res['description'] - } - instance, created = save_carefully(QM,lookupArgs,nonLookupArgs) + "blockname": "", + "expoyear": year, + "number": res["number"], + "cave": kh, + "grade": res["grade"], + } + nonLookupArgs = { + "ticked": ticked, + "nearest_station_name": res["nearest_station"], + "location_description": res["description"], + } + instance, created = save_carefully(QM, lookupArgs, nonLookupArgs) # if created: - # message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}" - # print(message) - # DataIssue.objects.create(parser='QMs', message=message) + # message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}" + # print(message) + # DataIssue.objects.create(parser='QMs', message=message) nqms += 1 return nqms - - + + def Load_QMs(): deleteQMs() - n204 = parseCaveQMs(cave='204-steinBH',inputFile=r"1623/204/qm.csv") - n234 = parseCaveQMs(cave='234-Hauch',inputFile=r"1623/234/qm.csv") - n161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmtodo.htm", ticked=False) - t161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmdone.htm", ticked=True) - #parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv") + n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv") + n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv") + n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False) + t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True) + # parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv") print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.") - print () + print() diff --git a/parsers/caves.py b/parsers/caves.py index 8e1993a..fa2aea3 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -6,49 +6,48 @@ from pathlib import Path from django.conf import settings from django.db import transaction -from troggle.core.models.caves import (Area, Cave, CaveAndEntrance, CaveSlug, - Entrance, EntranceSlug, GetCaveLookup) +from troggle.core.models.caves import Area, Cave, CaveAndEntrance, CaveSlug, Entrance, EntranceSlug, GetCaveLookup from troggle.core.models.troggle import DataIssue -from troggle.settings import (CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, - SURVEX_DATA) +from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA -'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html ) +"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html ) and creating the various Cave, Entrance and necessary Area objects. This is the first import that happens after the database is reinitialised. So is the first thing that creates tables. -''' +""" -todo=''' +todo = """ - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file. So we will need a separate file-editing capability just for this configuration file ?! - crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true. The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo) and then restart the databasereset.py again. (status as of July 2022) -''' +""" entrances_xslug = {} caves_xslug = {} areas_xslug = {} + def dummy_entrance(k, slug, msg="DUMMY"): - '''Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if + """Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if user forgot to provide one when creating the cave - ''' + """ ent = Entrance( - name = k, - entrance_description = "Dummy entrance: auto-created when registering a new cave " + - "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.", - marking = '?') + name=k, + entrance_description="Dummy entrance: auto-created when registering a new cave " + + "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.", + marking="?", + ) if ent: - ent.save() # must save to have id before foreign keys work. - try: # Now create a entrance slug ID - es = EntranceSlug(entrance = ent, - slug = slug, primary = False) + ent.save() # must save to have id before foreign keys work. + try: # Now create a entrance slug ID + es = EntranceSlug(entrance=ent, slug=slug, primary=False) except: message = f" ! {k:11s} {msg}-{slug} entrance create failure" - DataIssue.objects.create(parser='caves', message=message, url=f'{slug}') + DataIssue.objects.create(parser="caves", message=message, url=f"{slug}") print(message) ent.cached_primary_slug = slug @@ -57,46 +56,48 @@ def dummy_entrance(k, slug, msg="DUMMY"): return ent else: message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure" - DataIssue.objects.create(parser='caves', message=message, url=f'{slug}') + DataIssue.objects.create(parser="caves", message=message, url=f"{slug}") print(message) - raise + raise + def set_dummy_entrance(id, slug, cave, msg="DUMMY"): - '''Entrance field either missing or holds a null string instead of a filename in a cave_data file. - ''' + """Entrance field either missing or holds a null string instead of a filename in a cave_data file.""" global entrances_xslug try: entrance = dummy_entrance(id, slug, msg="DUMMY") letter = "" entrances_xslug[slug] = entrance - ce = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = entrance) - message = f' ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}' - - DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}') + ce = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance) + message = f" ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}" + + DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}") print(message) except: - #raise + # raise message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" ' - DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}') + DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}") print(message) - + + def do_pending_cave(k, url, area): - ''' - default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists - in expoweb/cave_data/1623-"k".html - + """ + default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists + in expoweb/cave_data/1623-"k".html + Note that at this point in importing the data we have not yet seen the survex files, so we can't look inside the relevant survex file to find the year and so we con't provide helpful links. - ''' + """ + def get_survex_file(k): - '''Guesses at and finds a survex file for this pending cave. + """Guesses at and finds a survex file for this pending cave. Convoluted. Needs rewriting - ''' + """ if k[0:3] == "162": id = Path(k[5:]) else: id = Path(k) - + survex_file = f"caves-{area.short_name}/{id}/{id}.svx" if Path(settings.SURVEX_DATA, survex_file).is_file(): return survex_file @@ -104,7 +105,7 @@ def do_pending_cave(k, url, area): survex_file = f"caves-{area.short_name}/{id}.svx" if Path(settings.SURVEX_DATA, survex_file).is_file(): return survex_file - + survex_file = "" d = Path(settings.SURVEX_DATA, f"caves-{area.short_name}/{id}") if d.is_dir(): @@ -113,8 +114,8 @@ def do_pending_cave(k, url, area): for f in dir: if f.suffix == ".svx": survex_file = f.relative_to(settings.SURVEX_DATA) - chk = min(5, len(f.name)-1) - if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works + chk = min(5, len(f.name) - 1) + if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works prime_suspect = survex_file if prime_suspect: survex_file = prime_suspect @@ -124,71 +125,81 @@ def do_pending_cave(k, url, area): return survex_file slug = k - + g = GetCaveLookup() with transaction.atomic(): if slug in g: message = f" ! {k:18} cave listed in pendingcaves.txt already exists." - DataIssue.objects.create(parser='caves', message=message, url=url) + DataIssue.objects.create(parser="caves", message=message, url=url) print(message) return - - - default_note = f"_Survex file found in loser repo but no description in expoweb


\n" - default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " - default_note += f"

\n\n - (0) look in the cave number index for notes on this cave, " - default_note += f"

\n\n - (1) search in the survex file for the *ref to find a " - default_note += f"relevant wallet, e.g.2009#11 and read the notes image files
\n - " - default_note += f"

\n\n - (2) search in the Expo for that year e.g. 2009 to find a " - default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, " - default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, " - default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) " - default_note += f"just in case a vital trip was not transcribed, then
\n - " - default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook" - default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now." - default_note += f"

\n\n - Only two fields on this form are essential. " + default_note = f"_Survex file found in loser repo but no description in expoweb


\n" + default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " + default_note += f'

\n\n - (0) look in the cave number index for notes on this cave, ' + default_note += f"

\n\n - (1) search in the survex file for the *ref to find a " + default_note += f"relevant wallet, e.g.2009#11 and read the notes image files
\n - " + default_note += ( + f"

\n\n - (2) search in the Expo for that year e.g. 2009 to find a " + ) + default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, " + default_note += ( + f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, " + ) + default_note += ( + f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) " + ) + default_note += f"just in case a vital trip was not transcribed, then
\n - " + default_note += ( + f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook" + ) + default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now." + default_note += f"

\n\n - Only two fields on this form are essential. " default_note += f"Documentation of all the fields on 'Edit this cave' form is in handbook/survey/caveentryfields" - default_note += f"

\n\n - " - default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import." - default_note += f"

\n\n - " - default_note += f"When you Submit it will create a new file in expoweb/cave_data/ " - default_note += f"

\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. " + default_note += f"

\n\n - " + default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import." + default_note += f"

\n\n - " + default_note += f"When you Submit it will create a new file in expoweb/cave_data/ " + default_note += ( + f"

\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. " + ) default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)." - default_note += f"

\n\n - Finally, you need to find a nerd to edit the file 'expoweb/cave_data/pending.txt' " - default_note += f"to remove the line
{slug}
as it is no longer 'pending' but 'done. Well Done." + default_note += f"

\n\n - Finally, you need to find a nerd to edit the file 'expoweb/cave_data/pending.txt' " + default_note += ( + f"to remove the line
{slug}
as it is no longer 'pending' but 'done. Well Done." + ) survex_file = get_survex_file(k) - + cave = Cave( - unofficial_number = k, - underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.", - survex_file = survex_file, - url = url, - notes = default_note) + unofficial_number=k, + underground_description="Pending cave write-up - creating as empty object. No XML file available yet.", + survex_file=survex_file, + url=url, + notes=default_note, + ) if cave: - cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. + cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. cave.area.add(area) cave.save() message = f" ! {k:18} {cave.underground_description} url: {url}" - DataIssue.objects.create(parser='caves', message=message, url=url) + DataIssue.objects.create(parser="caves", message=message, url=url) print(message) - - try: # Now create a cave slug ID - cs = CaveSlug.objects.update_or_create(cave = cave, - slug = slug, primary = False) + + try: # Now create a cave slug ID + cs = CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False) except: message = f" ! {k:11s} PENDING cave SLUG create failure" - DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser="caves", message=message) print(message) else: - message = f' ! {k:11s} PENDING cave create failure' - DataIssue.objects.create(parser='caves', message=message) + message = f" ! {k:11s} PENDING cave create failure" + DataIssue.objects.create(parser="caves", message=message) print(message) try: ent = dummy_entrance(k, slug, msg="PENDING") - ceinsts = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = ent) + ceinsts = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=ent) for ceinst in ceinsts: if str(ceinst) == str(cave): # magic runes... why is the next value a Bool? ceinst.cave = cave @@ -196,15 +207,14 @@ def do_pending_cave(k, url, area): break except: message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]" - DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser="caves", message=message) print(message) - def readentrance(filename): - '''Reads an enrance description from the .html file + """Reads an enrance description from the .html file Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting - ''' + """ global entrances_xslug global caves_xslug global areas_xslug @@ -213,181 +223,214 @@ def readentrance(filename): with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() context = filename - #print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename)) - entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) + # print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename)) + entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context) if len(entrancecontentslist) != 1: message = f'! BAD ENTRANCE at "{filename}"' - DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser="caves", message=message) print(message) else: entrancecontents = entrancecontentslist[0] - non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context) - name = getXML(entrancecontents, "name", maxItems = 1, context = context) - slugs = getXML(entrancecontents, "slug", context = context) - entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context) - explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context) - map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context) - location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context) - lastvisit = getXML(entrancecontents, "last visit date", maxItems = 1, minItems = 0, context = context) - approach = getXML(entrancecontents, "approach", maxItems = 1, context = context) - underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context) - photo = getXML(entrancecontents, "photo", maxItems = 1, context = context) - marking = getXML(entrancecontents, "marking", maxItems = 1, context = context) - marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context) - findability = getXML(entrancecontents, "findability", maxItems = 1, context = context) - findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context) - alt = getXML(entrancecontents, "alt", maxItems = 1, context = context) - northing = getXML(entrancecontents, "northing", maxItems = 1, context = context) - easting = getXML(entrancecontents, "easting", maxItems = 1, context = context) - tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context) - exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context) - other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context) - other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context) - bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) - url = getXML(entrancecontents, "url", maxItems = 1, context = context) - #if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: - e, state = Entrance.objects.update_or_create(name = name[0], - non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], - entrance_description = entrance_description[0], - explorers = explorers[0], - map_description = map_description[0], - location_description = location_description[0], - lastvisit = lastvisit[0], - approach = approach[0], - underground_description = underground_description[0], - photo = photo[0], - marking = marking[0], - marking_comment = marking_comment[0], - findability = findability[0], - findability_description = findability_description[0], - alt = alt[0], - northing = northing[0], - easting = easting[0], - tag_station = tag_station[0], - exact_station = exact_station[0], - other_station = other_station[0], - other_description = other_description[0], - bearings = bearings[0], - url = url[0], - filename = filename, - cached_primary_slug = slugs[0]) + non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context) + name = getXML(entrancecontents, "name", maxItems=1, context=context) + slugs = getXML(entrancecontents, "slug", context=context) + entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context) + explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context) + map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context) + location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context) + lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context) + approach = getXML(entrancecontents, "approach", maxItems=1, context=context) + underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context) + photo = getXML(entrancecontents, "photo", maxItems=1, context=context) + marking = getXML(entrancecontents, "marking", maxItems=1, context=context) + marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context) + findability = getXML(entrancecontents, "findability", maxItems=1, context=context) + findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context) + alt = getXML(entrancecontents, "alt", maxItems=1, context=context) + northing = getXML(entrancecontents, "northing", maxItems=1, context=context) + easting = getXML(entrancecontents, "easting", maxItems=1, context=context) + tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context) + exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context) + other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context) + other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context) + bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context) + url = getXML(entrancecontents, "url", maxItems=1, context=context) + # if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: + e, state = Entrance.objects.update_or_create( + name=name[0], + non_public={ + "True": True, + "False": False, + "true": True, + "false": False, + }[non_public[0]], + entrance_description=entrance_description[0], + explorers=explorers[0], + map_description=map_description[0], + location_description=location_description[0], + lastvisit=lastvisit[0], + approach=approach[0], + underground_description=underground_description[0], + photo=photo[0], + marking=marking[0], + marking_comment=marking_comment[0], + findability=findability[0], + findability_description=findability_description[0], + alt=alt[0], + northing=northing[0], + easting=easting[0], + tag_station=tag_station[0], + exact_station=exact_station[0], + other_station=other_station[0], + other_description=other_description[0], + bearings=bearings[0], + url=url[0], + filename=filename, + cached_primary_slug=slugs[0], + ) primary = True for slug in slugs: - #print("entrance slug:{} filename:{}".format(slug, filename)) + # print("entrance slug:{} filename:{}".format(slug, filename)) try: - cs = EntranceSlug.objects.update_or_create(entrance = e, - slug = slug, - primary = primary) + cs = EntranceSlug.objects.update_or_create(entrance=e, slug=slug, primary=primary) except: # need to cope with duplicates message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}" - DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') - kents = EntranceSlug.objects.all().filter(entrance = e, - slug = slug, - primary = primary) + DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/") + kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug, primary=primary) for k in kents: - message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug()) - DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') + message = " ! - DUPLICATE in db. entrance:" + str(k.entrance) + ", slug:" + str(k.slug()) + DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/") print(message) for k in kents: if k.slug() != None: - print(" ! - OVERWRITING this one: slug:"+ str(k.slug())) + print(" ! - OVERWRITING this one: slug:" + str(k.slug())) k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes c = k primary = False # else: # more than one item in long list. But this is not an error, and the max and min have been checked by getXML - # slug = Path(filename).stem - # message = f' ! ABORT loading this entrance. in "{filename}"' - # DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') - # print(message) + # slug = Path(filename).stem + # message = f' ! ABORT loading this entrance. in "{filename}"' + # DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') + # print(message) + def readcave(filename): - '''Reads an enrance description from the .html file + """Reads an enrance description from the .html file Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting Assumes any area it hasn't seen before is a subarea of 1623 - ''' + """ global entrances_xslug global caves_xslug global areas_xslug - + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f: contents = f.read() context = filename - cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) + cavecontentslist = getXML(contents, "cave", maxItems=1, context=context) if len(cavecontentslist) != 1: message = f'! BAD CAVE at "{filename}"' - DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser="caves", message=message) print(message) else: cavecontents = cavecontentslist[0] - non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context) - slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context) - official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context) - areas = getXML(cavecontents, "area", context = context) - kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context) - kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context) - unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context) - explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context) - underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context) - equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context) - references = getXML(cavecontents, "references", maxItems = 1, context = context) - survey = getXML(cavecontents, "survey", maxItems = 1, context = context) - kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context) - underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context) - notes = getXML(cavecontents, "notes", maxItems = 1, context = context) - length = getXML(cavecontents, "length", maxItems = 1, context = context) - depth = getXML(cavecontents, "depth", maxItems = 1, context = context) - extent = getXML(cavecontents, "extent", maxItems = 1, context = context) - survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context) - description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context) - url = getXML(cavecontents, "url", maxItems = 1, context = context) - entrances = getXML(cavecontents, "entrance", context = context) - - if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1: + non_public = getXML(cavecontents, "non_public", maxItems=1, context=context) + slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context) + official_name = getXML(cavecontents, "official_name", maxItems=1, context=context) + areas = getXML(cavecontents, "area", context=context) + kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context) + kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context) + unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context) + explorers = getXML(cavecontents, "explorers", maxItems=1, context=context) + underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context) + equipment = getXML(cavecontents, "equipment", maxItems=1, context=context) + references = getXML(cavecontents, "references", maxItems=1, context=context) + survey = getXML(cavecontents, "survey", maxItems=1, context=context) + kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context) + underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context) + notes = getXML(cavecontents, "notes", maxItems=1, context=context) + length = getXML(cavecontents, "length", maxItems=1, context=context) + depth = getXML(cavecontents, "depth", maxItems=1, context=context) + extent = getXML(cavecontents, "extent", maxItems=1, context=context) + survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context) + description_file = getXML(cavecontents, "description_file", maxItems=1, context=context) + url = getXML(cavecontents, "url", maxItems=1, context=context) + entrances = getXML(cavecontents, "entrance", context=context) + + if ( + len(non_public) == 1 + and len(slugs) >= 1 + and len(official_name) == 1 + and len(areas) >= 1 + and len(kataster_code) == 1 + and len(kataster_number) == 1 + and len(unofficial_number) == 1 + and len(explorers) == 1 + and len(underground_description) == 1 + and len(equipment) == 1 + and len(references) == 1 + and len(survey) == 1 + and len(kataster_status) == 1 + and len(underground_centre_line) == 1 + and len(notes) == 1 + and len(length) == 1 + and len(depth) == 1 + and len(extent) == 1 + and len(survex_file) == 1 + and len(description_file) == 1 + and len(url) == 1 + ): try: - c, state = Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], - official_name = official_name[0], - kataster_code = kataster_code[0], - kataster_number = kataster_number[0], - unofficial_number = unofficial_number[0], - explorers = explorers[0], - underground_description = underground_description[0], - equipment = equipment[0], - references = references[0], - survey = survey[0], - kataster_status = kataster_status[0], - underground_centre_line = underground_centre_line[0], - notes = notes[0], - length = length[0], - depth = depth[0], - extent = extent[0], - survex_file = survex_file[0], - description_file = description_file[0], - url = url[0], - filename = filename) + c, state = Cave.objects.update_or_create( + non_public={ + "True": True, + "False": False, + "true": True, + "false": False, + }[non_public[0]], + official_name=official_name[0], + kataster_code=kataster_code[0], + kataster_number=kataster_number[0], + unofficial_number=unofficial_number[0], + explorers=explorers[0], + underground_description=underground_description[0], + equipment=equipment[0], + references=references[0], + survey=survey[0], + kataster_status=kataster_status[0], + underground_centre_line=underground_centre_line[0], + notes=notes[0], + length=length[0], + depth=depth[0], + extent=extent[0], + survex_file=survex_file[0], + description_file=description_file[0], + url=url[0], + filename=filename, + ) except: - print(" ! FAILED to get only one CAVE when updating using: "+filename) + print(" ! FAILED to get only one CAVE when updating using: " + filename) kaves = Cave.objects.all().filter(kataster_number=kataster_number[0]) for k in kaves: - message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) - DataIssue.objects.create(parser='caves', message=message) + message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug()) + DataIssue.objects.create(parser="caves", message=message) print(message) for k in kaves: if k.slug() != None: - print(" ! - OVERWRITING this one: slug:"+ str(k.slug())) + print(" ! - OVERWRITING this one: slug:" + str(k.slug())) k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes c = k - + for area_slug in areas: if area_slug in areas_xslug: newArea = areas_xslug[area_slug] else: - area = Area.objects.filter(short_name = area_slug) + area = Area.objects.filter(short_name=area_slug) if area: newArea = area[0] else: - newArea = Area(short_name = area_slug, super = Area.objects.get(short_name = "1623")) + newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623")) newArea.save() areas_xslug[area_slug] = newArea c.area.add(newArea) @@ -396,17 +439,15 @@ def readcave(filename): if slug in caves_xslug: cs = caves_xslug[slug] else: - try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it - cs = CaveSlug.objects.update_or_create(cave = c, - slug = slug, - primary = primary) + try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it + cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary) caves_xslug[slug] = cs except Exception as ex: # This fails to do an update! It just crashes.. to be fixed message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}" - DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser="caves", message=message) print(message) - + primary = False if not entrances or len(entrances) < 1: @@ -414,80 +455,87 @@ def readcave(filename): set_dummy_entrance(slug[5:], slug, c, msg="DUMMY") else: for entrance in entrances: - eslug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] - letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] - if len(entrances) == 1 and not eslug: # may be empty: + eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0] + letter = getXML(entrance, "letter", maxItems=1, context=context)[0] + if len(entrances) == 1 and not eslug: # may be empty: set_dummy_entrance(slug[5:], slug, c, msg="DUMMY") else: try: if eslug in entrances_xslug: entrance = entrances_xslug[eslug] else: - entrance = Entrance.objects.get(entranceslug__slug = eslug) + entrance = Entrance.objects.get(entranceslug__slug=eslug) entrances_xslug[eslug] = entrance - ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) + ce = CaveAndEntrance.objects.update_or_create( + cave=c, entrance_letter=letter, entrance=entrance + ) except: message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"' - DataIssue.objects.create(parser='caves', message=message, url=f'{c.url}_edit/') + DataIssue.objects.create(parser="caves", message=message, url=f"{c.url}_edit/") print(message) - + if survex_file[0]: if not (Path(SURVEX_DATA) / survex_file[0]).is_file(): message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"' - DataIssue.objects.create(parser='caves', message=message, url=f'/{slug[0:4]}/{slug}_cave_edit/') + DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/") print(message) - - if description_file[0]: # if not an empty string + if description_file[0]: # if not an empty string message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"' - DataIssue.objects.create(parser='caves ok', message=message, url=f'/{slug}_cave_edit/') + DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/") print(message) if not (Path(EXPOWEB) / description_file[0]).is_file(): message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file' - DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/') + DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/") print(message) - #c.description_file="" # done only once, to clear out cruft. - #c.save() - else: # more than one item in long list + # c.description_file="" # done only once, to clear out cruft. + # c.save() + else: # more than one item in long list message = f' ! ABORT loading this cave. in "{filename}"' - DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/') + DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/") print(message) -def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): - """Reads a single XML tag - """ + +def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""): + """Reads a single XML tag""" items = re.findall("<%(itemname)s>(.*?)" % {"itemname": itemname}, text, re.S) if len(items) < minItems and printwarnings: - message = " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % {"count": len(items), - "itemname": itemname, - "min": minItems} + " in file " + context - DataIssue.objects.create(parser='caves', message=message, url=""+context) + message = ( + " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " + % {"count": len(items), "itemname": itemname, "min": minItems} + + " in file " + + context + ) + DataIssue.objects.create(parser="caves", message=message, url="" + context) print(message) - + if maxItems is not None and len(items) > maxItems and printwarnings: - message = " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % {"count": len(items), - "itemname": itemname, - "max": maxItems} + " in file " + context - DataIssue.objects.create(parser='caves', message=message) + message = ( + " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " + % {"count": len(items), "itemname": itemname, "max": maxItems} + + " in file " + + context + ) + DataIssue.objects.create(parser="caves", message=message) print(message) if minItems == 0: if not items: - items = [ "" ] + items = [""] return items - + + def readcaves(): - '''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo. - ''' + """Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo.""" # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys - # should put this in a simple list + # should put this in a simple list pending = set() fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt") if fpending.is_file(): with open(fpending, "r") as fo: cids = fo.readlines() for cid in cids: - pending.add(cid.strip().rstrip('\n').upper()) + pending.add(cid.strip().rstrip("\n").upper()) with transaction.atomic(): print(" - Deleting Caves and Entrances") @@ -505,55 +553,53 @@ def readcaves(): except: pass # Clear the cave data issues and the caves as we are reloading - DataIssue.objects.filter(parser='areas').delete() - DataIssue.objects.filter(parser='caves').delete() - DataIssue.objects.filter(parser='caves ok').delete() - DataIssue.objects.filter(parser='entrances').delete() - + DataIssue.objects.filter(parser="areas").delete() + DataIssue.objects.filter(parser="caves").delete() + DataIssue.objects.filter(parser="caves ok").delete() + DataIssue.objects.filter(parser="entrances").delete() + print(" - Creating Areas 1623, 1624, 1627 and 1626") # This crashes on the server with MariaDB even though a null parent is explicitly allowed. - area_1623= Area.objects.create(short_name = "1623", super=None) + area_1623 = Area.objects.create(short_name="1623", super=None) area_1623.save() - area_1624= Area.objects.create(short_name = "1624", super=None) - area_1624.save() - area_1626= Area.objects.create(short_name = "1626", super=None) + area_1624 = Area.objects.create(short_name="1624", super=None) + area_1624.save() + area_1626 = Area.objects.create(short_name="1626", super=None) area_1626.save() - area_1627= Area.objects.create(short_name = "1627", super=None) + area_1627 = Area.objects.create(short_name="1627", super=None) area_1627.save() - with transaction.atomic(): print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) print(" - Reading Entrances from entrance descriptions xml files") - for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files # if filename.endswith('.html'): - # if Path(filename).stem[5:] in pending: - # print(f'Skipping pending entrance dummy file <{filename}>') - # else: - # readentrance(filename) + # if Path(filename).stem[5:] in pending: + # print(f'Skipping pending entrance dummy file <{filename}>') + # else: + # readentrance(filename) readentrance(filename) print(" - Reading Caves from cave descriptions xml files") - for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): + for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files + if filename.endswith(".html"): readcave(filename) - print (" - Setting up all the variously useful alias names") + print(" - Setting up all the variously useful alias names") mycavelookup = GetCaveLookup() - - print (" - Setting pending caves") + + print(" - Setting pending caves") # Do this last, so we can detect if they are created and no longer 'pending' with transaction.atomic(): for k in pending: - + if k[0:3] == "162": areanum = k[0:4] - url = f'{areanum}/{k[5:]}' # Note we are not appending the .htm as we are modern folks now. + url = f"{areanum}/{k[5:]}" # Note we are not appending the .htm as we are modern folks now. else: areanum = "1623" - url = f'1623/{k}' - + url = f"1623/{k}" area = area_1623 if areanum == "1623": @@ -564,12 +610,10 @@ def readcaves(): area = area_1626 if areanum == "1627": area = area_1627 - try: + try: do_pending_cave(k, url, area) except: message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}" - DataIssue.objects.create(parser='caves', message=message) + DataIssue.objects.create(parser="caves", message=message) print(message) raise - - diff --git a/parsers/drawings.py b/parsers/drawings.py index e3630b0..d3e8491 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -13,11 +13,11 @@ from troggle.core.models.survex import DrawingFile, SingleScan, Wallet from troggle.core.models.troggle import DataIssue from troggle.core.utils import save_carefully -'''Searches through all the :drawings: repository looking +"""Searches through all the :drawings: repository looking for tunnel and therion files -''' +""" -todo='''- Rename functions more consistently between tunnel and therion variants +todo = """- Rename functions more consistently between tunnel and therion variants - Recode to use pathlib instead of whacky resetting of loop variable inside loop to scan sub-folders. @@ -25,20 +25,23 @@ to scan sub-folders. - Recode rx_valid_ext to use pathlib suffix() function - Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity -''' +""" + +rx_valid_ext = re.compile(r"(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$") -rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$') def find_dwg_file(dwgfile, path): - '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file + """Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file which we have already seen when we imported all the files we could find in the surveyscans direstories. - + The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ? - + What is all this really for ?! Is this data used anywhere ?? - ''' + """ wallet, scansfile = None, None - mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path) + mscansdir = re.search( + r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path + ) if mscansdir: scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) # This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first. @@ -47,44 +50,46 @@ def find_dwg_file(dwgfile, path): if len(scanswalletl) > 1: message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}" print(message) - DataIssue.objects.create(parser='Tunnel', message=message) - + DataIssue.objects.create(parser="Tunnel", message=message) + if wallet: scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) if len(scansfilel): if len(scansfilel) > 1: - plist =[] + plist = [] for sf in scansfilel: plist.append(sf.ffile) message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}" print(message) - DataIssue.objects.create(parser='Tunnel', message=message) + DataIssue.objects.create(parser="Tunnel", message=message) scansfile = scansfilel[0] if wallet: dwgfile.dwgwallets.add(wallet) if scansfile: dwgfile.scans.add(scansfile) - - elif path and not rx_valid_ext.search(path): # ie not recognised as a path where wallets live and not an image file type + + elif path and not rx_valid_ext.search( + path + ): # ie not recognised as a path where wallets live and not an image file type name = os.path.split(path)[1] - rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen + rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen if len(rdwgfilel): if len(rdwgfilel) > 1: - plist =[] + plist = [] for df in rdwgfilel: plist.append(df.dwgpath) - message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem? + message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem? print(message) - DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}') + DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{path}") rdwgfile = rdwgfilel[0] dwgfile.dwgcontains.add(rdwgfile) dwgfile.save() + def findwalletimage(therionfile, foundpath): - '''Tries to link the drawing file (Therion format) to the referenced image (scan) file - ''' + """Tries to link the drawing file (Therion format) to the referenced image (scan) file""" foundpath = foundpath.strip("{}") mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath) if mscansdir: @@ -93,165 +98,170 @@ def findwalletimage(therionfile, foundpath): if len(scanswalletl): wallet = scanswalletl[0] if len(scanswalletl) > 1: - message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(therionfile, mscansdir.group(1), foundpath) + message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format( + therionfile, mscansdir.group(1), foundpath + ) print(message) - DataIssue.objects.create(parser='Therion', message=message) + DataIssue.objects.create(parser="Therion", message=message) if wallet: therionfile.dwgwallets.add(wallet) - + scanfilename = Path(foundpath).name scansfilel = wallet.singlescan_set.filter(name=scanfilename, wallet=wallet) if len(scansfilel): # message = f'! {len(scansfilel)} {scansfilel} = {scanfilename} found in the wallet specified {wallet.walletname}' # print(message) if len(scansfilel) > 1: - plist =[] + plist = [] for sf in scansfilel: plist.append(sf.ffile) message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}" print(message) - DataIssue.objects.create(parser='Therion', message=message) + DataIssue.objects.create(parser="Therion", message=message) scansfile = scansfilel[0] therionfile.scans.add(scansfile) else: message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}' - wurl = f'/survey_scans/{wallet.walletname}/'.replace("#",":") + wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":") # print(message) - DataIssue.objects.create(parser='Therion', message=message, url = wurl) + DataIssue.objects.create(parser="Therion", message=message, url=wurl) def findimportinsert(therionfile, imp): - '''Tries to link the scrap (Therion format) to the referenced therion scrap - ''' + """Tries to link the scrap (Therion format) to the referenced therion scrap""" pass -rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE) -rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE) -rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE) + +rx_xth_me = re.compile(r"xth_me_image_insert.*{.*}$", re.MULTILINE) +rx_scrap = re.compile(r"^survey (\w*).*$", re.MULTILINE) +rx_input = re.compile(r"^input (\w*).*$", re.MULTILINE) + def settherionfileinfo(filetuple): - '''Read in the drawing file contents and sets values on the dwgfile object - ''' + """Read in the drawing file contents and sets values on the dwgfile object""" thtype, therionfile = filetuple - + ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) therionfile.filesize = os.stat(ff)[stat.ST_SIZE] if therionfile.filesize <= 0: message = f"! Zero length therion file {ff}" print(message) - DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') + DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") return - fin = open(ff,'r') + fin = open(ff, "r") ttext = fin.read() fin.close() - + # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' # print(len(re.findall(r"line", ttext))) - if thtype=='th': + if thtype == "th": therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE)) - elif thtype=='th2': + elif thtype == "th2": therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE)) therionfile.save() - + # scan and look for survex blocks that might have been included, and image scans (as for tunnel drawings) # which would populate dwgfile.survexfile - + # in .th2 files: # ##XTHERION## xth_me_image_insert {500 1 1.0} {1700 {}} ../../../expofiles/surveyscans/2014/01popped_elev1.jpeg 0 {} # scrap blownout -projection plan -scale [-81.0 -42.0 216.0 -42.0 0.0 0.0 7.5438 0.0 m] - + for xth_me in rx_xth_me.findall(ttext): # WORK IN PROGRESS. Do not clutter up the DataIssues list with this - message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' + message = f"! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}" # print(message) # DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') # ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2 - with open('therionrefs.log', 'a') as lg: - lg.write(message + '\n') + with open("therionrefs.log", "a") as lg: + lg.write(message + "\n") findwalletimage(therionfile, xth_me.split()[-3]) - + for inp in rx_input.findall(ttext): # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file # but we would need to disentangle to get the current path properly - message = f'! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' - #print(message) - DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') + message = f"! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}" + # print(message) + DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}") findimportinsert(therionfile, inp) - + therionfile.save() - -rx_skpath = re.compile(rb' # - + for path, style in rx_pcpath.findall(ttext): find_dwg_file(dwgfile, path.decode()) - + # should also scan and look for survex blocks that might have been included, and image scans # which would populate dwgfile.survexfile dwgfile.save() + def setdrwfileinfo(dwgfile): - '''Read in the drawing file contents and sets values on the dwgfile object, + """Read in the drawing file contents and sets values on the dwgfile object, but these are SVGs, PDFs or .txt files, so there is no useful format to search for This function is a placeholder in case we thnk of a way to do something to recognise generic survex filenames. - ''' + """ ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath dwgfile.filesize = ff.stat().st_size if dwgfile.filesize <= 0: message = f"! Zero length drawing file {ff}" print(message) - DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}') + DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}") return + def load_drawings_files(): - '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize + """Breadth first search of drawings directory looking for sub-directories and *.xml filesize This is brain-damaged very early code. Should be replaced with proper use of pathlib. - - Why do we have all this detection of file types/! Why not use get_mime_types ? + + Why do we have all this detection of file types/! Why not use get_mime_types ? What is it all for ?? - + We import JPG, PNG and SVG files; which have already been put on the server, but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG) - ''' + """ all_xml = [] drawdatadir = settings.DRAWINGS_DATA DrawingFile.objects.all().delete() - DataIssue.objects.filter(parser='drawings').delete() - DataIssue.objects.filter(parser='Therion').delete() - DataIssue.objects.filter(parser='xTherion').delete() - DataIssue.objects.filter(parser='Tunnel').delete() - if(os.path.isfile('therionrefs.log')): - os.remove('therionrefs.log') - + DataIssue.objects.filter(parser="drawings").delete() + DataIssue.objects.filter(parser="Therion").delete() + DataIssue.objects.filter(parser="xTherion").delete() + DataIssue.objects.filter(parser="Tunnel").delete() + if os.path.isfile("therionrefs.log"): + os.remove("therionrefs.log") - drawingsdirs = [ "" ] + drawingsdirs = [""] while drawingsdirs: drawdir = drawingsdirs.pop() for f in os.listdir(os.path.join(drawdatadir, drawdir)): @@ -260,65 +270,67 @@ def load_drawings_files(): lf = os.path.join(drawdir, f) ff = os.path.join(drawdatadir, lf) if os.path.isdir(ff): - drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions. + drawingsdirs.append( + lf + ) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions. elif Path(f).suffix.lower() == ".txt": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('txt',dwgfile)) + all_xml.append(("txt", dwgfile)) elif Path(f).suffix.lower() == ".xml": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('xml',dwgfile)) + all_xml.append(("xml", dwgfile)) elif Path(f).suffix.lower() == ".th": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('th',dwgfile)) + all_xml.append(("th", dwgfile)) elif Path(f).suffix.lower() == ".th2": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('th2',dwgfile)) + all_xml.append(("th2", dwgfile)) elif Path(f).suffix.lower() == ".pdf": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('pdf',dwgfile)) + all_xml.append(("pdf", dwgfile)) elif Path(f).suffix.lower() == ".png": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('png',dwgfile)) + all_xml.append(("png", dwgfile)) elif Path(f).suffix.lower() == ".svg": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('svg',dwgfile)) + all_xml.append(("svg", dwgfile)) elif Path(f).suffix.lower() == ".jpg": # Always creates new dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile.save() - all_xml.append(('jpg',dwgfile)) - elif Path(f).suffix == '': + all_xml.append(("jpg", dwgfile)) + elif Path(f).suffix == "": # therion file dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1]) dwgfile.save() - all_xml.append(('',dwgfile)) + all_xml.append(("", dwgfile)) - print(f' - {len(all_xml)} Drawings files found') + print(f" - {len(all_xml)} Drawings files found") for d in all_xml: - if d[0] in ['pdf', 'txt', 'svg', 'jpg', 'png', '']: + if d[0] in ["pdf", "txt", "svg", "jpg", "png", ""]: setdrwfileinfo(d[1]) - if d[0] == 'xml': + if d[0] == "xml": settnlfileinfo(d[1]) # important to import .th2 files before .th so that we can assign them when found in .th files - if d[0] == 'th2': + if d[0] == "th2": settherionfileinfo(d) - if d[0] == 'th': + if d[0] == "th": settherionfileinfo(d) - + # for drawfile in DrawingFile.objects.all(): - # SetTunnelfileInfo(drawfile) + # SetTunnelfileInfo(drawfile) diff --git a/parsers/imports.py b/parsers/imports.py index d8fa87d..d59c792 100644 --- a/parsers/imports.py +++ b/parsers/imports.py @@ -4,8 +4,7 @@ import sys import django from django.contrib.auth.models import User from django.core import management -from django.db import (close_old_connections, connection, connections, - transaction) +from django.db import close_old_connections, connection, connections, transaction from django.http import HttpResponse import troggle.parsers.caves @@ -16,46 +15,53 @@ import troggle.parsers.QMs import troggle.parsers.scans import troggle.settings -'''Master data import. +"""Master data import. Used only by databaseReset.py and online controlpanel. -''' +""" + def import_caves(): - print("-- Importing Caves to ",end="") - print(django.db.connections.databases['default']['NAME']) + print("-- Importing Caves to ", end="") + print(django.db.connections.databases["default"]["NAME"]) troggle.parsers.caves.readcaves() + def import_people(): - print("-- Importing People (folk.csv) to ",end="") - print(django.db.connections.databases['default']['NAME']) + print("-- Importing People (folk.csv) to ", end="") + print(django.db.connections.databases["default"]["NAME"]) with transaction.atomic(): troggle.parsers.people.load_people_expos() + def import_surveyscans(): print("-- Importing Survey Scans") with transaction.atomic(): troggle.parsers.scans.load_all_scans() + def import_logbooks(): print("-- Importing Logbooks") with transaction.atomic(): troggle.parsers.logbooks.LoadLogbooks() + def import_logbook(year=2022): print(f"-- Importing Logbook {year}") with transaction.atomic(): troggle.parsers.logbooks.LoadLogbook(year) + def import_QMs(): print("-- Importing old QMs for 161, 204, 234 from CSV files") with transaction.atomic(): troggle.parsers.QMs.Load_QMs() + def import_survex(): # when this import is moved to the top with the rest it all crashes horribly print("-- Importing Survex and Entrance Positions") with transaction.atomic(): - import troggle.parsers.survex + import troggle.parsers.survex print(" - Survex Blocks") with transaction.atomic(): troggle.parsers.survex.LoadSurvexBlocks() @@ -63,23 +69,26 @@ def import_survex(): with transaction.atomic(): troggle.parsers.survex.LoadPositions() + def import_ents(): # when this import is moved to the top with the rest it all crashes horribly print(" - Survex entrances x/y/z Positions") with transaction.atomic(): - import troggle.parsers.survex + import troggle.parsers.survex + troggle.parsers.survex.LoadPositions() + def import_loadpos(): # when this import is moved to the top with the rest it all crashes horribly - import troggle.parsers.survex + import troggle.parsers.survex + print(" - Survex entrances x/y/z Positions") with transaction.atomic(): troggle.parsers.survex.LoadPositions() + def import_drawingsfiles(): print("-- Importing Drawings files") with transaction.atomic(): troggle.parsers.drawings.load_drawings_files() - - diff --git a/parsers/logbooks.py b/parsers/logbooks.py index df5b9d0..61f17e3 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -11,17 +11,16 @@ from django.template.defaultfilters import slugify from django.utils.timezone import get_current_timezone, make_aware from parsers.people import GetPersonExpeditionNameLookup -from troggle.core.models.caves import (Cave, GetCaveLookup, LogbookEntry, - PersonTrip) +from troggle.core.models.caves import Cave, GetCaveLookup, LogbookEntry, PersonTrip from troggle.core.models.troggle import DataIssue, Expedition from troggle.core.utils import TROG, save_carefully -''' +""" Parses and imports logbooks in all their wonderful confusion See detailed explanation of the complete process: https://expo.survex.com/handbook/computing/logbooks-parsing.html -''' -todo=''' +""" +todo = """ - refactor everything with some urgency, esp. LoadLogbookForExpedition() - remove the TROG things since we need the database for multiuser access? Or not? @@ -47,67 +46,101 @@ todo=''' - use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache data for old logbooks? Not worth it.. -''' +""" MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 BLOG_PARSER_SETTINGS = { -# "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html -# "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html -# "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html -# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html - } + # "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html + # "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html + # "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html + # "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html +} DEFAULT_LOGBOOK_FILE = "logbook.html" DEFAULT_LOGBOOK_PARSER = "parser_html" # All years since 2002 use the default value for Logbook parser # dont forget to update expoweb/pubs.htm to match. -LOGBOOK_PARSER_SETTINGS = { - "2002": ("logbook.html", "parser_html"), - "2001": ("log.htm", "parser_html_01"), - "2000": ("log.htm", "parser_html_01"), - "1999": ("log.htm", "parser_html_01"), - "1998": ("log.htm", "parser_html_01"), - "1997": ("log.htm", "parser_html_01"), - "1996": ("log.htm", "parser_html_01"), - "1995": ("log.htm", "parser_html_01"), - "1994": ("logbook.html", "parser_html"), - "1993": ("logbook.html", "parser_html"), - "1992": ("logbook.html", "parser_html"), - "1991": ("logbook.html", "parser_html"), - "1990": ("logbook.html", "parser_html"), - "1989": ("logbook.html", "parser_html"), - "1988": ("logbook.html", "parser_html"), - "1987": ("logbook.html", "parser_html"), - "1985": ("logbook.html", "parser_html"), - "1984": ("logbook.html", "parser_html"), - "1983": ("logbook.html", "parser_html"), - "1982": ("logbook.html", "parser_html"), - } +LOGBOOK_PARSER_SETTINGS = { + "2002": ("logbook.html", "parser_html"), + "2001": ("log.htm", "parser_html_01"), + "2000": ("log.htm", "parser_html_01"), + "1999": ("log.htm", "parser_html_01"), + "1998": ("log.htm", "parser_html_01"), + "1997": ("log.htm", "parser_html_01"), + "1996": ("log.htm", "parser_html_01"), + "1995": ("log.htm", "parser_html_01"), + "1994": ("logbook.html", "parser_html"), + "1993": ("logbook.html", "parser_html"), + "1992": ("logbook.html", "parser_html"), + "1991": ("logbook.html", "parser_html"), + "1990": ("logbook.html", "parser_html"), + "1989": ("logbook.html", "parser_html"), + "1988": ("logbook.html", "parser_html"), + "1987": ("logbook.html", "parser_html"), + "1985": ("logbook.html", "parser_html"), + "1984": ("logbook.html", "parser_html"), + "1983": ("logbook.html", "parser_html"), + "1982": ("logbook.html", "parser_html"), +} -entries = { "2022": 89, "2019": 55, "2018": 95, "2017": 74, "2016": 86, "2015": 80, - "2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53, - "2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, - "2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, - "1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34, - "1985": 24, "1984": 32, "1983": 52, "1982": 42,} +entries = { + "2022": 89, + "2019": 55, + "2018": 95, + "2017": 74, + "2016": 86, + "2015": 80, + "2014": 65, + "2013": 52, + "2012": 75, + "2011": 71, + "2010": 22, + "2009": 53, + "2008": 49, + "2007": 113, + "2006": 60, + "2005": 55, + "2004": 76, + "2003": 42, + "2002": 31, + "2001": 49, + "2000": 54, + "1999": 79, + "1998": 43, + "1997": 53, + "1996": 95, + "1995": 42, + "1994": 32, + "1993": 41, + "1992": 62, + "1991": 39, + "1990": 87, + "1989": 63, + "1988": 61, + "1987": 34, + "1985": 24, + "1984": 32, + "1983": 52, + "1982": 42, +} -logentries = [] # the entire logbook for one year is a single object: a list of entries -noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau', - 'base camp', 'basecamp', 'top camp', 'topcamp' ] -logdataissues = TROG['issues']['logdataissues'] -trips ={} +logentries = [] # the entire logbook for one year is a single object: a list of entries +noncaveplaces = ["Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"] +logdataissues = TROG["issues"]["logdataissues"] +trips = {} # # the logbook loading section # def set_trip_id(year, seq): - tid= f"{year}_s{seq:02d}" + tid = f"{year}_s{seq:02d}" return tid -rx_tripperson = re.compile(r'(?i)(.*?)$') + +rx_tripperson = re.compile(r"(?i)(.*?)$") rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]") - + def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): - res = [ ] + res = [] author = None # print(f'# {tid}') # print(f" - {tid} '{trippeople}' ") @@ -118,56 +151,55 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): mul = rx_tripperson.match(tripperson) if mul: tripperson = mul.group(1).strip() - if tripperson and tripperson[0] != '*': + if tripperson and tripperson[0] != "*": tripperson = re.sub(rx_round_bracket, "", tripperson).strip() - - # these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition) - if tripperson =="Wiggy": - tripperson = "Phil Wigglesworth" - if tripperson =="Animal": - tripperson = "Mike Richardson" - if tripperson =="MikeTA": - tripperson = "Mike Richardson" - if tripperson =="CavingPig": - tripperson = "Elaine Oliver" - if tripperson =="nobrotson": - tripperson = "Rob Watson" - if tripperson =="Tinywoman": - tripperson = "Nadia" - if tripperson =="tcacrossley": - tripperson = "Tom Crossley" - if tripperson =="Samouse1": - tripperson = "Todd Rye" - - + # these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition) + if tripperson == "Wiggy": + tripperson = "Phil Wigglesworth" + if tripperson == "Animal": + tripperson = "Mike Richardson" + if tripperson == "MikeTA": + tripperson = "Mike Richardson" + if tripperson == "CavingPig": + tripperson = "Elaine Oliver" + if tripperson == "nobrotson": + tripperson = "Rob Watson" + if tripperson == "Tinywoman": + tripperson = "Nadia" + if tripperson == "tcacrossley": + tripperson = "Tom Crossley" + if tripperson == "Samouse1": + tripperson = "Todd Rye" + personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) if not personyear: - message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year." + message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year." print(message) - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message res.append((personyear, logtime_underground)) if mul: author = personyear if not author: if not res: return "", 0 - author = res[-1][0] # the previous valid person and a time of 0 hours - - #print(f" - {tid} [{author.person}] '{res[0][0].person}'...") + author = res[-1][0] # the previous valid person and a time of 0 hours + + # print(f" - {tid} [{author.person}] '{res[0][0].person}'...") return res, author + def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None): - """ saves a logbook entry and related persontrips + """saves a logbook entry and related persontrips Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday ! - - troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times.. - + + troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times.. + Until 18 Dec.2022, this was overwriting logbook entries for the same date with the same title, because lookupAttribs={'date':date, 'title':title} """ - + # Nasty hack, must tidy this up.. if logtime_underground: try: @@ -188,68 +220,75 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ # print(f" - {author} - {logtime_underground}") except: message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["title"]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues["title"] = message print(message) raise return - + if not author: message = f" ! - {expedition.year} Warning: logentry: {title} - no expo member author for entry '{tid}'" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["title"]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues["title"] = message print(message) - #return + # return # This needs attention. The slug field is derived from 'title' # both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though? - #tripCave = GetTripCave(place): + # tripCave = GetTripCave(place): lplace = place.lower() - cave=None + cave = None if lplace not in noncaveplaces: cave = GetCaveLookup().get(lplace) - + y = str(date)[:4] - text = text.replace(' src="', f' src="/years/{y}/' ) - text = text.replace(" src='", f" src='/years/{y}/" ) - - text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/' ) - text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/" ) + text = text.replace(' src="', f' src="/years/{y}/') + text = text.replace(" src='", f" src='/years/{y}/") - text = text.replace('\t', '' ) - text = text.replace('\n\n\n', '\n\n' ) + text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/') + text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/") - #Check for an existing copy of the current entry, and save + text = text.replace("\t", "") + text = text.replace("\n\n\n", "\n\n") + + # Check for an existing copy of the current entry, and save expeditionday = expedition.get_expedition_day(date) - lookupAttribs={'date':date, 'title':title} + lookupAttribs = {"date": date, "title": title} # 'cave' is converted to a string doing this, which renders as the cave slug. # but it is a db query which we should try to avoid - rewrite this - - #NEW slug for a logbook entry here! Unique id + slugified title fragment - + + # NEW slug for a logbook entry here! Unique id + slugified title fragment + if tid is not None: slug = tid # slug = tid + "_" + slugify(title)[:10].replace('-','_') - else: - slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_') - nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, - 'time_underground':logtime_underground, 'cave_slug':str(cave), 'slug': slug} - + else: + slug = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_") + nonLookupAttribs = { + "place": place, + "text": text, + "expedition": expedition, + "time_underground": logtime_underground, + "cave_slug": str(cave), + "slug": slug, + } + # This creates the lbo instance of LogbookEntry - lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) - + lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) + # for PersonTrip time_underground is float (decimal hours) for tripperson, time_underground in trippersons: # print(f" - {tid} '{tripperson}' author:{tripperson == author}") - lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} - nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} - # this creates the PersonTrip instance. + lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo} + nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)} + # this creates the PersonTrip instance. save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) + def ParseDate(tripdate, year): - """ Interprets dates in the expo logbooks and returns a correct datetime.date object """ + """Interprets dates in the expo logbooks and returns a correct datetime.date object""" dummydate = date(1970, 1, 1) month = 1 day = 1 @@ -261,16 +300,16 @@ def ParseDate(tripdate, year): if mdatestandard: if not (mdatestandard.group(1) == year): message = f" ! - Bad date (year) in logbook: {tripdate} - {year}" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["tripdate"]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues["tripdate"] = message return dummydate else: year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) elif mdategoof: if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]): message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3) - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["tripdate"]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues["tripdate"] = message return dummydate else: yadd = int(year[:2]) * 100 @@ -278,56 +317,58 @@ def ParseDate(tripdate, year): else: year = 1970 message = f" ! - Bad date in logbook: {tripdate} - {year}" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["tripdate"]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues["tripdate"] = message return date(year, month, day) except: message = f" ! - Failed to parse date in logbook: {tripdate} - {year}" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues["tripdate"]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues["tripdate"] = message return datetime.date(1970, 1, 1) - + + # 2002 - now def parser_html(year, expedition, txt, seq=""): - '''This uses some of the more obscure capabilities of regular expressions, + """This uses some of the more obscure capabilities of regular expressions, see https://docs.python.org/3/library/re.html - + You can't see it here, but a round-trip export-then-import will move the endmatter up to the frontmatter. This makes sense when moving from parser_html_01 format logfiles, believe me. - ''' + """ global logentries global logdataissues # extract front material and stash for later use when rebuilding from list of entries headmatch = re.match(r"(?i)(?s).*]*>(.*?)0): + if len(headpara) > 0: frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html") - with open(frontpath,"w") as front: - front.write(headpara+"\n") - + with open(frontpath, "w") as front: + front.write(headpara + "\n") + # extract END material and stash for later use when rebuilding from list of entries endmatch = re.match(r"(?i)(?s).*([\s\S]*?)(?=0): + if len(endpara) > 0: endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html") - with open(endpath,"w") as end: - end.write(endpara+"\n") - + with open(endpath, "w") as end: + end.write(endpara + "\n") + tripparas = re.findall(r"([\s\S]*?)(?=.*?\s*

)? # second date + + s = re.match( + r"""(?x)(?:\s*.*?\s*

)? # second date \s*(?:\s*)? \s*(.*?)(?:

)? \s*\s*(.*?) @@ -335,16 +376,19 @@ def parser_html(year, expedition, txt, seq=""): ([\s\S]*?) \s*(?:\s*(.*?))? \s*$ - ''', trippara) + """, + trippara, + ) if s: tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() - else: # allow title and people to be swapped in order + else: # allow title and people to be swapped in order msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..." print(msg) - DataIssue.objects.create(parser='logbooks', message=msg) - logdataissues[tid]=msg + DataIssue.objects.create(parser="logbooks", message=msg) + logdataissues[tid] = msg - s2 = re.match(r'''(?x)(?:\s*.*?\s*

)? # second date + s2 = re.match( + r"""(?x)(?:\s*.*?\s*

)? # second date \s*(?:\s*)? \s*(.*?)(?:

)? \s*\s*(.*?) @@ -352,17 +396,19 @@ def parser_html(year, expedition, txt, seq=""): ([\s\S]*?) \s*(?:\s*(.*?))? \s*$ - ''', trippara) + """, + trippara, + ) if s2: tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups() else: # if not re.search(r"Rigging Guide", trippara): msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..." print(msg) - DataIssue.objects.create(parser='logbooks', message=msg) - logdataissues[tid]=msg + DataIssue.objects.create(parser="logbooks", message=msg) + logdataissues[tid] = msg continue - + ldate = ParseDate(tripdate.strip(), year) triptitles = triptitle.split(" - ") if len(triptitles) >= 2: @@ -370,29 +416,29 @@ def parser_html(year, expedition, txt, seq=""): else: tripcave = "UNKNOWN" ltriptext = re.sub(r"

", "", triptext) - #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) + # ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) ltriptext = re.sub(r"

", "

", ltriptext).strip() - + triptitle = triptitle.strip() - entrytuple = (ldate, tripcave, triptitle, ltriptext, - trippeople, expedition, tu, tripid1) + entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tripid1) logentries.append(entrytuple) + # main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it def parser_html_01(year, expedition, txt, seq=""): global logentries global logdataissues errorcount = 0 - + # extract front material and stash for later use when rebuilding from list of entries headmatch = re.match(r"(?i)(?s).*]*>(.*?)0): + if len(headpara) > 0: frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html") - with open(frontpath,"w") as front: - front.write(headpara+"\n") + with open(frontpath, "w") as front: + front.write(headpara + "\n") # extract END material and stash for later use when rebuilding from list of entries endmatch = re.match(r"(?i)(?s).*([\s\S]*?)(?=0): + if len(endpara) > 0: endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html") - with open(endpath,"w") as end: - end.write(endpara+"\n") - + with open(endpath, "w") as end: + end.write(endpara + "\n") + tripparas = re.findall(r"([\s\S]*?)(?=)?(.*?)(.*)$", trippara) if not s: message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..." - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) break try: tripheader, triptext = s.group(1), s.group(2) except: - message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'" + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) - # mtripid = re.search(r' header:'{tripheader}'" - # DataIssue.objects.create(parser='logbooks', message=message) - # logdataissues[tid]=message - # print(message) - + # message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'" + # DataIssue.objects.create(parser='logbooks', message=message) + # logdataissues[tid]=message + # print(message) + # tripid = mtripid and mtripid.group(1) or "" # print(f" # - mtripid: {mtripid}") tripheader = re.sub(r"]*>", "", tripheader) - #print(f" #2 - tid: {tid}") + # print(f" #2 - tid: {tid}") try: tripdate, triptitle, trippeople = tripheader.split("|") except: - message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'" + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) try: tripdate, triptitle = tripheader.split("|") trippeople = "GUESS ANON" except: - message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !" + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) break - #print(f" #3 - tid: {tid}") + # print(f" #3 - tid: {tid}") ldate = ParseDate(tripdate.strip(), year) - #print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>") - #print(f" #4 - tid: {tid}") + # print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>") + # print(f" #4 - tid: {tid}") - mtu = re.search(r']*>(T/?U.*)', triptext) + mtu = re.search(r"]*>(T/?U.*)", triptext) if mtu: tu = mtu.group(1) - triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] + triptext = triptext[: mtu.start(0)] + triptext[mtu.end() :] else: tu = "" @@ -475,142 +520,144 @@ def parser_html_01(year, expedition, txt, seq=""): tripcave = triptitles[0].strip() ltriptext = triptext - + mtail = re.search(r'(?:[^<]*|\s|/|-|&||\((?:same day|\d+)\))*$', ltriptext) if mtail: - ltriptext = ltriptext[:mtail.start(0)] + ltriptext = ltriptext[: mtail.start(0)] ltriptext = re.sub(r"

", "", ltriptext) ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) ltriptext = re.sub(r"", "_", ltriptext) ltriptext = re.sub(r"", "''", ltriptext) ltriptext = re.sub(r"", "'''", ltriptext) ltriptext = re.sub(r"

", "

", ltriptext).strip() - - if ltriptext == "": - message = " ! - Zero content for logbook entry!: " + tid - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message - print(message) - - entrytuple = (ldate, tripcave, triptitle, ltriptext, - trippeople, expedition, tu, tid) + if ltriptext == "": + message = " ! - Zero content for logbook entry!: " + tid + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message + print(message) + + entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tid) logentries.append(entrytuple) - + except: message = f" ! - Skipping logentry {year} due to exception in: {tid}" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) errorcount += 1 raise - if errorcount >5 : + if errorcount > 5: message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) return + def parser_blog(year, expedition, txt, sq=""): - '''Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website. + """Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website. Note that the entries have dates and authors, but no titles. See detailed explanation of the complete process: https://expo.survex.com/handbook/computing/logbooks-parsing.html https://expo.survex.com/handbook/computing/log-blog-parsing.html - + This uses some of the more obscure capabilities of regular expressions, see https://docs.python.org/3/library/re.html - + BLOG entries have this structure:

-
+ So the content is nested inside the header. Attachments (images) come after the content. - ''' + """ global logentries global logdataissues errorcount = 0 - tripheads = re.findall(r"
\s*([\s\S]*?)(]*>)([\s\S]*?)(?=\s*([\s\S]*?)(]*>)([\s\S]*?)(?=[\s\S]*?(?=)","",attach) - attach = re.sub(r")","",attach) + attach = re.sub(r"
[\s\S]*?(?=)", "", attach) + attach = re.sub(r")", "", attach) tripcontent = tripstuff[0] + attach - #print(f"{i} - {len(tripstuff)} - {tripstuff[1]}") + # print(f"{i} - {len(tripstuff)} - {tripstuff[1]}") triphead = tripheads[i] logbook_entry_count += 1 - tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq + tid = set_trip_id(year, logbook_entry_count) + "_blog" + sq # print(f" - tid: {tid}") - + # data-author="tcacrossley" match_author = re.search(r".*data-author=\"([^\"]*)\" data-content=.*", triphead) - if not ( match_author ) : + if not (match_author): message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse data-author {tid} {triphead[:400]}..." - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) break trippeople = match_author.group(1) # print(f" - tid: {tid} {trippeople}") # datetime="2019-07-11T13:16:18+0100" match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead) - if not ( match_datetime ) : + if not (match_datetime): message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..." - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) break datestamp = match_datetime.group(1) - + try: tripdate = datetime.fromisoformat(datestamp) except: message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[tid]=message + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[tid] = message print(message) # fallback, ignore the timestamp bits: tripdate = datetime.fromisoformat(datestamp[0:10]) # print(f" - tid: {tid} '{trippeople}' '{tripdate}'") - + # tripname must have the location then a hyphen at the beginning as it is ignored by export function location = "Unknown" - tripname = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}" # must be unique for a given date - tripcontent = re.sub(r"(width=\"\d+\")","",tripcontent) - tripcontent = re.sub(r"height=\"\d+\"","",tripcontent) - tripcontent = re.sub(r"width: \d+px","",tripcontent) - tripcontent = re.sub(r"\n\n+","\n\n",tripcontent) - tripcontent = re.sub(r"","",tripcontent) - tripcontent = f"\n\n\nBlog Author: {trippeople}" + tripcontent + tripname = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}" # must be unique for a given date + tripcontent = re.sub(r"(width=\"\d+\")", "", tripcontent) + tripcontent = re.sub(r"height=\"\d+\"", "", tripcontent) + tripcontent = re.sub(r"width: \d+px", "", tripcontent) + tripcontent = re.sub(r"\n\n+", "\n\n", tripcontent) + tripcontent = re.sub(r"", "", tripcontent) + tripcontent = f"\n\n\nBlog Author: {trippeople}" + tripcontent - entrytuple = (tripdate, location, tripname, tripcontent, - trippeople, expedition, tu, tid) + entrytuple = (tripdate, location, tripname, tripcontent, trippeople, expedition, tu, tid) logentries.append(entrytuple) - + def LoadLogbookForExpedition(expedition, clean=True): - """ Parses all logbook entries for one expedition + """Parses all logbook entries for one expedition if clean==True then it deletes all entries for this year first. """ global logentries @@ -619,63 +666,62 @@ def LoadLogbookForExpedition(expedition, clean=True): global entries logbook_parseable = False - yearlinks = LOGBOOK_PARSER_SETTINGS + yearlinks = LOGBOOK_PARSER_SETTINGS expologbase = os.path.join(settings.EXPOWEB, "years") - logentries=[] - + logentries = [] + year = expedition.year expect = entries[year] # print(" - Logbook for: " + year) - - def cleanerrors(year): global logdataissues - dataissues = DataIssue.objects.filter(parser='logbooks') + dataissues = DataIssue.objects.filter(parser="logbooks") for di in dataissues: ph = year if re.search(ph, di.message) is not None: - #print(f' - CLEANING dataissue {di.message}') + # print(f' - CLEANING dataissue {di.message}') di.delete() - - #print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year') + + # print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year') dellist = [] for key, value in logdataissues.items(): - #print(f' - CLEANING logdataissues [{key}]: {value}') + # print(f' - CLEANING logdataissues [{key}]: {value}') if key.startswith(year): - #print(f' - CLEANING logdataissues [{key:12}]: {value} ') + # print(f' - CLEANING logdataissues [{key:12}]: {value} ') dellist.append(key) for i in dellist: del logdataissues[i] - if (clean): + + if clean: cleanerrors(year) if year in yearlinks: yearfile, yearparser = yearlinks[year] - logbookpath = Path(yearfile) - expedition.logbookfile = yearfile - parsefunc = yearparser + logbookpath = Path(yearfile) + expedition.logbookfile = yearfile + parsefunc = yearparser # print(f" - Logbook file {yearfile} using parser {yearparser}") else: logbookpath = Path(DEFAULT_LOGBOOK_FILE) expedition.logbookfile = DEFAULT_LOGBOOK_FILE - parsefunc = DEFAULT_LOGBOOK_PARSER + parsefunc = DEFAULT_LOGBOOK_PARSER expedition.save() - + lbes = LogbookEntry.objects.filter(expedition=expedition) - if (clean): + if clean: for lbe in lbes: lbe.delete() - for sq in ["", "2", "3", "4"]: # cope with blog saved as many separate files - lb = Path(expologbase, year, logbookpath.stem + sq + logbookpath.suffix) + for sq in ["", "2", "3", "4"]: # cope with blog saved as many separate files + lb = Path(expologbase, year, logbookpath.stem + sq + logbookpath.suffix) if not (lb.is_file()): # print(f" ! End of blog. Next blog file in sequence not there:{lb}") break try: - with open(lb,'rb') as file_in: + with open(lb, "rb") as file_in: txt = file_in.read().decode("utf-8") logbook_parseable = True except (IOError): @@ -686,11 +732,11 @@ def LoadLogbookForExpedition(expedition, clean=True): print(f" ! Very Bad Error opening {lb}") if logbook_parseable: - + # -------------------- parser = globals()[parsefunc] - print(f' - {year} parsing with {parsefunc} - {lb}') - parser(year, expedition, txt, sq) # this launches the right parser for this year + print(f" - {year} parsing with {parsefunc} - {lb}") + parser(year, expedition, txt, sq) # this launches the right parser for this year # -------------------- dupl = {} for entrytuple in logentries: @@ -699,12 +745,11 @@ def LoadLogbookForExpedition(expedition, clean=True): if check in dupl: dupl[check] += 1 triptitle = f"{triptitle} #{dupl[check]}" - print(f' - {triptitle} -- {date}') + print(f" - {triptitle} -- {date}") else: dupl[check] = 1 - EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, - tripid1) - + EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1) + if len(logentries) == expect: # print(f"OK {year} {len(logentries):5d} is {expect}\n") pass @@ -713,26 +758,29 @@ def LoadLogbookForExpedition(expedition, clean=True): return len(logentries) + def LoadLogbook(year): - '''One off logbook for testing purposes - ''' + """One off logbook for testing purposes""" global LOGBOOK_PARSER_SETTINGS - - nlbe={} - TROG['pagecache']['expedition'][year] = None # clear cache - - expo = Expedition.objects.get(year=year) - year = expo.year # some type funny + + nlbe = {} + TROG["pagecache"]["expedition"][year] = None # clear cache + + expo = Expedition.objects.get(year=year) + year = expo.year # some type funny nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo if year in BLOG_PARSER_SETTINGS: print("BLOG parsing") - LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year] + LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year] nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo else: - print(f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}") + print( + f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}" + ) + def LoadLogbooks(): - """ This is the master function for parsing all logbooks into the Troggle database. + """This is the master function for parsing all logbooks into the Troggle database. This should be rewritten to use coroutines to load all logbooks from disc in parallel, but must be serialised to write to database as sqlite is single-user. """ @@ -740,45 +788,48 @@ def LoadLogbooks(): global entries logdataissues = {} - DataIssue.objects.filter(parser='logbooks').delete() + DataIssue.objects.filter(parser="logbooks").delete() expos = Expedition.objects.all() if len(expos) <= 1: - message = f" ! - No expeditions found. Load 'people' first" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[f"sqlfail 0000"]=message + message = f" ! - No expeditions found. Load 'people' first" + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[f"sqlfail 0000"] = message print(message) return - noexpo = ["1986", "2020", "2021",] #no expo + noexpo = [ + "1986", + "2020", + "2021", + ] # no expo lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"] - sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.] + sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.] nologbook = noexpo + lostlogbook + sqlfail - nlbe={} - expd ={} + nlbe = {} + expd = {} loglist = [] bloglist = [] - - for expo in expos: # pointless as we explicitly know the years in this code. + + for expo in expos: # pointless as we explicitly know the years in this code. year = expo.year - TROG['pagecache']['expedition'][year] = None # clear cache + TROG["pagecache"]["expedition"][year] = None # clear cache if year in sqlfail: print(" - Logbook for: " + year + " NO parsing attempted - known sql failures") - message = f" ! - Not even attempting to parse logbook for {year} until code fixed" - DataIssue.objects.create(parser='logbooks', message=message) - logdataissues[f"sqlfail {year}"]=message + message = f" ! - Not even attempting to parse logbook for {year} until code fixed" + DataIssue.objects.create(parser="logbooks", message=message) + logdataissues[f"sqlfail {year}"] = message print(message) if year not in nologbook: if year in entries: loglist.append(expo) else: - print(" - No Logbook yet for: " + year) # catch case when preparing for next expo - + print(" - No Logbook yet for: " + year) # catch case when preparing for next expo + if year in BLOG_PARSER_SETTINGS: bloglist.append(expo) - for ex in loglist: nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo @@ -787,15 +838,15 @@ def LoadLogbooks(): orig = LOGBOOK_PARSER_SETTINGS[str(b)] else: orig = (DEFAULT_LOGBOOK_FILE, DEFAULT_LOGBOOK_PARSER) - LOGBOOK_PARSER_SETTINGS[str(b)] = BLOG_PARSER_SETTINGS[str(b)] - print(f" - BLOG: {b}") + LOGBOOK_PARSER_SETTINGS[str(b)] = BLOG_PARSER_SETTINGS[str(b)] + print(f" - BLOG: {b}") nlbe[b] = LoadLogbookForExpedition(b, clean=False) # this loads the blog logbook for one expo LOGBOOK_PARSER_SETTINGS[str(b)] = orig # tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock # yt = 0 - # for r in map(LoadLogbookForExpedition, loglist): - # yt = r + # for r in map(LoadLogbookForExpedition, loglist): + # yt = r yt = 0 for e in nlbe: @@ -803,7 +854,6 @@ def LoadLogbooks(): print(f"total {yt:,} log entries parsed in all expeditions") - # dateRegex = re.compile(r'(\d\d\d\d)-(\d\d)-(\d\d)', re.S) # expeditionYearRegex = re.compile(r'(.*?)', re.S) # titleRegex = re.compile(r'

(.*?)

', re.S) @@ -813,4 +863,3 @@ def LoadLogbooks(): # TURegex = re.compile(r'([0-9]*\.?[0-9]+)', re.S) # locationRegex = re.compile(r'(.*?)', re.S) # caveRegex = re.compile(r'(.*?)', re.S) - diff --git a/parsers/people.py b/parsers/people.py index bfacc2a..47bb328 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -9,80 +9,81 @@ from pathlib import Path from django.conf import settings from unidecode import unidecode -from troggle.core.models.troggle import (DataIssue, Expedition, Person, - PersonExpedition) +from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition from troggle.core.utils import TROG, save_carefully -'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has +"""These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has href links to pages in troggle which troggle does not think are right. The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that, or they should use the same code by importing a module. -''' +""" + def parse_blurb(personline, header, person): """create mugshot Photo instance""" ms_filename = personline[header["Mugshot"]] ms_path = Path(settings.EXPOWEB, "folk", ms_filename) - + if ms_filename: if not ms_path.is_file(): message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}" print(message) - DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}") + DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}") return - - if ms_filename.startswith('i/'): - #if person just has an image, add it. It has format 'i/adama2018.jpg' + + if ms_filename.startswith("i/"): + # if person just has an image, add it. It has format 'i/adama2018.jpg' person.mug_shot = str(Path("/folk", ms_filename)) person.blurb = None - elif ms_filename.startswith('l/'): + elif ms_filename.startswith("l/"): # it has the format 'l/ollybetts.htm' the file may contain images - with open(ms_path,'r') as blurbfile: + with open(ms_path, "r") as blurbfile: blrb = blurbfile.read() - pblurb=re.search(r'.*.*(.*)(.*)[^<]*', '', fragment) + fragment = re.sub(r"[^<]*", "", fragment) # replace src="../i/ with src="/folk/i person.blurb = fragment else: message = f"! Blurb parse error in {ms_filename}" print(message) - DataIssue.objects.create(parser='people', message=message, url="/folk/") + DataIssue.objects.create(parser="people", message=message, url="/folk/") - elif ms_filename == '': + elif ms_filename == "": pass else: message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}" print(message) - DataIssue.objects.create(parser='people', message=message, url="/folk/") + DataIssue.objects.create(parser="people", message=message, url="/folk/") person.save() + def load_people_expos(): - '''This is where the folk.csv file is parsed to read people's names. + """This is where the folk.csv file is parsed to read people's names. Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names' and McLean and Mclean and McAdam - interaction with the url parser in urls.py too - ''' - DataIssue.objects.filter(parser='people').delete() - - persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess - personreader = csv.reader(persontab) # this is an iterator + """ + DataIssue.objects.filter(parser="people").delete() + + persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess + personreader = csv.reader(persontab) # this is an iterator headers = next(personreader) header = dict(list(zip(headers, list(range(len(headers)))))) - + # make expeditions print(" - Loading expeditions") years = headers[5:] - + for year in years: - lookupAttribs = {'year':year} - nonLookupAttribs = {'name':f"CUCC expo {year}"} - + lookupAttribs = {"year": year} + nonLookupAttribs = {"name": f"CUCC expo {year}"} + save_carefully(Expedition, lookupAttribs, nonLookupAttribs) # make persons @@ -105,67 +106,86 @@ def load_people_expos(): nickname = splitnick.group(2) or "" fullname = fullname.strip() - names = fullname.split(' ') + names = fullname.split(" ") firstname = names[0] if len(names) == 1: lastname = "" - if personline[header["VfHO member"]] =='': + if personline[header["VfHO member"]] == "": vfho = False else: vfho = True - lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")} - nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname} + lookupAttribs = {"first_name": firstname, "last_name": (lastname or "")} + nonLookupAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nickname} person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs) parse_blurb(personline=personline, header=header, person=person) - + # make person expedition from table for year, attended in list(zip(headers, personline))[5:]: expedition = Expedition.objects.get(year=year) if attended == "1" or attended == "-1": - lookupAttribs = {'person':person, 'expedition':expedition} - nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")} + lookupAttribs = {"person": person, "expedition": expedition} + nonLookupAttribs = {"nickname": nickname, "is_guest": (personline[header["Guest"]] == "1")} save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs) print("", flush=True) -def who_is_this(year,possibleid): + +def who_is_this(year, possibleid): expo = Expedition.objects.filter(year=year) - personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()] + personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()] if personexpedition: return personexpedition.person else: return None - + + global foreign_friends -foreign_friends = ["P. Jeutter", "K. Jäger", "S. Steinberger", "R. Seebacher", - "Dominik Jauch", "Fritz Mammel", "Marcus Scheuerman", - "Uli Schütz", "Wieland Scheuerle", "Arndt Karger", - "Kai Schwekend", "Regina Kaiser", "Thilo Müller","Wieland Scheuerle", - "Florian Gruner", "Helmut Stopka-Ebeler", "Aiko", "Mark Morgan", "Arndt Karger"] - +foreign_friends = [ + "P. Jeutter", + "K. Jäger", + "S. Steinberger", + "R. Seebacher", + "Dominik Jauch", + "Fritz Mammel", + "Marcus Scheuerman", + "Uli Schütz", + "Wieland Scheuerle", + "Arndt Karger", + "Kai Schwekend", + "Regina Kaiser", + "Thilo Müller", + "Wieland Scheuerle", + "Florian Gruner", + "Helmut Stopka-Ebeler", + "Aiko", + "Mark Morgan", + "Arndt Karger", +] + + def known_foreigner(id): - '''If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching - ''' - global foreign_friends + """If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching""" + global foreign_friends if id in foreign_friends: return True else: return False - + # Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition # This is convoluted, the whole personexpedition concept is unnecessary? -Gpersonexpeditionnamelookup = { } +Gpersonexpeditionnamelookup = {} + + def GetPersonExpeditionNameLookup(expedition): global Gpersonexpeditionnamelookup - + def apply_variations(f, l): - '''Be generous in guessing possible matches. Any duplicates will be ruled as invalid. - ''' + """Be generous in guessing possible matches. Any duplicates will be ruled as invalid.""" f = f.lower() l = l.lower() variations = [] @@ -175,27 +195,27 @@ def GetPersonExpeditionNameLookup(expedition): variations.append(f + " " + l) variations.append(f + " " + l[0]) variations.append(f + l[0]) - variations.append(f + " " +l[0] + '.') + variations.append(f + " " + l[0] + ".") variations.append(f[0] + " " + l) variations.append(f[0] + ". " + l) variations.append(f[0] + l) - variations.append(f[0] + l[0]) # initials e.g. gb or bl + variations.append(f[0] + l[0]) # initials e.g. gb or bl return variations - + res = Gpersonexpeditionnamelookup.get(expedition.name) - + if res: return res - - res = { } + + res = {} duplicates = set() - - #print("Calculating GetPersonExpeditionNameLookup for " + expedition.year) + + # print("Calculating GetPersonExpeditionNameLookup for " + expedition.year) personexpeditions = PersonExpedition.objects.filter(expedition=expedition) short = {} dellist = [] for personexpedition in personexpeditions: - possnames = [ ] + possnames = [] f = unidecode(unescape(personexpedition.person.first_name.lower())) l = unidecode(unescape(personexpedition.person.last_name.lower())) full = unidecode(unescape(personexpedition.person.fullname.lower())) @@ -204,40 +224,40 @@ def GetPersonExpeditionNameLookup(expedition): possnames.append(full) if n not in possnames: possnames.append(n) - + if l: - possnames += apply_variations(f,l) + possnames += apply_variations(f, l) if n: possnames += apply_variations(n, l) - + if f == "Robert".lower(): possnames += apply_variations("Bob", l) if f == "Rob".lower(): possnames += apply_variations("Robert", l) - + if f == "Andrew".lower(): possnames += apply_variations("Andy", l) if f == "Andy".lower(): possnames += apply_variations("Andrew", l) if f == "Michael".lower(): possnames += apply_variations("Mike", l) - + if f == "David".lower(): possnames += apply_variations("Dave", l) if f == "Dave".lower(): possnames += apply_variations("David", l) - + if f == "Peter".lower(): possnames += apply_variations("Pete", l) if f == "Pete".lower(): possnames += apply_variations("Peter", l) - + if f == "Olly".lower(): possnames += apply_variations("Oliver", l) if f == "Oliver".lower(): possnames += apply_variations("Olly", l) - + if f == "Ollie".lower(): possnames += apply_variations("Oliver", l) if f == "Oliver".lower(): @@ -245,59 +265,57 @@ def GetPersonExpeditionNameLookup(expedition): if f == "Becka".lower(): possnames += apply_variations("Rebecca", l) - - if f'{f} {l}' == "Andy Waddington".lower(): + + if f"{f} {l}" == "Andy Waddington".lower(): possnames += apply_variations("aer", "waddington") - if f'{f} {l}' == "Phil Underwood".lower(): + if f"{f} {l}" == "Phil Underwood".lower(): possnames += apply_variations("phil", "underpants") - if f'{f} {l}' == "Naomi Griffiths".lower(): + if f"{f} {l}" == "Naomi Griffiths".lower(): possnames += apply_variations("naomi", "makins") - if f'{f} {l}' == "Tina White".lower(): + if f"{f} {l}" == "Tina White".lower(): possnames += apply_variations("tina", "richardson") - if f'{f} {l}' == "Cat Hulse".lower(): + if f"{f} {l}" == "Cat Hulse".lower(): possnames += apply_variations("catherine", "hulse") possnames += apply_variations("cat", "henry") - if f'{f} {l}' == "Jess Stirrups".lower(): + if f"{f} {l}" == "Jess Stirrups".lower(): possnames += apply_variations("jessica", "stirrups") - if f'{f} {l}' == "Nat Dalton".lower(): - possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling. - if f'{f} {l}' == "Mike Richardson".lower(): + if f"{f} {l}" == "Nat Dalton".lower(): + possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling. + if f"{f} {l}" == "Mike Richardson".lower(): possnames.append("mta") possnames.append("miketa") possnames.append("mike the animal") possnames.append("animal") - if f'{f} {l}' == "Eric Landgraf".lower(): + if f"{f} {l}" == "Eric Landgraf".lower(): possnames.append("eric c.landgraf") possnames.append("eric c. landgraf") possnames.append("eric c landgraf") - if f'{f} {l}' == "Nadia Raeburn".lower(): + if f"{f} {l}" == "Nadia Raeburn".lower(): possnames.append("nadia rc") possnames.append("nadia raeburn-cherradi") - + for i in [3, 4, 5, 6]: - lim = min(i, len(f)+1) # short form, e.g. Dan for Daniel. + lim = min(i, len(f) + 1) # short form, e.g. Dan for Daniel. if f[:lim] not in short: - short[f[:lim]]= personexpedition + short[f[:lim]] = personexpedition else: dellist.append(f[:lim]) - - possnames = set(possnames) # remove duplicates + + possnames = set(possnames) # remove duplicates for possname in possnames: if possname in res: duplicates.add(possname) else: res[possname] = personexpedition - + for possname in duplicates: del res[possname] - + for possname in dellist: - if possname in short: #always true ? + if possname in short: # always true ? del short[possname] for shortname in short: res[shortname] = short[shortname] - - + Gpersonexpeditionnamelookup[expedition.name] = res return res - diff --git a/parsers/scans.py b/parsers/scans.py index 1bebe18..cc54633 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -17,8 +17,8 @@ from troggle.core.models.troggle import DataIssue from troggle.core.utils import save_carefully from troggle.core.views.scans import datewallet -'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. -''' +"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. +""" contentsjson = "contents.json" @@ -26,111 +26,135 @@ git = settings.GIT # to do: Actually read all the JSON files and set the survex file field appropriately! + def setwalletyear(wallet): - _ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear + _ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear + def load_all_scans(): - '''This iterates through the scans directories (either here or on the remote server) + """This iterates through the scans directories (either here or on the remote server) and builds up the models we can access later. - + It does NOT read or validate anything in the JSON data attached to each wallet. Those checks are done at runtime, when a wallet is accessed, not at import time. - - ''' - print(' - Loading Survey Scans') + + """ + print(" - Loading Survey Scans") SingleScan.objects.all().delete() Wallet.objects.all().delete() - print(' - deleting all Wallet and SingleScan objects') - DataIssue.objects.filter(parser='scans').delete() - + print(" - deleting all Wallet and SingleScan objects") + DataIssue.objects.filter(parser="scans").delete() + # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet. - valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", - ".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", - ".ods",".csv",".xcf",".xml"] - validnames = ["thconfig","manifest"] + valids = [ + ".top", + ".txt", + ".tif", + ".png", + ".jpg", + ".jpeg", + ".pdf", + ".svg", + ".gif", + ".xvi", + ".json", + ".autosave", + ".sxd", + ".svx", + ".th", + ".th2", + ".tdr", + ".sql", + ".zip", + ".dxf", + ".3d", + ".ods", + ".csv", + ".xcf", + ".xml", + ] + validnames = ["thconfig", "manifest"] # iterate into the surveyscans directory # Not all folders with files in them are wallets. - # they are if they are /2010/2010#33 + # they are if they are /2010/2010#33 # or /1996-1999NotKHbook/ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ - print(' ', end='') - scans_path = Path(settings.SCANS_ROOT) + print(" ", end="") + scans_path = Path(settings.SCANS_ROOT) seen = [] - c=0 + c = 0 wallets = {} - for p in scans_path.rglob('*'): + for p in scans_path.rglob("*"): if p.is_file(): if p.suffix.lower() not in valids and p.name.lower() not in validnames: # print(f"'{p}'", end='\n') pass - elif p.parent == scans_path: # skip files directly in /surveyscans/ + elif p.parent == scans_path: # skip files directly in /surveyscans/ pass else: - - c+=1 - if c % 15 == 0 : - print(".", end='') - if c % 750 == 0 : - print("\n ", end='') + + c += 1 + if c % 15 == 0: + print(".", end="") + if c % 750 == 0: + print("\n ", end="") if p.parent.parent.parent.parent == scans_path: # print(f"too deep {p}", end='\n') fpath = p.parent.parent - walletname = p.parent.parent.name # wallet is one level higher - else: + walletname = p.parent.parent.name # wallet is one level higher + else: fpath = p.parent walletname = p.parent.name - + if walletname in wallets: wallet = wallets[walletname] else: - print("", flush=True, end='') + print("", flush=True, end="") # Create the wallet object. But we don't have a date for it yet. wallet = Wallet(fpath=fpath, walletname=walletname) setwalletyear(wallet) wallet.save() wallets[walletname] = wallet - + singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet) singlescan.save() - - + # only printing progress: tag = p.parent - if len(walletname)>4: + if len(walletname) > 4: if walletname[4] == "#": tag = p.parent.parent - + if tag not in seen: - print(f" {tag.name} ", end='') + print(f" {tag.name} ", end="") if len(str(tag.name)) > 17: - print('\n ', end='') + print("\n ", end="") seen.append(tag) - - - print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets') - + + print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets") + # but we also need to check if JSON exists, even if there are no uploaded scan files. # Here we know there is a rigid folder structure, so no need to look for sub folders print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:") - print(' ', end='') + print(" ", end="") wjson = 0 - contents_path = Path(settings.DRAWINGS_DATA, "walletjson") - for yeardir in contents_path.iterdir(): + contents_path = Path(settings.DRAWINGS_DATA, "walletjson") + for yeardir in contents_path.iterdir(): if yeardir.is_dir(): - for walletpath in yeardir.iterdir(): + for walletpath in yeardir.iterdir(): if Path(walletpath, contentsjson).is_file(): walletname = walletpath.name - + if walletname not in wallets: wjson += 1 - if wjson % 10 == 0 : - print("\n ", end='') + if wjson % 10 == 0: + print("\n ", end="") - print(f"{walletname} ", end='') - fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname) + print(f"{walletname} ", end="") + fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname) # The wallets found from JSON should all have dates already wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath) wallets[walletname] = wallet @@ -140,9 +164,11 @@ def load_all_scans(): # But we *do* set the walletyear: setwalletyear(wallet) if not created: - print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?") + print( + f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?" + ) wallet.save() - print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets') + print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets") wallets = Wallet.objects.filter(walletyear=None) for w in wallets: w.walletyear = datetime.date(1999, 1, 1) diff --git a/parsers/survex.py b/parsers/survex.py index c1c6c72..2545b7a 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -11,20 +11,17 @@ from django.utils.timezone import get_current_timezone, make_aware import troggle.settings as settings from troggle.core.models.caves import QM, Cave, Entrance, LogbookEntry -from troggle.core.models.survex import (SurvexBlock, SurvexDirectory, - SurvexFile, SurvexPersonRole, - SurvexStation, Wallet) +from troggle.core.models.survex import SurvexBlock, SurvexDirectory, SurvexFile, SurvexPersonRole, SurvexStation, Wallet from troggle.core.models.troggle import DataIssue, Expedition from troggle.core.utils import chaosmonkey, get_process_memory from troggle.parsers.logbooks import GetCaveLookup -from troggle.parsers.people import (GetPersonExpeditionNameLookup, - known_foreigner) +from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner -'''Imports the tree of survex files following from a defined root .svx file +"""Imports the tree of survex files following from a defined root .svx file It also scans the Loser repo for all the svx files, which it loads individually afterwards. -''' +""" -todo = ''' +todo = """ -#BUG, if *date comes after *team, the person's date is not set at all. It needs re-setting at the end of the block. @@ -38,7 +35,7 @@ ignore all the Units and offset stuff, that troggle will work with survex files repeated readings from distox etc.. Not actually useful for pre 2022 survey data, but good future-proofing. Also it will be a tiny bit more accurate as these leg lengths are after loop closure fixup. -''' +""" survexblockroot = None survexomitsroot = None ROOTBLOCK = "rootblock" @@ -46,27 +43,29 @@ OMITBLOCK = "omitblock" METRESINFEET = 3.28084 stop_dup_warning = False -debugprint = False # Turns on debug printout for just one *include file +debugprint = False # Turns on debug printout for just one *include file debugprinttrigger = "!" # debugprinttrigger = "caves-1623/40/old/EisSVH" + class MapLocations(object): - """Class used only for identifying teh entrance locations - """ + """Class used only for identifying teh entrance locations""" + p = [ ("laser.0_7", "BNase", "Reference", "Bräuning Nase laser point"), ("226-96", "BZkn", "Reference", "Bräuning Zinken trig point"), - ("vd1","VD1","Reference", "VD1 survey point"), - ("laser.kt114_96","HSK","Reference", "Hinterer Schwarzmooskogel trig point"), - ("2000","Nipple","Reference", "Nipple (Weiße Warze)"), - ("3000","VSK","Reference", "Vorderer Schwarzmooskogel summit"), + ("vd1", "VD1", "Reference", "VD1 survey point"), + ("laser.kt114_96", "HSK", "Reference", "Hinterer Schwarzmooskogel trig point"), + ("2000", "Nipple", "Reference", "Nipple (Weiße Warze)"), + ("3000", "VSK", "Reference", "Vorderer Schwarzmooskogel summit"), ("topcamp", "OTC", "Reference", "Old Top Camp"), ("laser.0", "LSR0", "Reference", "Laser Point 0"), ("laser.0_1", "LSR1", "Reference", "Laser Point 0/1"), ("laser.0_3", "LSR3", "Reference", "Laser Point 0/3"), ("laser.0_5", "LSR5", "Reference", "Laser Point 0/5"), - ("225-96", "BAlm", "Reference", "Bräuning Alm trig point") + ("225-96", "BAlm", "Reference", "Bräuning Alm trig point"), ] + def points(self): for ent in Entrance.objects.all(): if ent.best_station(): @@ -75,14 +74,14 @@ class MapLocations(object): k = ent.caveandentrance_set.all()[0].cave except: message = f" ! Failed to get Cave linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()} {ent.caveandentrance_set.all()}" - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) print(message) - continue # skip this entrance + continue # skip this entrance try: areaName = k.getArea().short_name except: message = f" ! Failed to get Area on cave '{k}' linked to Entrance:{ent.name} from:{ent.filename} best:{ent.best_station()}" - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) print(message) raise self.p.append((ent.best_station(), f"{areaName}-{str(ent)[5:]}", ent.needs_surface_work(), str(ent))) @@ -92,85 +91,90 @@ class MapLocations(object): def __str__(self): return f"{len(self.p)} map locations" - + + def get_offending_filename(path): """Used to provide the URL for a line in the DataErrors page whcih reports problems on importing data into troggle """ return "/survexfile/" + path + ".svx" -class SurvexLeg(): - """No longer a models.Model subclass, so no longer a database table - """ - tape = 0.0 - compass = 0.0 - clino = 0.0 + +class SurvexLeg: + """No longer a models.Model subclass, so no longer a database table""" + + tape = 0.0 + compass = 0.0 + clino = 0.0 + def get_people_on_trip(survexblock): qpeople = SurvexPersonRole.objects.filter(survexblock=survexblock) people = [] for p in qpeople: - people.append(f'{p.personname}') + people.append(f"{p.personname}") return list(set(people)) - -class LoadingSurvex(): + +class LoadingSurvex: """A 'survex block' is a *begin...*end set of cave data. A survex file can contain many begin-end blocks, which can be nested, and which can *include other survex files. A 'scanswallet' is what we today call a "survey scans folder" or a "wallet". """ + # python regex flags (?i) means case-insentitive, (?s) means . matches newline too # see https://docs.python.org/3/library/re.html - rx_begin = re.compile(r'(?i)begin') - rx_end = re.compile(r'(?i)end$') - rx_title = re.compile(r'(?i)title$') - rx_ref = re.compile(r'(?i)ref$') - rx_data = re.compile(r'(?i)data$') - rx_flags = re.compile(r'(?i)flags$') - rx_alias = re.compile(r'(?i)alias$') - rx_entrance = re.compile(r'(?i)entrance$') - rx_date = re.compile(r'(?i)date$') - rx_units = re.compile(r'(?i)units$') - rx_team = re.compile(r'(?i)team$') - rx_set = re.compile(r'(?i)set$') + rx_begin = re.compile(r"(?i)begin") + rx_end = re.compile(r"(?i)end$") + rx_title = re.compile(r"(?i)title$") + rx_ref = re.compile(r"(?i)ref$") + rx_data = re.compile(r"(?i)data$") + rx_flags = re.compile(r"(?i)flags$") + rx_alias = re.compile(r"(?i)alias$") + rx_entrance = re.compile(r"(?i)entrance$") + rx_date = re.compile(r"(?i)date$") + rx_units = re.compile(r"(?i)units$") + rx_team = re.compile(r"(?i)team$") + rx_set = re.compile(r"(?i)set$") - rx_names = re.compile(r'(?i)names') - rx_flagsnot= re.compile(r"not\s") + rx_names = re.compile(r"(?i)names") + rx_flagsnot = re.compile(r"not\s") rx_linelen = re.compile(r"[\d\-+.]+$") instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)" - rx_teammem = re.compile(r"(?i)"+instruments+"?(?:es|s)?\s+(.*)$") - rx_teamold = re.compile(r"(?i)(.*)\s+"+instruments+"?(?:es|s)?$") - rx_teamabs = re.compile(r"(?i)^\s*("+instruments+")?(?:es|s)?\s*$") - rx_person = re.compile(r"(?i) and |/| / |, | , |&| & | \+ |^both$|^none$") - rx_qm = re.compile(r'(?i)^\s*QM(\d+)\s+?([a-dA-DxX])\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$') + rx_teammem = re.compile(r"(?i)" + instruments + "?(?:es|s)?\s+(.*)$") + rx_teamold = re.compile(r"(?i)(.*)\s+" + instruments + "?(?:es|s)?$") + rx_teamabs = re.compile(r"(?i)^\s*(" + instruments + ")?(?:es|s)?\s*$") + rx_person = re.compile(r"(?i) and |/| / |, | , |&| & | \+ |^both$|^none$") + rx_qm = re.compile( + r"(?i)^\s*QM(\d+)\s+?([a-dA-DxX])\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$" + ) # does not recognise non numeric suffix survey point ids - rx_qm0 = re.compile(r'(?i)^\s*QM(\d+)\s+(.+)$') - rx_qm_tick = re.compile(r'(?i)^\s*QM(\d+)\s+TICK\s([\d\-]+)\s(.*)$') -# remember there is also QM_PATTERN used in views.other and set in settings.py - rx_tapelng = re.compile(r'(?i).*(tape|length).*$') + rx_qm0 = re.compile(r"(?i)^\s*QM(\d+)\s+(.+)$") + rx_qm_tick = re.compile(r"(?i)^\s*QM(\d+)\s+TICK\s([\d\-]+)\s(.*)$") + # remember there is also QM_PATTERN used in views.other and set in settings.py + rx_tapelng = re.compile(r"(?i).*(tape|length).*$") - rx_cave = re.compile(r'(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)') - rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$') - rx_comminc = re.compile(r'(?i)^\|\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include - rx_commcni = re.compile(r'(?i)^\|\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni - rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$') - rx_commref = re.compile(r'(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') - rx_ref_text= re.compile(r'(?i)^\s*\"[^"]*\"\s*$') - rx_star = re.compile(r'(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$') - rx_starref = re.compile(r'(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') - rx_argsref = re.compile(r'(?i)^[\s.:]*((?:19[6789]\d)|(?:20[012345]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$') - rx_badmerge= re.compile(r'(?i).*(\>\>\>\>\>)|(\=\=\=\=\=)|(\<\<\<\<\<).*$') - rx_ref2 = re.compile(r'(?i)\s*ref[.;]?') - rx_commteam = re.compile(r'(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)') - + rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)") + rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$") + rx_comminc = re.compile(r"(?i)^\|\*include[\s]*([-\w/]*).*$") # inserted by linear collate ;*include + rx_commcni = re.compile(r"(?i)^\|\*edulcni[\s]*([-\w/]*).*$") # inserted by linear collate ;*edulcni + rx_include = re.compile(r"(?i)^\s*(\*include[\s].*)$") + rx_commref = re.compile(r"(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)") + rx_ref_text = re.compile(r'(?i)^\s*\"[^"]*\"\s*$') + rx_star = re.compile(r"(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$") + rx_starref = re.compile(r"(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$") + rx_argsref = re.compile(r"(?i)^[\s.:]*((?:19[6789]\d)|(?:20[012345]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$") + rx_badmerge = re.compile(r"(?i).*(\>\>\>\>\>)|(\=\=\=\=\=)|(\<\<\<\<\<).*$") + rx_ref2 = re.compile(r"(?i)\s*ref[.;]?") + rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)") # This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g. # *DATA normal from to length gradient bearing ignore ignore ignore ignore - datastardefault = {"type":"normal", "from":0, "to":1, "tape":2, "compass":3, "clino":4} - flagsdefault = {"duplicate":False, "surface":False, "splay":False, "skiplegs":False, "splayalias":False} + datastardefault = {"type": "normal", "from": 0, "to": 1, "tape": 2, "compass": 3, "clino": 4} + flagsdefault = {"duplicate": False, "surface": False, "splay": False, "skiplegs": False, "splayalias": False} - datastar ={} + datastar = {} flagsstar = {} units = "metres" unitsfactor = None @@ -182,23 +186,31 @@ class LoadingSurvex(): legsnumberstack = [] slengthstack = [] personexpedstack = [] - stackbegin =[] - flagsstack =[] - datastack =[] + stackbegin = [] + flagsstack = [] + datastack = [] includestack = [] stacksvxfiles = [] svxfileslist = [] - svxdirs = {} + svxdirs = {} uniquename = {} expos = {} - survexdict = {} # each key is a directory, and its value is a list of files + survexdict = {} # each key is a directory, and its value is a list of files lineno = 0 insp = "" callcount = 0 caverncount = 0 ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"] - ignorenoncave = ["caves-1623", "caves-1623/2007-NEU","caves-1626", "caves-1624", "caves-1627", "fixedpts/gps/gps00raw", ""] - includedfilename ="" + ignorenoncave = [ + "caves-1623", + "caves-1623/2007-NEU", + "caves-1626", + "caves-1624", + "caves-1627", + "fixedpts/gps/gps00raw", + "", + ] + includedfilename = "" currentsurvexblock = None currentsurvexfile = None currentcave = None @@ -209,81 +221,94 @@ class LoadingSurvex(): def __init__(self): self.caveslist = GetCaveLookup() pass - + def LoadSurvexFallThrough(self, survexblock, line, cmd): if cmd == "require": - pass # should we check survex version available for processing? - elif cmd in ["equate", "fix", "calibrate", "cs", "export", "case", - "declination", "infer","instrument", "sd"]: - pass # we ignore all these, which is fine. + pass # should we check survex version available for processing? + elif cmd in ["equate", "fix", "calibrate", "cs", "export", "case", "declination", "infer", "instrument", "sd"]: + pass # we ignore all these, which is fine. else: - if cmd in ["include", "data", "flags", "title", "entrance","set", "units", "alias", "ref"]: - message = f"! Warning. Unparsed [*{cmd}]: '{line}' {survexblock.survexfile.path} - not an error (probably)" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + if cmd in ["include", "data", "flags", "title", "entrance", "set", "units", "alias", "ref"]: + message = ( + f"! Warning. Unparsed [*{cmd}]: '{line}' {survexblock.survexfile.path} - not an error (probably)" + ) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) else: - message = f"! Bad unrecognised svx command: [*{cmd}] {line} ({survexblock}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = ( + f"! Bad unrecognised svx command: [*{cmd}] {line} ({survexblock}) {survexblock.survexfile.path}" + ) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) def LoadSurvexTeam(self, survexblock, line): """Interpeting the *team fields means interpreting older style survex as well as current survex standard, *team Insts Anthony Day - this is how most of our files specify the team member *team "Anthony Day" notes pictures tape - this is how the survex documentation says it should be done - We have a huge variety of abbreviations and mispellings. The most laconic being + We have a huge variety of abbreviations and mispellings. The most laconic being *team gb, bl - + personrole is used to record that a person was on a survex trip, NOT the role they played. (NB PersonTrip is a logbook thing, not a survex thing. Yes they could be merged, maybe.) """ + def record_team_member(tm, survexblock): - tm = tm.strip('\"\'').strip() + tm = tm.strip("\"'").strip() # Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition # This is convoluted, the whole personexpedition concept is unnecessary. - + # we need the current expedition, but if there has been no date yet in the survex file, we don't know which one it is. # so we can't validate whether the person was on expo or not. - # we will have to attach them to the survexblock anyway, and then do a + # we will have to attach them to the survexblock anyway, and then do a # later check on whether they are valid when we get the date. - - - expo = survexblock.expedition # may be None if no *date yet + + expo = survexblock.expedition # may be None if no *date yet # this syntax was bizarre.. made more obvious if expo: - if not survexblock.expeditionday: # *date has been set + if not survexblock.expeditionday: # *date has been set # should not happen message = f"! *team {expo.year} expo ok, expedition day not in *team {survexblock.survexfile.path} ({survexblock}) " - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) - - personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower()) - if personexpedition: - personrole, created = SurvexPersonRole.objects.update_or_create(survexblock=survexblock, personexpedition=personexpedition, personname=tm) - personrole.person=personexpedition.person - personrole.expeditionday = survexblock.expeditionday - self.currentpersonexped.append(personexpedition) # used in push/pop block code + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + + personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower()) + if personexpedition: + personrole, created = SurvexPersonRole.objects.update_or_create( + survexblock=survexblock, personexpedition=personexpedition, personname=tm + ) + personrole.person = personexpedition.person + personrole.expeditionday = survexblock.expeditionday + self.currentpersonexped.append(personexpedition) # used in push/pop block code personrole.save() - elif known_foreigner(tm): # note, not using .lower() + elif known_foreigner(tm): # note, not using .lower() message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line}'" - print(self.insp+message) + print(self.insp + message) # DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) else: # we know the date and expo, but can't find the person message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock}) in '{line}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) else: personexpedition = None - personrole, created = SurvexPersonRole.objects.update_or_create(survexblock=survexblock, personexpedition=personexpedition, personname=tm) - # don't know the date yet, so cannot query the table about validity. + personrole, created = SurvexPersonRole.objects.update_or_create( + survexblock=survexblock, personexpedition=personexpedition, personname=tm + ) + # don't know the date yet, so cannot query the table about validity. # assume the person is valid. It will get picked up with the *date appears personrole.save() - - - mteammember = self.rx_teammem.match(line) # matches the role at the beginning + mteammember = self.rx_teammem.match(line) # matches the role at the beginning if not mteammember: - moldstyle = self.rx_teamold.match(line) # matches the role at the the end of the string + moldstyle = self.rx_teamold.match(line) # matches the role at the the end of the string if moldstyle: for tm in self.rx_person.split(moldstyle.group(1)): if tm: @@ -293,97 +318,109 @@ class LoadingSurvex(): # print(msg, file=sys.stderr) else: message = f"! *team {survexblock.survexfile.path} ({survexblock}) Weird '{mteammember.group(1)}' oldstyle line: '{line}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) else: - nullmember = self.rx_teamabs.match(line) # matches empty role line. Ignore these. + nullmember = self.rx_teamabs.match(line) # matches empty role line. Ignore these. if not nullmember: message = f"! *team {survexblock.survexfile.path} ({survexblock}) Bad line: '{line}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) else: for tm in self.rx_person.split(mteammember.group(2)): if tm: record_team_member(tm, survexblock) else: - if not mteammember.group(2).lower() in ('none', 'both'): + if not mteammember.group(2).lower() in ("none", "both"): message = f"! Weird *team '{mteammember.group(2)}' newstyle line: '{line}' ({survexblock}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) def LoadSurvexEntrance(self, survexblock, line): # Not using this yet pass - + def LoadSurvexAlias(self, survexblock, line): # *alias station - .. - splayalias = re.match("(?i)station\s*\-\s*\.\.\s*$",line) + splayalias = re.match("(?i)station\s*\-\s*\.\.\s*$", line) if splayalias: self.flagsstar["splayalias"] = True else: message = f"! Bad *ALIAS: '{line}' ({survexblock}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message) + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message) def LoadSurvexUnits(self, survexblock, line): # all for 4 survex files with measurements in feet. bugger. # Won't need this once we move to using cavern or d3dump output for lengths - tapeunits = self.rx_tapelng.match(line) # tape|length + tapeunits = self.rx_tapelng.match(line) # tape|length if not tapeunits: return - convert = re.match("(?i)(\w*)\s*([\.\d]+)\s*(\w*)",line) + convert = re.match("(?i)(\w*)\s*([\.\d]+)\s*(\w*)", line) if convert: factor = convert.groups()[1] self.unitsfactor = float(factor) if debugprint: - message = f"! *UNITS NUMERICAL conversion [{factor}x] '{line}' ({survexblock}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survexunits', message=message) + message = ( + f"! *UNITS NUMERICAL conversion [{factor}x] '{line}' ({survexblock}) {survexblock.survexfile.path}" + ) + print(self.insp + message) + DataIssue.objects.create(parser="survexunits", message=message) - feet = re.match("(?i).*feet$",line) - metres = re.match("(?i).*(METRIC|METRES|METERS)$",line) + feet = re.match("(?i).*feet$", line) + metres = re.match("(?i).*(METRIC|METRES|METERS)$", line) if feet: self.units = "feet" elif metres: self.units = "metres" else: message = f"! *UNITS in YARDS!? - not converted '{line}' ({survexblock}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survexunits', message=message) - + print(self.insp + message) + DataIssue.objects.create(parser="survexunits", message=message) + def get_expo_from_year(self, year): - # cacheing to save DB query on every block + # cacheing to save DB query on every block if year in self.expos: expo = self.expos[year] else: expeditions = Expedition.objects.filter(year=year) - if len(expeditions) != 1 : - message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) - - expo= expeditions[0] - self.expos[year]= expo - return expo - + if len(expeditions) != 1: + message = ( + f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}" + ) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + + expo = expeditions[0] + self.expos[year] = expo + return expo + def LoadSurvexDate(self, survexblock, line): # we should make this a date RANGE for everything? - + def setdate_on_survexblock(year): # We are assuming that deferred *team people are in the same block. Otherwise, ouch. expo = self.get_expo_from_year(year) survexblock.expedition = expo survexblock.expeditionday = expo.get_expedition_day(survexblock.date) survexblock.save() - + team = SurvexPersonRole.objects.filter(survexblock=survexblock) for pr in team: - if not pr.expeditionday: # *date and *team in 'wrong' order. All working now. - - pr.expeditionday = survexblock.expeditionday + if not pr.expeditionday: # *date and *team in 'wrong' order. All working now. + + pr.expeditionday = survexblock.expeditionday pr.save() - - if not pr.personexpedition: # again, we didn't know the date until now + + if not pr.personexpedition: # again, we didn't know the date until now pe = GetPersonExpeditionNameLookup(expo).get(pr.personname.lower()) if pe: # message = "! {} ({}) Fixing undated personexpedition '{}'".format(survexblock.survexfile.path, survexblock, p.personname) @@ -392,61 +429,73 @@ class LoadingSurvex(): pr.personexpedition = pe pr.person = pr.personexpedition.person pr.save() - self.currentpersonexped.append(pe) # used in push/pop block code - elif known_foreigner(pr.personname): # note, not using .lower() + self.currentpersonexped.append(pe) # used in push/pop block code + elif known_foreigner(pr.personname): # note, not using .lower() message = f"- *team {expo.year} '{pr.personname}' known foreigner on *date {survexblock.survexfile.path} ({survexblock}) in '{line}'" - print(self.insp+message) + print(self.insp + message) # DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) else: message = f"! *team {year} '{pr.personname}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) '{pr.personname}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) - + print(self.insp + message) + DataIssue.objects.create( + parser="survex", + message=message, + url=get_offending_filename(survexblock.survexfile.path), + ) + oline = line - if len(line) > 10: + if len(line) > 10: # message = "! DATE Warning LONG DATE '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path) # print(self.insp+message) # DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) - if line[10] == "-": # ie a range, just look at first date + if line[10] == "-": # ie a range, just look at first date line = line[0:10] - if len(line) == 10: + if len(line) == 10: year = line[:4] # TO DO set to correct Austrian timezone Europe/Vienna ? # %m and %d need leading zeros. Source svx files require them. - survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m-%d') + survexblock.date = datetime.strptime(line.replace(".", "-"), "%Y-%m-%d") setdate_on_survexblock(year) - elif len(line) == 7: + elif len(line) == 7: year = line[:4] - perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ? + perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ? message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}" - print(self.insp+message) - DataIssue.objects.create(parser='svxdate', message=message, url=get_offending_filename(survexblock.survexfile.path)) - survexblock.date = datetime.strptime(line.replace('.','-'), '%Y-%m') # sets to first of month + print(self.insp + message) + DataIssue.objects.create( + parser="svxdate", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + survexblock.date = datetime.strptime(line.replace(".", "-"), "%Y-%m") # sets to first of month setdate_on_survexblock(year) - elif len(line) == 4: + elif len(line) == 4: year = line[:4] perps = get_people_on_trip(survexblock) message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}" - print(self.insp+message) - DataIssue.objects.create(parser='svxdate', message=message, url=get_offending_filename(survexblock.survexfile.path)) - survexblock.date = datetime.strptime(line, '%Y') # sets to January 1st + print(self.insp + message) + DataIssue.objects.create( + parser="svxdate", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + survexblock.date = datetime.strptime(line, "%Y") # sets to January 1st setdate_on_survexblock(year) else: # these errors are reporting the wrong survexblock, which is actually a SurvexFile (!) - message = f"! DATE Error unrecognised '{oline}-{survexblock}' ({type(survexblock)}) {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) - print(f" {type(survexblock)=}") # survexblock.parent fails as a SurvexFile has no .parent ...ugh. + message = ( + f"! DATE Error unrecognised '{oline}-{survexblock}' ({type(survexblock)}) {survexblock.survexfile.path}" + ) + print(self.insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + print(f" {type(survexblock)=}") # survexblock.parent fails as a SurvexFile has no .parent ...ugh. print(f" {survexblock.survexpath=}") print(f" {survexblock.survexfile=}") - #raise + # raise def LoadSurvexLeg(self, survexblock, sline, comment, svxline): """This reads compass, clino and tape data but only keeps the tape lengths, the rest is discarded after error-checking. Now skipping the error checking - returns as soon as the leg is not one we count. - - REPLACE ALL THIS by reading the .log output of cavern for the file. + + REPLACE ALL THIS by reading the .log output of cavern for the file. But we need the lengths per Block, not by File. dump3d will do lengths per block. """ invalid_clino = 180.0 @@ -471,11 +520,13 @@ class LoadingSurvex(): if self.datastar["type"] == "cylpolar": return if debugprint: - print(f" !! LEG data lineno:{self.lineno}\n !! sline:'{sline}'\n !! datastar['tape']: {self.datastar['tape']}") - - if self.datastar["type"] != "normal": + print( + f" !! LEG data lineno:{self.lineno}\n !! sline:'{sline}'\n !! datastar['tape']: {self.datastar['tape']}" + ) + + if self.datastar["type"] != "normal": return - + ls = sline.lower().split() # NORMAL, so there should be 5 fields # from the content, this is clearly reading fixedpts/gps/gps00raw.svx, but not reporting it by that name @@ -484,12 +535,14 @@ class LoadingSurvex(): print(" datastar NORMAL:", self.datastar) print(f" Line (split): {ls}, comment: {comment}") print(f" Line: {sline}\nsvxline: {svxline}") - message = f' ! Not 5 fields in line \'{sline.lower()}\' {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}' - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = f" ! Not 5 fields in line '{sline.lower()}' {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}" + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) - datastar = self.datastar # shallow copy: alias but the things inside are the same things + datastar = self.datastar # shallow copy: alias but the things inside are the same things survexleg = SurvexLeg() - + # skip all splay legs try: if ls[datastar["from"]] == ".." or ls[datastar["from"]] == ".": @@ -510,17 +563,21 @@ class LoadingSurvex(): print("Aliased splay in ", survexblock.survexfile.path) return except: - message = f' ! datastar parsing from/to incorrect in line {ls} in {survexblock.survexfile.path}' - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = f" ! datastar parsing from/to incorrect in line {ls} in {survexblock.survexfile.path}" + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) return try: tape = ls[datastar["tape"]] except: - message = f' ! datastar parsing incorrect in line {ls} in {survexblock.survexfile.path}' - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = f" ! datastar parsing incorrect in line {ls} in {survexblock.survexfile.path}" + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) survexleg.tape = invalid_tape return # e.g. '29/09' or '(06.05)' in the tape measurement @@ -532,35 +589,47 @@ class LoadingSurvex(): tape = float(tape) * self.unitsfactor if debugprint: message = f" ! Units: Length scaled {tape}m '{ls}' in ({survexblock.survexfile.path}) units:{self.units} factor:{self.unitsfactor}x" - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) - if self.units =="feet": + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + if self.units == "feet": tape = float(tape) / METRESINFEET if debugprint: message = f" ! Units: converted to {tape:.3f}m from {self.units} '{ls}' in ({survexblock.survexfile.path})" - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) survexleg.tape = float(tape) self.legsnumber += 1 except ValueError: message = f" ! Value Error: Tape misread in line'{ls}' in {survexblock.survexfile.path} units:{self.units}" - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) survexleg.tape = invalid_tape try: survexblock.legslength += survexleg.tape - self.slength += survexleg.tape + self.slength += survexleg.tape except ValueError: - message = f" ! Value Error: Tape length not added '{ls}' in {survexblock.survexfile.path} units:{self.units}" - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = ( + f" ! Value Error: Tape length not added '{ls}' in {survexblock.survexfile.path} units:{self.units}" + ) + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) try: lcompass = ls[datastar["compass"]] except: - message = f' ! Value Error: Compass not found in line {ls} in {survexblock.survexfile.path}' - print(self.insp+message) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = f" ! Value Error: Compass not found in line {ls} in {survexblock.survexfile.path}" + print(self.insp + message) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) lcompass = invalid_compass try: @@ -569,8 +638,10 @@ class LoadingSurvex(): print(("! Clino misread in", survexblock.survexfile.path)) print((" datastar:", datastar)) print((" Line:", ls)) - message = f' ! Value Error: Clino misread in line \'{sline.lower()}\' {datastar=} {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}' - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = f" ! Value Error: Clino misread in line '{sline.lower()}' {datastar=} {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}" + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) lclino = invalid_clino if lclino == "up": @@ -588,31 +659,30 @@ class LoadingSurvex(): print(("! Compass misread in", survexblock.survexfile.path)) print((" datastar:", datastar)) print((" Line:", ls)) - message = " ! Value Error: lcompass:'{}' line {} in '{}'".format(lcompass, - ls, survexblock.survexfile.path) - DataIssue.objects.create(parser='survexleg', message=message, url=get_offending_filename(survexblock.survexfile.path)) + message = " ! Value Error: lcompass:'{}' line {} in '{}'".format(lcompass, ls, survexblock.survexfile.path) + DataIssue.objects.create( + parser="survexleg", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) survexleg.compass = invalid_compass # delete the object to save memory survexleg = None - - + def LoadSurvexRef(self, survexblock, args): - """Interpret the *ref record, and all the many variants - """ - #print(self.insp+ "*REF ---- '"+ args +"'") - url= get_offending_filename(survexblock.survexfile.path) + """Interpret the *ref record, and all the many variants""" + # print(self.insp+ "*REF ---- '"+ args +"'") + url = get_offending_filename(survexblock.survexfile.path) # *REF but also ; Ref years from 1960 to 2039 refline = self.rx_ref_text.match(args) if refline: # a textual reference such as "1996-1999 Not-KH survey book pp 92-95" - print(f'{self.insp} *REF quoted text so ignored:{args} in {survexblock.survexfile.path}') + print(f"{self.insp} *REF quoted text so ignored:{args} in {survexblock.survexfile.path}") return - - if len(args)< 4: + + if len(args) < 4: message = f" ! Empty or BAD *REF statement '{args}' in '{survexblock.survexfile.path}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) return argsgps = self.rx_argsref.match(args) @@ -621,91 +691,95 @@ class LoadingSurvex(): else: perps = get_people_on_trip(survexblock) message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' malformed id '{args}' {perps}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) return if not letterx: letterx = "" else: letterx = "X" - if len(wallet)<2: + if len(wallet) < 2: wallet = "0" + wallet - if not (int(yr)>1960 and int(yr)<2050): - message = " ! Wallet year out of bounds {yr} '{refscan}' {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) - + if not (int(yr) > 1960 and int(yr) < 2050): + message = " ! Wallet year out of bounds {yr} '{refscan}' {survexblock.survexfile.path}" + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) + refscan = f"{yr}#{letterx}{wallet}" try: - if int(wallet)>99: + if int(wallet) > 99: message = f" ! Wallet *REF {refscan} - very big (more than 99) so probably wrong in '{survexblock.survexfile.path}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) except: message = f" ! Wallet *REF {refscan} - not numeric in '{survexblock.survexfile.path}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) - - manywallets = Wallet.objects.filter(walletname=refscan) # assumes all wallets found in earlier pass of data import + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) + + manywallets = Wallet.objects.filter( + walletname=refscan + ) # assumes all wallets found in earlier pass of data import if manywallets: if len(manywallets) > 1: message = f" ! Wallet *REF {refscan} - more than one found {len(manywallets)} wallets in db with same id {survexblock.survexfile.path}" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) - + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) + if survexblock.scanswallet: if survexblock.scanswallet.walletname != refscan: message = f" ! Wallet *REF {refscan} in {survexblock.survexfile.path} - Already a DIFFERENT wallet is set for this block '{survexblock.scanswallet.walletname}'" - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) else: - survexblock.scanswallet = manywallets[0] # this is a ForeignKey field + survexblock.scanswallet = manywallets[0] # this is a ForeignKey field survexblock.save() # This is where we should check that the wallet JSON contains a link to the survexfile # and that the JSON date and walletdate are set correctly to the survexblock date. else: perps = get_people_on_trip(survexblock) message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' '{refscan}' NOT in database i.e. wallet does not exist {perps}." - print(self.insp+message) - DataIssue.objects.create(parser='survex', message=message, url=url) + print(self.insp + message) + DataIssue.objects.create(parser="survex", message=message, url=url) def TickSurvexQM(self, survexblock, qmtick): - """Interpret the specially formatted comment which is a QM TICKED statement - """ + """Interpret the specially formatted comment which is a QM TICKED statement""" # Now we need to find the correct QM object. It will be in the same block and have the same number. - + try: qm = QM.objects.filter(block=survexblock, number=int(qmtick.group(1))) except: - #raise + # raise message = f' ! QM TICK find FAIL QM{qmtick.group(1)} date:"{qmtick.group(2)}" qmlist:"{qm}" in "{survexblock.survexfile.path}" + comment:"{qmtick.group(3)}" ' print(message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) - if len(qm)>1: + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) + if len(qm) > 1: message = f' ! QM TICK MULTIPLE found FAIL QM{qmtick.group(1)} date:"{qmtick.group(2)}" in "{survexblock.survexfile.path}" + comment:"{qmtick.group(3)}" ' print(message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) qm[0].ticked = True qm[0].save() def LoadSurvexQM(self, survexblock, qmline): - """Interpret the specially formatted comment which is a QM definition - """ + """Interpret the specially formatted comment which is a QM definition""" insp = self.insp - qm_no = qmline.group(1) # this may not be unique across multiple survex files - + qm_no = qmline.group(1) # this may not be unique across multiple survex files + qm_grade = qmline.group(2) if qmline.group(3): # usual closest survey station - qm_nearest = qmline.group(3) + qm_nearest = qmline.group(3) if qmline.group(4): - qm_nearest = qm_nearest +"."+ qmline.group(4) - - if qmline.group(6) and qmline.group(6) != '-': + qm_nearest = qm_nearest + "." + qmline.group(4) + + if qmline.group(6) and qmline.group(6) != "-": qm_resolve_station = qmline.group(6) if qmline.group(7): - qm_resolve_station = qm_resolve_station +"."+ qmline.group(7) + qm_resolve_station = qm_resolve_station + "." + qmline.group(7) else: qm_resolve_station = "" qm_notes = qmline.group(8) @@ -716,12 +790,12 @@ class LoadingSurvex(): # NB none of the SurveyStations are in the DB now, so if we want to link to aSurvexStation # we would have to create one. But that is not obligatory and no QMs loaded from CSVs have one - - # Older troggle/CSV assumes a logbook entry 'found_by' for each QM, with a date. + + # Older troggle/CSV assumes a logbook entry 'found_by' for each QM, with a date. # We don't need this anymore so we don't need to create a placeholder logbook entry. qmyear = str(survexblock.date)[:4] blockname = survexblock.name[:6] + survexblock.name[-1:] - #logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}' + # logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}' if survexblock.survexfile.cave: caveslug = survexblock.survexfile.cave.slug() place = survexblock.survexfile.cave @@ -730,87 +804,99 @@ class LoadingSurvex(): place = None try: - qm = QM.objects.create(number=qm_no, - # nearest_station=a_survex_station_object, # can be null - nearest_station_description=qm_resolve_station, - nearest_station_name=qm_nearest, - grade=qm_grade.upper(), - location_description=qm_notes, - block = survexblock, # only set for survex-imported QMs - blockname = blockname, # only set for survex-imported QMs - expoyear = str(survexblock.date.year), - cave = survexblock.survexfile.cave) + qm = QM.objects.create( + number=qm_no, + # nearest_station=a_survex_station_object, # can be null + nearest_station_description=qm_resolve_station, + nearest_station_name=qm_nearest, + grade=qm_grade.upper(), + location_description=qm_notes, + block=survexblock, # only set for survex-imported QMs + blockname=blockname, # only set for survex-imported QMs + expoyear=str(survexblock.date.year), + cave=survexblock.survexfile.cave, + ) qm.save except: message = f" ! QM{qm_no} FAIL to create {qm_nearest} in'{survexblock.survexfile.path}'" - print(insp+message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(insp + message) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) - def LoadSurvexDataNormal(self,survexblock,args): + def LoadSurvexDataNormal(self, survexblock, args): """Sets the order for data elements in this and following blocks, e.g. *data normal from to compass clino tape *data normal from to tape compass clino We are only collecting length data so we are disinterested in from, to, LRUD etc. """ # datastardefault = { # included here as reference to help understand the code - # "type":"normal", - # "t":"leg", - # "from":0, - # "to":1, - # "tape":2, - # "compass":3, - # "clino":4} + # "type":"normal", + # "t":"leg", + # "from":0, + # "to":1, + # "tape":2, + # "compass":3, + # "clino":4} datastar = copy.deepcopy(self.datastardefault) if args == "": # naked '*data' which is relevant only for passages. Ignore. Continue with previous settings. return - # DEFAULT | NORMAL | CARTESIAN| NOSURVEY |PASSAGE | TOPOFIL | CYLPOLAR | DIVING - ls = args.lower().split() + # DEFAULT | NORMAL | CARTESIAN| NOSURVEY |PASSAGE | TOPOFIL | CYLPOLAR | DIVING + ls = args.lower().split() if ls[0] == "default": self.datastar = copy.deepcopy(self.datastardefault) elif ls[0] == "normal" or ls[0] == "topofil": if not ("from" in datastar and "to" in datastar): - message = f" ! - Unrecognised *data normal statement '{args}' {survexblock.name}|{survexblock.survexpath}" + message = ( + f" ! - Unrecognised *data normal statement '{args}' {survexblock.name}|{survexblock.survexpath}" + ) print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(message, file=sys.stderr) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) return else: datastar = self.datastardefault # ls = ["normal", "from", "to", "tape", "compass", "clino" ] - for i in range(1, len(ls)): # len[0] is "normal" - if ls[i].lower() =="newline": + for i in range(1, len(ls)): # len[0] is "normal" + if ls[i].lower() == "newline": message = f" ! - ABORT *data statement has NEWLINE in it in {survexblock.survexfile.path}. Not parsed by troggle. '{args}'" print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(message, file=sys.stderr) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) return False - - if ls[i] in ["bearing","compass"]: - datastar["compass"] = i-1 - if ls[i] in ["clino","gradient"]: - datastar["clino"] = i-1 - if ls[i] in ["tape","length"]: - datastar["tape"] = i-1 + + if ls[i] in ["bearing", "compass"]: + datastar["compass"] = i - 1 + if ls[i] in ["clino", "gradient"]: + datastar["clino"] = i - 1 + if ls[i] in ["tape", "length"]: + datastar["tape"] = i - 1 self.datastar = copy.deepcopy(datastar) return elif ls[0] == "passage" or ls[0] == "nosurvey" or ls[0] == "diving" or ls[0] == "cylpolar": - #message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args) + # message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args) # print(message) - #print(message,file=sys.stderr) - #DataIssue.objects.create(parser='survex', message=message) + # print(message,file=sys.stderr) + # DataIssue.objects.create(parser='survex', message=message) self.datastar["type"] = ls[0] - elif ls[0] == "cartesian": # We should not ignore this ?! Default for Germans ? - #message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args) + elif ls[0] == "cartesian": # We should not ignore this ?! Default for Germans ? + # message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args) # print(message) - #print(message,file=sys.stderr) - #DataIssue.objects.create(parser='survex', message=message) + # print(message,file=sys.stderr) + # DataIssue.objects.create(parser='survex', message=message) self.datastar["type"] = ls[0] else: message = f" ! - Unrecognised *data statement '{args}' {survexblock.name}|{survexblock.survexpath}" print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + print(message, file=sys.stderr) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) self.datastar["type"] = ls[0] def LoadSurvexFlags(self, args): @@ -818,14 +904,16 @@ class LoadingSurvex(): # Default values are NOT any of them self.flagsstar = copy.deepcopy(self.flagsdefault) flags = [] - - args = self.rx_flagsnot.sub("not",args) + + args = self.rx_flagsnot.sub("not", args) argslist = args.split() for s in argslist: flags.append(s) if debugprint: - print(f" ^ flagslist:{flags}",) - + print( + f" ^ flagslist:{flags}", + ) + if "duplicate" in flags: self.flagsstar["duplicate"] = True if "surface" in flags: @@ -845,8 +933,9 @@ class LoadingSurvex(): if self.flagsstar["surface"] == True or self.flagsstar["splay"] == True: self.flagsstar["skiplegs"] = True if debugprint: - print(f" $ flagslist:{flags}",) - + print( + f" $ flagslist:{flags}", + ) def IdentifyCave(self, cavepath): if cavepath.lower() in self.caveslist: @@ -854,13 +943,13 @@ class LoadingSurvex(): # TO DO - this predates the big revision to Gcavelookup so look at this again carefully path_match = self.rx_cave.search(cavepath) if path_match: - sluggy = f'{path_match.group(1)}-{path_match.group(2)}' + sluggy = f"{path_match.group(1)}-{path_match.group(2)}" guesses = [sluggy.lower(), path_match.group(2).lower()] for g in guesses: if g in self.caveslist: self.caveslist[cavepath] = self.caveslist[g] return self.caveslist[g] - print(f' ! Failed to find cave for {cavepath.lower()}') + print(f" ! Failed to find cave for {cavepath.lower()}") else: # not a cave, but that is fine. # print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}') @@ -877,12 +966,11 @@ class LoadingSurvex(): if headpath.lower() not in self.svxdirs: self.svxdirs[headpath.lower()] = SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile) self.svxdirs[headpath.lower()].save() - self.survexdict[self.svxdirs[headpath.lower()]] = [] # list of the files in the directory + self.survexdict[self.svxdirs[headpath.lower()]] = [] # list of the files in the directory return self.svxdirs[headpath.lower()] def ReportNonCaveIncludes(self, headpath, includelabel, depth): - """Ignore surface, kataser and gpx *include survex files - """ + """Ignore surface, kataser and gpx *include survex files""" if not self.pending: self.pending = set() fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt") @@ -890,7 +978,7 @@ class LoadingSurvex(): with open(fpending, "r") as fo: cids = fo.readlines() for cid in cids: - id = cid.strip().rstrip('\n').upper() + id = cid.strip().rstrip("\n").upper() if cid.startswith("162"): self.pending.add(id) else: @@ -898,34 +986,35 @@ class LoadingSurvex(): if headpath in self.ignorenoncave: message = f" - {headpath} is (while creating '{includelabel}' sfile & sdirectory)" - #print("\n"+message) - #print("\n"+message,file=sys.stderr) + # print("\n"+message) + # print("\n"+message,file=sys.stderr) return for i in self.ignoreprefix: if headpath.startswith(i): - message = f" - {headpath} starts with (while creating '{includelabel}' sfile & sdirectory)" + message = ( + f" - {headpath} starts with (while creating '{includelabel}' sfile & sdirectory)" + ) # print("\n"+message) # print("\n"+message,file=sys.stderr) return - caveid = f'{headpath[6:10]}-{headpath[11:]}'.upper() + caveid = f"{headpath[6:10]}-{headpath[11:]}".upper() if caveid in self.pending: - # Yes we didn't find this cave, but we know it is a pending one. So not an error. - # print(f'! ALREADY PENDING {caveid}',file=sys.stderr) - return + # Yes we didn't find this cave, but we know it is a pending one. So not an error. + # print(f'! ALREADY PENDING {caveid}',file=sys.stderr) + return id = caveid[5:] if id in self.pending: - print(f'! ALREADY PENDING {id}',file=sys.stderr) - return - + print(f"! ALREADY PENDING {id}", file=sys.stderr) + return + message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pending.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]." - print("\n"+message) - print("\n"+message,file=sys.stderr) - print(f"{self.pending}",end="", file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(includelabel)) + print("\n" + message) + print("\n" + message, file=sys.stderr) + print(f"{self.pending}", end="", file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(includelabel)) # print(f' # datastack in LoadSurvexFile:{includelabel}', file=sys.stderr) # for dict in self.datastack: - # print(f' type: <{dict["type"].upper()} >', file=sys.stderr) - + # print(f' type: <{dict["type"].upper()} >', file=sys.stderr) def LoadSurvexFile(self, svxid): """Creates SurvexFile in the database, and SurvexDirectory if needed @@ -940,46 +1029,45 @@ class LoadingSurvex(): print(f"'{dict['type'].upper()}' ", end="") print("") - depth = " " * self.depthbegin # print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid)) headpath = os.path.dirname(svxid) newfile = SurvexFile(path=svxid) - newfile.save() # until we do this there is no internal id so no foreign key works - self.currentsurvexfile = newfile + newfile.save() # until we do this there is no internal id so no foreign key works + self.currentsurvexfile = newfile newdirectory = self.GetSurvexDirectory(headpath) - newdirectory.save() + newdirectory.save() newfile.survexdirectory = newdirectory self.survexdict[newdirectory].append(newfile) - cave = self.IdentifyCave(headpath) # cave already exists in db + cave = self.IdentifyCave(headpath) # cave already exists in db if not newdirectory: message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})" print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url = f'/survexfile/{svxid}') + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=f"/survexfile/{svxid}") if cave: newdirectory.cave = cave - newfile.cave = cave + newfile.cave = cave # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr) - else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list + else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list self.ReportNonCaveIncludes(headpath, svxid, depth) - + if not newfile.survexdirectory: message = f" ! SurvexDirectory NOT SET in new SurvexFile {svxid} " print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message) - self.currentsurvexfile.save() # django insists on this although it is already saved !? + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message) + self.currentsurvexfile.save() # django insists on this although it is already saved !? try: newdirectory.save() except: print(newdirectory, file=sys.stderr) print(newdirectory.primarysurvexfile, file=sys.stderr) raise - + if debugprint: print(f" # datastack end LoadSurvexFile:{svxid} 'type':", end="") for dict in self.datastack: @@ -995,8 +1083,7 @@ class LoadingSurvex(): self.stacksvxfiles.append(self.currentsurvexfile) def ProcessEdulcniLine(self, edulcni): - """Saves the current survexfile in the db - """ + """Saves the current survexfile in the db""" global debugprint svxid = edulcni.groups()[0] if debugprint: @@ -1010,23 +1097,23 @@ class LoadingSurvex(): def LoadSurvexComment(self, survexblock, comment): # ignore all comments except ;ref, ; wallet and ;QM and ;*include (for collated survex file) # rx_ref2 = re.compile(r'(?i)\s*ref[.;]?') - + # This should also check that the QM survey point rxists in the block refline = self.rx_commref.match(comment) if refline: - #comment = re.sub('(?i)\s*ref[.;]?',"",comment.strip()) - comment = self.rx_ref2.sub("",comment.strip()) - print(f'rx_ref2 -- {comment=} in {survexblock.survexfile.path} :: {survexblock}') + # comment = re.sub('(?i)\s*ref[.;]?',"",comment.strip()) + comment = self.rx_ref2.sub("", comment.strip()) + print(f"rx_ref2 -- {comment=} in {survexblock.survexfile.path} :: {survexblock}") self.LoadSurvexRef(survexblock, comment) - + # handle # ; Messteam: Jörg Haussmann, Robert Eckardt, Thilo Müller # ; Zeichner: Thilo Müller # But none of these will be valid teammembers because they are not actually on our expo - + team = self.rx_commteam.match(comment) - if team: + if team: # print(f'rx_commteam -- {comment=} in {survexblock.survexfile.path} :: {survexblock}') pass @@ -1042,11 +1129,12 @@ class LoadingSurvex(): else: message = f' ! QM Unrecognised as valid in "{survexblock.survexfile.path}" QM{qml.group(1)} "{qml.group(2)}" : regex failure, typo?' print(message) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(survexblock.survexfile.path)) + DataIssue.objects.create( + parser="survex", message=message, url=get_offending_filename(survexblock.survexfile.path) + ) - included = self.rx_comminc.match(comment) - # ;*include means 'we have been included'; whereas *include means 'proceed to include' + # ;*include means 'we have been included'; whereas *include means 'proceed to include' # bug, If the original survex file contians the line ;*include then we pick it up ! So fix our special code to be ;|*include if included: self.ProcessIncludeLine(included) @@ -1056,22 +1144,25 @@ class LoadingSurvex(): if edulcni: self.ProcessEdulcniLine(edulcni) - def LoadSurvexSetup(self,survexblock, survexfile): + def LoadSurvexSetup(self, survexblock, survexfile): self.depthbegin = 0 self.datastar = self.datastardefault blocklegs = self.legsnumber - print(self.insp+f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} ") + print( + self.insp + + f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} " + ) self.lineno = 0 - sys.stderr.flush(); - self.callcount +=1 - if self.callcount % 10 ==0 : - print(".", file=sys.stderr,end='') - if self.callcount % 500 ==0 : - print("\n", file=sys.stderr,end='') + sys.stderr.flush() + self.callcount += 1 + if self.callcount % 10 == 0: + print(".", file=sys.stderr, end="") + if self.callcount % 500 == 0: + print("\n", file=sys.stderr, end="") # Try to find the cave in the DB if not use the string as before path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path) if path_match: - pos_cave = f'{path_match.group(1)}-{path_match.group(2)}' + pos_cave = f"{path_match.group(1)}-{path_match.group(2)}" cave = getCaveByReference(pos_cave) if cave: survexfile.cave = cave @@ -1090,25 +1181,25 @@ class LoadingSurvex(): slengthtotal = 0.0 nlegstotal = 0 self.relativefilename = path - cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections - + cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections + self.currentsurvexfile = survexblock.survexfile - self.currentsurvexfile.save() # django insists on this although it is already saved !? + self.currentsurvexfile.save() # django insists on this although it is already saved !? self.datastar = copy.deepcopy(self.datastardefault) self.flagsstar = copy.deepcopy(self.flagsdefault) def tickle(): nonlocal blockcount - - blockcount +=1 - if blockcount % 20 ==0 : - print(".", file=sys.stderr,end='') - if blockcount % 800 ==0 : - print("\n", file=sys.stderr,end='') - mem=get_process_memory() - print(f" - MEM: {mem:7.2f} MB in use",file=sys.stderr) - print(" ", file=sys.stderr,end='') + + blockcount += 1 + if blockcount % 20 == 0: + print(".", file=sys.stderr, end="") + if blockcount % 800 == 0: + print("\n", file=sys.stderr, end="") + mem = get_process_memory() + print(f" - MEM: {mem:7.2f} MB in use", file=sys.stderr) + print(" ", file=sys.stderr, end="") sys.stderr.flush() def printbegin(): @@ -1130,8 +1221,11 @@ class LoadingSurvex(): depth = " " * self.depthbegin if debugprint: print(f"{self.depthbegin:2}{depth} - End from:'{args}'") - print("{:2}{} - LEGS: {} (n: {}, length:{} units:{})".format(self.depthbegin, - depth, self.slength, self.slength, self.legsnumber, self.units)) + print( + "{:2}{} - LEGS: {} (n: {}, length:{} units:{})".format( + self.depthbegin, depth, self.slength, self.slength, self.legsnumber, self.units + ) + ) def pushblock(): nonlocal blkid @@ -1150,7 +1244,7 @@ class LoadingSurvex(): print(f"'{dict['type'].upper()}' ", end="") print("") print(f"'{self.datastar['type'].upper()}' self.datastar ") - + # ------------ * FLAGS self.flagsstack.append(copy.deepcopy(self.flagsstar)) # ------------ * FLAGS @@ -1166,7 +1260,7 @@ class LoadingSurvex(): print("") print(f"'{self.datastar['type'].upper()}' self.datastar ") # ------------ * DATA - self.datastar = copy.deepcopy(self.datastack.pop()) + self.datastar = copy.deepcopy(self.datastack.pop()) # ------------ * DATA if debugprint: print(f" # datastack after *end '{blkid} 'type':", end="") @@ -1174,17 +1268,16 @@ class LoadingSurvex(): print(f"'{dict['type'].upper()}' ", end="") print("") print(f"'{self.datastar['type'].upper()}' self.datastar ") - + # ------------ * FLAGS - self.flagsstar = copy.deepcopy(self.flagsstack.pop()) + self.flagsstar = copy.deepcopy(self.flagsstack.pop()) # ------------ * FLAGS if debugprint: if oldflags["skiplegs"] != self.flagsstar["skiplegs"]: print(f" # POP 'any' flag now:'{self.flagsstar['skiplegs']}' was:{oldflags['skiplegs']} ") def starstatement(star): - """Interprets a survex comamnd where * is the first character on the line, e.g. *begin - """ + """Interprets a survex comamnd where * is the first character on the line, e.g. *begin""" nonlocal survexblock nonlocal blkid nonlocal pathlist @@ -1212,14 +1305,21 @@ class LoadingSurvex(): self.units = "metres" self.currentpersonexped = [] printbegin() - newsurvexblock = SurvexBlock(name=blkid, parent=survexblock, - survexpath=pathlist, - cave=self.currentcave, survexfile=self.currentsurvexfile, - legsall=0, legslength=0.0) + newsurvexblock = SurvexBlock( + name=blkid, + parent=survexblock, + survexpath=pathlist, + cave=self.currentcave, + survexfile=self.currentsurvexfile, + legsall=0, + legslength=0.0, + ) newsurvexblock.save() - newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally, overwrite if it has its own + newsurvexblock.title = ( + "(" + survexblock.title + ")" + ) # copy parent inititally, overwrite if it has its own survexblock = newsurvexblock - survexblock.save() # django insists on this , but we want to save at the end ! + survexblock.save() # django insists on this , but we want to save at the end ! tickle() # ---------------------------END @@ -1231,16 +1331,16 @@ class LoadingSurvex(): nlegstotal += self.legsnumber try: - survexblock.parent.save() # django insists on this although it is already saved !? + survexblock.parent.save() # django insists on this although it is already saved !? except: print(survexblock.parent, file=sys.stderr) raise try: - survexblock.save() # save to db at end of block + survexblock.save() # save to db at end of block except: print(survexblock, file=sys.stderr) raise - # POP state ++++++++++++++ + # POP state ++++++++++++++ popblock() self.currentpersonexped = self.personexpedstack.pop() self.legsnumber = self.legsnumberstack.pop() @@ -1255,11 +1355,11 @@ class LoadingSurvex(): # ----------------------------- elif self.rx_title.match(cmd): - quotedtitle = re.match("(?i)^\"(.*)\"$",args) + quotedtitle = re.match('(?i)^"(.*)"$', args) if quotedtitle: survexblock.title = quotedtitle.groups()[0] else: - survexblock.title = args + survexblock.title = args elif self.rx_ref.match(cmd): self.LoadSurvexRef(survexblock, args) elif self.rx_flags.match(cmd): @@ -1267,7 +1367,7 @@ class LoadingSurvex(): self.LoadSurvexFlags(args) if debugprint: if oldflags["skiplegs"] != self.flagsstar["skiplegs"]: - print(f" # CHANGE 'any' flag now:'{self.flagsstar['skiplegs']}' was:{oldflags['skiplegs']} ") + print(f" # CHANGE 'any' flag now:'{self.flagsstar['skiplegs']}' was:{oldflags['skiplegs']} ") elif self.rx_data.match(cmd): if self.LoadSurvexDataNormal(survexblock, args): @@ -1290,13 +1390,15 @@ class LoadingSurvex(): elif self.rx_include.match(cmd): message = f" ! -ERROR *include command not expected here {path}. Re-run a full Survex import." print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, ) + print(message, file=sys.stderr) + DataIssue.objects.create( + parser="survex", + message=message, + ) else: self.LoadSurvexFallThrough(survexblock, args, cmd) - - # this is a python generator idiom. + # this is a python generator idiom. # see https://realpython.com/introduction-to-python-generators/ # this is the first use of generators in troggle (Oct.2022) and save 21 MB of memory with open(collatefilename, "r") as fcollate: @@ -1305,32 +1407,34 @@ class LoadingSurvex(): sline, comment = self.rx_comment.match(svxline).groups() if comment: # this catches the ;*include NEWFILE and ;*edulcni ENDOFFILE lines too - self.LoadSurvexComment(survexblock, comment) + self.LoadSurvexComment(survexblock, comment) if not sline: - continue # skip blank lines + continue # skip blank lines # detect a merge failure inserted by version control mfail = self.rx_badmerge.match(sline) - if mfail: + if mfail: message = f"\n ! - ERROR version control merge failure\n - '{sline}'\n" - message = message + f" - line {self.lineno} in {blkid} in {survexblock}\n - NERD++ needed to fix it" + message = ( + message + f" - line {self.lineno} in {blkid} in {survexblock}\n - NERD++ needed to fix it" + ) print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message) - continue # skip this line + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message) + continue # skip this line # detect a star command star = self.rx_star.match(sline) - if star: + if star: # yes we are reading a *command starstatement(star) - else: # not a *cmd so we are reading data OR a ";" rx_comment failed. We hope. + else: # not a *cmd so we are reading data OR a ";" rx_comment failed. We hope. self.LoadSurvexLeg(survexblock, sline, comment, svxline) self.legsnumber = nlegstotal - self.slength = slengthtotal - + self.slength = slengthtotal + def PushdownStackScan(self, survexblock, path, finname, flinear, fcollate): """Follows the *include links in all the survex files from the root file (usually 1623.svx) and reads only the *include and *begin and *end statements. It produces a linearised @@ -1342,22 +1446,24 @@ class LoadingSurvex(): self.lineno += 1 # detect a merge failure inserted by version control mfail = self.rx_badmerge.match(svxline) - if mfail: + if mfail: message = f"\n!! - ERROR version control merge failure\n - '{svxline}'\n" message = message + f" - in '{path}' at line {thissvxline}\n" - message = message + f" - line {self.lineno} {survexblock}\n - Parsing aborted. NERD++ needed to fix it" + message = ( + message + f" - line {self.lineno} {survexblock}\n - Parsing aborted. NERD++ needed to fix it" + ) print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) - return # skip this survex file and all things *included in it + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) + return # skip this survex file and all things *included in it - includestmt =self.rx_include.match(svxline) + includestmt = self.rx_include.match(svxline) if not includestmt: fcollate.write(f"{svxline.strip()}\n") sline, comment = self.rx_comment.match(svxline.strip()).groups() star = self.rx_star.match(sline) - if star: # yes we are reading a *cmd + if star: # yes we are reading a *cmd cmd, args = star.groups() cmd = cmd.lower() if re.match("(?i)include$", cmd): @@ -1367,7 +1473,7 @@ class LoadingSurvex(): self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath), path) self.checkUniqueness(os.path.join(settings.SURVEX_DATA, includepath)) if os.path.isfile(fullpath): - #-------------------------------------------------------- + # -------------------------------------------------------- self.depthinclude += 1 # fininclude = open(fullpath,'r') finincludename = fullpath @@ -1375,26 +1481,26 @@ class LoadingSurvex(): flinear.write(f"{self.depthinclude:2} {indent} *include {includepath}\n") push = includepath.lower() self.includestack.append(push) - #----------------- + # ----------------- self.PushdownStackScan(survexblock, includepath, finincludename, flinear, fcollate) - #----------------- + # ----------------- pop = self.includestack.pop() if pop != push: message = "!! ERROR mismatch *include pop!=push {}".format(pop, push, self.includestack) print(message) - print(message,file=flinear) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + print(message, file=flinear) + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) flinear.write(f"{self.depthinclude:2} {indent} *edulcni {pop}\n") fcollate.write(f";|*edulcni {pop}\n") # fininclude.close() self.depthinclude -= 1 - #-------------------------------------------------------- + # -------------------------------------------------------- else: message = f" ! ERROR *include file '{includepath}' not found, listed in '{fin.name}'" print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) elif re.match("(?i)begin$", cmd): self.depthbegin += 1 depth = " " * self.depthbegin @@ -1412,11 +1518,13 @@ class LoadingSurvex(): args = " " popargs = self.stackbegin.pop() if popargs != args.lower(): - message = f"!! ERROR mismatch in BEGIN/END labels pop!=push '{popargs}'!='{args}'\n{self.stackbegin}" + message = ( + f"!! ERROR mismatch in BEGIN/END labels pop!=push '{popargs}'!='{args}'\n{self.stackbegin}" + ) print(message) - print(message,file=flinear) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + print(message, file=flinear) + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) self.depthbegin -= 1 pass @@ -1426,32 +1534,31 @@ class LoadingSurvex(): pass indent = " " * self.depthinclude - sys.stderr.flush(); - self.callcount +=1 - - - if self.callcount % 10 ==0 : - print(".", file=sys.stderr,end='') - if self.callcount % 500 ==0 : - print("\n ", file=sys.stderr,end='') + sys.stderr.flush() + self.callcount += 1 + + if self.callcount % 10 == 0: + print(".", file=sys.stderr, end="") + if self.callcount % 500 == 0: + print("\n ", file=sys.stderr, end="") if path in self.svxfileslist: # We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already. if stop_dup_warning: - #print("D",end="", file=sys.stderr) + # print("D",end="", file=sys.stderr) pass else: message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}" print(message) - print(message,file=flinear) - #print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + print(message, file=flinear) + # print(message,file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) if self.svxfileslist.count(path) > 2: message = f" ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {path}" print(message) - print(message,file=flinear) - #print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + print(message, file=flinear) + # print(message,file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) return return try: @@ -1459,24 +1566,24 @@ class LoadingSurvex(): with open(finname, "r") as fin: for svxline in fin: process_line(svxline) - + self.svxfileslist.append(path) except UnicodeDecodeError: # some bugger put an umlaut in a non-UTF survex file ?! message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError. Omitted." print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) - return # skip this survex file and all things *included in it - except : + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) + return # skip this survex file and all things *included in it + except: message = f" ! ERROR *include file '{path}' in '{survexblock}' has unexpected error. Omitted." print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) - return # skip this survex file and all things *included in it - - def checkUniqueness(self,fullpath): + print(message, file=sys.stderr) + DataIssue.objects.create(parser="survex", message=message, url=get_offending_filename(path)) + return # skip this survex file and all things *included in it + + def checkUniqueness(self, fullpath): fn = Path(fullpath).name if fn not in self.uniquename: self.uniquename[fn] = [fullpath] @@ -1486,118 +1593,129 @@ class LoadingSurvex(): # message = f" ! NON-UNIQUE survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" # print(message) # DataIssue.objects.create(parser='survex', message=message) - message = f" NOTE: non-unique survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" + message = ( + f" NOTE: non-unique survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" + ) print(message) - - def RunSurvexIfNeeded(self,fullpath, calledpath): + def RunSurvexIfNeeded(self, fullpath, calledpath): now = time.time() - cav_t = now - 365*24*3600 - log_t = now - 365*24*3600 - svx_t = now - 365*24*3600 + cav_t = now - 365 * 24 * 3600 + log_t = now - 365 * 24 * 3600 + svx_t = now - 365 * 24 * 3600 def runcavern(): - '''regenerates the .3d file from the .svx if it is older than the svx file, or older than the software, + """regenerates the .3d file from the .svx if it is older than the svx file, or older than the software, or randomly using chaosmonkey() just to keep things ticking over. - ''' - print(f" - Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{fullpath}'\n at '{logpath}'\n") - print(f"days svx old: {(svx_t - log_t)/(24*3600):.1f} cav:{(cav_t - log_t)/(24*3600):.1f} log old: { (now - log_t)/(24*3600):.1f}") - - outputdir = Path(str(f'{fullpath}.svx')).parent - sp = subprocess.run([settings.CAVERN, "--log", f'--output={outputdir}', f'{fullpath}.svx'], - capture_output=True, check=False, text=True) + """ + print( + f" - Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{fullpath}'\n at '{logpath}'\n" + ) + print( + f"days svx old: {(svx_t - log_t)/(24*3600):.1f} cav:{(cav_t - log_t)/(24*3600):.1f} log old: { (now - log_t)/(24*3600):.1f}" + ) + + outputdir = Path(str(f"{fullpath}.svx")).parent + sp = subprocess.run( + [settings.CAVERN, "--log", f"--output={outputdir}", f"{fullpath}.svx"], + capture_output=True, + check=False, + text=True, + ) if sp.returncode != 0: - message = f' ! Error running {settings.CAVERN}: {fullpath}' - url = f'/survexfile{fullpath}.svx'.replace(settings.SURVEX_DATA, "") - DataIssue.objects.create(parser='xEntrances', message=message, url=url) + message = f" ! Error running {settings.CAVERN}: {fullpath}" + url = f"/survexfile{fullpath}.svx".replace(settings.SURVEX_DATA, "") + DataIssue.objects.create(parser="xEntrances", message=message, url=url) print(message) - print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) + print( + f"stderr:\n\n" + str(sp.stderr) + "\n\n" + str(sp.stdout) + "\n\nreturn code: " + str(sp.returncode) + ) self.caverncount += 1 - - # should also collect all the .err files too and create a DataIssue for each one which + + # should also collect all the .err files too and create a DataIssue for each one which # - is nonzero in size AND # - has Error greater than 5% anywhere, or some other more serious error - + errpath = Path(fullpath + ".err") if errpath.is_file(): if errpath.stat().st_size == 0: - errpath.unlink() # delete empty closure error file - + errpath.unlink() # delete empty closure error file svxpath = Path(fullpath + ".svx") logpath = Path(fullpath + ".log") outputdir = Path(svxpath).parent - if not svxpath.is_file(): - message = f' ! BAD survex file "{fullpath}" specified in *include in {calledpath} ' - DataIssue.objects.create(parser='entrances', message=message) + if not svxpath.is_file(): + message = f' ! BAD survex file "{fullpath}" specified in *include in {calledpath} ' + DataIssue.objects.create(parser="entrances", message=message) print(message) return - - if not logpath.is_file(): # always run if logfile not there + + if not logpath.is_file(): # always run if logfile not there runcavern() return - self.caverndate = now - 2*365*24*3600 + self.caverndate = now - 2 * 365 * 24 * 3600 if not self.caverndate: - sp = subprocess.run(["which", f"{settings.CAVERN}"], - capture_output=True, check=False, text=True) + sp = subprocess.run(["which", f"{settings.CAVERN}"], capture_output=True, check=False, text=True) if sp.returncode != 0: - message = f' ! Error running "which" on {settings.CAVERN}' - DataIssue.objects.create(parser='entrances', message=message) + message = f' ! Error running "which" on {settings.CAVERN}' + DataIssue.objects.create(parser="entrances", message=message) print(message) - print(f'stderr:\n\n' + str(sp.stderr) + '\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) + print( + f"stderr:\n\n" + str(sp.stderr) + "\n\n" + str(sp.stdout) + "\n\nreturn code: " + str(sp.returncode) + ) self.caverndate = os.path.getmtime(sp.stdout.strip()) else: - self.caverndate = now - 2*365*24*3600 + self.caverndate = now - 2 * 365 * 24 * 3600 cav_t = self.caverndate log_t = os.path.getmtime(logpath) svx_t = os.path.getmtime(svxpath) now = time.time() - if svx_t - log_t > 0: # stale, svx file is newer than log + if svx_t - log_t > 0: # stale, svx file is newer than log runcavern() return - if now - log_t > 60 *24*60*60: # >60 days, re-run anyway + if now - log_t > 60 * 24 * 60 * 60: # >60 days, re-run anyway runcavern() return - if cav_t - log_t > 0: # new version of cavern + if cav_t - log_t > 0: # new version of cavern runcavern() return - if chaosmonkey(350): # one in every 350 runs + if chaosmonkey(350): # one in every 350 runs runcavern() + def FindAndLoadSurvex(survexblockroot): - """Follows the *include links successively to find files in the whole include tree - """ + """Follows the *include links successively to find files in the whole include tree""" global stop_dup_warning - print(' - redirecting stdout to svxblks.log...') + print(" - redirecting stdout to svxblks.log...") stdout_orig = sys.stdout # Redirect sys.stdout to the file - sys.stdout = open('svxblks.log', 'w') + sys.stdout = open("svxblks.log", "w") - print(f' - Scanning Survex Blocks tree from {settings.SURVEX_TOPNAME}.svx ...',file=sys.stderr) - survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only + print(f" - Scanning Survex Blocks tree from {settings.SURVEX_TOPNAME}.svx ...", file=sys.stderr) + survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only collatefilename = "_" + survexfileroot.path + ".svx" svx_scan = LoadingSurvex() svx_scan.callcount = 0 svx_scan.depthinclude = 0 fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path) - + print(f" - RunSurvexIfNeeded cavern on '{fullpathtotop}'", file=sys.stderr) svx_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop) svx_scan.checkUniqueness(fullpathtotop) - - indent="" - fcollate = open(collatefilename, 'w') + + indent = "" + fcollate = open(collatefilename, "w") mem0 = get_process_memory() - print(f" - MEM:{mem0:7.2f} MB START",file=sys.stderr) - flinear = open('svxlinear.log', 'w') + print(f" - MEM:{mem0:7.2f} MB START", file=sys.stderr) + flinear = open("svxlinear.log", "w") flinear.write(f" - MEM:{mem0:7.2f} MB START {survexfileroot.path}\n") - print(" ", file=sys.stderr,end='') + print(" ", file=sys.stderr, end="") finrootname = Path(settings.SURVEX_DATA, survexfileroot.path + ".svx") fcollate.write(f";*include {survexfileroot.path}\n") @@ -1606,101 +1724,110 @@ def FindAndLoadSurvex(survexblockroot): import cProfile import pstats from pstats import SortKey + pr = cProfile.Profile() pr.enable() - #---------------------------------------------------------------- + # ---------------------------------------------------------------- svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate) - #---------------------------------------------------------------- + # ---------------------------------------------------------------- pr.disable() - with open('PushdownStackScan.prof', 'w') as f: + with open("PushdownStackScan.prof", "w") as f: ps = pstats.Stats(pr, stream=f) ps.sort_stats(SortKey.CUMULATIVE) ps.print_stats() - + flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n") fcollate.write(f";*edulcni {survexfileroot.path}\n") mem1 = get_process_memory() flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n") flinear.write(f" - MEM:{mem1 - mem0:.3f} MB ADDITIONALLY USED\n") flinear.write(f" - {len(svx_scan.svxfileslist):,} survex files in linear include list \n") - - print(f"\n - {svx_scan.caverncount:,} runs of survex 'cavern' refreshing .3d files",file=sys.stderr) - print(f" - {len(svx_scan.svxfileslist):,} survex files from tree in linear include list",file=sys.stderr) - + + print(f"\n - {svx_scan.caverncount:,} runs of survex 'cavern' refreshing .3d files", file=sys.stderr) + print(f" - {len(svx_scan.svxfileslist):,} survex files from tree in linear include list", file=sys.stderr) + mem1 = get_process_memory() - print(f" - MEM:{mem1:7.2f} MB END ",file=sys.stderr) - print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED",file=sys.stderr) + print(f" - MEM:{mem1:7.2f} MB END ", file=sys.stderr) + print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr) # # Process all the omitted files in :loser: with some exceptions # unseens = set() - b=[] - - for p in Path(settings.SURVEX_DATA).rglob('*.svx'): + b = [] + + for p in Path(settings.SURVEX_DATA).rglob("*.svx"): if p.is_file(): po = p.relative_to(Path(settings.SURVEX_DATA)) - pox = po.with_suffix('') + pox = po.with_suffix("") if str(pox) not in svx_scan.svxfileslist: # print(f"[{pox}]", file=sys.stderr) unseens.add(pox) else: b.append(pox) - + if len(b) != len(svx_scan.svxfileslist): - print(f" ! Mismatch. {len(b)} survex files found which should be {len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr) - + print( + f" ! Mismatch. {len(b)} survex files found which should be {len(svx_scan.svxfileslist)} in main tree)", + file=sys.stderr, + ) + excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "_unseens"] removals = [] for x in unseens: for o in excpts: - if str(x).strip().startswith(o): + if str(x).strip().startswith(o): removals.append(x) # special fix for file not actually in survex format unseens.remove(Path("fixedpts/gps/gps00raw")) - + for x in removals: unseens.remove(x) - print(f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", file=sys.stderr) + print( + f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)", + file=sys.stderr, + ) print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr) - - with open(Path(settings.SURVEX_DATA, '_unseens.svx'), 'w') as u: - u.write(f"; {len(unseens):,} survex files not *included by {settings.SURVEX_TOPNAME} (which are {len(svx_scan.svxfileslist):,} files)\n") + + with open(Path(settings.SURVEX_DATA, "_unseens.svx"), "w") as u: + u.write( + f"; {len(unseens):,} survex files not *included by {settings.SURVEX_TOPNAME} (which are {len(svx_scan.svxfileslist):,} files)\n" + ) u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n") u.write(f"; omitting any file beginning with {excpts}\n\n") u.write(f"*begin unseens\n") for x in sorted(unseens): u.write(f" *include {x}\n") u.write(f"*end unseens\n") - - survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only + + survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only omit_scan = LoadingSurvex() omit_scan.callcount = 0 omit_scan.depthinclude = 0 - fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, '_unseens.svx') - + fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, "_unseens.svx") + # copy the list to prime the next pass through the files omit_scan.svxfileslist = svx_scan.svxfileslist[:] - svx_scan.svxfileslist = [] # free memory - svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? - + svx_scan.svxfileslist = [] # free memory + svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? + print(f" - RunSurvexIfNeeded cavern on '{fullpathtotop}'", file=sys.stderr) omit_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop) omit_scan.checkUniqueness(fullpathtotop) - + mem0 = get_process_memory() - print(f" - MEM:{mem0:7.2f} MB START '_unseens'",file=sys.stderr) - #flinear = open('svxlinear.log', 'w') + print(f" - MEM:{mem0:7.2f} MB START '_unseens'", file=sys.stderr) + # flinear = open('svxlinear.log', 'w') flinear.write(f" - MEM:{mem0:7.2f} MB START '_unseens'\n") - print(" ", file=sys.stderr,end='') + print(" ", file=sys.stderr, end="") finrootname = fullpathtotop fcollate.write(";*include _unseens.svx\n") flinear.write(f"{omit_scan.depthinclude:2} {indent} *include _unseens\n") stop_dup_warning = True - #---------------------------------------------------------------- - omit_scan.PushdownStackScan(survexblockroot, '_unseens', finrootname, flinear, fcollate) - #---------------------------------------------------------------- + # ---------------------------------------------------------------- + omit_scan.PushdownStackScan(survexblockroot, "_unseens", finrootname, flinear, fcollate) + # ---------------------------------------------------------------- stop_dup_warning = False flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni _unseens\n") @@ -1709,57 +1836,62 @@ def FindAndLoadSurvex(survexblockroot): flinear.write(f"\n - MEM:{mem1:.2f} MB STOP _unseens.svx OMIT\n") flinear.write(f" - MEM:{mem1 - mem0:.3f} MB ADDITIONALLY USED OMIT\n") flinear.write(f" - {len(omit_scan.svxfileslist):,} survex files in linear include list OMIT \n") - + flinear.close() fcollate.close() - - print(f"\n - {omit_scan.caverncount:,} runs of survex 'cavern' refreshing .3d files in the unseen list",file=sys.stderr) - - print(f" - {len(omit_scan.svxfileslist):,} survex files in linear include list including previously unseen ones \n",file=sys.stderr) - omit_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? - - mem1 = get_process_memory() - print(f" - MEM:{mem1:7.2f} MB END ",file=sys.stderr) - print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED",file=sys.stderr) - + print( + f"\n - {omit_scan.caverncount:,} runs of survex 'cavern' refreshing .3d files in the unseen list", + file=sys.stderr, + ) + + print( + f" - {len(omit_scan.svxfileslist):,} survex files in linear include list including previously unseen ones \n", + file=sys.stderr, + ) + omit_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.? + + mem1 = get_process_memory() + print(f" - MEM:{mem1:7.2f} MB END ", file=sys.stderr) + print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr) + # Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the # entrance locations currently loaded after this by LoadPos(), but could better be done before ? # look in MapLocations() for how we find the entrances - - print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr) + + print("\n - Loading All Survex Blocks (LinearLoad)", file=sys.stderr) svx_load = LoadingSurvex() svx_load.survexdict[survexfileroot.survexdirectory] = [] svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot) svx_load.svxdirs[""] = survexfileroot.survexdirectory - #pr2 = cProfile.Profile() - #pr2.enable() - print(" ", file=sys.stderr,end='') - #---------------------------------------------------------------- + # pr2 = cProfile.Profile() + # pr2.enable() + print(" ", file=sys.stderr, end="") + # ---------------------------------------------------------------- svx_load.LinearLoad(survexblockroot, survexfileroot.path, collatefilename) - #---------------------------------------------------------------- - #pr2.disable() + # ---------------------------------------------------------------- + # pr2.disable() # with open('LinearLoad.prof', 'w') as f: - # ps = pstats.Stats(pr2, stream=f) - # ps.sort_stats(SortKey.CUMULATIVE) - # ps.print_stats() + # ps = pstats.Stats(pr2, stream=f) + # ps.sort_stats(SortKey.CUMULATIVE) + # ps.print_stats() mem1 = get_process_memory() - print(f"\n - MEM:{mem1:7.2f} MB STOP",file=sys.stderr) - print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED",file=sys.stderr) + print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr) + print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr) # Close the logging file, Restore sys.stdout to our old saved file handle sys.stdout.close() print("+", file=sys.stderr) - sys.stderr.flush(); + sys.stderr.flush() sys.stdout = stdout_orig legsnumber = svx_load.legsnumber mem1 = get_process_memory() print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}") - tf=0 + tf = 0 for d in svx_load.survexdict: tf += len(svx_load.survexdict[d]) print(f" - Number of SurvexFiles: {tf:,}") @@ -1768,39 +1900,40 @@ def FindAndLoadSurvex(survexblockroot): return legsnumber + def MakeSurvexFileRoot(): - """Returns a file_object.path = SURVEX_TOPNAME associated with directory_object.path = SURVEX_DATA - """ + """Returns a file_object.path = SURVEX_TOPNAME associated with directory_object.path = SURVEX_DATA""" # find a cave, any cave.. caves = Cave.objects.all() - smk = caves.filter(kataster_number="000") # returns a list, a QuerySet - + smk = caves.filter(kataster_number="000") # returns a list, a QuerySet + fileroot = SurvexFile(path=settings.SURVEX_TOPNAME, cave=None) fileroot.save() directoryroot = SurvexDirectory(path=settings.SURVEX_DATA, cave=smk[0], primarysurvexfile=fileroot) # MariaDB doesn't like this hack. Complains about non-null cave_id EVEN THOUGH our model file says this is OK: # cave = models.ForeignKey('Cave', blank=True, null=True,on_delete=models.SET_NULL) directoryroot.save() - fileroot.survexdirectory = directoryroot # i.e. SURVEX_DATA/SURVEX_TOPNAME - fileroot.save() # mutually dependent objects need a double-save like this + fileroot.survexdirectory = directoryroot # i.e. SURVEX_DATA/SURVEX_TOPNAME + fileroot.save() # mutually dependent objects need a double-save like this return fileroot - + + def MakeOmitFileRoot(fn): - """Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA - """ + """Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA""" fileroot = SurvexFile(path=fn, cave=None) fileroot.survexdirectory = SurvexDirectory.objects.get(path=settings.SURVEX_DATA) - fileroot.save() + fileroot.save() return fileroot + def LoadSurvexBlocks(): mem1 = get_process_memory() - print(f" - MEM:{mem1:7.2f} MB now ",file=sys.stderr) + print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr) - print(' - Flushing All Survex Blocks...') + print(" - Flushing All Survex Blocks...") # why does this increase memory use by 20 MB ?! - # We have foreign keys, Django needs to load the related objects - # in order to resolve how the relation should handle the deletion: + # We have foreign keys, Django needs to load the related objects + # in order to resolve how the relation should handle the deletion: # https://docs.djangoproject.com/en/3.2/ref/models/fields/#django.db.models.ForeignKey.on_delete SurvexBlock.objects.all().delete() SurvexFile.objects.all().delete() @@ -1808,116 +1941,135 @@ def LoadSurvexBlocks(): SurvexPersonRole.objects.all().delete() SurvexStation.objects.all().delete() mem1 = get_process_memory() - print(f" - MEM:{mem1:7.2f} MB now. Foreign key objects loaded on deletion. ",file=sys.stderr) - + print(f" - MEM:{mem1:7.2f} MB now. Foreign key objects loaded on deletion. ", file=sys.stderr) + print(" - Flushing survex Data Issues ") - DataIssue.objects.filter(parser='survex').delete() - DataIssue.objects.filter(parser='svxdate').delete() - DataIssue.objects.filter(parser='survexleg').delete() - DataIssue.objects.filter(parser='survexunits').delete() - DataIssue.objects.filter(parser='entrances').delete() - DataIssue.objects.filter(parser='xEntrances').delete() + DataIssue.objects.filter(parser="survex").delete() + DataIssue.objects.filter(parser="svxdate").delete() + DataIssue.objects.filter(parser="survexleg").delete() + DataIssue.objects.filter(parser="survexunits").delete() + DataIssue.objects.filter(parser="entrances").delete() + DataIssue.objects.filter(parser="xEntrances").delete() print(" - survex Data Issues flushed") mem1 = get_process_memory() - print(f" - MEM:{mem1:7.2f} MB now ",file=sys.stderr) - + print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr) + survexfileroot = MakeSurvexFileRoot() # this next makes a block_object assciated with a file_object.path = SURVEX_TOPNAME - survexblockroot = SurvexBlock(name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfileroot, - legsall=0, legslength=0.0) + survexblockroot = SurvexBlock( + name=ROOTBLOCK, survexpath="", cave=None, survexfile=survexfileroot, legsall=0, legslength=0.0 + ) # crashes here sometimes on MariaDB complaining that cave_id should not be null. But it should be. - #django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null") + # django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null") # fix by restarting db on server # sudo service mariadb stop # sudo service mariadb start survexblockroot.save() - - omitsfileroot = MakeOmitFileRoot("_unseens.svx") - survexomitsroot = SurvexBlock(name=OMITBLOCK, survexpath="", cave=None, survexfile=omitsfileroot, - legsall=0, legslength=0.0) - survexomitsroot.save() - print(' - Loading Survex Blocks...') + omitsfileroot = MakeOmitFileRoot("_unseens.svx") + survexomitsroot = SurvexBlock( + name=OMITBLOCK, survexpath="", cave=None, survexfile=omitsfileroot, legsall=0, legslength=0.0 + ) + survexomitsroot.save() + + print(" - Loading Survex Blocks...") memstart = get_process_memory() - #---------------------------------------------------------------- + # ---------------------------------------------------------------- FindAndLoadSurvex(survexblockroot) - #---------------------------------------------------------------- + # ---------------------------------------------------------------- memend = get_process_memory() print(f" - MEMORY start:{memstart:.3f} MB end:{memend:.3f} MB increase={memend - memstart:.3f} MB") - + survexblockroot.save() - print(' - Loaded All Survex Blocks.') + print(" - Loaded All Survex Blocks.") + poslineregex = re.compile(r"^\(\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*),\s*([+-]?\d*\.\d*)\s*\)\s*([^\s]+)$") + def LoadPositions(): """First load the survex stations for entrances and fixed points (about 600) into the database. - Run cavern to produce a complete .3d file, then run 3dtopos to produce a table of - all survey point positions. Then lookup each position by name to see if we have it in the database + Run cavern to produce a complete .3d file, then run 3dtopos to produce a table of + all survey point positions. Then lookup each position by name to see if we have it in the database and if we do, then save the x/y/z coordinates. This gives us coordinates of the entrances. If we don't have it in the database, print an error message and discard it. """ svx_t = 0 d3d_t = 0 + def runcavern3d(): - outputdir = Path(str(f'{topdata}.svx')).parent + outputdir = Path(str(f"{topdata}.svx")).parent # print(" - Regenerating stale cavern .log and .3d for '{}'\n days old: {:.1f} {:.1f} {:.1f}". # format(topdata, (svx_t - d3d_t)/(24*3600), (cav_t - d3d_t)/(24*3600), (now - d3d_t)/(24*3600))) - file3d = Path(f'{topdata}.3d') + file3d = Path(f"{topdata}.3d") try: - sp = subprocess.run([settings.CAVERN, "--log", f"--output={outputdir}", f"{topdata}.svx"], - capture_output=True, check=False, text=True) #check=False means exception not raised + sp = subprocess.run( + [settings.CAVERN, "--log", f"--output={outputdir}", f"{topdata}.svx"], + capture_output=True, + check=False, + text=True, + ) # check=False means exception not raised if sp.returncode != 0: - message = f' ! Error: cavern: creating {file3d} in runcavern3()' - DataIssue.objects.create(parser='entrances', message=message) - print(message) - - # find the errors in the 1623.log file - sp = subprocess.run(["grep", "error:", f"{topdata}.log"], - capture_output=True, check=False, text=True) #check=False means exception not raised - message = f' ! Error: cavern: {sp.stdout} creating {file3d} ' - DataIssue.objects.create(parser='entrances', message=message) + message = f" ! Error: cavern: creating {file3d} in runcavern3()" + DataIssue.objects.create(parser="entrances", message=message) print(message) - except: + # find the errors in the 1623.log file + sp = subprocess.run( + ["grep", "error:", f"{topdata}.log"], capture_output=True, check=False, text=True + ) # check=False means exception not raised + message = f" ! Error: cavern: {sp.stdout} creating {file3d} " + DataIssue.objects.create(parser="entrances", message=message) + print(message) + + except: message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}." - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) print(message) - + if file3d.is_file(): message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}" - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) print(message) - - if file3d.is_file(): # might be an old one though + + if file3d.is_file(): # might be an old one though try: # print(" - Regenerating {} {}.3d in {}".format(settings.SURVEXPORT, topdata, settings.SURVEX_DATA)) - sp = subprocess.run([settings.SURVEXPORT, '--pos', f'{file3d}'], cwd = settings.SURVEX_DATA, - capture_output=True, check=False, text=True) + sp = subprocess.run( + [settings.SURVEXPORT, "--pos", f"{file3d}"], + cwd=settings.SURVEX_DATA, + capture_output=True, + check=False, + text=True, + ) if sp.returncode != 0: - print(f' ! Error: survexport creating {topdata}.pos in runcavern3().\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode)) - except: - message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}." - DataIssue.objects.create(parser='entrances', message=message) + print( + f" ! Error: survexport creating {topdata}.pos in runcavern3().\n\n" + + str(sp.stdout) + + "\n\nreturn code: " + + str(sp.returncode) + ) + except: + message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}." + DataIssue.objects.create(parser="entrances", message=message) print(message) else: message = f" ! Failed to find {file3d} so aborting generation of new .pos, using old one if present" - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) print(message) topdata = os.fspath(Path(settings.SURVEX_DATA) / settings.SURVEX_TOPNAME) - print(f' - Generating a list of Pos from {topdata}.svx and then loading...') + print(f" - Generating a list of Pos from {topdata}.svx and then loading...") found = 0 skip = {} - print("\n") # extra line because cavern overwrites the text buffer somehow + print("\n") # extra line because cavern overwrites the text buffer somehow # cavern defaults to using same cwd as supplied input file - completed_process = subprocess.run(["which", f"{settings.CAVERN}"], - capture_output=True, check=True, text=True) + completed_process = subprocess.run(["which", f"{settings.CAVERN}"], capture_output=True, check=True, text=True) cav_t = os.path.getmtime(completed_process.stdout.strip()) svxpath = topdata + ".svx" @@ -1935,76 +2087,75 @@ def LoadPositions(): runcavern3d() if not os.path.isfile(d3dpath): runcavern3d() - elif d3d_t - svx_t > 0: # stale, 3d older than svx file + elif d3d_t - svx_t > 0: # stale, 3d older than svx file runcavern3d() - elif now - d3d_t> 60 *24*60*60: # >60 days old, re-run anyway + elif now - d3d_t > 60 * 24 * 60 * 60: # >60 days old, re-run anyway runcavern3d() - elif cav_t - d3d_t > 0: # new version of cavern + elif cav_t - d3d_t > 0: # new version of cavern runcavern3d() mappoints = {} for pt in MapLocations().points(): - svxid, number, point_type, label = pt - mappoints[svxid]=True + svxid, number, point_type, label = pt + mappoints[svxid] = True if not Path(pospath).is_file(): message = f" ! Failed to find {pospath} so aborting generation of entrance locations. " - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) print(message) return posfile = open(pospath) - posfile.readline() #Drop header + posfile.readline() # Drop header try: survexblockroot = SurvexBlock.objects.get(name=ROOTBLOCK) except: try: survexblockroot = SurvexBlock.objects.get(id=1) except: - message = f' ! FAILED to find root SurvexBlock' + message = f" ! FAILED to find root SurvexBlock" print(message) - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) raise for line in posfile.readlines(): r = poslineregex.match(line) if r: - x, y, z, id = r.groups() + x, y, z, id = r.groups() for sid in mappoints: if id.endswith(sid): - blockpath = "." + id[:-len(sid)].strip(".") + blockpath = "." + id[: -len(sid)].strip(".") # But why are we doing this? Why do we need the survexblock id for each of these ? # ..because mostly they don't actually appear in any SVX file. We should match them up # via the cave data, not by this half-arsed syntactic match which almost never works. PMS. if False: try: sbqs = SurvexBlock.objects.filter(survexpath=blockpath) - if len(sbqs)==1: + if len(sbqs) == 1: sb = sbqs[0] - if len(sbqs)>1: + if len(sbqs) > 1: message = f" ! MULTIPLE SurvexBlocks {len(sbqs):3} matching Entrance point {blockpath} {sid} '{id}'" print(message) - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) sb = sbqs[0] - elif len(sbqs)<=0: + elif len(sbqs) <= 0: message = f" ! ZERO SurvexBlocks matching Entrance point {blockpath} {sid} '{id}'" print(message) - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) sb = survexblockroot except: - message = f' ! FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}' + message = f" ! FAIL in getting SurvexBlock matching Entrance point {blockpath} {sid}" print(message) - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) try: ss = SurvexStation(name=id, block=survexblockroot) ss.x = float(x) ss.y = float(y) - ss.z = float(z) + ss.z = float(z) ss.save() found += 1 except: - message = f' ! FAIL to create SurvexStation Entrance point {blockpath} {sid}' + message = f" ! FAIL to create SurvexStation Entrance point {blockpath} {sid}" print(message) - DataIssue.objects.create(parser='entrances', message=message) + DataIssue.objects.create(parser="entrances", message=message) raise print(f" - {found} SurvexStation entrances found.") - diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2c382fb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[tool.black] +line-length = 120 + +[tool.isort] +profile = 'black' \ No newline at end of file diff --git a/settings.py b/settings.py index 438b0b8..f93d00b 100644 --- a/settings.py +++ b/settings.py @@ -7,12 +7,12 @@ https://docs.djangoproject.com/en/dev/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/dev/ref/settings/ """ -#Imports should be grouped in the following order: +# Imports should be grouped in the following order: -#1.Standard library imports. -#2.Related third party imports. -#3.Local application/library specific imports. -#4.You should put a blank line between each group of imports. +# 1.Standard library imports. +# 2.Related third party imports. +# 3.Local application/library specific imports. +# 4.You should put a blank line between each group of imports. import os import urllib.parse @@ -24,7 +24,7 @@ print("* importing troggle/settings.py") # default value, then gets overwritten by real secrets SECRET_KEY = "not-the-real-secret-key-a#vaeozn0---^fj!355qki*vj2" -GIT = 'git' # command for running git +GIT = "git" # command for running git # Note that this builds upon the django system installed # global settings in @@ -32,18 +32,18 @@ GIT = 'git' # command for running git # read https://docs.djangoproject.com/en/3.0/topics/settings/ # Build paths inside the project like this: os.path.join(BASE_DIR, ...) -#BASE_DIR = os.path.dirname(os.path.dirname(__file__)) +# BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # Django settings for troggle project. -ALLOWED_HOSTS = ['*', 'expo.survex.com', '.survex.com', 'localhost', '127.0.0.1', '192.168.0.5' ] +ALLOWED_HOSTS = ["*", "expo.survex.com", ".survex.com", "localhost", "127.0.0.1", "192.168.0.5"] ADMINS = ( # ('Your Name', 'your_email@domain.com'), ) MANAGERS = ADMINS -#LOGIN_URL = '/accounts/login/' # this is the default value so does not need to be set +# LOGIN_URL = '/accounts/login/' # this is the default value so does not need to be set # Local time zone for this installation. Choices can be found here: # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name @@ -51,11 +51,11 @@ MANAGERS = ADMINS # If running in a Windows environment this must be set to the same as your # system time zone. USE_TZ = True -TIME_ZONE = 'Europe/London' +TIME_ZONE = "Europe/London" # Language code for this installation. All choices can be found here: # http://www.i18nguy.com/unicode/language-identifiers.html -LANGUAGE_CODE = 'en-uk' +LANGUAGE_CODE = "en-uk" SITE_ID = 1 @@ -72,77 +72,79 @@ SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs" # Caves for which survex files exist, but are not otherwise registered # replaced (?) by expoweb/cave_data/pendingcaves.txt -# PENDING = ["1626-361", "2007-06", "2009-02", - # "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", - # "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", - # "2018-pf-01", "2018-pf-02"] +# PENDING = ["1626-361", "2007-06", "2009-02", +# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", +# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", +# "2018-pf-01", "2018-pf-02"] -APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response -SMART_APPEND_SLASH = True #not eorking as middleware different after Dj2.0 +APPEND_SLASH = ( + False # never relevant because we have urls that match unknown files and produce an 'edit this page' response +) +SMART_APPEND_SLASH = True # not eorking as middleware different after Dj2.0 -LOGIN_REDIRECT_URL = '/' # does not seem to have any effect +LOGIN_REDIRECT_URL = "/" # does not seem to have any effect SECURE_CONTENT_TYPE_NOSNIFF = True SECURE_BROWSER_XSS_FILTER = True # SESSION_COOKIE_SECURE = True # if enabled, cannot login to Django control panel, bug elsewhere? # CSRF_COOKIE_SECURE = True # if enabled only sends cookies over SSL -X_FRAME_OPTIONS = 'DENY' # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html +X_FRAME_OPTIONS = "DENY" # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html -DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' # from Django 3.2 +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # from Django 3.2 INSTALLED_APPS = ( - 'django.contrib.admin', - 'django.contrib.auth', # includes the url redirections for login, logout - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.admindocs', - 'django.forms', #Required to customise widget templates -# 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this - 'troggle.core', + "django.contrib.admin", + "django.contrib.auth", # includes the url redirections for login, logout + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.admindocs", + "django.forms", # Required to customise widget templates + # 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this + "troggle.core", ) -FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise widget templates +FORM_RENDERER = "django.forms.renderers.TemplatesSetting" # Required to customise widget templates # See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/ -# Note that this is a radically different onion architecture from earlier versions though it looks the same, -# see https://docs.djangoproject.com/en/2.0/topics/http/middleware/#upgrading-pre-django-1-10-style-middleware +# Note that this is a radically different onion architecture from earlier versions though it looks the same, +# see https://docs.djangoproject.com/en/2.0/topics/http/middleware/#upgrading-pre-django-1-10-style-middleware # Seriously, read this: https://www.webforefront.com/django/middlewaredjango.html which is MUCH BETTER than the docs MIDDLEWARE = [ #'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this - 'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache - 'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early - 'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW - 'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST - 'django.contrib.auth.middleware.AuthenticationMiddleware', # Adds the user attribute, representing the currently-logged-in user - 'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs - 'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system - 'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header + "django.middleware.gzip.GZipMiddleware", # not needed when expofiles and photos served by apache + "django.contrib.sessions.middleware.SessionMiddleware", # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early + "django.middleware.common.CommonMiddleware", # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW + "django.middleware.csrf.CsrfViewMiddleware", # Cross Site Request Forgeries by adding hidden form fields to POST + "django.contrib.auth.middleware.AuthenticationMiddleware", # Adds the user attribute, representing the currently-logged-in user + "django.contrib.admindocs.middleware.XViewMiddleware", # this and docutils needed by admindocs + "django.contrib.messages.middleware.MessageMiddleware", # Cookie-based and session-based message support. Needed by admin system + "django.middleware.clickjacking.XFrameOptionsMiddleware", # clickjacking protection via the X-Frame-Options header #'django.middleware.security.SecurityMiddleware', # SECURE_HSTS_SECONDS, SECURE_CONTENT_TYPE_NOSNIFF, SECURE_BROWSER_XSS_FILTER, SECURE_REFERRER_POLICY, and SECURE_SSL_REDIRECT #'troggle.core.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0 ] -ROOT_URLCONF = 'troggle.urls' +ROOT_URLCONF = "troggle.urls" -WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0 +WSGI_APPLICATION = "troggle.wsgi.application" # change to asgi as soon as we upgrade to Django 3.0 -ACCOUNT_ACTIVATION_DAYS=3 +ACCOUNT_ACTIVATION_DAYS = 3 # AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ? -QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]" +QM_PATTERN = "\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]" # Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html # TINYMCE_DEFAULT_CONFIG = { - # 'plugins': "table,spellchecker,paste,searchreplace", - # 'theme': "advanced", +# 'plugins': "table,spellchecker,paste,searchreplace", +# 'theme': "advanced", # } # TINYMCE_SPELLCHECKER = False # TINYMCE_COMPRESSOR = True -TEST_RUNNER = 'django.test.runner.DiscoverRunner' +TEST_RUNNER = "django.test.runner.DiscoverRunner" from localsettings import * -#localsettings needs to take precedence. Call it to override any existing vars. +# localsettings needs to take precedence. Call it to override any existing vars.