reformatted using black

This commit is contained in:
Philip Sargent 2023-01-19 21:18:42 +00:00
parent 0f8fe0e290
commit ba2ae6cd82
11 changed files with 2183 additions and 1838 deletions

View File

@ -20,8 +20,8 @@ troggle application.
""" """
print(" - settings on loading databaseReset.py", flush=True) print(" - settings on loading databaseReset.py", flush=True)
os.environ['PYTHONPATH'] = str(settings.PYTHON_PATH) os.environ["PYTHONPATH"] = str(settings.PYTHON_PATH)
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
print(" - settings on loading databaseReset.py") print(" - settings on loading databaseReset.py")
@ -31,14 +31,15 @@ print(f" - Memory footprint before loading Django: {resource.getrusage(resource.
try: try:
django.setup() django.setup()
except: except:
print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..") print(
" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce.."
)
raise raise
print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB") print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB")
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core import management from django.core import management
from django.db import (close_old_connections, connection, connections, from django.db import close_old_connections, connection, connections, transaction
transaction)
from django.http import HttpResponse from django.http import HttpResponse
from django.urls import reverse from django.urls import reverse
@ -46,11 +47,18 @@ import troggle.core.models.survex
from troggle.core.models.caves import Cave, Entrance from troggle.core.models.caves import Cave, Entrance
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.core.utils import get_process_memory from troggle.core.utils import get_process_memory
from troggle.parsers.imports import (import_caves, import_drawingsfiles, from troggle.parsers.imports import (
import_ents, import_loadpos, import_caves,
import_logbook, import_logbooks, import_drawingsfiles,
import_people, import_QMs, import_survex, import_ents,
import_surveyscans) import_loadpos,
import_logbook,
import_logbooks,
import_people,
import_QMs,
import_survex,
import_surveyscans,
)
if os.geteuid() == 0: if os.geteuid() == 0:
# This protects the server from having the wrong file permissions written on logs and caches # This protects the server from having the wrong file permissions written on logs and caches
@ -65,6 +73,7 @@ expoadminuser=settings.EXPOADMINUSER
expoadminuserpass = settings.EXPOADMINUSERPASS expoadminuserpass = settings.EXPOADMINUSERPASS
expoadminuseremail = settings.EXPOADMINUSER_EMAIL expoadminuseremail = settings.EXPOADMINUSER_EMAIL
def reinit_db(): def reinit_db():
"""Rebuild database from scratch. Deletes the file first if sqlite is used, """Rebuild database from scratch. Deletes the file first if sqlite is used,
otherwise it drops the database and creates it. otherwise it drops the database and creates it.
@ -73,21 +82,25 @@ def reinit_db():
of stuff known. Deleting the db file does not clear memory. of stuff known. Deleting the db file does not clear memory.
""" """
print("Reinitialising db ", end="") print("Reinitialising db ", end="")
print(django.db.connections.databases['default']['NAME']) print(django.db.connections.databases["default"]["NAME"])
currentdbname = settings.DATABASES['default']['NAME'] currentdbname = settings.DATABASES["default"]["NAME"]
if currentdbname == ':memory:': if currentdbname == ":memory:":
# closing connections should wipe the in-memory database # closing connections should wipe the in-memory database
django.db.close_old_connections() django.db.close_old_connections()
for conn in django.db.connections.all(): for conn in django.db.connections.all():
print(" ! Closing another connection to db...") print(" ! Closing another connection to db...")
conn.close() conn.close()
elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3': elif django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
if os.path.isfile(currentdbname): if os.path.isfile(currentdbname):
try: try:
print(" - deleting " + currentdbname) print(" - deleting " + currentdbname)
os.remove(currentdbname) os.remove(currentdbname)
except OSError: except OSError:
print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n") print(
" ! OSError on removing: "
+ currentdbname
+ "\n ! Is the file open in another app? Is the server running?\n"
)
raise raise
else: else:
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n") print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
@ -102,102 +115,110 @@ def reinit_db():
cursor.execute(f"USE {currentdbname}") cursor.execute(f"USE {currentdbname}")
print(f" - Nuked : {currentdbname}\n") print(f" - Nuked : {currentdbname}\n")
print(" - Migrating: " + django.db.connections.databases['default']['NAME']) print(" - Migrating: " + django.db.connections.databases["default"]["NAME"])
if django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3': if django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
# with transaction.atomic(): # with transaction.atomic():
management.call_command('makemigrations','core', interactive=False) management.call_command("makemigrations", "core", interactive=False)
management.call_command('migrate', interactive=False) management.call_command("migrate", interactive=False)
management.call_command('migrate','core', interactive=False) management.call_command("migrate", "core", interactive=False)
else: else:
management.call_command('makemigrations','core', interactive=False) management.call_command("makemigrations", "core", interactive=False)
management.call_command('migrate', interactive=False) management.call_command("migrate", interactive=False)
management.call_command('migrate','core', interactive=False) management.call_command("migrate", "core", interactive=False)
print(" - done migration on: " + settings.DATABASES["default"]["NAME"])
print(" - done migration on: " + settings.DATABASES['default']['NAME'])
print("users in db already: ", len(User.objects.all())) print("users in db already: ", len(User.objects.all()))
with transaction.atomic(): with transaction.atomic():
try: try:
print(" - Setting up expo user on: " + django.db.connections.databases['default']['NAME']) print(" - Setting up expo user on: " + django.db.connections.databases["default"]["NAME"])
print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ") print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ")
user = User.objects.create_user(expouser, expouseremail, expouserpass) user = User.objects.create_user(expouser, expouseremail, expouserpass)
user.is_staff = False user.is_staff = False
user.is_superuser = False user.is_superuser = False
user.save() user.save()
except: except:
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME']) print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
print(django.db.connections.databases['default']['NAME']) print(django.db.connections.databases["default"]["NAME"])
print(" ! You probably have not got a clean db when you thought you had.\n") print(" ! You probably have not got a clean db when you thought you had.\n")
print(" ! Also you are probably NOT running an in-memory db now.\n") print(" ! Also you are probably NOT running an in-memory db now.\n")
print("users in db: ", len(User.objects.all())) print("users in db: ", len(User.objects.all()))
print("tables in db: ", len(connection.introspection.table_names())) print("tables in db: ", len(connection.introspection.table_names()))
memdumpsql(fn='integrityfail.sql') memdumpsql(fn="integrityfail.sql")
django.db.connections.databases['default']['NAME'] = ':memory:' django.db.connections.databases["default"]["NAME"] = ":memory:"
# raise # raise
with transaction.atomic(): with transaction.atomic():
try: try:
print(" - Setting up expoadmin user on: " + django.db.connections.databases['default']['NAME']) print(" - Setting up expoadmin user on: " + django.db.connections.databases["default"]["NAME"])
print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ") print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ")
user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass) user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass)
user.is_staff = True user.is_staff = True
user.is_superuser = True user.is_superuser = True
user.save() user.save()
except: except:
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME']) print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
print(django.db.connections.databases['default']['NAME']) print(django.db.connections.databases["default"]["NAME"])
print(" ! You probably have not got a clean db when you thought you had.\n") print(" ! You probably have not got a clean db when you thought you had.\n")
print(" ! Also you are probably NOT running an in-memory db now.\n") print(" ! Also you are probably NOT running an in-memory db now.\n")
print("users in db: ", len(User.objects.all())) print("users in db: ", len(User.objects.all()))
print("tables in db: ", len(connection.introspection.table_names())) print("tables in db: ", len(connection.introspection.table_names()))
memdumpsql(fn='integrityfail.sql') memdumpsql(fn="integrityfail.sql")
django.db.connections.databases['default']['NAME'] = ':memory:' django.db.connections.databases["default"]["NAME"] = ":memory:"
# raise # raise
def memdumpsql(fn): def memdumpsql(fn):
'''Unused option to dump SQL. Aborted attempt to create a cache for loading data """Unused option to dump SQL. Aborted attempt to create a cache for loading data"""
'''
djconn = django.db.connection djconn = django.db.connection
from dump import _iterdump from dump import _iterdump
with open(fn, 'w') as f:
with open(fn, "w") as f:
for line in _iterdump(djconn): for line in _iterdump(djconn):
f.write(f"{line.encode('utf8')}\n") f.write(f"{line.encode('utf8')}\n")
return True return True
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class JobQueue():
class JobQueue:
"""A list of import operations to run. Always reports profile times """A list of import operations to run. Always reports profile times
of the import operations in the same order. of the import operations in the same order.
""" """
def __init__(self, run): def __init__(self, run):
'''Initialises the job queue object with a fixed order for reporting """Initialises the job queue object with a fixed order for reporting
options during a run. Imports the timings from previous runs. options during a run. Imports the timings from previous runs.
''' """
self.runlabel = run self.runlabel = run
self.queue = [] # tuples of (jobname, jobfunction) self.queue = [] # tuples of (jobname, jobfunction)
self.results = {} self.results = {}
self.results_order = [ self.results_order = [
"date","runlabel","reinit", "caves", "people", "date",
"logbooks", "QMs", "scans", "survex", "runlabel",
"drawings", "test" ] "reinit",
"caves",
"people",
"logbooks",
"QMs",
"scans",
"survex",
"drawings",
"test",
]
for k in self.results_order: for k in self.results_order:
self.results[k] = [] self.results[k] = []
self.tfile = "import_profile.json" self.tfile = "import_profile.json"
self.htmlfile = "profile.html" # for HTML results table. Not yet done. self.htmlfile = "profile.html" # for HTML results table. Not yet done.
def enq(self, label, func): def enq(self, label, func):
'''Enqueue: Adding elements to queue """Enqueue: Adding elements to queue"""
'''
self.queue.append((label, func)) self.queue.append((label, func))
return True return True
def loadprofiles(self): def loadprofiles(self):
"""Load timings for previous imports for each data import type """Load timings for previous imports for each data import type"""
"""
if os.path.isfile(self.tfile): if os.path.isfile(self.tfile):
try: try:
f = open(self.tfile, "r") f = open(self.tfile, "r")
@ -213,31 +234,27 @@ class JobQueue():
return True return True
def dellastprofile(self): def dellastprofile(self):
"""trim one set of data from the results """trim one set of data from the results"""
"""
for j in self.results_order: for j in self.results_order:
self.results[j].pop() # delete last item self.results[j].pop() # delete last item
return True return True
def delfirstprofile(self): def delfirstprofile(self):
"""trim one set of data from the results """trim one set of data from the results"""
"""
for j in self.results_order: for j in self.results_order:
self.results[j].pop(0) # delete zeroth item self.results[j].pop(0) # delete zeroth item
return True return True
def saveprofiles(self): def saveprofiles(self):
"""Save timings for the set of imports just completed """Save timings for the set of imports just completed"""
""" with open(self.tfile, "w") as f:
with open(self.tfile, 'w') as f:
json.dump(self.results, f) json.dump(self.results, f)
return True return True
def runqonce(self): def runqonce(self):
"""Run all the jobs in the queue provided - once """Run all the jobs in the queue provided - once"""
"""
print("** Running job ", self.runlabel, end=" to ") print("** Running job ", self.runlabel, end=" to ")
print(django.db.connections.databases['default']['NAME']) print(django.db.connections.databases["default"]["NAME"])
jobstart = time.time() jobstart = time.time()
print(f"-- Initial memory in use {get_process_memory():.3f} MB") print(f"-- Initial memory in use {get_process_memory():.3f} MB")
self.results["date"].pop() self.results["date"].pop()
@ -255,50 +272,52 @@ class JobQueue():
memend = get_process_memory() memend = get_process_memory()
duration = time.time() - start duration = time.time() - start
# print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, )) # print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
print("\n*- Ended \"", jobname, f"\" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)") print(
'\n*- Ended "',
jobname,
f'" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)',
)
self.results[jobname].pop() # the null item self.results[jobname].pop() # the null item
self.results[jobname].append(duration) self.results[jobname].append(duration)
jobend = time.time() jobend = time.time()
jobduration = jobend - jobstart jobduration = jobend - jobstart
print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.") print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.")
return True return True
def append_placeholders(self): def append_placeholders(self):
'''Ads a dummy timing for each option, to fix off by one error """Ads a dummy timing for each option, to fix off by one error"""
'''
for j in self.results_order: for j in self.results_order:
self.results[j].append(None) # append a placeholder self.results[j].append(None) # append a placeholder
def run_now_django_tests(self, n): def run_now_django_tests(self, n):
"""Runs the standard django test harness system which is in troggle/core/TESTS/tests.py """Runs the standard django test harness system which is in troggle/core/TESTS/tests.py"""
""" management.call_command("test", verbosity=n)
management.call_command('test', verbosity=n)
django.db.close_old_connections() django.db.close_old_connections()
def run(self): def run(self):
"""Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data. """Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data."""
"""
self.loadprofiles() self.loadprofiles()
print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME']) print(
"-- start ",
django.db.connections.databases["default"]["ENGINE"],
django.db.connections.databases["default"]["NAME"],
)
self.runqonce() self.runqonce()
if settings.DATABASES['default']['NAME'] ==":memory:": if settings.DATABASES["default"]["NAME"] == ":memory:":
memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later.. memdumpsql("memdump.sql") # saved contents of in-memory db, could be imported later..
self.saveprofiles() self.saveprofiles()
return True return True
def showprofile(self): def showprofile(self):
"""Prints out the time it took to run the jobqueue """Prints out the time it took to run the jobqueue"""
"""
for k in self.results_order: for k in self.results_order:
if k == "test": if k == "test":
break break
elif k == "date": elif k == "date":
print(" days ago ", end=' ') print(" days ago ", end=" ")
else: else:
print('%10s (s)' % k, end=' ') print("%10s (s)" % k, end=" ")
percen = 0 percen = 0
r = self.results[k] r = self.results[k]
@ -308,12 +327,12 @@ class JobQueue():
rp = r[i] rp = r[i]
else: else:
rp = " - " rp = " - "
print('%8s' % rp, end=' ') print("%8s" % rp, end=" ")
elif k == "date": elif k == "date":
# Calculate dates as days before present # Calculate dates as days before present
if r[i]: if r[i]:
if i == len(r) - 1: if i == len(r) - 1:
print(" this", end=' ') print(" this", end=" ")
else: else:
# prints one place to the left of where you expect # prints one place to the left of where you expect
if r[len(r) - 1]: if r[len(r) - 1]:
@ -323,24 +342,24 @@ class JobQueue():
else: else:
s = 0 s = 0
days = (s) / (24 * 60 * 60) days = (s) / (24 * 60 * 60)
print(f'{days:8.2f}', end=' ') print(f"{days:8.2f}", end=" ")
elif r[i]: elif r[i]:
print(f'{r[i]:8.1f}', end=' ') print(f"{r[i]:8.1f}", end=" ")
if i == len(r) - 1 and r[i - 1]: if i == len(r) - 1 and r[i - 1]:
percen = 100 * (r[i] - r[i - 1]) / r[i - 1] percen = 100 * (r[i] - r[i - 1]) / r[i - 1]
if abs(percen) > 0.1: if abs(percen) > 0.1:
print(f'{percen:8.1f}%', end=' ') print(f"{percen:8.1f}%", end=" ")
else: else:
print(" - ", end=' ') print(" - ", end=" ")
print("") print("")
print("\n") print("\n")
return True return True
def usage(): def usage():
'''Prints command line options, can print history of previous runs with timings """Prints command line options, can print history of previous runs with timings"""
''' print(
print("""Usage is 'python databaseReset.py <command> [runlabel]' """Usage is 'python databaseReset.py <command> [runlabel]'
where command is: where command is:
test - testing... imports people and prints profile. Deletes nothing. test - testing... imports people and prints profile. Deletes nothing.
profile - print the profile from previous runs. Import nothing. profile - print the profile from previous runs. Import nothing.
@ -370,7 +389,9 @@ def usage():
Note that running the subfunctions will not produce a consistent website Note that running the subfunctions will not produce a consistent website
- only the full 'reset' does that. - only the full 'reset' does that.
""") """
)
if __name__ == "__main__": if __name__ == "__main__":
@ -381,10 +402,10 @@ if __name__ == "__main__":
if sys.getfilesystemencoding() != "utf-8": if sys.getfilesystemencoding() != "utf-8":
print("UTF-8 is NOT the default file encoding. You must fix this.") print("UTF-8 is NOT the default file encoding. You must fix this.")
print(f'- {sys.getdefaultencoding()=}') print(f"- {sys.getdefaultencoding()=}")
print(f'- {sys.getfilesystemencoding()=}') print(f"- {sys.getfilesystemencoding()=}")
print(f'- {locale.getdefaultlocale()=}') print(f"- {locale.getdefaultlocale()=}")
print(f'- {locale.getpreferredencoding()=}') print(f"- {locale.getpreferredencoding()=}")
print("Aborting run.") print("Aborting run.")
exit() exit()
@ -438,13 +459,13 @@ if __name__ == "__main__":
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!! # elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
# writeCaves() # writeCaves()
elif "profile" in sys.argv: elif "profile" in sys.argv:
if runlabel == 'del' : if runlabel == "del":
jq.loadprofiles() jq.loadprofiles()
jq.dellastprofile() jq.dellastprofile()
jq.dellastprofile() # twice because loadprofiles adds a dummy jq.dellastprofile() # twice because loadprofiles adds a dummy
jq.showprofile() jq.showprofile()
jq.saveprofiles() jq.saveprofiles()
if runlabel == 'delfirst' : if runlabel == "delfirst":
jq.loadprofiles() jq.loadprofiles()
jq.dellastprofile() # remove the dummy jq.dellastprofile() # remove the dummy
jq.delfirstprofile() jq.delfirstprofile()

View File

@ -9,13 +9,14 @@ from troggle.core.models.caves import QM, Cave, LogbookEntry
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully from troggle.core.utils import save_carefully
'''Reads the CSV files containg QMs for a select few caves """Reads the CSV files containg QMs for a select few caves
See parsers/survex.py for the parser which extracts QMs from the survex files See parsers/survex.py for the parser which extracts QMs from the survex files
''' """
def deleteQMs(): def deleteQMs():
QM.objects.all().delete() QM.objects.all().delete()
DataIssue.objects.filter(parser='QMs').delete() DataIssue.objects.filter(parser="QMs").delete()
def parseCaveQMs(cave, inputFile, ticked=False): def parseCaveQMs(cave, inputFile, ticked=False):
@ -35,39 +36,39 @@ def parseCaveQMs(cave, inputFile, ticked=False):
All QMs are created afresh and this is all only run once on import on a fresh database. All QMs are created afresh and this is all only run once on import on a fresh database.
""" """
if cave=='204-steinBH': if cave == "204-steinBH":
try: try:
steinBr = Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle") steinBr = Cave.objects.get(official_name="Steinbr&uuml;ckenh&ouml;hle")
caveid = steinBr caveid = steinBr
except Cave.DoesNotExist: except Cave.DoesNotExist:
message = f' ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser' message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser"
print(message) print(message)
DataIssue.objects.create(parser='QMs', message=message) DataIssue.objects.create(parser="QMs", message=message)
return return
elif cave=='234-Hauch': elif cave == "234-Hauch":
try: try:
hauchHl = Cave.objects.get(official_name="Hauchh&ouml;hle") hauchHl = Cave.objects.get(official_name="Hauchh&ouml;hle")
caveid = hauchHl caveid = hauchHl
except Cave.DoesNotExist: except Cave.DoesNotExist:
message = f' ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser' message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser"
print(message) print(message)
DataIssue.objects.create(parser='QMs', message=message) DataIssue.objects.create(parser="QMs", message=message)
return return
elif cave =='161-KH': elif cave == "161-KH":
try: try:
kh = Cave.objects.get(official_name="Kaninchenh&ouml;hle") kh = Cave.objects.get(official_name="Kaninchenh&ouml;hle")
caveid = kh caveid = kh
except Cave.DoesNotExist: except Cave.DoesNotExist:
message = f' ! - {qmPath} KH is not in the database. Please run cave parser' message = f" ! - {qmPath} KH is not in the database. Please run cave parser"
print(message) print(message)
DataIssue.objects.create(parser='QMs', message=message) DataIssue.objects.create(parser="QMs", message=message)
nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked) nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked)
return nqms return nqms
# qmPath = settings.EXPOWEB+inputFile # qmPath = settings.EXPOWEB+inputFile
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ? qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
qmCSVContents = open(qmPath,'r') qmCSVContents = open(qmPath, "r")
dialect = csv.Sniffer().sniff(qmCSVContents.read()) dialect = csv.Sniffer().sniff(qmCSVContents.read())
qmCSVContents.seek(0, 0) qmCSVContents.seek(0, 0)
qmReader = csv.reader(qmCSVContents, dialect=dialect) qmReader = csv.reader(qmCSVContents, dialect=dialect)
@ -78,7 +79,7 @@ def parseCaveQMs(cave, inputFile, ticked=False):
try: try:
n += 1 n += 1
year = int(line[0][1:5]) year = int(line[0][1:5])
logslug = f'PH_{int(year)}_{int(n):02d}' logslug = f"PH_{int(year)}_{int(n):02d}"
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb") QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
newQM = QM() newQM = QM()
# newQM.found_by=placeholder # newQM.found_by=placeholder
@ -103,8 +104,12 @@ def parseCaveQMs(cave, inputFile, ticked=False):
newQM.comment = line[6] newQM.comment = line[6]
try: try:
# year and number are unique for a cave in CSV imports # year and number are unique for a cave in CSV imports
preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it preexistingQM = QM.objects.get(
if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it - VERY OLD THING number=QMnum, found_by__date__year=year
) # if we don't have this one in the DB, save it
if (
preexistingQM.new_since_parsing == False
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
preexistingQM.delete() preexistingQM.delete()
newQM.expoyear = year newQM.expoyear = year
newQM.save() newQM.save()
@ -116,28 +121,31 @@ def parseCaveQMs(cave, inputFile, ticked=False):
newQM.save() newQM.save()
nqms += 1 nqms += 1
except KeyError: # check on this one except KeyError: # check on this one
message = f' ! - {qmPath} KeyError {str(line)} ' message = f" ! - {qmPath} KeyError {str(line)} "
print(message) print(message)
DataIssue.objects.create(parser='QMs', message=message) DataIssue.objects.create(parser="QMs", message=message)
continue continue
except IndexError: except IndexError:
message = f' ! - {qmPath} IndexError {str(line)} ' message = f" ! - {qmPath} IndexError {str(line)} "
print(message) print(message)
DataIssue.objects.create(parser='QMs', message=message) DataIssue.objects.create(parser="QMs", message=message)
continue continue
return nqms return nqms
def parse_KH_QMs(kh, inputFile, ticked): def parse_KH_QMs(kh, inputFile, ticked):
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format """import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
""" khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r')
khQMs = khQMs.readlines() khQMs = khQMs.readlines()
nqms = 0 nqms = 0
for line in khQMs: for line in khQMs:
res=re.search(r'name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line) res = re.search(
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
line,
)
if res: if res:
res = res.groupdict() res = res.groupdict()
year=int(res['year']) year = int(res["year"])
# logbook placeholder code was previously here. No longer needed. # logbook placeholder code was previously here. No longer needed.
# check if placeholder exists for given year, create it if not # check if placeholder exists for given year, create it if not
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip." # message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
@ -147,16 +155,16 @@ def parse_KH_QMs(kh, inputFile, ticked):
# DataIssue.objects.create(parser='QMs', message=message) # DataIssue.objects.create(parser='QMs', message=message)
lookupArgs = { lookupArgs = {
#'found_by':placeholder, #'found_by':placeholder,
'blockname': "", "blockname": "",
'expoyear':year, "expoyear": year,
'number':res['number'], "number": res["number"],
'cave': kh, "cave": kh,
'grade':res['grade'] "grade": res["grade"],
} }
nonLookupArgs = { nonLookupArgs = {
'ticked': ticked, "ticked": ticked,
'nearest_station_name':res['nearest_station'], "nearest_station_name": res["nearest_station"],
'location_description':res['description'] "location_description": res["description"],
} }
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs) instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
# if created: # if created:
@ -169,10 +177,10 @@ def parse_KH_QMs(kh, inputFile, ticked):
def Load_QMs(): def Load_QMs():
deleteQMs() deleteQMs()
n204 = parseCaveQMs(cave='204-steinBH',inputFile=r"1623/204/qm.csv") n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv")
n234 = parseCaveQMs(cave='234-Hauch',inputFile=r"1623/234/qm.csv") n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv")
n161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmtodo.htm", ticked=False) n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False)
t161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmdone.htm", ticked=True) t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True)
# parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv") # parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.") print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.")

View File

@ -6,49 +6,48 @@ from pathlib import Path
from django.conf import settings from django.conf import settings
from django.db import transaction from django.db import transaction
from troggle.core.models.caves import (Area, Cave, CaveAndEntrance, CaveSlug, from troggle.core.models.caves import Area, Cave, CaveAndEntrance, CaveSlug, Entrance, EntranceSlug, GetCaveLookup
Entrance, EntranceSlug, GetCaveLookup)
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.settings import (CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
SURVEX_DATA)
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html ) """Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
and creating the various Cave, Entrance and necessary Area objects. and creating the various Cave, Entrance and necessary Area objects.
This is the first import that happens after the database is reinitialised. This is the first import that happens after the database is reinitialised.
So is the first thing that creates tables. So is the first thing that creates tables.
''' """
todo=''' todo = """
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file. - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
So we will need a separate file-editing capability just for this configuration file ?! So we will need a separate file-editing capability just for this configuration file ?!
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true. - crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true.
The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo) The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
and then restart the databasereset.py again. (status as of July 2022) and then restart the databasereset.py again. (status as of July 2022)
''' """
entrances_xslug = {} entrances_xslug = {}
caves_xslug = {} caves_xslug = {}
areas_xslug = {} areas_xslug = {}
def dummy_entrance(k, slug, msg="DUMMY"): def dummy_entrance(k, slug, msg="DUMMY"):
'''Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if """Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
user forgot to provide one when creating the cave user forgot to provide one when creating the cave
''' """
ent = Entrance( ent = Entrance(
name=k, name=k,
entrance_description = "Dummy entrance: auto-created when registering a new cave " + entrance_description="Dummy entrance: auto-created when registering a new cave "
"and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.", + "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
marking = '?') marking="?",
)
if ent: if ent:
ent.save() # must save to have id before foreign keys work. ent.save() # must save to have id before foreign keys work.
try: # Now create a entrance slug ID try: # Now create a entrance slug ID
es = EntranceSlug(entrance = ent, es = EntranceSlug(entrance=ent, slug=slug, primary=False)
slug = slug, primary = False)
except: except:
message = f" ! {k:11s} {msg}-{slug} entrance create failure" message = f" ! {k:11s} {msg}-{slug} entrance create failure"
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}') DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
print(message) print(message)
ent.cached_primary_slug = slug ent.cached_primary_slug = slug
@ -57,41 +56,43 @@ def dummy_entrance(k, slug, msg="DUMMY"):
return ent return ent
else: else:
message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure" message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}') DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
print(message) print(message)
raise raise
def set_dummy_entrance(id, slug, cave, msg="DUMMY"): def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
'''Entrance field either missing or holds a null string instead of a filename in a cave_data file. """Entrance field either missing or holds a null string instead of a filename in a cave_data file."""
'''
global entrances_xslug global entrances_xslug
try: try:
entrance = dummy_entrance(id, slug, msg="DUMMY") entrance = dummy_entrance(id, slug, msg="DUMMY")
letter = "" letter = ""
entrances_xslug[slug] = entrance entrances_xslug[slug] = entrance
ce = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance) ce = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
message = f' ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}' message = f" ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}"
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}') DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
print(message) print(message)
except: except:
# raise # raise
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" ' message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}') DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
print(message) print(message)
def do_pending_cave(k, url, area): def do_pending_cave(k, url, area):
''' """
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
in expoweb/cave_data/1623-"k".html in expoweb/cave_data/1623-"k".html
Note that at this point in importing the data we have not yet seen the survex files, so we can't Note that at this point in importing the data we have not yet seen the survex files, so we can't
look inside the relevant survex file to find the year and so we con't provide helpful links. look inside the relevant survex file to find the year and so we con't provide helpful links.
''' """
def get_survex_file(k): def get_survex_file(k):
'''Guesses at and finds a survex file for this pending cave. """Guesses at and finds a survex file for this pending cave.
Convoluted. Needs rewriting Convoluted. Needs rewriting
''' """
if k[0:3] == "162": if k[0:3] == "162":
id = Path(k[5:]) id = Path(k[5:])
else: else:
@ -129,23 +130,29 @@ def do_pending_cave(k, url, area):
with transaction.atomic(): with transaction.atomic():
if slug in g: if slug in g:
message = f" ! {k:18} cave listed in pendingcaves.txt already exists." message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
DataIssue.objects.create(parser='caves', message=message, url=url) DataIssue.objects.create(parser="caves", message=message, url=url)
print(message) print(message)
return return
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n" default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
default_note += f"<br><br>\n\n - (0) look in the <a href=\"/noinfo/cave-number-index\">cave number index</a> for notes on this cave, " default_note += f'<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave, '
default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a " default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - " default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a " default_note += (
f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
)
default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, " default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, " default_note += (
default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) " f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
)
default_note += (
f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
)
default_note += f"just in case a vital trip was not transcribed, then <br>\n - " default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook" default_note += (
f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
)
default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now." default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
default_note += f"<br><br>\n\n - Only two fields on this form are essential. " default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>" default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
@ -153,10 +160,14 @@ def do_pending_cave(k, url, area):
default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import." default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
default_note += f"<br><br>\n\n - " default_note += f"<br><br>\n\n - "
default_note += f"When you Submit it will create a new file in expoweb/cave_data/ " default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
default_note += f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. " default_note += (
f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
)
default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)." default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' " default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
default_note += f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done." default_note += (
f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
)
survex_file = get_survex_file(k) survex_file = get_survex_file(k)
@ -165,25 +176,25 @@ def do_pending_cave(k, url, area):
underground_description="Pending cave write-up - creating as empty object. No XML file available yet.", underground_description="Pending cave write-up - creating as empty object. No XML file available yet.",
survex_file=survex_file, survex_file=survex_file,
url=url, url=url,
notes = default_note) notes=default_note,
)
if cave: if cave:
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
cave.area.add(area) cave.area.add(area)
cave.save() cave.save()
message = f" ! {k:18} {cave.underground_description} url: {url}" message = f" ! {k:18} {cave.underground_description} url: {url}"
DataIssue.objects.create(parser='caves', message=message, url=url) DataIssue.objects.create(parser="caves", message=message, url=url)
print(message) print(message)
try: # Now create a cave slug ID try: # Now create a cave slug ID
cs = CaveSlug.objects.update_or_create(cave = cave, cs = CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False)
slug = slug, primary = False)
except: except:
message = f" ! {k:11s} PENDING cave SLUG create failure" message = f" ! {k:11s} PENDING cave SLUG create failure"
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
else: else:
message = f' ! {k:11s} PENDING cave create failure' message = f" ! {k:11s} PENDING cave create failure"
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
try: try:
@ -196,15 +207,14 @@ def do_pending_cave(k, url, area):
break break
except: except:
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]" message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
def readentrance(filename): def readentrance(filename):
'''Reads an enrance description from the .html file """Reads an enrance description from the .html file
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
''' """
global entrances_xslug global entrances_xslug
global caves_xslug global caves_xslug
global areas_xslug global areas_xslug
@ -217,7 +227,7 @@ def readentrance(filename):
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context) entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
if len(entrancecontentslist) != 1: if len(entrancecontentslist) != 1:
message = f'! BAD ENTRANCE at "{filename}"' message = f'! BAD ENTRANCE at "{filename}"'
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
else: else:
entrancecontents = entrancecontentslist[0] entrancecontents = entrancecontentslist[0]
@ -246,8 +256,14 @@ def readentrance(filename):
bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context) bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context)
url = getXML(entrancecontents, "url", maxItems=1, context=context) url = getXML(entrancecontents, "url", maxItems=1, context=context)
# if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: # if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
e, state = Entrance.objects.update_or_create(name = name[0], e, state = Entrance.objects.update_or_create(
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], name=name[0],
non_public={
"True": True,
"False": False,
"true": True,
"false": False,
}[non_public[0]],
entrance_description=entrance_description[0], entrance_description=entrance_description[0],
explorers=explorers[0], explorers=explorers[0],
map_description=map_description[0], map_description=map_description[0],
@ -270,24 +286,21 @@ def readentrance(filename):
bearings=bearings[0], bearings=bearings[0],
url=url[0], url=url[0],
filename=filename, filename=filename,
cached_primary_slug = slugs[0]) cached_primary_slug=slugs[0],
)
primary = True primary = True
for slug in slugs: for slug in slugs:
# print("entrance slug:{} filename:{}".format(slug, filename)) # print("entrance slug:{} filename:{}".format(slug, filename))
try: try:
cs = EntranceSlug.objects.update_or_create(entrance = e, cs = EntranceSlug.objects.update_or_create(entrance=e, slug=slug, primary=primary)
slug = slug,
primary = primary)
except: except:
# need to cope with duplicates # need to cope with duplicates
message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}" message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}"
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
kents = EntranceSlug.objects.all().filter(entrance = e, kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug, primary=primary)
slug = slug,
primary = primary)
for k in kents: for k in kents:
message = " ! - DUPLICATE in db. entrance:" + str(k.entrance) + ", slug:" + str(k.slug()) message = " ! - DUPLICATE in db. entrance:" + str(k.entrance) + ", slug:" + str(k.slug())
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
print(message) print(message)
for k in kents: for k in kents:
if k.slug() != None: if k.slug() != None:
@ -301,11 +314,12 @@ def readentrance(filename):
# DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') # DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
# print(message) # print(message)
def readcave(filename): def readcave(filename):
'''Reads an enrance description from the .html file """Reads an enrance description from the .html file
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
Assumes any area it hasn't seen before is a subarea of 1623 Assumes any area it hasn't seen before is a subarea of 1623
''' """
global entrances_xslug global entrances_xslug
global caves_xslug global caves_xslug
global areas_xslug global areas_xslug
@ -317,7 +331,7 @@ def readcave(filename):
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context) cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
if len(cavecontentslist) != 1: if len(cavecontentslist) != 1:
message = f'! BAD CAVE at "{filename}"' message = f'! BAD CAVE at "{filename}"'
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
else: else:
cavecontents = cavecontentslist[0] cavecontents = cavecontentslist[0]
@ -344,9 +358,37 @@ def readcave(filename):
url = getXML(cavecontents, "url", maxItems=1, context=context) url = getXML(cavecontents, "url", maxItems=1, context=context)
entrances = getXML(cavecontents, "entrance", context=context) entrances = getXML(cavecontents, "entrance", context=context)
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1: if (
len(non_public) == 1
and len(slugs) >= 1
and len(official_name) == 1
and len(areas) >= 1
and len(kataster_code) == 1
and len(kataster_number) == 1
and len(unofficial_number) == 1
and len(explorers) == 1
and len(underground_description) == 1
and len(equipment) == 1
and len(references) == 1
and len(survey) == 1
and len(kataster_status) == 1
and len(underground_centre_line) == 1
and len(notes) == 1
and len(length) == 1
and len(depth) == 1
and len(extent) == 1
and len(survex_file) == 1
and len(description_file) == 1
and len(url) == 1
):
try: try:
c, state = Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], c, state = Cave.objects.update_or_create(
non_public={
"True": True,
"False": False,
"true": True,
"false": False,
}[non_public[0]],
official_name=official_name[0], official_name=official_name[0],
kataster_code=kataster_code[0], kataster_code=kataster_code[0],
kataster_number=kataster_number[0], kataster_number=kataster_number[0],
@ -365,13 +407,14 @@ def readcave(filename):
survex_file=survex_file[0], survex_file=survex_file[0],
description_file=description_file[0], description_file=description_file[0],
url=url[0], url=url[0],
filename = filename) filename=filename,
)
except: except:
print(" ! FAILED to get only one CAVE when updating using: " + filename) print(" ! FAILED to get only one CAVE when updating using: " + filename)
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0]) kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
for k in kaves: for k in kaves:
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug()) message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
for k in kaves: for k in kaves:
if k.slug() != None: if k.slug() != None:
@ -397,14 +440,12 @@ def readcave(filename):
cs = caves_xslug[slug] cs = caves_xslug[slug]
else: else:
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
cs = CaveSlug.objects.update_or_create(cave = c, cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
slug = slug,
primary = primary)
caves_xslug[slug] = cs caves_xslug[slug] = cs
except Exception as ex: except Exception as ex:
# This fails to do an update! It just crashes.. to be fixed # This fails to do an update! It just crashes.. to be fixed
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}" message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
primary = False primary = False
@ -425,60 +466,67 @@ def readcave(filename):
else: else:
entrance = Entrance.objects.get(entranceslug__slug=eslug) entrance = Entrance.objects.get(entranceslug__slug=eslug)
entrances_xslug[eslug] = entrance entrances_xslug[eslug] = entrance
ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) ce = CaveAndEntrance.objects.update_or_create(
cave=c, entrance_letter=letter, entrance=entrance
)
except: except:
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"' message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'{c.url}_edit/') DataIssue.objects.create(parser="caves", message=message, url=f"{c.url}_edit/")
print(message) print(message)
if survex_file[0]: if survex_file[0]:
if not (Path(SURVEX_DATA) / survex_file[0]).is_file(): if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"' message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug[0:4]}/{slug}_cave_edit/') DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
print(message) print(message)
if description_file[0]: # if not an empty string if description_file[0]: # if not an empty string
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"' message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser='caves ok', message=message, url=f'/{slug}_cave_edit/') DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
print(message) print(message)
if not (Path(EXPOWEB) / description_file[0]).is_file(): if not (Path(EXPOWEB) / description_file[0]).is_file():
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file' message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/') DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
print(message) print(message)
# c.description_file="" # done only once, to clear out cruft. # c.description_file="" # done only once, to clear out cruft.
# c.save() # c.save()
else: # more than one item in long list else: # more than one item in long list
message = f' ! ABORT loading this cave. in "{filename}"' message = f' ! ABORT loading this cave. in "{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/') DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
print(message) print(message)
def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""): def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""):
"""Reads a single XML tag """Reads a single XML tag"""
"""
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems and printwarnings: if len(items) < minItems and printwarnings:
message = " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % {"count": len(items), message = (
"itemname": itemname, " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
"min": minItems} + " in file " + context % {"count": len(items), "itemname": itemname, "min": minItems}
DataIssue.objects.create(parser='caves', message=message, url=""+context) + " in file "
+ context
)
DataIssue.objects.create(parser="caves", message=message, url="" + context)
print(message) print(message)
if maxItems is not None and len(items) > maxItems and printwarnings: if maxItems is not None and len(items) > maxItems and printwarnings:
message = " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % {"count": len(items), message = (
"itemname": itemname, " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
"max": maxItems} + " in file " + context % {"count": len(items), "itemname": itemname, "max": maxItems}
DataIssue.objects.create(parser='caves', message=message) + " in file "
+ context
)
DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
if minItems == 0: if minItems == 0:
if not items: if not items:
items = [""] items = [""]
return items return items
def readcaves(): def readcaves():
'''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo. """Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
'''
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
# should put this in a simple list # should put this in a simple list
pending = set() pending = set()
@ -487,7 +535,7 @@ def readcaves():
with open(fpending, "r") as fo: with open(fpending, "r") as fo:
cids = fo.readlines() cids = fo.readlines()
for cid in cids: for cid in cids:
pending.add(cid.strip().rstrip('\n').upper()) pending.add(cid.strip().rstrip("\n").upper())
with transaction.atomic(): with transaction.atomic():
print(" - Deleting Caves and Entrances") print(" - Deleting Caves and Entrances")
@ -505,10 +553,10 @@ def readcaves():
except: except:
pass pass
# Clear the cave data issues and the caves as we are reloading # Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser='areas').delete() DataIssue.objects.filter(parser="areas").delete()
DataIssue.objects.filter(parser='caves').delete() DataIssue.objects.filter(parser="caves").delete()
DataIssue.objects.filter(parser='caves ok').delete() DataIssue.objects.filter(parser="caves ok").delete()
DataIssue.objects.filter(parser='entrances').delete() DataIssue.objects.filter(parser="entrances").delete()
print(" - Creating Areas 1623, 1624, 1627 and 1626") print(" - Creating Areas 1623, 1624, 1627 and 1626")
# This crashes on the server with MariaDB even though a null parent is explicitly allowed. # This crashes on the server with MariaDB even though a null parent is explicitly allowed.
@ -521,7 +569,6 @@ def readcaves():
area_1627 = Area.objects.create(short_name="1627", super=None) area_1627 = Area.objects.create(short_name="1627", super=None)
area_1627.save() area_1627.save()
with transaction.atomic(): with transaction.atomic():
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
print(" - Reading Entrances from entrance descriptions xml files") print(" - Reading Entrances from entrance descriptions xml files")
@ -535,7 +582,7 @@ def readcaves():
print(" - Reading Caves from cave descriptions xml files") print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
if filename.endswith('.html'): if filename.endswith(".html"):
readcave(filename) readcave(filename)
print(" - Setting up all the variously useful alias names") print(" - Setting up all the variously useful alias names")
@ -549,11 +596,10 @@ def readcaves():
if k[0:3] == "162": if k[0:3] == "162":
areanum = k[0:4] areanum = k[0:4]
url = f'{areanum}/{k[5:]}' # Note we are not appending the .htm as we are modern folks now. url = f"{areanum}/{k[5:]}" # Note we are not appending the .htm as we are modern folks now.
else: else:
areanum = "1623" areanum = "1623"
url = f'1623/{k}' url = f"1623/{k}"
area = area_1623 area = area_1623
if areanum == "1623": if areanum == "1623":
@ -568,8 +614,6 @@ def readcaves():
do_pending_cave(k, url, area) do_pending_cave(k, url, area)
except: except:
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}" message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
DataIssue.objects.create(parser='caves', message=message) DataIssue.objects.create(parser="caves", message=message)
print(message) print(message)
raise raise

View File

@ -13,11 +13,11 @@ from troggle.core.models.survex import DrawingFile, SingleScan, Wallet
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully from troggle.core.utils import save_carefully
'''Searches through all the :drawings: repository looking """Searches through all the :drawings: repository looking
for tunnel and therion files for tunnel and therion files
''' """
todo='''- Rename functions more consistently between tunnel and therion variants todo = """- Rename functions more consistently between tunnel and therion variants
- Recode to use pathlib instead of whacky resetting of loop variable inside loop - Recode to use pathlib instead of whacky resetting of loop variable inside loop
to scan sub-folders. to scan sub-folders.
@ -25,20 +25,23 @@ to scan sub-folders.
- Recode rx_valid_ext to use pathlib suffix() function - Recode rx_valid_ext to use pathlib suffix() function
- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity - Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity
''' """
rx_valid_ext = re.compile(r"(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$")
rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$')
def find_dwg_file(dwgfile, path): def find_dwg_file(dwgfile, path):
'''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file """Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
which we have already seen when we imported all the files we could find in the surveyscans direstories. which we have already seen when we imported all the files we could find in the surveyscans direstories.
The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ? The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ?
What is all this really for ?! Is this data used anywhere ?? What is all this really for ?! Is this data used anywhere ??
''' """
wallet, scansfile = None, None wallet, scansfile = None, None
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path) mscansdir = re.search(
r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
)
if mscansdir: if mscansdir:
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
# This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first. # This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first.
@ -47,7 +50,7 @@ def find_dwg_file(dwgfile, path):
if len(scanswalletl) > 1: if len(scanswalletl) > 1:
message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}" message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}"
print(message) print(message)
DataIssue.objects.create(parser='Tunnel', message=message) DataIssue.objects.create(parser="Tunnel", message=message)
if wallet: if wallet:
scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2)) scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2))
@ -58,7 +61,7 @@ def find_dwg_file(dwgfile, path):
plist.append(sf.ffile) plist.append(sf.ffile)
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}" message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
print(message) print(message)
DataIssue.objects.create(parser='Tunnel', message=message) DataIssue.objects.create(parser="Tunnel", message=message)
scansfile = scansfilel[0] scansfile = scansfilel[0]
if wallet: if wallet:
@ -66,7 +69,9 @@ def find_dwg_file(dwgfile, path):
if scansfile: if scansfile:
dwgfile.scans.add(scansfile) dwgfile.scans.add(scansfile)
elif path and not rx_valid_ext.search(path): # ie not recognised as a path where wallets live and not an image file type elif path and not rx_valid_ext.search(
path
): # ie not recognised as a path where wallets live and not an image file type
name = os.path.split(path)[1] name = os.path.split(path)[1]
rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen
if len(rdwgfilel): if len(rdwgfilel):
@ -76,15 +81,15 @@ def find_dwg_file(dwgfile, path):
plist.append(df.dwgpath) plist.append(df.dwgpath)
message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem? message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem?
print(message) print(message)
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}') DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{path}")
rdwgfile = rdwgfilel[0] rdwgfile = rdwgfilel[0]
dwgfile.dwgcontains.add(rdwgfile) dwgfile.dwgcontains.add(rdwgfile)
dwgfile.save() dwgfile.save()
def findwalletimage(therionfile, foundpath): def findwalletimage(therionfile, foundpath):
'''Tries to link the drawing file (Therion format) to the referenced image (scan) file """Tries to link the drawing file (Therion format) to the referenced image (scan) file"""
'''
foundpath = foundpath.strip("{}") foundpath = foundpath.strip("{}")
mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath) mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath)
if mscansdir: if mscansdir:
@ -93,9 +98,11 @@ def findwalletimage(therionfile, foundpath):
if len(scanswalletl): if len(scanswalletl):
wallet = scanswalletl[0] wallet = scanswalletl[0]
if len(scanswalletl) > 1: if len(scanswalletl) > 1:
message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(therionfile, mscansdir.group(1), foundpath) message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(
therionfile, mscansdir.group(1), foundpath
)
print(message) print(message)
DataIssue.objects.create(parser='Therion', message=message) DataIssue.objects.create(parser="Therion", message=message)
if wallet: if wallet:
therionfile.dwgwallets.add(wallet) therionfile.dwgwallets.add(wallet)
@ -110,28 +117,28 @@ def findwalletimage(therionfile, foundpath):
plist.append(sf.ffile) plist.append(sf.ffile)
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}" message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
print(message) print(message)
DataIssue.objects.create(parser='Therion', message=message) DataIssue.objects.create(parser="Therion", message=message)
scansfile = scansfilel[0] scansfile = scansfilel[0]
therionfile.scans.add(scansfile) therionfile.scans.add(scansfile)
else: else:
message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}' message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}'
wurl = f'/survey_scans/{wallet.walletname}/'.replace("#",":") wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":")
# print(message) # print(message)
DataIssue.objects.create(parser='Therion', message=message, url = wurl) DataIssue.objects.create(parser="Therion", message=message, url=wurl)
def findimportinsert(therionfile, imp): def findimportinsert(therionfile, imp):
'''Tries to link the scrap (Therion format) to the referenced therion scrap """Tries to link the scrap (Therion format) to the referenced therion scrap"""
'''
pass pass
rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE)
rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE) rx_xth_me = re.compile(r"xth_me_image_insert.*{.*}$", re.MULTILINE)
rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE) rx_scrap = re.compile(r"^survey (\w*).*$", re.MULTILINE)
rx_input = re.compile(r"^input (\w*).*$", re.MULTILINE)
def settherionfileinfo(filetuple): def settherionfileinfo(filetuple):
'''Read in the drawing file contents and sets values on the dwgfile object """Read in the drawing file contents and sets values on the dwgfile object"""
'''
thtype, therionfile = filetuple thtype, therionfile = filetuple
ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath) ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
@ -139,17 +146,17 @@ def settherionfileinfo(filetuple):
if therionfile.filesize <= 0: if therionfile.filesize <= 0:
message = f"! Zero length therion file {ff}" message = f"! Zero length therion file {ff}"
print(message) print(message)
DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
return return
fin = open(ff,'r') fin = open(ff, "r")
ttext = fin.read() ttext = fin.read()
fin.close() fin.close()
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap' # The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
# print(len(re.findall(r"line", ttext))) # print(len(re.findall(r"line", ttext)))
if thtype=='th': if thtype == "th":
therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE)) therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
elif thtype=='th2': elif thtype == "th2":
therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE)) therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
therionfile.save() therionfile.save()
@ -162,42 +169,44 @@ def settherionfileinfo(filetuple):
for xth_me in rx_xth_me.findall(ttext): for xth_me in rx_xth_me.findall(ttext):
# WORK IN PROGRESS. Do not clutter up the DataIssues list with this # WORK IN PROGRESS. Do not clutter up the DataIssues list with this
message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' message = f"! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}"
# print(message) # print(message)
# DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') # DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
# ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2 # ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2
with open('therionrefs.log', 'a') as lg: with open("therionrefs.log", "a") as lg:
lg.write(message + '\n') lg.write(message + "\n")
findwalletimage(therionfile, xth_me.split()[-3]) findwalletimage(therionfile, xth_me.split()[-3])
for inp in rx_input.findall(ttext): for inp in rx_input.findall(ttext):
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file # if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
# but we would need to disentangle to get the current path properly # but we would need to disentangle to get the current path properly
message = f'! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' message = f"! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}"
# print(message) # print(message)
DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}') DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
findimportinsert(therionfile, inp) findimportinsert(therionfile, inp)
therionfile.save() therionfile.save()
rx_skpath = re.compile(rb'<skpath')
rx_skpath = re.compile(rb"<skpath")
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"') rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
def settnlfileinfo(dwgfile): def settnlfileinfo(dwgfile):
'''Read in the drawing file contents and sets values on the dwgfile object """Read in the drawing file contents and sets values on the dwgfile object
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57 Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
then we could display on the master calendar per expo. then we could display on the master calendar per expo.
''' """
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
if dwgfile.filesize <= 0: if dwgfile.filesize <= 0:
message = f"! Zero length tunnel file {ff}" message = f"! Zero length tunnel file {ff}"
print(message) print(message)
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}') DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
return return
fin = open(ff,'rb') fin = open(ff, "rb")
ttext = fin.read() ttext = fin.read()
fin.close() fin.close()
@ -216,22 +225,24 @@ def settnlfileinfo(dwgfile):
dwgfile.save() dwgfile.save()
def setdrwfileinfo(dwgfile): def setdrwfileinfo(dwgfile):
'''Read in the drawing file contents and sets values on the dwgfile object, """Read in the drawing file contents and sets values on the dwgfile object,
but these are SVGs, PDFs or .txt files, so there is no useful format to search for but these are SVGs, PDFs or .txt files, so there is no useful format to search for
This function is a placeholder in case we thnk of a way to do something This function is a placeholder in case we thnk of a way to do something
to recognise generic survex filenames. to recognise generic survex filenames.
''' """
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
dwgfile.filesize = ff.stat().st_size dwgfile.filesize = ff.stat().st_size
if dwgfile.filesize <= 0: if dwgfile.filesize <= 0:
message = f"! Zero length drawing file {ff}" message = f"! Zero length drawing file {ff}"
print(message) print(message)
DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}') DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
return return
def load_drawings_files(): def load_drawings_files():
'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize """Breadth first search of drawings directory looking for sub-directories and *.xml filesize
This is brain-damaged very early code. Should be replaced with proper use of pathlib. This is brain-damaged very early code. Should be replaced with proper use of pathlib.
Why do we have all this detection of file types/! Why not use get_mime_types ? Why do we have all this detection of file types/! Why not use get_mime_types ?
@ -239,17 +250,16 @@ def load_drawings_files():
We import JPG, PNG and SVG files; which have already been put on the server, We import JPG, PNG and SVG files; which have already been put on the server,
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG) but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
''' """
all_xml = [] all_xml = []
drawdatadir = settings.DRAWINGS_DATA drawdatadir = settings.DRAWINGS_DATA
DrawingFile.objects.all().delete() DrawingFile.objects.all().delete()
DataIssue.objects.filter(parser='drawings').delete() DataIssue.objects.filter(parser="drawings").delete()
DataIssue.objects.filter(parser='Therion').delete() DataIssue.objects.filter(parser="Therion").delete()
DataIssue.objects.filter(parser='xTherion').delete() DataIssue.objects.filter(parser="xTherion").delete()
DataIssue.objects.filter(parser='Tunnel').delete() DataIssue.objects.filter(parser="Tunnel").delete()
if(os.path.isfile('therionrefs.log')): if os.path.isfile("therionrefs.log"):
os.remove('therionrefs.log') os.remove("therionrefs.log")
drawingsdirs = [""] drawingsdirs = [""]
while drawingsdirs: while drawingsdirs:
@ -260,64 +270,66 @@ def load_drawings_files():
lf = os.path.join(drawdir, f) lf = os.path.join(drawdir, f)
ff = os.path.join(drawdatadir, lf) ff = os.path.join(drawdatadir, lf)
if os.path.isdir(ff): if os.path.isdir(ff):
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions. drawingsdirs.append(
lf
) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
elif Path(f).suffix.lower() == ".txt": elif Path(f).suffix.lower() == ".txt":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('txt',dwgfile)) all_xml.append(("txt", dwgfile))
elif Path(f).suffix.lower() == ".xml": elif Path(f).suffix.lower() == ".xml":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('xml',dwgfile)) all_xml.append(("xml", dwgfile))
elif Path(f).suffix.lower() == ".th": elif Path(f).suffix.lower() == ".th":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('th',dwgfile)) all_xml.append(("th", dwgfile))
elif Path(f).suffix.lower() == ".th2": elif Path(f).suffix.lower() == ".th2":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('th2',dwgfile)) all_xml.append(("th2", dwgfile))
elif Path(f).suffix.lower() == ".pdf": elif Path(f).suffix.lower() == ".pdf":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('pdf',dwgfile)) all_xml.append(("pdf", dwgfile))
elif Path(f).suffix.lower() == ".png": elif Path(f).suffix.lower() == ".png":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('png',dwgfile)) all_xml.append(("png", dwgfile))
elif Path(f).suffix.lower() == ".svg": elif Path(f).suffix.lower() == ".svg":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('svg',dwgfile)) all_xml.append(("svg", dwgfile))
elif Path(f).suffix.lower() == ".jpg": elif Path(f).suffix.lower() == ".jpg":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('jpg',dwgfile)) all_xml.append(("jpg", dwgfile))
elif Path(f).suffix == '': elif Path(f).suffix == "":
# therion file # therion file
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1])
dwgfile.save() dwgfile.save()
all_xml.append(('',dwgfile)) all_xml.append(("", dwgfile))
print(f' - {len(all_xml)} Drawings files found') print(f" - {len(all_xml)} Drawings files found")
for d in all_xml: for d in all_xml:
if d[0] in ['pdf', 'txt', 'svg', 'jpg', 'png', '']: if d[0] in ["pdf", "txt", "svg", "jpg", "png", ""]:
setdrwfileinfo(d[1]) setdrwfileinfo(d[1])
if d[0] == 'xml': if d[0] == "xml":
settnlfileinfo(d[1]) settnlfileinfo(d[1])
# important to import .th2 files before .th so that we can assign them when found in .th files # important to import .th2 files before .th so that we can assign them when found in .th files
if d[0] == 'th2': if d[0] == "th2":
settherionfileinfo(d) settherionfileinfo(d)
if d[0] == 'th': if d[0] == "th":
settherionfileinfo(d) settherionfileinfo(d)
# for drawfile in DrawingFile.objects.all(): # for drawfile in DrawingFile.objects.all():

View File

@ -4,8 +4,7 @@ import sys
import django import django
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core import management from django.core import management
from django.db import (close_old_connections, connection, connections, from django.db import close_old_connections, connection, connections, transaction
transaction)
from django.http import HttpResponse from django.http import HttpResponse
import troggle.parsers.caves import troggle.parsers.caves
@ -16,41 +15,48 @@ import troggle.parsers.QMs
import troggle.parsers.scans import troggle.parsers.scans
import troggle.settings import troggle.settings
'''Master data import. """Master data import.
Used only by databaseReset.py and online controlpanel. Used only by databaseReset.py and online controlpanel.
''' """
def import_caves(): def import_caves():
print("-- Importing Caves to ", end="") print("-- Importing Caves to ", end="")
print(django.db.connections.databases['default']['NAME']) print(django.db.connections.databases["default"]["NAME"])
troggle.parsers.caves.readcaves() troggle.parsers.caves.readcaves()
def import_people(): def import_people():
print("-- Importing People (folk.csv) to ", end="") print("-- Importing People (folk.csv) to ", end="")
print(django.db.connections.databases['default']['NAME']) print(django.db.connections.databases["default"]["NAME"])
with transaction.atomic(): with transaction.atomic():
troggle.parsers.people.load_people_expos() troggle.parsers.people.load_people_expos()
def import_surveyscans(): def import_surveyscans():
print("-- Importing Survey Scans") print("-- Importing Survey Scans")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.scans.load_all_scans() troggle.parsers.scans.load_all_scans()
def import_logbooks(): def import_logbooks():
print("-- Importing Logbooks") print("-- Importing Logbooks")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.logbooks.LoadLogbooks() troggle.parsers.logbooks.LoadLogbooks()
def import_logbook(year=2022): def import_logbook(year=2022):
print(f"-- Importing Logbook {year}") print(f"-- Importing Logbook {year}")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.logbooks.LoadLogbook(year) troggle.parsers.logbooks.LoadLogbook(year)
def import_QMs(): def import_QMs():
print("-- Importing old QMs for 161, 204, 234 from CSV files") print("-- Importing old QMs for 161, 204, 234 from CSV files")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.QMs.Load_QMs() troggle.parsers.QMs.Load_QMs()
def import_survex(): def import_survex():
# when this import is moved to the top with the rest it all crashes horribly # when this import is moved to the top with the rest it all crashes horribly
print("-- Importing Survex and Entrance Positions") print("-- Importing Survex and Entrance Positions")
@ -63,23 +69,26 @@ def import_survex():
with transaction.atomic(): with transaction.atomic():
troggle.parsers.survex.LoadPositions() troggle.parsers.survex.LoadPositions()
def import_ents(): def import_ents():
# when this import is moved to the top with the rest it all crashes horribly # when this import is moved to the top with the rest it all crashes horribly
print(" - Survex entrances x/y/z Positions") print(" - Survex entrances x/y/z Positions")
with transaction.atomic(): with transaction.atomic():
import troggle.parsers.survex import troggle.parsers.survex
troggle.parsers.survex.LoadPositions() troggle.parsers.survex.LoadPositions()
def import_loadpos(): def import_loadpos():
# when this import is moved to the top with the rest it all crashes horribly # when this import is moved to the top with the rest it all crashes horribly
import troggle.parsers.survex import troggle.parsers.survex
print(" - Survex entrances x/y/z Positions") print(" - Survex entrances x/y/z Positions")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.survex.LoadPositions() troggle.parsers.survex.LoadPositions()
def import_drawingsfiles(): def import_drawingsfiles():
print("-- Importing Drawings files") print("-- Importing Drawings files")
with transaction.atomic(): with transaction.atomic():
troggle.parsers.drawings.load_drawings_files() troggle.parsers.drawings.load_drawings_files()

View File

@ -11,17 +11,16 @@ from django.template.defaultfilters import slugify
from django.utils.timezone import get_current_timezone, make_aware from django.utils.timezone import get_current_timezone, make_aware
from parsers.people import GetPersonExpeditionNameLookup from parsers.people import GetPersonExpeditionNameLookup
from troggle.core.models.caves import (Cave, GetCaveLookup, LogbookEntry, from troggle.core.models.caves import Cave, GetCaveLookup, LogbookEntry, PersonTrip
PersonTrip)
from troggle.core.models.troggle import DataIssue, Expedition from troggle.core.models.troggle import DataIssue, Expedition
from troggle.core.utils import TROG, save_carefully from troggle.core.utils import TROG, save_carefully
''' """
Parses and imports logbooks in all their wonderful confusion Parses and imports logbooks in all their wonderful confusion
See detailed explanation of the complete process: See detailed explanation of the complete process:
https://expo.survex.com/handbook/computing/logbooks-parsing.html https://expo.survex.com/handbook/computing/logbooks-parsing.html
''' """
todo=''' todo = """
- refactor everything with some urgency, esp. LoadLogbookForExpedition() - refactor everything with some urgency, esp. LoadLogbookForExpedition()
- remove the TROG things since we need the database for multiuser access? Or not? - remove the TROG things since we need the database for multiuser access? Or not?
@ -47,7 +46,7 @@ todo='''
- use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache - use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache
data for old logbooks? Not worth it.. data for old logbooks? Not worth it..
''' """
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
BLOG_PARSER_SETTINGS = { BLOG_PARSER_SETTINGS = {
# "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html # "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
@ -82,17 +81,50 @@ LOGBOOK_PARSER_SETTINGS = {
"1982": ("logbook.html", "parser_html"), "1982": ("logbook.html", "parser_html"),
} }
entries = { "2022": 89, "2019": 55, "2018": 95, "2017": 74, "2016": 86, "2015": 80, entries = {
"2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53, "2022": 89,
"2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, "2019": 55,
"2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42, "2018": 95,
"1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34, "2017": 74,
"1985": 24, "1984": 32, "1983": 52, "1982": 42,} "2016": 86,
"2015": 80,
"2014": 65,
"2013": 52,
"2012": 75,
"2011": 71,
"2010": 22,
"2009": 53,
"2008": 49,
"2007": 113,
"2006": 60,
"2005": 55,
"2004": 76,
"2003": 42,
"2002": 31,
"2001": 49,
"2000": 54,
"1999": 79,
"1998": 43,
"1997": 53,
"1996": 95,
"1995": 42,
"1994": 32,
"1993": 41,
"1992": 62,
"1991": 39,
"1990": 87,
"1989": 63,
"1988": 61,
"1987": 34,
"1985": 24,
"1984": 32,
"1983": 52,
"1982": 42,
}
logentries = [] # the entire logbook for one year is a single object: a list of entries logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau', noncaveplaces = ["Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"]
'base camp', 'basecamp', 'top camp', 'topcamp' ] logdataissues = TROG["issues"]["logdataissues"]
logdataissues = TROG['issues']['logdataissues']
trips = {} trips = {}
# #
@ -102,7 +134,8 @@ def set_trip_id(year, seq):
tid = f"{year}_s{seq:02d}" tid = f"{year}_s{seq:02d}"
return tid return tid
rx_tripperson = re.compile(r'(?i)<u>(.*?)</u>$')
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]") rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
@ -118,7 +151,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
mul = rx_tripperson.match(tripperson) mul = rx_tripperson.match(tripperson)
if mul: if mul:
tripperson = mul.group(1).strip() tripperson = mul.group(1).strip()
if tripperson and tripperson[0] != '*': if tripperson and tripperson[0] != "*":
tripperson = re.sub(rx_round_bracket, "", tripperson).strip() tripperson = re.sub(rx_round_bracket, "", tripperson).strip()
# these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition) # these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition)
@ -139,13 +172,11 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
if tripperson == "Samouse1": if tripperson == "Samouse1":
tripperson = "Todd Rye" tripperson = "Todd Rye"
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
if not personyear: if not personyear:
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year." message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year."
print(message) print(message)
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
res.append((personyear, logtime_underground)) res.append((personyear, logtime_underground))
if mul: if mul:
@ -158,6 +189,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...") # print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
return res, author return res, author
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None): def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
"""saves a logbook entry and related persontrips """saves a logbook entry and related persontrips
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday ! Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
@ -188,7 +220,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
# print(f" - {author} - {logtime_underground}") # print(f" - {author} - {logtime_underground}")
except: except:
message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL" message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues["title"] = message logdataissues["title"] = message
print(message) print(message)
raise raise
@ -196,7 +228,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
if not author: if not author:
message = f" ! - {expedition.year} Warning: logentry: {title} - no expo member author for entry '{tid}'" message = f" ! - {expedition.year} Warning: logentry: {title} - no expo member author for entry '{tid}'"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues["title"] = message logdataissues["title"] = message
print(message) print(message)
# return # return
@ -218,12 +250,12 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/') text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/')
text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/") text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/")
text = text.replace('\t', '' ) text = text.replace("\t", "")
text = text.replace('\n\n\n', '\n\n' ) text = text.replace("\n\n\n", "\n\n")
# Check for an existing copy of the current entry, and save # Check for an existing copy of the current entry, and save
expeditionday = expedition.get_expedition_day(date) expeditionday = expedition.get_expedition_day(date)
lookupAttribs={'date':date, 'title':title} lookupAttribs = {"date": date, "title": title}
# 'cave' is converted to a string doing this, which renders as the cave slug. # 'cave' is converted to a string doing this, which renders as the cave slug.
# but it is a db query which we should try to avoid - rewrite this # but it is a db query which we should try to avoid - rewrite this
@ -233,9 +265,15 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
slug = tid slug = tid
# slug = tid + "_" + slugify(title)[:10].replace('-','_') # slug = tid + "_" + slugify(title)[:10].replace('-','_')
else: else:
slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_') slug = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, nonLookupAttribs = {
'time_underground':logtime_underground, 'cave_slug':str(cave), 'slug': slug} "place": place,
"text": text,
"expedition": expedition,
"time_underground": logtime_underground,
"cave_slug": str(cave),
"slug": slug,
}
# This creates the lbo instance of LogbookEntry # This creates the lbo instance of LogbookEntry
lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
@ -243,11 +281,12 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
# for PersonTrip time_underground is float (decimal hours) # for PersonTrip time_underground is float (decimal hours)
for tripperson, time_underground in trippersons: for tripperson, time_underground in trippersons:
# print(f" - {tid} '{tripperson}' author:{tripperson == author}") # print(f" - {tid} '{tripperson}' author:{tripperson == author}")
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo}
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
# this creates the PersonTrip instance. # this creates the PersonTrip instance.
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs) save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
def ParseDate(tripdate, year): def ParseDate(tripdate, year):
"""Interprets dates in the expo logbooks and returns a correct datetime.date object""" """Interprets dates in the expo logbooks and returns a correct datetime.date object"""
dummydate = date(1970, 1, 1) dummydate = date(1970, 1, 1)
@ -261,7 +300,7 @@ def ParseDate(tripdate, year):
if mdatestandard: if mdatestandard:
if not (mdatestandard.group(1) == year): if not (mdatestandard.group(1) == year):
message = f" ! - Bad date (year) in logbook: {tripdate} - {year}" message = f" ! - Bad date (year) in logbook: {tripdate} - {year}"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues["tripdate"] = message logdataissues["tripdate"] = message
return dummydate return dummydate
else: else:
@ -269,7 +308,7 @@ def ParseDate(tripdate, year):
elif mdategoof: elif mdategoof:
if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]): if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3) message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues["tripdate"] = message logdataissues["tripdate"] = message
return dummydate return dummydate
else: else:
@ -278,25 +317,26 @@ def ParseDate(tripdate, year):
else: else:
year = 1970 year = 1970
message = f" ! - Bad date in logbook: {tripdate} - {year}" message = f" ! - Bad date in logbook: {tripdate} - {year}"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues["tripdate"] = message logdataissues["tripdate"] = message
return date(year, month, day) return date(year, month, day)
except: except:
message = f" ! - Failed to parse date in logbook: {tripdate} - {year}" message = f" ! - Failed to parse date in logbook: {tripdate} - {year}"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues["tripdate"] = message logdataissues["tripdate"] = message
return datetime.date(1970, 1, 1) return datetime.date(1970, 1, 1)
# 2002 - now # 2002 - now
def parser_html(year, expedition, txt, seq=""): def parser_html(year, expedition, txt, seq=""):
'''This uses some of the more obscure capabilities of regular expressions, """This uses some of the more obscure capabilities of regular expressions,
see https://docs.python.org/3/library/re.html see https://docs.python.org/3/library/re.html
You can't see it here, but a round-trip export-then-import will move You can't see it here, but a round-trip export-then-import will move
the endmatter up to the frontmatter. This makes sense when moving the endmatter up to the frontmatter. This makes sense when moving
from parser_html_01 format logfiles, believe me. from parser_html_01 format logfiles, believe me.
''' """
global logentries global logentries
global logdataissues global logdataissues
@ -305,7 +345,7 @@ def parser_html(year, expedition, txt, seq=""):
headpara = headmatch.groups()[0].strip() headpara = headmatch.groups()[0].strip()
# print(f" - headpara:\n'{headpara}'") # print(f" - headpara:\n'{headpara}'")
if(len(headpara)>0): if len(headpara) > 0:
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html") frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
with open(frontpath, "w") as front: with open(frontpath, "w") as front:
front.write(headpara + "\n") front.write(headpara + "\n")
@ -315,7 +355,7 @@ def parser_html(year, expedition, txt, seq=""):
endpara = endmatch.groups()[0].strip() endpara = endmatch.groups()[0].strip()
# print(f" - endpara:\n'{endpara}'") # print(f" - endpara:\n'{endpara}'")
if(len(endpara)>0): if len(endpara) > 0:
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html") endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
with open(endpath, "w") as end: with open(endpath, "w") as end:
end.write(endpara + "\n") end.write(endpara + "\n")
@ -327,7 +367,8 @@ def parser_html(year, expedition, txt, seq=""):
tid = set_trip_id(year, logbook_entry_count) tid = set_trip_id(year, logbook_entry_count)
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}') # print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date s = re.match(
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)? \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)? \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
\s*<div\s+class="trippeople">\s*(.*?)</div> \s*<div\s+class="trippeople">\s*(.*?)</div>
@ -335,16 +376,19 @@ def parser_html(year, expedition, txt, seq=""):
([\s\S]*?) ([\s\S]*?)
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)? \s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
\s*$ \s*$
''', trippara) """,
trippara,
)
if s: if s:
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
else: # allow title and people to be swapped in order else: # allow title and people to be swapped in order
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..." msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..."
print(msg) print(msg)
DataIssue.objects.create(parser='logbooks', message=msg) DataIssue.objects.create(parser="logbooks", message=msg)
logdataissues[tid] = msg logdataissues[tid] = msg
s2 = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date s2 = re.match(
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)? \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)? \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
\s*<div\s+class="triptitle">\s*(.*?)</div> \s*<div\s+class="triptitle">\s*(.*?)</div>
@ -352,14 +396,16 @@ def parser_html(year, expedition, txt, seq=""):
([\s\S]*?) ([\s\S]*?)
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)? \s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
\s*$ \s*$
''', trippara) """,
trippara,
)
if s2: if s2:
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups() tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
else: else:
# if not re.search(r"Rigging Guide", trippara): # if not re.search(r"Rigging Guide", trippara):
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..." msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..."
print(msg) print(msg)
DataIssue.objects.create(parser='logbooks', message=msg) DataIssue.objects.create(parser="logbooks", message=msg)
logdataissues[tid] = msg logdataissues[tid] = msg
continue continue
@ -374,10 +420,10 @@ def parser_html(year, expedition, txt, seq=""):
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip() ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
triptitle = triptitle.strip() triptitle = triptitle.strip()
entrytuple = (ldate, tripcave, triptitle, ltriptext, entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tripid1)
trippeople, expedition, tu, tripid1)
logentries.append(entrytuple) logentries.append(entrytuple)
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it # main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
def parser_html_01(year, expedition, txt, seq=""): def parser_html_01(year, expedition, txt, seq=""):
global logentries global logentries
@ -389,7 +435,7 @@ def parser_html_01(year, expedition, txt, seq=""):
headpara = headmatch.groups()[0].strip() headpara = headmatch.groups()[0].strip()
# print(f" - headpara:\n'{headpara}'") # print(f" - headpara:\n'{headpara}'")
if(len(headpara)>0): if len(headpara) > 0:
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html") frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
with open(frontpath, "w") as front: with open(frontpath, "w") as front:
front.write(headpara + "\n") front.write(headpara + "\n")
@ -403,7 +449,7 @@ def parser_html_01(year, expedition, txt, seq=""):
endpara = "" endpara = ""
# print(f" - endpara:\n'{endpara}'") # print(f" - endpara:\n'{endpara}'")
if(len(endpara)>0): if len(endpara) > 0:
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html") endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
with open(endpath, "w") as end: with open(endpath, "w") as end:
end.write(endpara + "\n") end.write(endpara + "\n")
@ -419,7 +465,7 @@ def parser_html_01(year, expedition, txt, seq=""):
s = re.match(r"(?i)(?s)\s*(?:<p>)?(.*?)</?p>(.*)$", trippara) s = re.match(r"(?i)(?s)\s*(?:<p>)?(.*?)</?p>(.*)$", trippara)
if not s: if not s:
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..." message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
break break
@ -427,11 +473,10 @@ def parser_html_01(year, expedition, txt, seq=""):
tripheader, triptext = s.group(1), s.group(2) tripheader, triptext = s.group(1), s.group(2)
except: except:
message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'" message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
# mtripid = re.search(r'<a id="(.*?)"', tripheader) # mtripid = re.search(r'<a id="(.*?)"', tripheader)
# if not mtripid: # if not mtripid:
# message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'" # message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'"
@ -447,7 +492,7 @@ def parser_html_01(year, expedition, txt, seq=""):
tripdate, triptitle, trippeople = tripheader.split("|") tripdate, triptitle, trippeople = tripheader.split("|")
except: except:
message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'" message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
try: try:
@ -455,7 +500,7 @@ def parser_html_01(year, expedition, txt, seq=""):
trippeople = "GUESS ANON" trippeople = "GUESS ANON"
except: except:
message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !" message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
break break
@ -464,7 +509,7 @@ def parser_html_01(year, expedition, txt, seq=""):
# print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>") # print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
# print(f" #4 - tid: {tid}") # print(f" #4 - tid: {tid}")
mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext) mtu = re.search(r"<p[^>]*>(T/?U.*)", triptext)
if mtu: if mtu:
tu = mtu.group(1) tu = mtu.group(1)
triptext = triptext[: mtu.start(0)] + triptext[mtu.end() :] triptext = triptext[: mtu.start(0)] + triptext[mtu.end() :]
@ -488,31 +533,30 @@ def parser_html_01(year, expedition, txt, seq=""):
if ltriptext == "": if ltriptext == "":
message = " ! - Zero content for logbook entry!: " + tid message = " ! - Zero content for logbook entry!: " + tid
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tid)
entrytuple = (ldate, tripcave, triptitle, ltriptext,
trippeople, expedition, tu, tid)
logentries.append(entrytuple) logentries.append(entrytuple)
except: except:
message = f" ! - Skipping logentry {year} due to exception in: {tid}" message = f" ! - Skipping logentry {year} due to exception in: {tid}"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
errorcount += 1 errorcount += 1
raise raise
if errorcount > 5: if errorcount > 5:
message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}" message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
return return
def parser_blog(year, expedition, txt, sq=""): def parser_blog(year, expedition, txt, sq=""):
'''Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website. """Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
Note that the entries have dates and authors, but no titles. Note that the entries have dates and authors, but no titles.
See detailed explanation of the complete process: See detailed explanation of the complete process:
https://expo.survex.com/handbook/computing/logbooks-parsing.html https://expo.survex.com/handbook/computing/logbooks-parsing.html
@ -527,23 +571,27 @@ def parser_blog(year, expedition, txt, sq=""):
</article> </article>
</article> </article>
So the content is nested inside the header. Attachments (images) come after the content. So the content is nested inside the header. Attachments (images) come after the content.
''' """
global logentries global logentries
global logdataissues global logdataissues
errorcount = 0 errorcount = 0
tripheads = re.findall(r"<article class=\"message message--post js-post js-inlineModContainer\s*\"\s*([\s\S]*?)(?=</article)", txt) tripheads = re.findall(
r"<article class=\"message message--post js-post js-inlineModContainer\s*\"\s*([\s\S]*?)(?=</article)", txt
)
if not (tripheads): if not (tripheads):
message = f" ! - Skipping on failure to parse article header: {txt[:500]}" message = f" ! - Skipping on failure to parse article header: {txt[:500]}"
print(message) print(message)
# (?= is a non-consuming match, see https://docs.python.org/3/library/re.html # (?= is a non-consuming match, see https://docs.python.org/3/library/re.html
tripparas = re.findall(r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt) tripparas = re.findall(
r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt
)
if not (tripparas): if not (tripparas):
message = f" ! - Skipping on failure to parse article content: {txt[:500]}" message = f" ! - Skipping on failure to parse article content: {txt[:500]}"
print(message) print(message)
if (len(tripheads) !=len(tripparas)): if len(tripheads) != len(tripparas):
print(f"{len(tripheads)} != {len(tripparas)}") print(f"{len(tripheads)} != {len(tripparas)}")
print(f"{len(tripheads)} - {len(tripparas)}") print(f"{len(tripheads)} - {len(tripparas)}")
@ -567,7 +615,7 @@ def parser_blog(year, expedition, txt, sq=""):
match_author = re.search(r".*data-author=\"([^\"]*)\" data-content=.*", triphead) match_author = re.search(r".*data-author=\"([^\"]*)\" data-content=.*", triphead)
if not (match_author): if not (match_author):
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse data-author {tid} {triphead[:400]}..." message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse data-author {tid} {triphead[:400]}..."
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
break break
@ -577,7 +625,7 @@ def parser_blog(year, expedition, txt, sq=""):
match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead) match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead)
if not (match_datetime): if not (match_datetime):
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..." message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..."
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
break break
@ -587,7 +635,7 @@ def parser_blog(year, expedition, txt, sq=""):
tripdate = datetime.fromisoformat(datestamp) tripdate = datetime.fromisoformat(datestamp)
except: except:
message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'" message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[tid] = message logdataissues[tid] = message
print(message) print(message)
# fallback, ignore the timestamp bits: # fallback, ignore the timestamp bits:
@ -604,8 +652,7 @@ def parser_blog(year, expedition, txt, sq=""):
tripcontent = re.sub(r"<hr\s*>", "", tripcontent) tripcontent = re.sub(r"<hr\s*>", "", tripcontent)
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
entrytuple = (tripdate, location, tripname, tripcontent, entrytuple = (tripdate, location, tripname, tripcontent, trippeople, expedition, tu, tid)
trippeople, expedition, tu, tid)
logentries.append(entrytuple) logentries.append(entrytuple)
@ -627,11 +674,9 @@ def LoadLogbookForExpedition(expedition, clean=True):
expect = entries[year] expect = entries[year]
# print(" - Logbook for: " + year) # print(" - Logbook for: " + year)
def cleanerrors(year): def cleanerrors(year):
global logdataissues global logdataissues
dataissues = DataIssue.objects.filter(parser='logbooks') dataissues = DataIssue.objects.filter(parser="logbooks")
for di in dataissues: for di in dataissues:
ph = year ph = year
if re.search(ph, di.message) is not None: if re.search(ph, di.message) is not None:
@ -647,7 +692,8 @@ def LoadLogbookForExpedition(expedition, clean=True):
dellist.append(key) dellist.append(key)
for i in dellist: for i in dellist:
del logdataissues[i] del logdataissues[i]
if (clean):
if clean:
cleanerrors(year) cleanerrors(year)
if year in yearlinks: if year in yearlinks:
@ -665,7 +711,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
expedition.save() expedition.save()
lbes = LogbookEntry.objects.filter(expedition=expedition) lbes = LogbookEntry.objects.filter(expedition=expedition)
if (clean): if clean:
for lbe in lbes: for lbe in lbes:
lbe.delete() lbe.delete()
@ -675,7 +721,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
# print(f" ! End of blog. Next blog file in sequence not there:{lb}") # print(f" ! End of blog. Next blog file in sequence not there:{lb}")
break break
try: try:
with open(lb,'rb') as file_in: with open(lb, "rb") as file_in:
txt = file_in.read().decode("utf-8") txt = file_in.read().decode("utf-8")
logbook_parseable = True logbook_parseable = True
except (IOError): except (IOError):
@ -689,7 +735,7 @@ def LoadLogbookForExpedition(expedition, clean=True):
# -------------------- # --------------------
parser = globals()[parsefunc] parser = globals()[parsefunc]
print(f' - {year} parsing with {parsefunc} - {lb}') print(f" - {year} parsing with {parsefunc} - {lb}")
parser(year, expedition, txt, sq) # this launches the right parser for this year parser(year, expedition, txt, sq) # this launches the right parser for this year
# -------------------- # --------------------
dupl = {} dupl = {}
@ -699,11 +745,10 @@ def LoadLogbookForExpedition(expedition, clean=True):
if check in dupl: if check in dupl:
dupl[check] += 1 dupl[check] += 1
triptitle = f"{triptitle} #{dupl[check]}" triptitle = f"{triptitle} #{dupl[check]}"
print(f' - {triptitle} -- {date}') print(f" - {triptitle} -- {date}")
else: else:
dupl[check] = 1 dupl[check] = 1
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1)
tripid1)
if len(logentries) == expect: if len(logentries) == expect:
# print(f"OK {year} {len(logentries):5d} is {expect}\n") # print(f"OK {year} {len(logentries):5d} is {expect}\n")
@ -713,13 +758,13 @@ def LoadLogbookForExpedition(expedition, clean=True):
return len(logentries) return len(logentries)
def LoadLogbook(year): def LoadLogbook(year):
'''One off logbook for testing purposes """One off logbook for testing purposes"""
'''
global LOGBOOK_PARSER_SETTINGS global LOGBOOK_PARSER_SETTINGS
nlbe = {} nlbe = {}
TROG['pagecache']['expedition'][year] = None # clear cache TROG["pagecache"]["expedition"][year] = None # clear cache
expo = Expedition.objects.get(year=year) expo = Expedition.objects.get(year=year)
year = expo.year # some type funny year = expo.year # some type funny
@ -729,7 +774,10 @@ def LoadLogbook(year):
LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year] LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year]
nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo
else: else:
print(f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}") print(
f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
)
def LoadLogbooks(): def LoadLogbooks():
"""This is the master function for parsing all logbooks into the Troggle database. """This is the master function for parsing all logbooks into the Troggle database.
@ -740,16 +788,20 @@ def LoadLogbooks():
global entries global entries
logdataissues = {} logdataissues = {}
DataIssue.objects.filter(parser='logbooks').delete() DataIssue.objects.filter(parser="logbooks").delete()
expos = Expedition.objects.all() expos = Expedition.objects.all()
if len(expos) <= 1: if len(expos) <= 1:
message = f" ! - No expeditions found. Load 'people' first" message = f" ! - No expeditions found. Load 'people' first"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[f"sqlfail 0000"] = message logdataissues[f"sqlfail 0000"] = message
print(message) print(message)
return return
noexpo = ["1986", "2020", "2021",] #no expo noexpo = [
"1986",
"2020",
"2021",
] # no expo
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"] lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.] sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.]
nologbook = noexpo + lostlogbook + sqlfail nologbook = noexpo + lostlogbook + sqlfail
@ -761,11 +813,11 @@ def LoadLogbooks():
for expo in expos: # pointless as we explicitly know the years in this code. for expo in expos: # pointless as we explicitly know the years in this code.
year = expo.year year = expo.year
TROG['pagecache']['expedition'][year] = None # clear cache TROG["pagecache"]["expedition"][year] = None # clear cache
if year in sqlfail: if year in sqlfail:
print(" - Logbook for: " + year + " NO parsing attempted - known sql failures") print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
message = f" ! - Not even attempting to parse logbook for {year} until code fixed" message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
DataIssue.objects.create(parser='logbooks', message=message) DataIssue.objects.create(parser="logbooks", message=message)
logdataissues[f"sqlfail {year}"] = message logdataissues[f"sqlfail {year}"] = message
print(message) print(message)
@ -778,7 +830,6 @@ def LoadLogbooks():
if year in BLOG_PARSER_SETTINGS: if year in BLOG_PARSER_SETTINGS:
bloglist.append(expo) bloglist.append(expo)
for ex in loglist: for ex in loglist:
nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo
@ -803,7 +854,6 @@ def LoadLogbooks():
print(f"total {yt:,} log entries parsed in all expeditions") print(f"total {yt:,} log entries parsed in all expeditions")
# dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S) # dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
# expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S) # expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
# titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S) # titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
@ -813,4 +863,3 @@ def LoadLogbooks():
# TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S) # TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
# locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S) # locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
# caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S) # caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)

View File

@ -9,15 +9,15 @@ from pathlib import Path
from django.conf import settings from django.conf import settings
from unidecode import unidecode from unidecode import unidecode
from troggle.core.models.troggle import (DataIssue, Expedition, Person, from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
PersonExpedition)
from troggle.core.utils import TROG, save_carefully from troggle.core.utils import TROG, save_carefully
'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has """These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
href links to pages in troggle which troggle does not think are right. href links to pages in troggle which troggle does not think are right.
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that, The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
or they should use the same code by importing a module. or they should use the same code by importing a module.
''' """
def parse_blurb(personline, header, person): def parse_blurb(personline, header, person):
"""create mugshot Photo instance""" """create mugshot Photo instance"""
@ -28,47 +28,48 @@ def parse_blurb(personline, header, person):
if not ms_path.is_file(): if not ms_path.is_file():
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}" message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
print(message) print(message)
DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}") DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
return return
if ms_filename.startswith('i/'): if ms_filename.startswith("i/"):
# if person just has an image, add it. It has format 'i/adama2018.jpg' # if person just has an image, add it. It has format 'i/adama2018.jpg'
person.mug_shot = str(Path("/folk", ms_filename)) person.mug_shot = str(Path("/folk", ms_filename))
person.blurb = None person.blurb = None
elif ms_filename.startswith('l/'): elif ms_filename.startswith("l/"):
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images # it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
with open(ms_path,'r') as blurbfile: with open(ms_path, "r") as blurbfile:
blrb = blurbfile.read() blrb = blurbfile.read()
pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL) pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
if pblurb: if pblurb:
person.mug_shot = None person.mug_shot = None
fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1) fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
fragment = fragment.replace('src="../i/', 'src="/folk/i/') fragment = fragment.replace('src="../i/', 'src="/folk/i/')
fragment = fragment.replace("src='../i/", "src='/folk/i/") fragment = fragment.replace("src='../i/", "src='/folk/i/")
fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment) fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
# replace src="../i/ with src="/folk/i # replace src="../i/ with src="/folk/i
person.blurb = fragment person.blurb = fragment
else: else:
message = f"! Blurb parse error in {ms_filename}" message = f"! Blurb parse error in {ms_filename}"
print(message) print(message)
DataIssue.objects.create(parser='people', message=message, url="/folk/") DataIssue.objects.create(parser="people", message=message, url="/folk/")
elif ms_filename == '': elif ms_filename == "":
pass pass
else: else:
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}" message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
print(message) print(message)
DataIssue.objects.create(parser='people', message=message, url="/folk/") DataIssue.objects.create(parser="people", message=message, url="/folk/")
person.save() person.save()
def load_people_expos(): def load_people_expos():
'''This is where the folk.csv file is parsed to read people's names. """This is where the folk.csv file is parsed to read people's names.
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names' Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
''' """
DataIssue.objects.filter(parser='people').delete() DataIssue.objects.filter(parser="people").delete()
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
personreader = csv.reader(persontab) # this is an iterator personreader = csv.reader(persontab) # this is an iterator
@ -80,8 +81,8 @@ def load_people_expos():
years = headers[5:] years = headers[5:]
for year in years: for year in years:
lookupAttribs = {'year':year} lookupAttribs = {"year": year}
nonLookupAttribs = {'name':f"CUCC expo {year}"} nonLookupAttribs = {"name": f"CUCC expo {year}"}
save_carefully(Expedition, lookupAttribs, nonLookupAttribs) save_carefully(Expedition, lookupAttribs, nonLookupAttribs)
@ -105,18 +106,18 @@ def load_people_expos():
nickname = splitnick.group(2) or "" nickname = splitnick.group(2) or ""
fullname = fullname.strip() fullname = fullname.strip()
names = fullname.split(' ') names = fullname.split(" ")
firstname = names[0] firstname = names[0]
if len(names) == 1: if len(names) == 1:
lastname = "" lastname = ""
if personline[header["VfHO member"]] =='': if personline[header["VfHO member"]] == "":
vfho = False vfho = False
else: else:
vfho = True vfho = True
lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")} lookupAttribs = {"first_name": firstname, "last_name": (lastname or "")}
nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname} nonLookupAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nickname}
person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs) person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
parse_blurb(personline=personline, header=header, person=person) parse_blurb(personline=personline, header=header, person=person)
@ -125,11 +126,12 @@ def load_people_expos():
for year, attended in list(zip(headers, personline))[5:]: for year, attended in list(zip(headers, personline))[5:]:
expedition = Expedition.objects.get(year=year) expedition = Expedition.objects.get(year=year)
if attended == "1" or attended == "-1": if attended == "1" or attended == "-1":
lookupAttribs = {'person':person, 'expedition':expedition} lookupAttribs = {"person": person, "expedition": expedition}
nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")} nonLookupAttribs = {"nickname": nickname, "is_guest": (personline[header["Guest"]] == "1")}
save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs) save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)
print("", flush=True) print("", flush=True)
def who_is_this(year, possibleid): def who_is_this(year, possibleid):
expo = Expedition.objects.filter(year=year) expo = Expedition.objects.filter(year=year)
personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()] personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
@ -138,16 +140,33 @@ def who_is_this(year,possibleid):
else: else:
return None return None
global foreign_friends global foreign_friends
foreign_friends = ["P. Jeutter", "K. Jäger", "S. Steinberger", "R. Seebacher", foreign_friends = [
"Dominik Jauch", "Fritz Mammel", "Marcus Scheuerman", "P. Jeutter",
"Uli Schütz", "Wieland Scheuerle", "Arndt Karger", "K. Jäger",
"Kai Schwekend", "Regina Kaiser", "Thilo Müller","Wieland Scheuerle", "S. Steinberger",
"Florian Gruner", "Helmut Stopka-Ebeler", "Aiko", "Mark Morgan", "Arndt Karger"] "R. Seebacher",
"Dominik Jauch",
"Fritz Mammel",
"Marcus Scheuerman",
"Uli Schütz",
"Wieland Scheuerle",
"Arndt Karger",
"Kai Schwekend",
"Regina Kaiser",
"Thilo Müller",
"Wieland Scheuerle",
"Florian Gruner",
"Helmut Stopka-Ebeler",
"Aiko",
"Mark Morgan",
"Arndt Karger",
]
def known_foreigner(id): def known_foreigner(id):
'''If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching """If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching"""
'''
global foreign_friends global foreign_friends
if id in foreign_friends: if id in foreign_friends:
@ -160,12 +179,13 @@ def known_foreigner(id):
# This is convoluted, the whole personexpedition concept is unnecessary? # This is convoluted, the whole personexpedition concept is unnecessary?
Gpersonexpeditionnamelookup = {} Gpersonexpeditionnamelookup = {}
def GetPersonExpeditionNameLookup(expedition): def GetPersonExpeditionNameLookup(expedition):
global Gpersonexpeditionnamelookup global Gpersonexpeditionnamelookup
def apply_variations(f, l): def apply_variations(f, l):
'''Be generous in guessing possible matches. Any duplicates will be ruled as invalid. """Be generous in guessing possible matches. Any duplicates will be ruled as invalid."""
'''
f = f.lower() f = f.lower()
l = l.lower() l = l.lower()
variations = [] variations = []
@ -175,7 +195,7 @@ def GetPersonExpeditionNameLookup(expedition):
variations.append(f + " " + l) variations.append(f + " " + l)
variations.append(f + " " + l[0]) variations.append(f + " " + l[0])
variations.append(f + l[0]) variations.append(f + l[0])
variations.append(f + " " +l[0] + '.') variations.append(f + " " + l[0] + ".")
variations.append(f[0] + " " + l) variations.append(f[0] + " " + l)
variations.append(f[0] + ". " + l) variations.append(f[0] + ". " + l)
variations.append(f[0] + l) variations.append(f[0] + l)
@ -246,31 +266,31 @@ def GetPersonExpeditionNameLookup(expedition):
if f == "Becka".lower(): if f == "Becka".lower():
possnames += apply_variations("Rebecca", l) possnames += apply_variations("Rebecca", l)
if f'{f} {l}' == "Andy Waddington".lower(): if f"{f} {l}" == "Andy Waddington".lower():
possnames += apply_variations("aer", "waddington") possnames += apply_variations("aer", "waddington")
if f'{f} {l}' == "Phil Underwood".lower(): if f"{f} {l}" == "Phil Underwood".lower():
possnames += apply_variations("phil", "underpants") possnames += apply_variations("phil", "underpants")
if f'{f} {l}' == "Naomi Griffiths".lower(): if f"{f} {l}" == "Naomi Griffiths".lower():
possnames += apply_variations("naomi", "makins") possnames += apply_variations("naomi", "makins")
if f'{f} {l}' == "Tina White".lower(): if f"{f} {l}" == "Tina White".lower():
possnames += apply_variations("tina", "richardson") possnames += apply_variations("tina", "richardson")
if f'{f} {l}' == "Cat Hulse".lower(): if f"{f} {l}" == "Cat Hulse".lower():
possnames += apply_variations("catherine", "hulse") possnames += apply_variations("catherine", "hulse")
possnames += apply_variations("cat", "henry") possnames += apply_variations("cat", "henry")
if f'{f} {l}' == "Jess Stirrups".lower(): if f"{f} {l}" == "Jess Stirrups".lower():
possnames += apply_variations("jessica", "stirrups") possnames += apply_variations("jessica", "stirrups")
if f'{f} {l}' == "Nat Dalton".lower(): if f"{f} {l}" == "Nat Dalton".lower():
possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling. possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
if f'{f} {l}' == "Mike Richardson".lower(): if f"{f} {l}" == "Mike Richardson".lower():
possnames.append("mta") possnames.append("mta")
possnames.append("miketa") possnames.append("miketa")
possnames.append("mike the animal") possnames.append("mike the animal")
possnames.append("animal") possnames.append("animal")
if f'{f} {l}' == "Eric Landgraf".lower(): if f"{f} {l}" == "Eric Landgraf".lower():
possnames.append("eric c.landgraf") possnames.append("eric c.landgraf")
possnames.append("eric c. landgraf") possnames.append("eric c. landgraf")
possnames.append("eric c landgraf") possnames.append("eric c landgraf")
if f'{f} {l}' == "Nadia Raeburn".lower(): if f"{f} {l}" == "Nadia Raeburn".lower():
possnames.append("nadia rc") possnames.append("nadia rc")
possnames.append("nadia raeburn-cherradi") possnames.append("nadia raeburn-cherradi")
@ -297,7 +317,5 @@ def GetPersonExpeditionNameLookup(expedition):
for shortname in short: for shortname in short:
res[shortname] = short[shortname] res[shortname] = short[shortname]
Gpersonexpeditionnamelookup[expedition.name] = res Gpersonexpeditionnamelookup[expedition.name] = res
return res return res

View File

@ -17,8 +17,8 @@ from troggle.core.models.troggle import DataIssue
from troggle.core.utils import save_carefully from troggle.core.utils import save_carefully
from troggle.core.views.scans import datewallet from troggle.core.views.scans import datewallet
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced. """Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
''' """
contentsjson = "contents.json" contentsjson = "contents.json"
@ -26,28 +26,54 @@ git = settings.GIT
# to do: Actually read all the JSON files and set the survex file field appropriately! # to do: Actually read all the JSON files and set the survex file field appropriately!
def setwalletyear(wallet): def setwalletyear(wallet):
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear _ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear
def load_all_scans(): def load_all_scans():
'''This iterates through the scans directories (either here or on the remote server) """This iterates through the scans directories (either here or on the remote server)
and builds up the models we can access later. and builds up the models we can access later.
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
are done at runtime, when a wallet is accessed, not at import time. are done at runtime, when a wallet is accessed, not at import time.
''' """
print(' - Loading Survey Scans') print(" - Loading Survey Scans")
SingleScan.objects.all().delete() SingleScan.objects.all().delete()
Wallet.objects.all().delete() Wallet.objects.all().delete()
print(' - deleting all Wallet and SingleScan objects') print(" - deleting all Wallet and SingleScan objects")
DataIssue.objects.filter(parser='scans').delete() DataIssue.objects.filter(parser="scans").delete()
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet. # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi", valids = [
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d", ".top",
".ods",".csv",".xcf",".xml"] ".txt",
".tif",
".png",
".jpg",
".jpeg",
".pdf",
".svg",
".gif",
".xvi",
".json",
".autosave",
".sxd",
".svx",
".th",
".th2",
".tdr",
".sql",
".zip",
".dxf",
".3d",
".ods",
".csv",
".xcf",
".xml",
]
validnames = ["thconfig", "manifest"] validnames = ["thconfig", "manifest"]
# iterate into the surveyscans directory # iterate into the surveyscans directory
@ -55,12 +81,12 @@ def load_all_scans():
# they are if they are /2010/2010#33 # they are if they are /2010/2010#33
# or /1996-1999NotKHbook/ # or /1996-1999NotKHbook/
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/ # but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
print(' ', end='') print(" ", end="")
scans_path = Path(settings.SCANS_ROOT) scans_path = Path(settings.SCANS_ROOT)
seen = [] seen = []
c = 0 c = 0
wallets = {} wallets = {}
for p in scans_path.rglob('*'): for p in scans_path.rglob("*"):
if p.is_file(): if p.is_file():
if p.suffix.lower() not in valids and p.name.lower() not in validnames: if p.suffix.lower() not in valids and p.name.lower() not in validnames:
# print(f"'{p}'", end='\n') # print(f"'{p}'", end='\n')
@ -71,9 +97,9 @@ def load_all_scans():
c += 1 c += 1
if c % 15 == 0: if c % 15 == 0:
print(".", end='') print(".", end="")
if c % 750 == 0: if c % 750 == 0:
print("\n ", end='') print("\n ", end="")
if p.parent.parent.parent.parent == scans_path: if p.parent.parent.parent.parent == scans_path:
# print(f"too deep {p}", end='\n') # print(f"too deep {p}", end='\n')
@ -86,7 +112,7 @@ def load_all_scans():
if walletname in wallets: if walletname in wallets:
wallet = wallets[walletname] wallet = wallets[walletname]
else: else:
print("", flush=True, end='') print("", flush=True, end="")
# Create the wallet object. But we don't have a date for it yet. # Create the wallet object. But we don't have a date for it yet.
wallet = Wallet(fpath=fpath, walletname=walletname) wallet = Wallet(fpath=fpath, walletname=walletname)
setwalletyear(wallet) setwalletyear(wallet)
@ -96,7 +122,6 @@ def load_all_scans():
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet) singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
singlescan.save() singlescan.save()
# only printing progress: # only printing progress:
tag = p.parent tag = p.parent
if len(walletname) > 4: if len(walletname) > 4:
@ -104,18 +129,17 @@ def load_all_scans():
tag = p.parent.parent tag = p.parent.parent
if tag not in seen: if tag not in seen:
print(f" {tag.name} ", end='') print(f" {tag.name} ", end="")
if len(str(tag.name)) > 17: if len(str(tag.name)) > 17:
print('\n ', end='') print("\n ", end="")
seen.append(tag) seen.append(tag)
print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets")
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
# but we also need to check if JSON exists, even if there are no uploaded scan files. # but we also need to check if JSON exists, even if there are no uploaded scan files.
# Here we know there is a rigid folder structure, so no need to look for sub folders # Here we know there is a rigid folder structure, so no need to look for sub folders
print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:") print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
print(' ', end='') print(" ", end="")
wjson = 0 wjson = 0
contents_path = Path(settings.DRAWINGS_DATA, "walletjson") contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
for yeardir in contents_path.iterdir(): for yeardir in contents_path.iterdir():
@ -127,9 +151,9 @@ def load_all_scans():
if walletname not in wallets: if walletname not in wallets:
wjson += 1 wjson += 1
if wjson % 10 == 0: if wjson % 10 == 0:
print("\n ", end='') print("\n ", end="")
print(f"{walletname} ", end='') print(f"{walletname} ", end="")
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname) fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
# The wallets found from JSON should all have dates already # The wallets found from JSON should all have dates already
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath) wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
@ -140,9 +164,11 @@ def load_all_scans():
# But we *do* set the walletyear: # But we *do* set the walletyear:
setwalletyear(wallet) setwalletyear(wallet)
if not created: if not created:
print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?") print(
f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?"
)
wallet.save() wallet.save()
print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets') print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets")
wallets = Wallet.objects.filter(walletyear=None) wallets = Wallet.objects.filter(walletyear=None)
for w in wallets: for w in wallets:
w.walletyear = datetime.date(1999, 1, 1) w.walletyear = datetime.date(1999, 1, 1)

File diff suppressed because it is too large Load Diff

5
pyproject.toml Normal file
View File

@ -0,0 +1,5 @@
[tool.black]
line-length = 120
[tool.isort]
profile = 'black'

View File

@ -24,7 +24,7 @@ print("* importing troggle/settings.py")
# default value, then gets overwritten by real secrets # default value, then gets overwritten by real secrets
SECRET_KEY = "not-the-real-secret-key-a#vaeozn0---^fj!355qki*vj2" SECRET_KEY = "not-the-real-secret-key-a#vaeozn0---^fj!355qki*vj2"
GIT = 'git' # command for running git GIT = "git" # command for running git
# Note that this builds upon the django system installed # Note that this builds upon the django system installed
# global settings in # global settings in
@ -36,7 +36,7 @@ GIT = 'git' # command for running git
# Django settings for troggle project. # Django settings for troggle project.
ALLOWED_HOSTS = ['*', 'expo.survex.com', '.survex.com', 'localhost', '127.0.0.1', '192.168.0.5' ] ALLOWED_HOSTS = ["*", "expo.survex.com", ".survex.com", "localhost", "127.0.0.1", "192.168.0.5"]
ADMINS = ( ADMINS = (
# ('Your Name', 'your_email@domain.com'), # ('Your Name', 'your_email@domain.com'),
@ -51,11 +51,11 @@ MANAGERS = ADMINS
# If running in a Windows environment this must be set to the same as your # If running in a Windows environment this must be set to the same as your
# system time zone. # system time zone.
USE_TZ = True USE_TZ = True
TIME_ZONE = 'Europe/London' TIME_ZONE = "Europe/London"
# Language code for this installation. All choices can be found here: # Language code for this installation. All choices can be found here:
# http://www.i18nguy.com/unicode/language-identifiers.html # http://www.i18nguy.com/unicode/language-identifiers.html
LANGUAGE_CODE = 'en-uk' LANGUAGE_CODE = "en-uk"
SITE_ID = 1 SITE_ID = 1
@ -77,33 +77,35 @@ SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", # "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
# "2018-pf-01", "2018-pf-02"] # "2018-pf-01", "2018-pf-02"]
APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response APPEND_SLASH = (
False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
)
SMART_APPEND_SLASH = True # not eorking as middleware different after Dj2.0 SMART_APPEND_SLASH = True # not eorking as middleware different after Dj2.0
LOGIN_REDIRECT_URL = '/' # does not seem to have any effect LOGIN_REDIRECT_URL = "/" # does not seem to have any effect
SECURE_CONTENT_TYPE_NOSNIFF = True SECURE_CONTENT_TYPE_NOSNIFF = True
SECURE_BROWSER_XSS_FILTER = True SECURE_BROWSER_XSS_FILTER = True
# SESSION_COOKIE_SECURE = True # if enabled, cannot login to Django control panel, bug elsewhere? # SESSION_COOKIE_SECURE = True # if enabled, cannot login to Django control panel, bug elsewhere?
# CSRF_COOKIE_SECURE = True # if enabled only sends cookies over SSL # CSRF_COOKIE_SECURE = True # if enabled only sends cookies over SSL
X_FRAME_OPTIONS = 'DENY' # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html X_FRAME_OPTIONS = "DENY" # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' # from Django 3.2 DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # from Django 3.2
INSTALLED_APPS = ( INSTALLED_APPS = (
'django.contrib.admin', "django.contrib.admin",
'django.contrib.auth', # includes the url redirections for login, logout "django.contrib.auth", # includes the url redirections for login, logout
'django.contrib.contenttypes', "django.contrib.contenttypes",
'django.contrib.sessions', "django.contrib.sessions",
'django.contrib.messages', "django.contrib.messages",
'django.contrib.admindocs', "django.contrib.admindocs",
'django.forms', #Required to customise widget templates "django.forms", # Required to customise widget templates
# 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this # 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this
'troggle.core', "troggle.core",
) )
FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise widget templates FORM_RENDERER = "django.forms.renderers.TemplatesSetting" # Required to customise widget templates
# See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/ # See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/
# Note that this is a radically different onion architecture from earlier versions though it looks the same, # Note that this is a radically different onion architecture from earlier versions though it looks the same,
@ -111,21 +113,21 @@ FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise
# Seriously, read this: https://www.webforefront.com/django/middlewaredjango.html which is MUCH BETTER than the docs # Seriously, read this: https://www.webforefront.com/django/middlewaredjango.html which is MUCH BETTER than the docs
MIDDLEWARE = [ MIDDLEWARE = [
#'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this #'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this
'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache "django.middleware.gzip.GZipMiddleware", # not needed when expofiles and photos served by apache
'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early "django.contrib.sessions.middleware.SessionMiddleware", # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW "django.middleware.common.CommonMiddleware", # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST "django.middleware.csrf.CsrfViewMiddleware", # Cross Site Request Forgeries by adding hidden form fields to POST
'django.contrib.auth.middleware.AuthenticationMiddleware', # Adds the user attribute, representing the currently-logged-in user "django.contrib.auth.middleware.AuthenticationMiddleware", # Adds the user attribute, representing the currently-logged-in user
'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs "django.contrib.admindocs.middleware.XViewMiddleware", # this and docutils needed by admindocs
'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system "django.contrib.messages.middleware.MessageMiddleware", # Cookie-based and session-based message support. Needed by admin system
'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header "django.middleware.clickjacking.XFrameOptionsMiddleware", # clickjacking protection via the X-Frame-Options header
#'django.middleware.security.SecurityMiddleware', # SECURE_HSTS_SECONDS, SECURE_CONTENT_TYPE_NOSNIFF, SECURE_BROWSER_XSS_FILTER, SECURE_REFERRER_POLICY, and SECURE_SSL_REDIRECT #'django.middleware.security.SecurityMiddleware', # SECURE_HSTS_SECONDS, SECURE_CONTENT_TYPE_NOSNIFF, SECURE_BROWSER_XSS_FILTER, SECURE_REFERRER_POLICY, and SECURE_SSL_REDIRECT
#'troggle.core.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0 #'troggle.core.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0
] ]
ROOT_URLCONF = 'troggle.urls' ROOT_URLCONF = "troggle.urls"
WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0 WSGI_APPLICATION = "troggle.wsgi.application" # change to asgi as soon as we upgrade to Django 3.0
ACCOUNT_ACTIVATION_DAYS = 3 ACCOUNT_ACTIVATION_DAYS = 3
@ -141,7 +143,7 @@ QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
# TINYMCE_SPELLCHECKER = False # TINYMCE_SPELLCHECKER = False
# TINYMCE_COMPRESSOR = True # TINYMCE_COMPRESSOR = True
TEST_RUNNER = 'django.test.runner.DiscoverRunner' TEST_RUNNER = "django.test.runner.DiscoverRunner"
from localsettings import * from localsettings import *