forked from expo/troggle
reformatted using black
This commit is contained in:
parent
0f8fe0e290
commit
ba2ae6cd82
363
databaseReset.py
363
databaseReset.py
@ -20,8 +20,8 @@ troggle application.
|
|||||||
"""
|
"""
|
||||||
print(" - settings on loading databaseReset.py", flush=True)
|
print(" - settings on loading databaseReset.py", flush=True)
|
||||||
|
|
||||||
os.environ['PYTHONPATH'] = str(settings.PYTHON_PATH)
|
os.environ["PYTHONPATH"] = str(settings.PYTHON_PATH)
|
||||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
|
||||||
|
|
||||||
print(" - settings on loading databaseReset.py")
|
print(" - settings on loading databaseReset.py")
|
||||||
|
|
||||||
@ -31,14 +31,15 @@ print(f" - Memory footprint before loading Django: {resource.getrusage(resource.
|
|||||||
try:
|
try:
|
||||||
django.setup()
|
django.setup()
|
||||||
except:
|
except:
|
||||||
print(" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce..")
|
print(
|
||||||
|
" ! Cyclic reference failure. Can occur when the initial db is empty. Fixed now (in UploadFileForm) but easy to reintroduce.."
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB")
|
print(f" - Memory footprint after loading Django: {resource.getrusage(resource.RUSAGE_SELF)[2] / 1024.0:.3f} MB")
|
||||||
|
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.core import management
|
from django.core import management
|
||||||
from django.db import (close_old_connections, connection, connections,
|
from django.db import close_old_connections, connection, connections, transaction
|
||||||
transaction)
|
|
||||||
from django.http import HttpResponse
|
from django.http import HttpResponse
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
@ -46,24 +47,32 @@ import troggle.core.models.survex
|
|||||||
from troggle.core.models.caves import Cave, Entrance
|
from troggle.core.models.caves import Cave, Entrance
|
||||||
from troggle.core.models.troggle import DataIssue
|
from troggle.core.models.troggle import DataIssue
|
||||||
from troggle.core.utils import get_process_memory
|
from troggle.core.utils import get_process_memory
|
||||||
from troggle.parsers.imports import (import_caves, import_drawingsfiles,
|
from troggle.parsers.imports import (
|
||||||
import_ents, import_loadpos,
|
import_caves,
|
||||||
import_logbook, import_logbooks,
|
import_drawingsfiles,
|
||||||
import_people, import_QMs, import_survex,
|
import_ents,
|
||||||
import_surveyscans)
|
import_loadpos,
|
||||||
|
import_logbook,
|
||||||
|
import_logbooks,
|
||||||
|
import_people,
|
||||||
|
import_QMs,
|
||||||
|
import_survex,
|
||||||
|
import_surveyscans,
|
||||||
|
)
|
||||||
|
|
||||||
if os.geteuid() == 0:
|
if os.geteuid() == 0:
|
||||||
# This protects the server from having the wrong file permissions written on logs and caches
|
# This protects the server from having the wrong file permissions written on logs and caches
|
||||||
print("This script should be run as expo not root - quitting")
|
print("This script should be run as expo not root - quitting")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
expouser=settings.EXPOUSER
|
expouser = settings.EXPOUSER
|
||||||
expouserpass=settings.EXPOUSERPASS
|
expouserpass = settings.EXPOUSERPASS
|
||||||
expouseremail=settings.EXPOUSER_EMAIL
|
expouseremail = settings.EXPOUSER_EMAIL
|
||||||
|
|
||||||
|
expoadminuser = settings.EXPOADMINUSER
|
||||||
|
expoadminuserpass = settings.EXPOADMINUSERPASS
|
||||||
|
expoadminuseremail = settings.EXPOADMINUSER_EMAIL
|
||||||
|
|
||||||
expoadminuser=settings.EXPOADMINUSER
|
|
||||||
expoadminuserpass=settings.EXPOADMINUSERPASS
|
|
||||||
expoadminuseremail=settings.EXPOADMINUSER_EMAIL
|
|
||||||
|
|
||||||
def reinit_db():
|
def reinit_db():
|
||||||
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
"""Rebuild database from scratch. Deletes the file first if sqlite is used,
|
||||||
@ -72,22 +81,26 @@ def reinit_db():
|
|||||||
in memory (django python models, not the database), so there is already a full load
|
in memory (django python models, not the database), so there is already a full load
|
||||||
of stuff known. Deleting the db file does not clear memory.
|
of stuff known. Deleting the db file does not clear memory.
|
||||||
"""
|
"""
|
||||||
print("Reinitialising db ",end="")
|
print("Reinitialising db ", end="")
|
||||||
print(django.db.connections.databases['default']['NAME'])
|
print(django.db.connections.databases["default"]["NAME"])
|
||||||
currentdbname = settings.DATABASES['default']['NAME']
|
currentdbname = settings.DATABASES["default"]["NAME"]
|
||||||
if currentdbname == ':memory:':
|
if currentdbname == ":memory:":
|
||||||
# closing connections should wipe the in-memory database
|
# closing connections should wipe the in-memory database
|
||||||
django.db.close_old_connections()
|
django.db.close_old_connections()
|
||||||
for conn in django.db.connections.all():
|
for conn in django.db.connections.all():
|
||||||
print(" ! Closing another connection to db...")
|
print(" ! Closing another connection to db...")
|
||||||
conn.close()
|
conn.close()
|
||||||
elif django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
elif django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
||||||
if os.path.isfile(currentdbname):
|
if os.path.isfile(currentdbname):
|
||||||
try:
|
try:
|
||||||
print(" - deleting " + currentdbname)
|
print(" - deleting " + currentdbname)
|
||||||
os.remove(currentdbname)
|
os.remove(currentdbname)
|
||||||
except OSError:
|
except OSError:
|
||||||
print(" ! OSError on removing: " + currentdbname + "\n ! Is the file open in another app? Is the server running?\n")
|
print(
|
||||||
|
" ! OSError on removing: "
|
||||||
|
+ currentdbname
|
||||||
|
+ "\n ! Is the file open in another app? Is the server running?\n"
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
|
print(" - No database file found: " + currentdbname + " ..continuing, will create it.\n")
|
||||||
@ -102,102 +115,110 @@ def reinit_db():
|
|||||||
cursor.execute(f"USE {currentdbname}")
|
cursor.execute(f"USE {currentdbname}")
|
||||||
print(f" - Nuked : {currentdbname}\n")
|
print(f" - Nuked : {currentdbname}\n")
|
||||||
|
|
||||||
print(" - Migrating: " + django.db.connections.databases['default']['NAME'])
|
print(" - Migrating: " + django.db.connections.databases["default"]["NAME"])
|
||||||
|
|
||||||
if django.db.connections.databases['default']['ENGINE'] == 'django.db.backends.sqlite3':
|
if django.db.connections.databases["default"]["ENGINE"] == "django.db.backends.sqlite3":
|
||||||
#with transaction.atomic():
|
# with transaction.atomic():
|
||||||
management.call_command('makemigrations','core', interactive=False)
|
management.call_command("makemigrations", "core", interactive=False)
|
||||||
management.call_command('migrate', interactive=False)
|
management.call_command("migrate", interactive=False)
|
||||||
management.call_command('migrate','core', interactive=False)
|
management.call_command("migrate", "core", interactive=False)
|
||||||
else:
|
else:
|
||||||
management.call_command('makemigrations','core', interactive=False)
|
management.call_command("makemigrations", "core", interactive=False)
|
||||||
management.call_command('migrate', interactive=False)
|
management.call_command("migrate", interactive=False)
|
||||||
management.call_command('migrate','core', interactive=False)
|
management.call_command("migrate", "core", interactive=False)
|
||||||
|
|
||||||
|
print(" - done migration on: " + settings.DATABASES["default"]["NAME"])
|
||||||
print(" - done migration on: " + settings.DATABASES['default']['NAME'])
|
print("users in db already: ", len(User.objects.all()))
|
||||||
print("users in db already: ",len(User.objects.all()))
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
try:
|
try:
|
||||||
print(" - Setting up expo user on: " + django.db.connections.databases['default']['NAME'])
|
print(" - Setting up expo user on: " + django.db.connections.databases["default"]["NAME"])
|
||||||
print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ")
|
print(f" - user: {expouser} ({expouserpass:.5}...) <{expouseremail}> ")
|
||||||
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
user = User.objects.create_user(expouser, expouseremail, expouserpass)
|
||||||
user.is_staff = False
|
user.is_staff = False
|
||||||
user.is_superuser = False
|
user.is_superuser = False
|
||||||
user.save()
|
user.save()
|
||||||
except:
|
except:
|
||||||
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
|
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
|
||||||
print(django.db.connections.databases['default']['NAME'])
|
print(django.db.connections.databases["default"]["NAME"])
|
||||||
print(" ! You probably have not got a clean db when you thought you had.\n")
|
print(" ! You probably have not got a clean db when you thought you had.\n")
|
||||||
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
||||||
print("users in db: ",len(User.objects.all()))
|
print("users in db: ", len(User.objects.all()))
|
||||||
print("tables in db: ",len(connection.introspection.table_names()))
|
print("tables in db: ", len(connection.introspection.table_names()))
|
||||||
memdumpsql(fn='integrityfail.sql')
|
memdumpsql(fn="integrityfail.sql")
|
||||||
django.db.connections.databases['default']['NAME'] = ':memory:'
|
django.db.connections.databases["default"]["NAME"] = ":memory:"
|
||||||
#raise
|
# raise
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
try:
|
try:
|
||||||
print(" - Setting up expoadmin user on: " + django.db.connections.databases['default']['NAME'])
|
print(" - Setting up expoadmin user on: " + django.db.connections.databases["default"]["NAME"])
|
||||||
print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ")
|
print(f" - user: {expoadminuser} ({expoadminuserpass:.5}...) <{expoadminuseremail}> ")
|
||||||
user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass)
|
user = User.objects.create_user(expoadminuser, expoadminuseremail, expoadminuserpass)
|
||||||
user.is_staff = True
|
user.is_staff = True
|
||||||
user.is_superuser = True
|
user.is_superuser = True
|
||||||
user.save()
|
user.save()
|
||||||
except:
|
except:
|
||||||
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES['default']['NAME'])
|
print(" ! INTEGRITY ERROR user on: " + settings.DATABASES["default"]["NAME"])
|
||||||
print(django.db.connections.databases['default']['NAME'])
|
print(django.db.connections.databases["default"]["NAME"])
|
||||||
print(" ! You probably have not got a clean db when you thought you had.\n")
|
print(" ! You probably have not got a clean db when you thought you had.\n")
|
||||||
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
print(" ! Also you are probably NOT running an in-memory db now.\n")
|
||||||
print("users in db: ",len(User.objects.all()))
|
print("users in db: ", len(User.objects.all()))
|
||||||
print("tables in db: ",len(connection.introspection.table_names()))
|
print("tables in db: ", len(connection.introspection.table_names()))
|
||||||
memdumpsql(fn='integrityfail.sql')
|
memdumpsql(fn="integrityfail.sql")
|
||||||
django.db.connections.databases['default']['NAME'] = ':memory:'
|
django.db.connections.databases["default"]["NAME"] = ":memory:"
|
||||||
#raise
|
# raise
|
||||||
|
|
||||||
|
|
||||||
def memdumpsql(fn):
|
def memdumpsql(fn):
|
||||||
'''Unused option to dump SQL. Aborted attempt to create a cache for loading data
|
"""Unused option to dump SQL. Aborted attempt to create a cache for loading data"""
|
||||||
'''
|
|
||||||
djconn = django.db.connection
|
djconn = django.db.connection
|
||||||
from dump import _iterdump
|
from dump import _iterdump
|
||||||
with open(fn, 'w') as f:
|
|
||||||
|
with open(fn, "w") as f:
|
||||||
for line in _iterdump(djconn):
|
for line in _iterdump(djconn):
|
||||||
f.write(f"{line.encode('utf8')}\n")
|
f.write(f"{line.encode('utf8')}\n")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||||
|
|
||||||
class JobQueue():
|
|
||||||
|
class JobQueue:
|
||||||
"""A list of import operations to run. Always reports profile times
|
"""A list of import operations to run. Always reports profile times
|
||||||
of the import operations in the same order.
|
of the import operations in the same order.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,run):
|
def __init__(self, run):
|
||||||
'''Initialises the job queue object with a fixed order for reporting
|
"""Initialises the job queue object with a fixed order for reporting
|
||||||
options during a run. Imports the timings from previous runs.
|
options during a run. Imports the timings from previous runs.
|
||||||
'''
|
"""
|
||||||
self.runlabel = run
|
self.runlabel = run
|
||||||
self.queue = [] # tuples of (jobname, jobfunction)
|
self.queue = [] # tuples of (jobname, jobfunction)
|
||||||
self.results = {}
|
self.results = {}
|
||||||
self.results_order=[
|
self.results_order = [
|
||||||
"date","runlabel","reinit", "caves", "people",
|
"date",
|
||||||
"logbooks", "QMs", "scans", "survex",
|
"runlabel",
|
||||||
"drawings", "test" ]
|
"reinit",
|
||||||
|
"caves",
|
||||||
|
"people",
|
||||||
|
"logbooks",
|
||||||
|
"QMs",
|
||||||
|
"scans",
|
||||||
|
"survex",
|
||||||
|
"drawings",
|
||||||
|
"test",
|
||||||
|
]
|
||||||
for k in self.results_order:
|
for k in self.results_order:
|
||||||
self.results[k]=[]
|
self.results[k] = []
|
||||||
self.tfile = "import_profile.json"
|
self.tfile = "import_profile.json"
|
||||||
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
|
self.htmlfile = "profile.html" # for HTML results table. Not yet done.
|
||||||
|
|
||||||
|
def enq(self, label, func):
|
||||||
def enq(self,label,func):
|
"""Enqueue: Adding elements to queue"""
|
||||||
'''Enqueue: Adding elements to queue
|
self.queue.append((label, func))
|
||||||
'''
|
|
||||||
self.queue.append((label,func))
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def loadprofiles(self):
|
def loadprofiles(self):
|
||||||
"""Load timings for previous imports for each data import type
|
"""Load timings for previous imports for each data import type"""
|
||||||
"""
|
|
||||||
if os.path.isfile(self.tfile):
|
if os.path.isfile(self.tfile):
|
||||||
try:
|
try:
|
||||||
f = open(self.tfile, "r")
|
f = open(self.tfile, "r")
|
||||||
@ -209,35 +230,31 @@ class JobQueue():
|
|||||||
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
# Python bug: https://github.com/ShinNoNoir/twitterwebsearch/issues/12
|
||||||
f.close()
|
f.close()
|
||||||
for j in self.results_order:
|
for j in self.results_order:
|
||||||
self.results[j].append(None) # append a placeholder
|
self.results[j].append(None) # append a placeholder
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def dellastprofile(self):
|
def dellastprofile(self):
|
||||||
"""trim one set of data from the results
|
"""trim one set of data from the results"""
|
||||||
"""
|
|
||||||
for j in self.results_order:
|
for j in self.results_order:
|
||||||
self.results[j].pop() # delete last item
|
self.results[j].pop() # delete last item
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def delfirstprofile(self):
|
def delfirstprofile(self):
|
||||||
"""trim one set of data from the results
|
"""trim one set of data from the results"""
|
||||||
"""
|
|
||||||
for j in self.results_order:
|
for j in self.results_order:
|
||||||
self.results[j].pop(0) # delete zeroth item
|
self.results[j].pop(0) # delete zeroth item
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def saveprofiles(self):
|
def saveprofiles(self):
|
||||||
"""Save timings for the set of imports just completed
|
"""Save timings for the set of imports just completed"""
|
||||||
"""
|
with open(self.tfile, "w") as f:
|
||||||
with open(self.tfile, 'w') as f:
|
|
||||||
json.dump(self.results, f)
|
json.dump(self.results, f)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def runqonce(self):
|
def runqonce(self):
|
||||||
"""Run all the jobs in the queue provided - once
|
"""Run all the jobs in the queue provided - once"""
|
||||||
"""
|
print("** Running job ", self.runlabel, end=" to ")
|
||||||
print("** Running job ", self.runlabel,end=" to ")
|
print(django.db.connections.databases["default"]["NAME"])
|
||||||
print(django.db.connections.databases['default']['NAME'])
|
|
||||||
jobstart = time.time()
|
jobstart = time.time()
|
||||||
print(f"-- Initial memory in use {get_process_memory():.3f} MB")
|
print(f"-- Initial memory in use {get_process_memory():.3f} MB")
|
||||||
self.results["date"].pop()
|
self.results["date"].pop()
|
||||||
@ -249,98 +266,100 @@ class JobQueue():
|
|||||||
start = time.time()
|
start = time.time()
|
||||||
memstart = get_process_memory()
|
memstart = get_process_memory()
|
||||||
jobname, jobparser = runfunction
|
jobname, jobparser = runfunction
|
||||||
#--------------------
|
# --------------------
|
||||||
jobparser() # invokes function passed in the second item in the tuple
|
jobparser() # invokes function passed in the second item in the tuple
|
||||||
#--------------------
|
# --------------------
|
||||||
memend = get_process_memory()
|
memend = get_process_memory()
|
||||||
duration = time.time()-start
|
duration = time.time() - start
|
||||||
#print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
|
# print(" - MEMORY start:{:.3f} MB end:{:.3f} MB change={:.3f} MB".format(memstart,memend, ))
|
||||||
print("\n*- Ended \"", jobname, f"\" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)")
|
print(
|
||||||
|
'\n*- Ended "',
|
||||||
|
jobname,
|
||||||
|
f'" {duration:.1f} seconds + {memend - memstart:.3f} MB ({memend:.3f} MB)',
|
||||||
|
)
|
||||||
self.results[jobname].pop() # the null item
|
self.results[jobname].pop() # the null item
|
||||||
self.results[jobname].append(duration)
|
self.results[jobname].append(duration)
|
||||||
|
|
||||||
|
|
||||||
jobend = time.time()
|
jobend = time.time()
|
||||||
jobduration = jobend-jobstart
|
jobduration = jobend - jobstart
|
||||||
print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.")
|
print(f"** Ended job {self.runlabel} - {jobduration:.1f} seconds total.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def append_placeholders(self):
|
def append_placeholders(self):
|
||||||
'''Ads a dummy timing for each option, to fix off by one error
|
"""Ads a dummy timing for each option, to fix off by one error"""
|
||||||
'''
|
|
||||||
for j in self.results_order:
|
for j in self.results_order:
|
||||||
self.results[j].append(None) # append a placeholder
|
self.results[j].append(None) # append a placeholder
|
||||||
|
|
||||||
def run_now_django_tests(self,n):
|
def run_now_django_tests(self, n):
|
||||||
"""Runs the standard django test harness system which is in troggle/core/TESTS/tests.py
|
"""Runs the standard django test harness system which is in troggle/core/TESTS/tests.py"""
|
||||||
"""
|
management.call_command("test", verbosity=n)
|
||||||
management.call_command('test', verbosity=n)
|
|
||||||
django.db.close_old_connections()
|
django.db.close_old_connections()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data.
|
"""Initialises profile timings record, initiates relational database, runs the job queue saving the imported data as an SQL image and saves the timing profile data."""
|
||||||
"""
|
|
||||||
self.loadprofiles()
|
self.loadprofiles()
|
||||||
print("-- start ", django.db.connections.databases['default']['ENGINE'], django.db.connections.databases['default']['NAME'])
|
print(
|
||||||
|
"-- start ",
|
||||||
|
django.db.connections.databases["default"]["ENGINE"],
|
||||||
|
django.db.connections.databases["default"]["NAME"],
|
||||||
|
)
|
||||||
self.runqonce()
|
self.runqonce()
|
||||||
if settings.DATABASES['default']['NAME'] ==":memory:":
|
if settings.DATABASES["default"]["NAME"] == ":memory:":
|
||||||
memdumpsql('memdump.sql') # saved contents of in-memory db, could be imported later..
|
memdumpsql("memdump.sql") # saved contents of in-memory db, could be imported later..
|
||||||
self.saveprofiles()
|
self.saveprofiles()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def showprofile(self):
|
def showprofile(self):
|
||||||
"""Prints out the time it took to run the jobqueue
|
"""Prints out the time it took to run the jobqueue"""
|
||||||
"""
|
|
||||||
for k in self.results_order:
|
for k in self.results_order:
|
||||||
if k =="test":
|
if k == "test":
|
||||||
break
|
break
|
||||||
elif k =="date":
|
elif k == "date":
|
||||||
print(" days ago ", end=' ')
|
print(" days ago ", end=" ")
|
||||||
else:
|
else:
|
||||||
print('%10s (s)' % k, end=' ')
|
print("%10s (s)" % k, end=" ")
|
||||||
percen=0
|
percen = 0
|
||||||
r = self.results[k]
|
r = self.results[k]
|
||||||
|
|
||||||
for i in range(len(r)):
|
for i in range(len(r)):
|
||||||
if k == "runlabel":
|
if k == "runlabel":
|
||||||
if r[i]:
|
if r[i]:
|
||||||
rp = r[i]
|
rp = r[i]
|
||||||
else:
|
else:
|
||||||
rp = " - "
|
rp = " - "
|
||||||
print('%8s' % rp, end=' ')
|
print("%8s" % rp, end=" ")
|
||||||
elif k =="date":
|
elif k == "date":
|
||||||
# Calculate dates as days before present
|
# Calculate dates as days before present
|
||||||
if r[i]:
|
if r[i]:
|
||||||
if i == len(r)-1:
|
if i == len(r) - 1:
|
||||||
print(" this", end=' ')
|
print(" this", end=" ")
|
||||||
else:
|
else:
|
||||||
# prints one place to the left of where you expect
|
# prints one place to the left of where you expect
|
||||||
if r[len(r)-1]:
|
if r[len(r) - 1]:
|
||||||
s = r[i]-r[len(r)-1]
|
s = r[i] - r[len(r) - 1]
|
||||||
elif r[len(r)-2]:
|
elif r[len(r) - 2]:
|
||||||
s = r[i]-r[len(r)-2]
|
s = r[i] - r[len(r) - 2]
|
||||||
else:
|
else:
|
||||||
s = 0
|
s = 0
|
||||||
days = (s)/(24*60*60)
|
days = (s) / (24 * 60 * 60)
|
||||||
print(f'{days:8.2f}', end=' ')
|
print(f"{days:8.2f}", end=" ")
|
||||||
elif r[i]:
|
elif r[i]:
|
||||||
print(f'{r[i]:8.1f}', end=' ')
|
print(f"{r[i]:8.1f}", end=" ")
|
||||||
if i == len(r)-1 and r[i-1]:
|
if i == len(r) - 1 and r[i - 1]:
|
||||||
percen = 100* (r[i] - r[i-1])/r[i-1]
|
percen = 100 * (r[i] - r[i - 1]) / r[i - 1]
|
||||||
if abs(percen) >0.1:
|
if abs(percen) > 0.1:
|
||||||
print(f'{percen:8.1f}%', end=' ')
|
print(f"{percen:8.1f}%", end=" ")
|
||||||
else:
|
else:
|
||||||
print(" - ", end=' ')
|
print(" - ", end=" ")
|
||||||
print("")
|
print("")
|
||||||
print("\n")
|
print("\n")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
'''Prints command line options, can print history of previous runs with timings
|
"""Prints command line options, can print history of previous runs with timings"""
|
||||||
'''
|
print(
|
||||||
print("""Usage is 'python databaseReset.py <command> [runlabel]'
|
"""Usage is 'python databaseReset.py <command> [runlabel]'
|
||||||
where command is:
|
where command is:
|
||||||
test - testing... imports people and prints profile. Deletes nothing.
|
test - testing... imports people and prints profile. Deletes nothing.
|
||||||
profile - print the profile from previous runs. Import nothing.
|
profile - print the profile from previous runs. Import nothing.
|
||||||
@ -370,7 +389,9 @@ def usage():
|
|||||||
|
|
||||||
Note that running the subfunctions will not produce a consistent website
|
Note that running the subfunctions will not produce a consistent website
|
||||||
- only the full 'reset' does that.
|
- only the full 'reset' does that.
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
@ -381,72 +402,72 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
if sys.getfilesystemencoding() != "utf-8":
|
if sys.getfilesystemencoding() != "utf-8":
|
||||||
print("UTF-8 is NOT the default file encoding. You must fix this.")
|
print("UTF-8 is NOT the default file encoding. You must fix this.")
|
||||||
print(f'- {sys.getdefaultencoding()=}')
|
print(f"- {sys.getdefaultencoding()=}")
|
||||||
print(f'- {sys.getfilesystemencoding()=}')
|
print(f"- {sys.getfilesystemencoding()=}")
|
||||||
print(f'- {locale.getdefaultlocale()=}')
|
print(f"- {locale.getdefaultlocale()=}")
|
||||||
print(f'- {locale.getpreferredencoding()=}')
|
print(f"- {locale.getpreferredencoding()=}")
|
||||||
print("Aborting run.")
|
print("Aborting run.")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
if len(sys.argv)>2:
|
if len(sys.argv) > 2:
|
||||||
runlabel = sys.argv[len(sys.argv)-1]
|
runlabel = sys.argv[len(sys.argv) - 1]
|
||||||
else:
|
else:
|
||||||
runlabel=None
|
runlabel = None
|
||||||
|
|
||||||
jq = JobQueue(runlabel)
|
jq = JobQueue(runlabel)
|
||||||
|
|
||||||
if len(sys.argv)==1:
|
if len(sys.argv) == 1:
|
||||||
usage()
|
usage()
|
||||||
exit()
|
exit()
|
||||||
elif "init" in sys.argv:
|
elif "init" in sys.argv:
|
||||||
jq.enq("reinit",reinit_db)
|
jq.enq("reinit", reinit_db)
|
||||||
elif "ents" in sys.argv:
|
elif "ents" in sys.argv:
|
||||||
jq.enq("survex",import_ents)
|
jq.enq("survex", import_ents)
|
||||||
elif "test2" in sys.argv:
|
elif "test2" in sys.argv:
|
||||||
jq.enq("QMs",import_QMs)
|
jq.enq("QMs", import_QMs)
|
||||||
jq.enq("drawings",import_drawingsfiles)
|
jq.enq("drawings", import_drawingsfiles)
|
||||||
jq.enq("survex",import_survex)
|
jq.enq("survex", import_survex)
|
||||||
elif "caves" in sys.argv:
|
elif "caves" in sys.argv:
|
||||||
jq.enq("caves",import_caves)
|
jq.enq("caves", import_caves)
|
||||||
elif "logbooks" in sys.argv:
|
elif "logbooks" in sys.argv:
|
||||||
jq.enq("logbooks",import_logbooks)
|
jq.enq("logbooks", import_logbooks)
|
||||||
elif "logbook" in sys.argv:
|
elif "logbook" in sys.argv:
|
||||||
jq.enq("logbooks",import_logbook) # default year set in imports.py
|
jq.enq("logbooks", import_logbook) # default year set in imports.py
|
||||||
elif "people" in sys.argv:
|
elif "people" in sys.argv:
|
||||||
jq.enq("people",import_people)
|
jq.enq("people", import_people)
|
||||||
elif "QMs" in sys.argv:
|
elif "QMs" in sys.argv:
|
||||||
jq.enq("QMs",import_QMs)
|
jq.enq("QMs", import_QMs)
|
||||||
elif "reset" in sys.argv:
|
elif "reset" in sys.argv:
|
||||||
jq.enq("reinit",reinit_db)
|
jq.enq("reinit", reinit_db)
|
||||||
jq.enq("caves",import_caves)
|
jq.enq("caves", import_caves)
|
||||||
jq.enq("people",import_people)
|
jq.enq("people", import_people)
|
||||||
jq.enq("scans",import_surveyscans)
|
jq.enq("scans", import_surveyscans)
|
||||||
jq.enq("logbooks",import_logbooks)
|
jq.enq("logbooks", import_logbooks)
|
||||||
jq.enq("QMs",import_QMs)
|
jq.enq("QMs", import_QMs)
|
||||||
jq.enq("drawings",import_drawingsfiles)
|
jq.enq("drawings", import_drawingsfiles)
|
||||||
jq.enq("survex",import_survex)
|
jq.enq("survex", import_survex)
|
||||||
elif "scans" in sys.argv:
|
elif "scans" in sys.argv:
|
||||||
jq.enq("scans",import_surveyscans)
|
jq.enq("scans", import_surveyscans)
|
||||||
elif "survex" in sys.argv:
|
elif "survex" in sys.argv:
|
||||||
jq.enq("survex",import_survex)
|
jq.enq("survex", import_survex)
|
||||||
elif "loadpos" in sys.argv:
|
elif "loadpos" in sys.argv:
|
||||||
jq.enq("survex",import_loadpos)
|
jq.enq("survex", import_loadpos)
|
||||||
elif "drawings" in sys.argv:
|
elif "drawings" in sys.argv:
|
||||||
jq.enq("drawings",import_drawingsfiles)
|
jq.enq("drawings", import_drawingsfiles)
|
||||||
elif "dumplogbooks" in sys.argv: # untested in 2020
|
elif "dumplogbooks" in sys.argv: # untested in 2020
|
||||||
dumplogbooks()
|
dumplogbooks()
|
||||||
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
|
# elif "writecaves" in sys.argv: # untested in 2020 - will overwrite input files!!
|
||||||
# writeCaves()
|
# writeCaves()
|
||||||
elif "profile" in sys.argv:
|
elif "profile" in sys.argv:
|
||||||
if runlabel == 'del' :
|
if runlabel == "del":
|
||||||
jq.loadprofiles()
|
jq.loadprofiles()
|
||||||
jq.dellastprofile()
|
jq.dellastprofile()
|
||||||
jq.dellastprofile() # twice because loadprofiles adds a dummy
|
jq.dellastprofile() # twice because loadprofiles adds a dummy
|
||||||
jq.showprofile()
|
jq.showprofile()
|
||||||
jq.saveprofiles()
|
jq.saveprofiles()
|
||||||
if runlabel == 'delfirst' :
|
if runlabel == "delfirst":
|
||||||
jq.loadprofiles()
|
jq.loadprofiles()
|
||||||
jq.dellastprofile() # remove the dummy
|
jq.dellastprofile() # remove the dummy
|
||||||
jq.delfirstprofile()
|
jq.delfirstprofile()
|
||||||
jq.showprofile()
|
jq.showprofile()
|
||||||
jq.saveprofiles()
|
jq.saveprofiles()
|
||||||
|
158
parsers/QMs.py
158
parsers/QMs.py
@ -9,13 +9,14 @@ from troggle.core.models.caves import QM, Cave, LogbookEntry
|
|||||||
from troggle.core.models.troggle import DataIssue
|
from troggle.core.models.troggle import DataIssue
|
||||||
from troggle.core.utils import save_carefully
|
from troggle.core.utils import save_carefully
|
||||||
|
|
||||||
'''Reads the CSV files containg QMs for a select few caves
|
"""Reads the CSV files containg QMs for a select few caves
|
||||||
See parsers/survex.py for the parser which extracts QMs from the survex files
|
See parsers/survex.py for the parser which extracts QMs from the survex files
|
||||||
'''
|
"""
|
||||||
|
|
||||||
|
|
||||||
def deleteQMs():
|
def deleteQMs():
|
||||||
QM.objects.all().delete()
|
QM.objects.all().delete()
|
||||||
DataIssue.objects.filter(parser='QMs').delete()
|
DataIssue.objects.filter(parser="QMs").delete()
|
||||||
|
|
||||||
|
|
||||||
def parseCaveQMs(cave, inputFile, ticked=False):
|
def parseCaveQMs(cave, inputFile, ticked=False):
|
||||||
@ -35,145 +36,152 @@ def parseCaveQMs(cave, inputFile, ticked=False):
|
|||||||
All QMs are created afresh and this is all only run once on import on a fresh database.
|
All QMs are created afresh and this is all only run once on import on a fresh database.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if cave=='204-steinBH':
|
if cave == "204-steinBH":
|
||||||
try:
|
try:
|
||||||
steinBr=Cave.objects.get(official_name="Steinbrückenhöhle")
|
steinBr = Cave.objects.get(official_name="Steinbrückenhöhle")
|
||||||
caveid = steinBr
|
caveid = steinBr
|
||||||
except Cave.DoesNotExist:
|
except Cave.DoesNotExist:
|
||||||
message = f' ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser'
|
message = f" ! - {qmPath} Steinbruckenhoehle is not in the database. Please run cave parser"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='QMs', message=message)
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
return
|
return
|
||||||
elif cave=='234-Hauch':
|
elif cave == "234-Hauch":
|
||||||
try:
|
try:
|
||||||
hauchHl=Cave.objects.get(official_name="Hauchhöhle")
|
hauchHl = Cave.objects.get(official_name="Hauchhöhle")
|
||||||
caveid = hauchHl
|
caveid = hauchHl
|
||||||
except Cave.DoesNotExist:
|
except Cave.DoesNotExist:
|
||||||
message = f' ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser'
|
message = f" ! - {qmPath} Hauchhoehle is not in the database. Please run cave parser"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='QMs', message=message)
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
return
|
return
|
||||||
elif cave =='161-KH':
|
elif cave == "161-KH":
|
||||||
try:
|
try:
|
||||||
kh=Cave.objects.get(official_name="Kaninchenhöhle")
|
kh = Cave.objects.get(official_name="Kaninchenhöhle")
|
||||||
caveid = kh
|
caveid = kh
|
||||||
except Cave.DoesNotExist:
|
except Cave.DoesNotExist:
|
||||||
message = f' ! - {qmPath} KH is not in the database. Please run cave parser'
|
message = f" ! - {qmPath} KH is not in the database. Please run cave parser"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='QMs', message=message)
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked)
|
nqms = parse_KH_QMs(kh, inputFile=inputFile, ticked=ticked)
|
||||||
return nqms
|
return nqms
|
||||||
|
|
||||||
#qmPath = settings.EXPOWEB+inputFile
|
# qmPath = settings.EXPOWEB+inputFile
|
||||||
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
|
qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
|
||||||
|
|
||||||
qmCSVContents = open(qmPath,'r')
|
qmCSVContents = open(qmPath, "r")
|
||||||
dialect=csv.Sniffer().sniff(qmCSVContents.read())
|
dialect = csv.Sniffer().sniff(qmCSVContents.read())
|
||||||
qmCSVContents.seek(0,0)
|
qmCSVContents.seek(0, 0)
|
||||||
qmReader = csv.reader(qmCSVContents,dialect=dialect)
|
qmReader = csv.reader(qmCSVContents, dialect=dialect)
|
||||||
next(qmReader) # Skip header row
|
next(qmReader) # Skip header row
|
||||||
n = 0
|
n = 0
|
||||||
nqms = 0
|
nqms = 0
|
||||||
for line in qmReader:
|
for line in qmReader:
|
||||||
try:
|
try:
|
||||||
n += 1
|
n += 1
|
||||||
year=int(line[0][1:5])
|
year = int(line[0][1:5])
|
||||||
logslug = f'PH_{int(year)}_{int(n):02d}'
|
logslug = f"PH_{int(year)}_{int(n):02d}"
|
||||||
QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
|
QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
|
||||||
newQM = QM()
|
newQM = QM()
|
||||||
# newQM.found_by=placeholder
|
# newQM.found_by=placeholder
|
||||||
newQM.number=QMnum
|
newQM.number = QMnum
|
||||||
newQM.cave = caveid
|
newQM.cave = caveid
|
||||||
newQM.blockname = ""
|
newQM.blockname = ""
|
||||||
if line[1]=="Dig":
|
if line[1] == "Dig":
|
||||||
newQM.grade="D"
|
newQM.grade = "D"
|
||||||
else:
|
else:
|
||||||
newQM.grade=line[1]
|
newQM.grade = line[1]
|
||||||
newQM.area=line[2]
|
newQM.area = line[2]
|
||||||
newQM.location_description=line[3]
|
newQM.location_description = line[3]
|
||||||
|
|
||||||
# In the table, completion is indicated by the presence of a completion discription.
|
# In the table, completion is indicated by the presence of a completion discription.
|
||||||
newQM.completion_description=line[4]
|
newQM.completion_description = line[4]
|
||||||
newQM.nearest_station_description=line[5]
|
newQM.nearest_station_description = line[5]
|
||||||
if newQM.completion_description:
|
if newQM.completion_description:
|
||||||
newQM.ticked = True
|
newQM.ticked = True
|
||||||
else:
|
else:
|
||||||
newQM.ticked = False
|
newQM.ticked = False
|
||||||
|
|
||||||
newQM.comment=line[6]
|
newQM.comment = line[6]
|
||||||
try:
|
try:
|
||||||
# year and number are unique for a cave in CSV imports
|
# year and number are unique for a cave in CSV imports
|
||||||
preexistingQM=QM.objects.get(number=QMnum, found_by__date__year=year) #if we don't have this one in the DB, save it
|
preexistingQM = QM.objects.get(
|
||||||
if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
number=QMnum, found_by__date__year=year
|
||||||
|
) # if we don't have this one in the DB, save it
|
||||||
|
if (
|
||||||
|
preexistingQM.new_since_parsing == False
|
||||||
|
): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
|
||||||
preexistingQM.delete()
|
preexistingQM.delete()
|
||||||
newQM.expoyear = year
|
newQM.expoyear = year
|
||||||
newQM.save()
|
newQM.save()
|
||||||
else: # otherwise, print that it was ignored
|
else: # otherwise, print that it was ignored
|
||||||
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
|
||||||
|
|
||||||
except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
|
except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
|
||||||
newQM.expoyear = year
|
newQM.expoyear = year
|
||||||
newQM.save()
|
newQM.save()
|
||||||
nqms += 1
|
nqms += 1
|
||||||
except KeyError: #check on this one
|
except KeyError: # check on this one
|
||||||
message = f' ! - {qmPath} KeyError {str(line)} '
|
message = f" ! - {qmPath} KeyError {str(line)} "
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='QMs', message=message)
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
continue
|
continue
|
||||||
except IndexError:
|
except IndexError:
|
||||||
message = f' ! - {qmPath} IndexError {str(line)} '
|
message = f" ! - {qmPath} IndexError {str(line)} "
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='QMs', message=message)
|
DataIssue.objects.create(parser="QMs", message=message)
|
||||||
continue
|
continue
|
||||||
return nqms
|
return nqms
|
||||||
|
|
||||||
|
|
||||||
def parse_KH_QMs(kh, inputFile, ticked):
|
def parse_KH_QMs(kh, inputFile, ticked):
|
||||||
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format
|
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
|
||||||
"""
|
khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
|
||||||
khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r')
|
khQMs = khQMs.readlines()
|
||||||
khQMs=khQMs.readlines()
|
|
||||||
nqms = 0
|
nqms = 0
|
||||||
for line in khQMs:
|
for line in khQMs:
|
||||||
res=re.search(r'name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
|
res = re.search(
|
||||||
|
r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
|
||||||
|
line,
|
||||||
|
)
|
||||||
if res:
|
if res:
|
||||||
res=res.groupdict()
|
res = res.groupdict()
|
||||||
year=int(res['year'])
|
year = int(res["year"])
|
||||||
# logbook placeholder code was previously here. No longer needed.
|
# logbook placeholder code was previously here. No longer needed.
|
||||||
#check if placeholder exists for given year, create it if not
|
# check if placeholder exists for given year, create it if not
|
||||||
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
|
# message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
|
||||||
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
|
# placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
|
||||||
# # if hadToCreate:
|
# # if hadToCreate:
|
||||||
# print(message)
|
# print(message)
|
||||||
# DataIssue.objects.create(parser='QMs', message=message)
|
# DataIssue.objects.create(parser='QMs', message=message)
|
||||||
lookupArgs={
|
lookupArgs = {
|
||||||
#'found_by':placeholder,
|
#'found_by':placeholder,
|
||||||
'blockname': "",
|
"blockname": "",
|
||||||
'expoyear':year,
|
"expoyear": year,
|
||||||
'number':res['number'],
|
"number": res["number"],
|
||||||
'cave': kh,
|
"cave": kh,
|
||||||
'grade':res['grade']
|
"grade": res["grade"],
|
||||||
}
|
}
|
||||||
nonLookupArgs={
|
nonLookupArgs = {
|
||||||
'ticked': ticked,
|
"ticked": ticked,
|
||||||
'nearest_station_name':res['nearest_station'],
|
"nearest_station_name": res["nearest_station"],
|
||||||
'location_description':res['description']
|
"location_description": res["description"],
|
||||||
}
|
}
|
||||||
instance, created = save_carefully(QM,lookupArgs,nonLookupArgs)
|
instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
|
||||||
# if created:
|
# if created:
|
||||||
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
|
# message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
|
||||||
# print(message)
|
# print(message)
|
||||||
# DataIssue.objects.create(parser='QMs', message=message)
|
# DataIssue.objects.create(parser='QMs', message=message)
|
||||||
nqms += 1
|
nqms += 1
|
||||||
return nqms
|
return nqms
|
||||||
|
|
||||||
|
|
||||||
def Load_QMs():
|
def Load_QMs():
|
||||||
deleteQMs()
|
deleteQMs()
|
||||||
n204 = parseCaveQMs(cave='204-steinBH',inputFile=r"1623/204/qm.csv")
|
n204 = parseCaveQMs(cave="204-steinBH", inputFile=r"1623/204/qm.csv")
|
||||||
n234 = parseCaveQMs(cave='234-Hauch',inputFile=r"1623/234/qm.csv")
|
n234 = parseCaveQMs(cave="234-Hauch", inputFile=r"1623/234/qm.csv")
|
||||||
n161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmtodo.htm", ticked=False)
|
n161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmtodo.htm", ticked=False)
|
||||||
t161 = parseCaveQMs(cave='161-KH', inputFile="1623/161/qmdone.htm", ticked=True)
|
t161 = parseCaveQMs(cave="161-KH", inputFile="1623/161/qmdone.htm", ticked=True)
|
||||||
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
|
# parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
|
||||||
print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.")
|
print(f" - Imported: {n204} QMs for 204, {n234} QMs for 234, {t161} QMs for 161 done, {n161} QMs for 161 not done.")
|
||||||
|
|
||||||
print ()
|
print()
|
||||||
|
526
parsers/caves.py
526
parsers/caves.py
@ -6,49 +6,48 @@ from pathlib import Path
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
from troggle.core.models.caves import (Area, Cave, CaveAndEntrance, CaveSlug,
|
from troggle.core.models.caves import Area, Cave, CaveAndEntrance, CaveSlug, Entrance, EntranceSlug, GetCaveLookup
|
||||||
Entrance, EntranceSlug, GetCaveLookup)
|
|
||||||
from troggle.core.models.troggle import DataIssue
|
from troggle.core.models.troggle import DataIssue
|
||||||
from troggle.settings import (CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB,
|
from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
|
||||||
SURVEX_DATA)
|
|
||||||
|
|
||||||
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
|
"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
|
||||||
and creating the various Cave, Entrance and necessary Area objects.
|
and creating the various Cave, Entrance and necessary Area objects.
|
||||||
|
|
||||||
This is the first import that happens after the database is reinitialised.
|
This is the first import that happens after the database is reinitialised.
|
||||||
So is the first thing that creates tables.
|
So is the first thing that creates tables.
|
||||||
|
|
||||||
'''
|
"""
|
||||||
|
|
||||||
todo='''
|
todo = """
|
||||||
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
|
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
|
||||||
So we will need a separate file-editing capability just for this configuration file ?!
|
So we will need a separate file-editing capability just for this configuration file ?!
|
||||||
|
|
||||||
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true.
|
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true.
|
||||||
The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
|
The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
|
||||||
and then restart the databasereset.py again. (status as of July 2022)
|
and then restart the databasereset.py again. (status as of July 2022)
|
||||||
'''
|
"""
|
||||||
entrances_xslug = {}
|
entrances_xslug = {}
|
||||||
caves_xslug = {}
|
caves_xslug = {}
|
||||||
areas_xslug = {}
|
areas_xslug = {}
|
||||||
|
|
||||||
|
|
||||||
def dummy_entrance(k, slug, msg="DUMMY"):
|
def dummy_entrance(k, slug, msg="DUMMY"):
|
||||||
'''Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
|
"""Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
|
||||||
user forgot to provide one when creating the cave
|
user forgot to provide one when creating the cave
|
||||||
'''
|
"""
|
||||||
ent = Entrance(
|
ent = Entrance(
|
||||||
name = k,
|
name=k,
|
||||||
entrance_description = "Dummy entrance: auto-created when registering a new cave " +
|
entrance_description="Dummy entrance: auto-created when registering a new cave "
|
||||||
"and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
|
+ "and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
|
||||||
marking = '?')
|
marking="?",
|
||||||
|
)
|
||||||
if ent:
|
if ent:
|
||||||
ent.save() # must save to have id before foreign keys work.
|
ent.save() # must save to have id before foreign keys work.
|
||||||
try: # Now create a entrance slug ID
|
try: # Now create a entrance slug ID
|
||||||
es = EntranceSlug(entrance = ent,
|
es = EntranceSlug(entrance=ent, slug=slug, primary=False)
|
||||||
slug = slug, primary = False)
|
|
||||||
except:
|
except:
|
||||||
message = f" ! {k:11s} {msg}-{slug} entrance create failure"
|
message = f" ! {k:11s} {msg}-{slug} entrance create failure"
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}')
|
DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
ent.cached_primary_slug = slug
|
ent.cached_primary_slug = slug
|
||||||
@ -57,41 +56,43 @@ def dummy_entrance(k, slug, msg="DUMMY"):
|
|||||||
return ent
|
return ent
|
||||||
else:
|
else:
|
||||||
message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
|
message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}')
|
DataIssue.objects.create(parser="caves", message=message, url=f"{slug}")
|
||||||
print(message)
|
print(message)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
|
def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
|
||||||
'''Entrance field either missing or holds a null string instead of a filename in a cave_data file.
|
"""Entrance field either missing or holds a null string instead of a filename in a cave_data file."""
|
||||||
'''
|
|
||||||
global entrances_xslug
|
global entrances_xslug
|
||||||
try:
|
try:
|
||||||
entrance = dummy_entrance(id, slug, msg="DUMMY")
|
entrance = dummy_entrance(id, slug, msg="DUMMY")
|
||||||
letter = ""
|
letter = ""
|
||||||
entrances_xslug[slug] = entrance
|
entrances_xslug[slug] = entrance
|
||||||
ce = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = entrance)
|
ce = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=entrance)
|
||||||
message = f' ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}'
|
message = f" ! Warning: Dummy Entrance successfully set for entrance {id} on cave {cave}"
|
||||||
|
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}')
|
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
|
||||||
print(message)
|
print(message)
|
||||||
except:
|
except:
|
||||||
#raise
|
# raise
|
||||||
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
|
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}')
|
DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}")
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
|
||||||
def do_pending_cave(k, url, area):
|
def do_pending_cave(k, url, area):
|
||||||
'''
|
"""
|
||||||
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
|
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
|
||||||
in expoweb/cave_data/1623-"k".html
|
in expoweb/cave_data/1623-"k".html
|
||||||
|
|
||||||
Note that at this point in importing the data we have not yet seen the survex files, so we can't
|
Note that at this point in importing the data we have not yet seen the survex files, so we can't
|
||||||
look inside the relevant survex file to find the year and so we con't provide helpful links.
|
look inside the relevant survex file to find the year and so we con't provide helpful links.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
def get_survex_file(k):
|
def get_survex_file(k):
|
||||||
'''Guesses at and finds a survex file for this pending cave.
|
"""Guesses at and finds a survex file for this pending cave.
|
||||||
Convoluted. Needs rewriting
|
Convoluted. Needs rewriting
|
||||||
'''
|
"""
|
||||||
if k[0:3] == "162":
|
if k[0:3] == "162":
|
||||||
id = Path(k[5:])
|
id = Path(k[5:])
|
||||||
else:
|
else:
|
||||||
@ -113,8 +114,8 @@ def do_pending_cave(k, url, area):
|
|||||||
for f in dir:
|
for f in dir:
|
||||||
if f.suffix == ".svx":
|
if f.suffix == ".svx":
|
||||||
survex_file = f.relative_to(settings.SURVEX_DATA)
|
survex_file = f.relative_to(settings.SURVEX_DATA)
|
||||||
chk = min(5, len(f.name)-1)
|
chk = min(5, len(f.name) - 1)
|
||||||
if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works
|
if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works
|
||||||
prime_suspect = survex_file
|
prime_suspect = survex_file
|
||||||
if prime_suspect:
|
if prime_suspect:
|
||||||
survex_file = prime_suspect
|
survex_file = prime_suspect
|
||||||
@ -129,23 +130,29 @@ def do_pending_cave(k, url, area):
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
if slug in g:
|
if slug in g:
|
||||||
message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
|
message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=url)
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
||||||
print(message)
|
print(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
|
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
|
||||||
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
|
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
|
||||||
default_note += f"<br><br>\n\n - (0) look in the <a href=\"/noinfo/cave-number-index\">cave number index</a> for notes on this cave, "
|
default_note += f'<br><br>\n\n - (0) look in the <a href="/noinfo/cave-number-index">cave number index</a> for notes on this cave, '
|
||||||
default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
|
default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
|
||||||
default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
|
default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
|
||||||
default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
|
default_note += (
|
||||||
|
f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
|
||||||
|
)
|
||||||
default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
|
default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
|
||||||
default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
|
default_note += (
|
||||||
default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
|
f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
|
||||||
|
)
|
||||||
|
default_note += (
|
||||||
|
f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
|
||||||
|
)
|
||||||
default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
|
default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
|
||||||
default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
|
default_note += (
|
||||||
|
f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
|
||||||
|
)
|
||||||
default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
|
default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
|
||||||
default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
|
default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
|
||||||
default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
|
default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
|
||||||
@ -153,42 +160,46 @@ def do_pending_cave(k, url, area):
|
|||||||
default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
|
default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
|
||||||
default_note += f"<br><br>\n\n - "
|
default_note += f"<br><br>\n\n - "
|
||||||
default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
|
default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
|
||||||
default_note += f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
|
default_note += (
|
||||||
|
f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
|
||||||
|
)
|
||||||
default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
|
default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
|
||||||
default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
|
default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
|
||||||
default_note += f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
|
default_note += (
|
||||||
|
f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
|
||||||
|
)
|
||||||
|
|
||||||
survex_file = get_survex_file(k)
|
survex_file = get_survex_file(k)
|
||||||
|
|
||||||
cave = Cave(
|
cave = Cave(
|
||||||
unofficial_number = k,
|
unofficial_number=k,
|
||||||
underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
|
underground_description="Pending cave write-up - creating as empty object. No XML file available yet.",
|
||||||
survex_file = survex_file,
|
survex_file=survex_file,
|
||||||
url = url,
|
url=url,
|
||||||
notes = default_note)
|
notes=default_note,
|
||||||
|
)
|
||||||
if cave:
|
if cave:
|
||||||
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
|
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
|
||||||
cave.area.add(area)
|
cave.area.add(area)
|
||||||
cave.save()
|
cave.save()
|
||||||
message = f" ! {k:18} {cave.underground_description} url: {url}"
|
message = f" ! {k:18} {cave.underground_description} url: {url}"
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=url)
|
DataIssue.objects.create(parser="caves", message=message, url=url)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
try: # Now create a cave slug ID
|
try: # Now create a cave slug ID
|
||||||
cs = CaveSlug.objects.update_or_create(cave = cave,
|
cs = CaveSlug.objects.update_or_create(cave=cave, slug=slug, primary=False)
|
||||||
slug = slug, primary = False)
|
|
||||||
except:
|
except:
|
||||||
message = f" ! {k:11s} PENDING cave SLUG create failure"
|
message = f" ! {k:11s} PENDING cave SLUG create failure"
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
else:
|
else:
|
||||||
message = f' ! {k:11s} PENDING cave create failure'
|
message = f" ! {k:11s} PENDING cave create failure"
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ent = dummy_entrance(k, slug, msg="PENDING")
|
ent = dummy_entrance(k, slug, msg="PENDING")
|
||||||
ceinsts = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = ent)
|
ceinsts = CaveAndEntrance.objects.update_or_create(cave=cave, entrance_letter="", entrance=ent)
|
||||||
for ceinst in ceinsts:
|
for ceinst in ceinsts:
|
||||||
if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
|
if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
|
||||||
ceinst.cave = cave
|
ceinst.cave = cave
|
||||||
@ -196,15 +207,14 @@ def do_pending_cave(k, url, area):
|
|||||||
break
|
break
|
||||||
except:
|
except:
|
||||||
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
|
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def readentrance(filename):
|
def readentrance(filename):
|
||||||
'''Reads an enrance description from the .html file
|
"""Reads an enrance description from the .html file
|
||||||
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
||||||
'''
|
"""
|
||||||
global entrances_xslug
|
global entrances_xslug
|
||||||
global caves_xslug
|
global caves_xslug
|
||||||
global areas_xslug
|
global areas_xslug
|
||||||
@ -213,99 +223,103 @@ def readentrance(filename):
|
|||||||
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
|
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
|
||||||
contents = f.read()
|
contents = f.read()
|
||||||
context = filename
|
context = filename
|
||||||
#print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
|
||||||
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
|
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
|
||||||
if len(entrancecontentslist) != 1:
|
if len(entrancecontentslist) != 1:
|
||||||
message = f'! BAD ENTRANCE at "{filename}"'
|
message = f'! BAD ENTRANCE at "{filename}"'
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
else:
|
else:
|
||||||
entrancecontents = entrancecontentslist[0]
|
entrancecontents = entrancecontentslist[0]
|
||||||
non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)
|
non_public = getXML(entrancecontents, "non_public", maxItems=1, context=context)
|
||||||
name = getXML(entrancecontents, "name", maxItems = 1, context = context)
|
name = getXML(entrancecontents, "name", maxItems=1, context=context)
|
||||||
slugs = getXML(entrancecontents, "slug", context = context)
|
slugs = getXML(entrancecontents, "slug", context=context)
|
||||||
entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)
|
entrance_description = getXML(entrancecontents, "entrance_description", maxItems=1, context=context)
|
||||||
explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)
|
explorers = getXML(entrancecontents, "explorers", maxItems=1, context=context)
|
||||||
map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)
|
map_description = getXML(entrancecontents, "map_description", maxItems=1, context=context)
|
||||||
location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)
|
location_description = getXML(entrancecontents, "location_description", maxItems=1, context=context)
|
||||||
lastvisit = getXML(entrancecontents, "last visit date", maxItems = 1, minItems = 0, context = context)
|
lastvisit = getXML(entrancecontents, "last visit date", maxItems=1, minItems=0, context=context)
|
||||||
approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)
|
approach = getXML(entrancecontents, "approach", maxItems=1, context=context)
|
||||||
underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)
|
underground_description = getXML(entrancecontents, "underground_description", maxItems=1, context=context)
|
||||||
photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)
|
photo = getXML(entrancecontents, "photo", maxItems=1, context=context)
|
||||||
marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)
|
marking = getXML(entrancecontents, "marking", maxItems=1, context=context)
|
||||||
marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)
|
marking_comment = getXML(entrancecontents, "marking_comment", maxItems=1, context=context)
|
||||||
findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)
|
findability = getXML(entrancecontents, "findability", maxItems=1, context=context)
|
||||||
findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)
|
findability_description = getXML(entrancecontents, "findability_description", maxItems=1, context=context)
|
||||||
alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)
|
alt = getXML(entrancecontents, "alt", maxItems=1, context=context)
|
||||||
northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)
|
northing = getXML(entrancecontents, "northing", maxItems=1, context=context)
|
||||||
easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)
|
easting = getXML(entrancecontents, "easting", maxItems=1, context=context)
|
||||||
tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)
|
tag_station = getXML(entrancecontents, "tag_station", maxItems=1, context=context)
|
||||||
exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)
|
exact_station = getXML(entrancecontents, "exact_station", maxItems=1, context=context)
|
||||||
other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)
|
other_station = getXML(entrancecontents, "other_station", maxItems=1, context=context)
|
||||||
other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)
|
other_description = getXML(entrancecontents, "other_description", maxItems=1, context=context)
|
||||||
bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
|
bearings = getXML(entrancecontents, "bearings", maxItems=1, context=context)
|
||||||
url = getXML(entrancecontents, "url", maxItems = 1, context = context)
|
url = getXML(entrancecontents, "url", maxItems=1, context=context)
|
||||||
#if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
|
# if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
|
||||||
e, state = Entrance.objects.update_or_create(name = name[0],
|
e, state = Entrance.objects.update_or_create(
|
||||||
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
|
name=name[0],
|
||||||
entrance_description = entrance_description[0],
|
non_public={
|
||||||
explorers = explorers[0],
|
"True": True,
|
||||||
map_description = map_description[0],
|
"False": False,
|
||||||
location_description = location_description[0],
|
"true": True,
|
||||||
lastvisit = lastvisit[0],
|
"false": False,
|
||||||
approach = approach[0],
|
}[non_public[0]],
|
||||||
underground_description = underground_description[0],
|
entrance_description=entrance_description[0],
|
||||||
photo = photo[0],
|
explorers=explorers[0],
|
||||||
marking = marking[0],
|
map_description=map_description[0],
|
||||||
marking_comment = marking_comment[0],
|
location_description=location_description[0],
|
||||||
findability = findability[0],
|
lastvisit=lastvisit[0],
|
||||||
findability_description = findability_description[0],
|
approach=approach[0],
|
||||||
alt = alt[0],
|
underground_description=underground_description[0],
|
||||||
northing = northing[0],
|
photo=photo[0],
|
||||||
easting = easting[0],
|
marking=marking[0],
|
||||||
tag_station = tag_station[0],
|
marking_comment=marking_comment[0],
|
||||||
exact_station = exact_station[0],
|
findability=findability[0],
|
||||||
other_station = other_station[0],
|
findability_description=findability_description[0],
|
||||||
other_description = other_description[0],
|
alt=alt[0],
|
||||||
bearings = bearings[0],
|
northing=northing[0],
|
||||||
url = url[0],
|
easting=easting[0],
|
||||||
filename = filename,
|
tag_station=tag_station[0],
|
||||||
cached_primary_slug = slugs[0])
|
exact_station=exact_station[0],
|
||||||
|
other_station=other_station[0],
|
||||||
|
other_description=other_description[0],
|
||||||
|
bearings=bearings[0],
|
||||||
|
url=url[0],
|
||||||
|
filename=filename,
|
||||||
|
cached_primary_slug=slugs[0],
|
||||||
|
)
|
||||||
primary = True
|
primary = True
|
||||||
for slug in slugs:
|
for slug in slugs:
|
||||||
#print("entrance slug:{} filename:{}".format(slug, filename))
|
# print("entrance slug:{} filename:{}".format(slug, filename))
|
||||||
try:
|
try:
|
||||||
cs = EntranceSlug.objects.update_or_create(entrance = e,
|
cs = EntranceSlug.objects.update_or_create(entrance=e, slug=slug, primary=primary)
|
||||||
slug = slug,
|
|
||||||
primary = primary)
|
|
||||||
except:
|
except:
|
||||||
# need to cope with duplicates
|
# need to cope with duplicates
|
||||||
message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}"
|
message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}"
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
|
||||||
kents = EntranceSlug.objects.all().filter(entrance = e,
|
kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug, primary=primary)
|
||||||
slug = slug,
|
|
||||||
primary = primary)
|
|
||||||
for k in kents:
|
for k in kents:
|
||||||
message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug())
|
message = " ! - DUPLICATE in db. entrance:" + str(k.entrance) + ", slug:" + str(k.slug())
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
DataIssue.objects.create(parser="caves", message=message, url=f"/cave/{slug}/edit/")
|
||||||
print(message)
|
print(message)
|
||||||
for k in kents:
|
for k in kents:
|
||||||
if k.slug() != None:
|
if k.slug() != None:
|
||||||
print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
|
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
||||||
k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes
|
k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes
|
||||||
c = k
|
c = k
|
||||||
primary = False
|
primary = False
|
||||||
# else: # more than one item in long list. But this is not an error, and the max and min have been checked by getXML
|
# else: # more than one item in long list. But this is not an error, and the max and min have been checked by getXML
|
||||||
# slug = Path(filename).stem
|
# slug = Path(filename).stem
|
||||||
# message = f' ! ABORT loading this entrance. in "{filename}"'
|
# message = f' ! ABORT loading this entrance. in "{filename}"'
|
||||||
# DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
# DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
|
||||||
# print(message)
|
# print(message)
|
||||||
|
|
||||||
|
|
||||||
def readcave(filename):
|
def readcave(filename):
|
||||||
'''Reads an enrance description from the .html file
|
"""Reads an enrance description from the .html file
|
||||||
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
|
||||||
Assumes any area it hasn't seen before is a subarea of 1623
|
Assumes any area it hasn't seen before is a subarea of 1623
|
||||||
'''
|
"""
|
||||||
global entrances_xslug
|
global entrances_xslug
|
||||||
global caves_xslug
|
global caves_xslug
|
||||||
global areas_xslug
|
global areas_xslug
|
||||||
@ -314,68 +328,97 @@ def readcave(filename):
|
|||||||
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
|
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
|
||||||
contents = f.read()
|
contents = f.read()
|
||||||
context = filename
|
context = filename
|
||||||
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
|
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
|
||||||
if len(cavecontentslist) != 1:
|
if len(cavecontentslist) != 1:
|
||||||
message = f'! BAD CAVE at "{filename}"'
|
message = f'! BAD CAVE at "{filename}"'
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
else:
|
else:
|
||||||
cavecontents = cavecontentslist[0]
|
cavecontents = cavecontentslist[0]
|
||||||
non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)
|
non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
|
||||||
slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)
|
slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
|
||||||
official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)
|
official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
|
||||||
areas = getXML(cavecontents, "area", context = context)
|
areas = getXML(cavecontents, "area", context=context)
|
||||||
kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)
|
kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
|
||||||
kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)
|
kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
|
||||||
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)
|
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
|
||||||
explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)
|
explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
|
||||||
underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)
|
underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
|
||||||
equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)
|
equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
|
||||||
references = getXML(cavecontents, "references", maxItems = 1, context = context)
|
references = getXML(cavecontents, "references", maxItems=1, context=context)
|
||||||
survey = getXML(cavecontents, "survey", maxItems = 1, context = context)
|
survey = getXML(cavecontents, "survey", maxItems=1, context=context)
|
||||||
kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)
|
kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
|
||||||
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)
|
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
|
||||||
notes = getXML(cavecontents, "notes", maxItems = 1, context = context)
|
notes = getXML(cavecontents, "notes", maxItems=1, context=context)
|
||||||
length = getXML(cavecontents, "length", maxItems = 1, context = context)
|
length = getXML(cavecontents, "length", maxItems=1, context=context)
|
||||||
depth = getXML(cavecontents, "depth", maxItems = 1, context = context)
|
depth = getXML(cavecontents, "depth", maxItems=1, context=context)
|
||||||
extent = getXML(cavecontents, "extent", maxItems = 1, context = context)
|
extent = getXML(cavecontents, "extent", maxItems=1, context=context)
|
||||||
survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)
|
survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
|
||||||
description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)
|
description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
|
||||||
url = getXML(cavecontents, "url", maxItems = 1, context = context)
|
url = getXML(cavecontents, "url", maxItems=1, context=context)
|
||||||
entrances = getXML(cavecontents, "entrance", context = context)
|
entrances = getXML(cavecontents, "entrance", context=context)
|
||||||
|
|
||||||
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1:
|
if (
|
||||||
|
len(non_public) == 1
|
||||||
|
and len(slugs) >= 1
|
||||||
|
and len(official_name) == 1
|
||||||
|
and len(areas) >= 1
|
||||||
|
and len(kataster_code) == 1
|
||||||
|
and len(kataster_number) == 1
|
||||||
|
and len(unofficial_number) == 1
|
||||||
|
and len(explorers) == 1
|
||||||
|
and len(underground_description) == 1
|
||||||
|
and len(equipment) == 1
|
||||||
|
and len(references) == 1
|
||||||
|
and len(survey) == 1
|
||||||
|
and len(kataster_status) == 1
|
||||||
|
and len(underground_centre_line) == 1
|
||||||
|
and len(notes) == 1
|
||||||
|
and len(length) == 1
|
||||||
|
and len(depth) == 1
|
||||||
|
and len(extent) == 1
|
||||||
|
and len(survex_file) == 1
|
||||||
|
and len(description_file) == 1
|
||||||
|
and len(url) == 1
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
c, state = Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
|
c, state = Cave.objects.update_or_create(
|
||||||
official_name = official_name[0],
|
non_public={
|
||||||
kataster_code = kataster_code[0],
|
"True": True,
|
||||||
kataster_number = kataster_number[0],
|
"False": False,
|
||||||
unofficial_number = unofficial_number[0],
|
"true": True,
|
||||||
explorers = explorers[0],
|
"false": False,
|
||||||
underground_description = underground_description[0],
|
}[non_public[0]],
|
||||||
equipment = equipment[0],
|
official_name=official_name[0],
|
||||||
references = references[0],
|
kataster_code=kataster_code[0],
|
||||||
survey = survey[0],
|
kataster_number=kataster_number[0],
|
||||||
kataster_status = kataster_status[0],
|
unofficial_number=unofficial_number[0],
|
||||||
underground_centre_line = underground_centre_line[0],
|
explorers=explorers[0],
|
||||||
notes = notes[0],
|
underground_description=underground_description[0],
|
||||||
length = length[0],
|
equipment=equipment[0],
|
||||||
depth = depth[0],
|
references=references[0],
|
||||||
extent = extent[0],
|
survey=survey[0],
|
||||||
survex_file = survex_file[0],
|
kataster_status=kataster_status[0],
|
||||||
description_file = description_file[0],
|
underground_centre_line=underground_centre_line[0],
|
||||||
url = url[0],
|
notes=notes[0],
|
||||||
filename = filename)
|
length=length[0],
|
||||||
|
depth=depth[0],
|
||||||
|
extent=extent[0],
|
||||||
|
survex_file=survex_file[0],
|
||||||
|
description_file=description_file[0],
|
||||||
|
url=url[0],
|
||||||
|
filename=filename,
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
print(" ! FAILED to get only one CAVE when updating using: "+filename)
|
print(" ! FAILED to get only one CAVE when updating using: " + filename)
|
||||||
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
|
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
|
||||||
for k in kaves:
|
for k in kaves:
|
||||||
message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug())
|
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
for k in kaves:
|
for k in kaves:
|
||||||
if k.slug() != None:
|
if k.slug() != None:
|
||||||
print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
|
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
|
||||||
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
|
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
|
||||||
c = k
|
c = k
|
||||||
|
|
||||||
@ -383,11 +426,11 @@ def readcave(filename):
|
|||||||
if area_slug in areas_xslug:
|
if area_slug in areas_xslug:
|
||||||
newArea = areas_xslug[area_slug]
|
newArea = areas_xslug[area_slug]
|
||||||
else:
|
else:
|
||||||
area = Area.objects.filter(short_name = area_slug)
|
area = Area.objects.filter(short_name=area_slug)
|
||||||
if area:
|
if area:
|
||||||
newArea = area[0]
|
newArea = area[0]
|
||||||
else:
|
else:
|
||||||
newArea = Area(short_name = area_slug, super = Area.objects.get(short_name = "1623"))
|
newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
|
||||||
newArea.save()
|
newArea.save()
|
||||||
areas_xslug[area_slug] = newArea
|
areas_xslug[area_slug] = newArea
|
||||||
c.area.add(newArea)
|
c.area.add(newArea)
|
||||||
@ -396,15 +439,13 @@ def readcave(filename):
|
|||||||
if slug in caves_xslug:
|
if slug in caves_xslug:
|
||||||
cs = caves_xslug[slug]
|
cs = caves_xslug[slug]
|
||||||
else:
|
else:
|
||||||
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
|
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
|
||||||
cs = CaveSlug.objects.update_or_create(cave = c,
|
cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
|
||||||
slug = slug,
|
|
||||||
primary = primary)
|
|
||||||
caves_xslug[slug] = cs
|
caves_xslug[slug] = cs
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
# This fails to do an update! It just crashes.. to be fixed
|
# This fails to do an update! It just crashes.. to be fixed
|
||||||
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
|
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
primary = False
|
primary = False
|
||||||
@ -414,71 +455,78 @@ def readcave(filename):
|
|||||||
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
||||||
else:
|
else:
|
||||||
for entrance in entrances:
|
for entrance in entrances:
|
||||||
eslug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
|
eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
|
||||||
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
|
letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
|
||||||
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
|
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
|
||||||
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
if eslug in entrances_xslug:
|
if eslug in entrances_xslug:
|
||||||
entrance = entrances_xslug[eslug]
|
entrance = entrances_xslug[eslug]
|
||||||
else:
|
else:
|
||||||
entrance = Entrance.objects.get(entranceslug__slug = eslug)
|
entrance = Entrance.objects.get(entranceslug__slug=eslug)
|
||||||
entrances_xslug[eslug] = entrance
|
entrances_xslug[eslug] = entrance
|
||||||
ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
|
ce = CaveAndEntrance.objects.update_or_create(
|
||||||
|
cave=c, entrance_letter=letter, entrance=entrance
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
|
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'{c.url}_edit/')
|
DataIssue.objects.create(parser="caves", message=message, url=f"{c.url}_edit/")
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if survex_file[0]:
|
if survex_file[0]:
|
||||||
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
|
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
|
||||||
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
|
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug[0:4]}/{slug}_cave_edit/')
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
if description_file[0]: # if not an empty string
|
||||||
if description_file[0]: # if not an empty string
|
|
||||||
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
|
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
|
||||||
DataIssue.objects.create(parser='caves ok', message=message, url=f'/{slug}_cave_edit/')
|
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if not (Path(EXPOWEB) / description_file[0]).is_file():
|
if not (Path(EXPOWEB) / description_file[0]).is_file():
|
||||||
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
|
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
||||||
print(message)
|
print(message)
|
||||||
#c.description_file="" # done only once, to clear out cruft.
|
# c.description_file="" # done only once, to clear out cruft.
|
||||||
#c.save()
|
# c.save()
|
||||||
else: # more than one item in long list
|
else: # more than one item in long list
|
||||||
message = f' ! ABORT loading this cave. in "{filename}"'
|
message = f' ! ABORT loading this cave. in "{filename}"'
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
|
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
|
|
||||||
"""Reads a single XML tag
|
def getXML(text, itemname, minItems=1, maxItems=None, printwarnings=True, context=""):
|
||||||
"""
|
"""Reads a single XML tag"""
|
||||||
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
|
||||||
if len(items) < minItems and printwarnings:
|
if len(items) < minItems and printwarnings:
|
||||||
message = " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % {"count": len(items),
|
message = (
|
||||||
"itemname": itemname,
|
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
|
||||||
"min": minItems} + " in file " + context
|
% {"count": len(items), "itemname": itemname, "min": minItems}
|
||||||
DataIssue.objects.create(parser='caves', message=message, url=""+context)
|
+ " in file "
|
||||||
|
+ context
|
||||||
|
)
|
||||||
|
DataIssue.objects.create(parser="caves", message=message, url="" + context)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if maxItems is not None and len(items) > maxItems and printwarnings:
|
if maxItems is not None and len(items) > maxItems and printwarnings:
|
||||||
message = " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % {"count": len(items),
|
message = (
|
||||||
"itemname": itemname,
|
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
|
||||||
"max": maxItems} + " in file " + context
|
% {"count": len(items), "itemname": itemname, "max": maxItems}
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
+ " in file "
|
||||||
|
+ context
|
||||||
|
)
|
||||||
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
if minItems == 0:
|
if minItems == 0:
|
||||||
if not items:
|
if not items:
|
||||||
items = [ "" ]
|
items = [""]
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
def readcaves():
|
def readcaves():
|
||||||
'''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo.
|
"""Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
|
||||||
'''
|
|
||||||
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
|
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
|
||||||
# should put this in a simple list
|
# should put this in a simple list
|
||||||
pending = set()
|
pending = set()
|
||||||
@ -487,7 +535,7 @@ def readcaves():
|
|||||||
with open(fpending, "r") as fo:
|
with open(fpending, "r") as fo:
|
||||||
cids = fo.readlines()
|
cids = fo.readlines()
|
||||||
for cid in cids:
|
for cid in cids:
|
||||||
pending.add(cid.strip().rstrip('\n').upper())
|
pending.add(cid.strip().rstrip("\n").upper())
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
print(" - Deleting Caves and Entrances")
|
print(" - Deleting Caves and Entrances")
|
||||||
@ -505,43 +553,42 @@ def readcaves():
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
# Clear the cave data issues and the caves as we are reloading
|
# Clear the cave data issues and the caves as we are reloading
|
||||||
DataIssue.objects.filter(parser='areas').delete()
|
DataIssue.objects.filter(parser="areas").delete()
|
||||||
DataIssue.objects.filter(parser='caves').delete()
|
DataIssue.objects.filter(parser="caves").delete()
|
||||||
DataIssue.objects.filter(parser='caves ok').delete()
|
DataIssue.objects.filter(parser="caves ok").delete()
|
||||||
DataIssue.objects.filter(parser='entrances').delete()
|
DataIssue.objects.filter(parser="entrances").delete()
|
||||||
|
|
||||||
print(" - Creating Areas 1623, 1624, 1627 and 1626")
|
print(" - Creating Areas 1623, 1624, 1627 and 1626")
|
||||||
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
|
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
|
||||||
area_1623= Area.objects.create(short_name = "1623", super=None)
|
area_1623 = Area.objects.create(short_name="1623", super=None)
|
||||||
area_1623.save()
|
area_1623.save()
|
||||||
area_1624= Area.objects.create(short_name = "1624", super=None)
|
area_1624 = Area.objects.create(short_name="1624", super=None)
|
||||||
area_1624.save()
|
area_1624.save()
|
||||||
area_1626= Area.objects.create(short_name = "1626", super=None)
|
area_1626 = Area.objects.create(short_name="1626", super=None)
|
||||||
area_1626.save()
|
area_1626.save()
|
||||||
area_1627= Area.objects.create(short_name = "1627", super=None)
|
area_1627 = Area.objects.create(short_name="1627", super=None)
|
||||||
area_1627.save()
|
area_1627.save()
|
||||||
|
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
|
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
|
||||||
print(" - Reading Entrances from entrance descriptions xml files")
|
print(" - Reading Entrances from entrance descriptions xml files")
|
||||||
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
|
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
||||||
# if filename.endswith('.html'):
|
# if filename.endswith('.html'):
|
||||||
# if Path(filename).stem[5:] in pending:
|
# if Path(filename).stem[5:] in pending:
|
||||||
# print(f'Skipping pending entrance dummy file <{filename}>')
|
# print(f'Skipping pending entrance dummy file <{filename}>')
|
||||||
# else:
|
# else:
|
||||||
# readentrance(filename)
|
# readentrance(filename)
|
||||||
readentrance(filename)
|
readentrance(filename)
|
||||||
|
|
||||||
print(" - Reading Caves from cave descriptions xml files")
|
print(" - Reading Caves from cave descriptions xml files")
|
||||||
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
|
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
||||||
if filename.endswith('.html'):
|
if filename.endswith(".html"):
|
||||||
readcave(filename)
|
readcave(filename)
|
||||||
|
|
||||||
print (" - Setting up all the variously useful alias names")
|
print(" - Setting up all the variously useful alias names")
|
||||||
mycavelookup = GetCaveLookup()
|
mycavelookup = GetCaveLookup()
|
||||||
|
|
||||||
print (" - Setting pending caves")
|
print(" - Setting pending caves")
|
||||||
# Do this last, so we can detect if they are created and no longer 'pending'
|
# Do this last, so we can detect if they are created and no longer 'pending'
|
||||||
|
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
@ -549,11 +596,10 @@ def readcaves():
|
|||||||
|
|
||||||
if k[0:3] == "162":
|
if k[0:3] == "162":
|
||||||
areanum = k[0:4]
|
areanum = k[0:4]
|
||||||
url = f'{areanum}/{k[5:]}' # Note we are not appending the .htm as we are modern folks now.
|
url = f"{areanum}/{k[5:]}" # Note we are not appending the .htm as we are modern folks now.
|
||||||
else:
|
else:
|
||||||
areanum = "1623"
|
areanum = "1623"
|
||||||
url = f'1623/{k}'
|
url = f"1623/{k}"
|
||||||
|
|
||||||
|
|
||||||
area = area_1623
|
area = area_1623
|
||||||
if areanum == "1623":
|
if areanum == "1623":
|
||||||
@ -568,8 +614,6 @@ def readcaves():
|
|||||||
do_pending_cave(k, url, area)
|
do_pending_cave(k, url, area)
|
||||||
except:
|
except:
|
||||||
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
|
message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
|
||||||
DataIssue.objects.create(parser='caves', message=message)
|
DataIssue.objects.create(parser="caves", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,11 +13,11 @@ from troggle.core.models.survex import DrawingFile, SingleScan, Wallet
|
|||||||
from troggle.core.models.troggle import DataIssue
|
from troggle.core.models.troggle import DataIssue
|
||||||
from troggle.core.utils import save_carefully
|
from troggle.core.utils import save_carefully
|
||||||
|
|
||||||
'''Searches through all the :drawings: repository looking
|
"""Searches through all the :drawings: repository looking
|
||||||
for tunnel and therion files
|
for tunnel and therion files
|
||||||
'''
|
"""
|
||||||
|
|
||||||
todo='''- Rename functions more consistently between tunnel and therion variants
|
todo = """- Rename functions more consistently between tunnel and therion variants
|
||||||
|
|
||||||
- Recode to use pathlib instead of whacky resetting of loop variable inside loop
|
- Recode to use pathlib instead of whacky resetting of loop variable inside loop
|
||||||
to scan sub-folders.
|
to scan sub-folders.
|
||||||
@ -25,20 +25,23 @@ to scan sub-folders.
|
|||||||
- Recode rx_valid_ext to use pathlib suffix() function
|
- Recode rx_valid_ext to use pathlib suffix() function
|
||||||
|
|
||||||
- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity
|
- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity
|
||||||
'''
|
"""
|
||||||
|
|
||||||
|
rx_valid_ext = re.compile(r"(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$")
|
||||||
|
|
||||||
rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$')
|
|
||||||
|
|
||||||
def find_dwg_file(dwgfile, path):
|
def find_dwg_file(dwgfile, path):
|
||||||
'''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
|
"""Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
|
||||||
which we have already seen when we imported all the files we could find in the surveyscans direstories.
|
which we have already seen when we imported all the files we could find in the surveyscans direstories.
|
||||||
|
|
||||||
The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ?
|
The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ?
|
||||||
|
|
||||||
What is all this really for ?! Is this data used anywhere ??
|
What is all this really for ?! Is this data used anywhere ??
|
||||||
'''
|
"""
|
||||||
wallet, scansfile = None, None
|
wallet, scansfile = None, None
|
||||||
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path)
|
mscansdir = re.search(
|
||||||
|
r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path
|
||||||
|
)
|
||||||
if mscansdir:
|
if mscansdir:
|
||||||
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
|
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
|
||||||
# This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first.
|
# This should be changed to properly detect if a list of folders is returned and do something sensible, not just pick the first.
|
||||||
@ -47,18 +50,18 @@ def find_dwg_file(dwgfile, path):
|
|||||||
if len(scanswalletl) > 1:
|
if len(scanswalletl) > 1:
|
||||||
message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}"
|
message = f"! More than one scan FOLDER matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Tunnel', message=message)
|
DataIssue.objects.create(parser="Tunnel", message=message)
|
||||||
|
|
||||||
if wallet:
|
if wallet:
|
||||||
scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2))
|
scansfilel = wallet.singlescan_set.filter(name=mscansdir.group(2))
|
||||||
if len(scansfilel):
|
if len(scansfilel):
|
||||||
if len(scansfilel) > 1:
|
if len(scansfilel) > 1:
|
||||||
plist =[]
|
plist = []
|
||||||
for sf in scansfilel:
|
for sf in scansfilel:
|
||||||
plist.append(sf.ffile)
|
plist.append(sf.ffile)
|
||||||
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
|
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Tunnel', message=message)
|
DataIssue.objects.create(parser="Tunnel", message=message)
|
||||||
scansfile = scansfilel[0]
|
scansfile = scansfilel[0]
|
||||||
|
|
||||||
if wallet:
|
if wallet:
|
||||||
@ -66,25 +69,27 @@ def find_dwg_file(dwgfile, path):
|
|||||||
if scansfile:
|
if scansfile:
|
||||||
dwgfile.scans.add(scansfile)
|
dwgfile.scans.add(scansfile)
|
||||||
|
|
||||||
elif path and not rx_valid_ext.search(path): # ie not recognised as a path where wallets live and not an image file type
|
elif path and not rx_valid_ext.search(
|
||||||
|
path
|
||||||
|
): # ie not recognised as a path where wallets live and not an image file type
|
||||||
name = os.path.split(path)[1]
|
name = os.path.split(path)[1]
|
||||||
rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen
|
rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen
|
||||||
if len(rdwgfilel):
|
if len(rdwgfilel):
|
||||||
if len(rdwgfilel) > 1:
|
if len(rdwgfilel) > 1:
|
||||||
plist =[]
|
plist = []
|
||||||
for df in rdwgfilel:
|
for df in rdwgfilel:
|
||||||
plist.append(df.dwgpath)
|
plist.append(df.dwgpath)
|
||||||
message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem?
|
message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem?
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}')
|
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{path}")
|
||||||
rdwgfile = rdwgfilel[0]
|
rdwgfile = rdwgfilel[0]
|
||||||
dwgfile.dwgcontains.add(rdwgfile)
|
dwgfile.dwgcontains.add(rdwgfile)
|
||||||
|
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
|
|
||||||
|
|
||||||
def findwalletimage(therionfile, foundpath):
|
def findwalletimage(therionfile, foundpath):
|
||||||
'''Tries to link the drawing file (Therion format) to the referenced image (scan) file
|
"""Tries to link the drawing file (Therion format) to the referenced image (scan) file"""
|
||||||
'''
|
|
||||||
foundpath = foundpath.strip("{}")
|
foundpath = foundpath.strip("{}")
|
||||||
mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath)
|
mscansdir = re.search(r"(\d\d\d\d#\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)", foundpath)
|
||||||
if mscansdir:
|
if mscansdir:
|
||||||
@ -93,9 +98,11 @@ def findwalletimage(therionfile, foundpath):
|
|||||||
if len(scanswalletl):
|
if len(scanswalletl):
|
||||||
wallet = scanswalletl[0]
|
wallet = scanswalletl[0]
|
||||||
if len(scanswalletl) > 1:
|
if len(scanswalletl) > 1:
|
||||||
message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(therionfile, mscansdir.group(1), foundpath)
|
message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {}".format(
|
||||||
|
therionfile, mscansdir.group(1), foundpath
|
||||||
|
)
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Therion', message=message)
|
DataIssue.objects.create(parser="Therion", message=message)
|
||||||
if wallet:
|
if wallet:
|
||||||
therionfile.dwgwallets.add(wallet)
|
therionfile.dwgwallets.add(wallet)
|
||||||
|
|
||||||
@ -105,33 +112,33 @@ def findwalletimage(therionfile, foundpath):
|
|||||||
# message = f'! {len(scansfilel)} {scansfilel} = {scanfilename} found in the wallet specified {wallet.walletname}'
|
# message = f'! {len(scansfilel)} {scansfilel} = {scanfilename} found in the wallet specified {wallet.walletname}'
|
||||||
# print(message)
|
# print(message)
|
||||||
if len(scansfilel) > 1:
|
if len(scansfilel) > 1:
|
||||||
plist =[]
|
plist = []
|
||||||
for sf in scansfilel:
|
for sf in scansfilel:
|
||||||
plist.append(sf.ffile)
|
plist.append(sf.ffile)
|
||||||
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
|
message = f"! More than one image FILENAME matches filter query. [{scansfilel[0]}]: {mscansdir.group(1)} {mscansdir.group(2)} {dwgfile.dwgpath} {path} {plist}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Therion', message=message)
|
DataIssue.objects.create(parser="Therion", message=message)
|
||||||
scansfile = scansfilel[0]
|
scansfile = scansfilel[0]
|
||||||
therionfile.scans.add(scansfile)
|
therionfile.scans.add(scansfile)
|
||||||
else:
|
else:
|
||||||
message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}'
|
message = f'! Scanned file {scanfilename} mentioned in "{therionfile.dwgpath}" is not actually found in {wallet.walletname}'
|
||||||
wurl = f'/survey_scans/{wallet.walletname}/'.replace("#",":")
|
wurl = f"/survey_scans/{wallet.walletname}/".replace("#", ":")
|
||||||
# print(message)
|
# print(message)
|
||||||
DataIssue.objects.create(parser='Therion', message=message, url = wurl)
|
DataIssue.objects.create(parser="Therion", message=message, url=wurl)
|
||||||
|
|
||||||
|
|
||||||
def findimportinsert(therionfile, imp):
|
def findimportinsert(therionfile, imp):
|
||||||
'''Tries to link the scrap (Therion format) to the referenced therion scrap
|
"""Tries to link the scrap (Therion format) to the referenced therion scrap"""
|
||||||
'''
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
rx_xth_me = re.compile(r'xth_me_image_insert.*{.*}$', re.MULTILINE)
|
|
||||||
rx_scrap = re.compile(r'^survey (\w*).*$', re.MULTILINE)
|
rx_xth_me = re.compile(r"xth_me_image_insert.*{.*}$", re.MULTILINE)
|
||||||
rx_input = re.compile(r'^input (\w*).*$', re.MULTILINE)
|
rx_scrap = re.compile(r"^survey (\w*).*$", re.MULTILINE)
|
||||||
|
rx_input = re.compile(r"^input (\w*).*$", re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
def settherionfileinfo(filetuple):
|
def settherionfileinfo(filetuple):
|
||||||
'''Read in the drawing file contents and sets values on the dwgfile object
|
"""Read in the drawing file contents and sets values on the dwgfile object"""
|
||||||
'''
|
|
||||||
thtype, therionfile = filetuple
|
thtype, therionfile = filetuple
|
||||||
|
|
||||||
ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
|
ff = os.path.join(settings.DRAWINGS_DATA, therionfile.dwgpath)
|
||||||
@ -139,17 +146,17 @@ def settherionfileinfo(filetuple):
|
|||||||
if therionfile.filesize <= 0:
|
if therionfile.filesize <= 0:
|
||||||
message = f"! Zero length therion file {ff}"
|
message = f"! Zero length therion file {ff}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
DataIssue.objects.create(parser="Therion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
|
||||||
return
|
return
|
||||||
fin = open(ff,'r')
|
fin = open(ff, "r")
|
||||||
ttext = fin.read()
|
ttext = fin.read()
|
||||||
fin.close()
|
fin.close()
|
||||||
|
|
||||||
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
|
# The equivalent for a tunnel 'path' would be a .th2 'line wall' or 'scrap'
|
||||||
# print(len(re.findall(r"line", ttext)))
|
# print(len(re.findall(r"line", ttext)))
|
||||||
if thtype=='th':
|
if thtype == "th":
|
||||||
therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
|
therionfile.npaths = len(re.findall(r"^input ", ttext, re.MULTILINE))
|
||||||
elif thtype=='th2':
|
elif thtype == "th2":
|
||||||
therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
|
therionfile.npaths = len(re.findall(r"^line ", ttext, re.MULTILINE))
|
||||||
therionfile.save()
|
therionfile.save()
|
||||||
|
|
||||||
@ -162,42 +169,44 @@ def settherionfileinfo(filetuple):
|
|||||||
|
|
||||||
for xth_me in rx_xth_me.findall(ttext):
|
for xth_me in rx_xth_me.findall(ttext):
|
||||||
# WORK IN PROGRESS. Do not clutter up the DataIssues list with this
|
# WORK IN PROGRESS. Do not clutter up the DataIssues list with this
|
||||||
message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'
|
message = f"! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}"
|
||||||
# print(message)
|
# print(message)
|
||||||
# DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
# DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
||||||
# ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2
|
# ! Un-parsed image filename: 107coldest : ../../../expofiles/surveyscans/2015/2015#20/notes.jpg - therion/plan/107coldest.th2
|
||||||
|
|
||||||
with open('therionrefs.log', 'a') as lg:
|
with open("therionrefs.log", "a") as lg:
|
||||||
lg.write(message + '\n')
|
lg.write(message + "\n")
|
||||||
|
|
||||||
findwalletimage(therionfile, xth_me.split()[-3])
|
findwalletimage(therionfile, xth_me.split()[-3])
|
||||||
|
|
||||||
for inp in rx_input.findall(ttext):
|
for inp in rx_input.findall(ttext):
|
||||||
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
|
# if this 'input' is a .th2 file we have already seen, then we can assign this as a sub-file
|
||||||
# but we would need to disentangle to get the current path properly
|
# but we would need to disentangle to get the current path properly
|
||||||
message = f'! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'
|
message = f"! Un-set (?) Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}"
|
||||||
#print(message)
|
# print(message)
|
||||||
DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
|
DataIssue.objects.create(parser="xTherion", message=message, url=f"/dwgdataraw/{therionfile.dwgpath}")
|
||||||
findimportinsert(therionfile, inp)
|
findimportinsert(therionfile, inp)
|
||||||
|
|
||||||
therionfile.save()
|
therionfile.save()
|
||||||
|
|
||||||
rx_skpath = re.compile(rb'<skpath')
|
|
||||||
|
rx_skpath = re.compile(rb"<skpath")
|
||||||
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
|
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
|
||||||
|
|
||||||
|
|
||||||
def settnlfileinfo(dwgfile):
|
def settnlfileinfo(dwgfile):
|
||||||
'''Read in the drawing file contents and sets values on the dwgfile object
|
"""Read in the drawing file contents and sets values on the dwgfile object
|
||||||
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
|
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
|
||||||
then we could display on the master calendar per expo.
|
then we could display on the master calendar per expo.
|
||||||
'''
|
"""
|
||||||
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
|
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
|
||||||
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
|
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
|
||||||
if dwgfile.filesize <= 0:
|
if dwgfile.filesize <= 0:
|
||||||
message = f"! Zero length tunnel file {ff}"
|
message = f"! Zero length tunnel file {ff}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
|
DataIssue.objects.create(parser="Tunnel", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
|
||||||
return
|
return
|
||||||
fin = open(ff,'rb')
|
fin = open(ff, "rb")
|
||||||
ttext = fin.read()
|
ttext = fin.read()
|
||||||
fin.close()
|
fin.close()
|
||||||
|
|
||||||
@ -216,22 +225,24 @@ def settnlfileinfo(dwgfile):
|
|||||||
|
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
|
|
||||||
|
|
||||||
def setdrwfileinfo(dwgfile):
|
def setdrwfileinfo(dwgfile):
|
||||||
'''Read in the drawing file contents and sets values on the dwgfile object,
|
"""Read in the drawing file contents and sets values on the dwgfile object,
|
||||||
but these are SVGs, PDFs or .txt files, so there is no useful format to search for
|
but these are SVGs, PDFs or .txt files, so there is no useful format to search for
|
||||||
This function is a placeholder in case we thnk of a way to do something
|
This function is a placeholder in case we thnk of a way to do something
|
||||||
to recognise generic survex filenames.
|
to recognise generic survex filenames.
|
||||||
'''
|
"""
|
||||||
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
|
||||||
dwgfile.filesize = ff.stat().st_size
|
dwgfile.filesize = ff.stat().st_size
|
||||||
if dwgfile.filesize <= 0:
|
if dwgfile.filesize <= 0:
|
||||||
message = f"! Zero length drawing file {ff}"
|
message = f"! Zero length drawing file {ff}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
|
DataIssue.objects.create(parser="drawings", message=message, url=f"/dwgdataraw/{dwgfile.dwgpath}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def load_drawings_files():
|
def load_drawings_files():
|
||||||
'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
"""Breadth first search of drawings directory looking for sub-directories and *.xml filesize
|
||||||
This is brain-damaged very early code. Should be replaced with proper use of pathlib.
|
This is brain-damaged very early code. Should be replaced with proper use of pathlib.
|
||||||
|
|
||||||
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
Why do we have all this detection of file types/! Why not use get_mime_types ?
|
||||||
@ -239,19 +250,18 @@ def load_drawings_files():
|
|||||||
|
|
||||||
We import JPG, PNG and SVG files; which have already been put on the server,
|
We import JPG, PNG and SVG files; which have already been put on the server,
|
||||||
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
|
but the upload form intentionally refuses to upload PNG and JPG (though it does allow SVG)
|
||||||
'''
|
"""
|
||||||
all_xml = []
|
all_xml = []
|
||||||
drawdatadir = settings.DRAWINGS_DATA
|
drawdatadir = settings.DRAWINGS_DATA
|
||||||
DrawingFile.objects.all().delete()
|
DrawingFile.objects.all().delete()
|
||||||
DataIssue.objects.filter(parser='drawings').delete()
|
DataIssue.objects.filter(parser="drawings").delete()
|
||||||
DataIssue.objects.filter(parser='Therion').delete()
|
DataIssue.objects.filter(parser="Therion").delete()
|
||||||
DataIssue.objects.filter(parser='xTherion').delete()
|
DataIssue.objects.filter(parser="xTherion").delete()
|
||||||
DataIssue.objects.filter(parser='Tunnel').delete()
|
DataIssue.objects.filter(parser="Tunnel").delete()
|
||||||
if(os.path.isfile('therionrefs.log')):
|
if os.path.isfile("therionrefs.log"):
|
||||||
os.remove('therionrefs.log')
|
os.remove("therionrefs.log")
|
||||||
|
|
||||||
|
drawingsdirs = [""]
|
||||||
drawingsdirs = [ "" ]
|
|
||||||
while drawingsdirs:
|
while drawingsdirs:
|
||||||
drawdir = drawingsdirs.pop()
|
drawdir = drawingsdirs.pop()
|
||||||
for f in os.listdir(os.path.join(drawdatadir, drawdir)):
|
for f in os.listdir(os.path.join(drawdatadir, drawdir)):
|
||||||
@ -260,65 +270,67 @@ def load_drawings_files():
|
|||||||
lf = os.path.join(drawdir, f)
|
lf = os.path.join(drawdir, f)
|
||||||
ff = os.path.join(drawdatadir, lf)
|
ff = os.path.join(drawdatadir, lf)
|
||||||
if os.path.isdir(ff):
|
if os.path.isdir(ff):
|
||||||
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
|
drawingsdirs.append(
|
||||||
|
lf
|
||||||
|
) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
|
||||||
elif Path(f).suffix.lower() == ".txt":
|
elif Path(f).suffix.lower() == ".txt":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('txt',dwgfile))
|
all_xml.append(("txt", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".xml":
|
elif Path(f).suffix.lower() == ".xml":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('xml',dwgfile))
|
all_xml.append(("xml", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".th":
|
elif Path(f).suffix.lower() == ".th":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('th',dwgfile))
|
all_xml.append(("th", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".th2":
|
elif Path(f).suffix.lower() == ".th2":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('th2',dwgfile))
|
all_xml.append(("th2", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".pdf":
|
elif Path(f).suffix.lower() == ".pdf":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('pdf',dwgfile))
|
all_xml.append(("pdf", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".png":
|
elif Path(f).suffix.lower() == ".png":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('png',dwgfile))
|
all_xml.append(("png", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".svg":
|
elif Path(f).suffix.lower() == ".svg":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('svg',dwgfile))
|
all_xml.append(("svg", dwgfile))
|
||||||
elif Path(f).suffix.lower() == ".jpg":
|
elif Path(f).suffix.lower() == ".jpg":
|
||||||
# Always creates new
|
# Always creates new
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('jpg',dwgfile))
|
all_xml.append(("jpg", dwgfile))
|
||||||
elif Path(f).suffix == '':
|
elif Path(f).suffix == "":
|
||||||
# therion file
|
# therion file
|
||||||
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1])
|
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f)[1])
|
||||||
dwgfile.save()
|
dwgfile.save()
|
||||||
all_xml.append(('',dwgfile))
|
all_xml.append(("", dwgfile))
|
||||||
|
|
||||||
print(f' - {len(all_xml)} Drawings files found')
|
print(f" - {len(all_xml)} Drawings files found")
|
||||||
|
|
||||||
for d in all_xml:
|
for d in all_xml:
|
||||||
if d[0] in ['pdf', 'txt', 'svg', 'jpg', 'png', '']:
|
if d[0] in ["pdf", "txt", "svg", "jpg", "png", ""]:
|
||||||
setdrwfileinfo(d[1])
|
setdrwfileinfo(d[1])
|
||||||
if d[0] == 'xml':
|
if d[0] == "xml":
|
||||||
settnlfileinfo(d[1])
|
settnlfileinfo(d[1])
|
||||||
# important to import .th2 files before .th so that we can assign them when found in .th files
|
# important to import .th2 files before .th so that we can assign them when found in .th files
|
||||||
if d[0] == 'th2':
|
if d[0] == "th2":
|
||||||
settherionfileinfo(d)
|
settherionfileinfo(d)
|
||||||
if d[0] == 'th':
|
if d[0] == "th":
|
||||||
settherionfileinfo(d)
|
settherionfileinfo(d)
|
||||||
|
|
||||||
# for drawfile in DrawingFile.objects.all():
|
# for drawfile in DrawingFile.objects.all():
|
||||||
# SetTunnelfileInfo(drawfile)
|
# SetTunnelfileInfo(drawfile)
|
||||||
|
@ -4,8 +4,7 @@ import sys
|
|||||||
import django
|
import django
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from django.core import management
|
from django.core import management
|
||||||
from django.db import (close_old_connections, connection, connections,
|
from django.db import close_old_connections, connection, connections, transaction
|
||||||
transaction)
|
|
||||||
from django.http import HttpResponse
|
from django.http import HttpResponse
|
||||||
|
|
||||||
import troggle.parsers.caves
|
import troggle.parsers.caves
|
||||||
@ -16,41 +15,48 @@ import troggle.parsers.QMs
|
|||||||
import troggle.parsers.scans
|
import troggle.parsers.scans
|
||||||
import troggle.settings
|
import troggle.settings
|
||||||
|
|
||||||
'''Master data import.
|
"""Master data import.
|
||||||
Used only by databaseReset.py and online controlpanel.
|
Used only by databaseReset.py and online controlpanel.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
|
|
||||||
def import_caves():
|
def import_caves():
|
||||||
print("-- Importing Caves to ",end="")
|
print("-- Importing Caves to ", end="")
|
||||||
print(django.db.connections.databases['default']['NAME'])
|
print(django.db.connections.databases["default"]["NAME"])
|
||||||
troggle.parsers.caves.readcaves()
|
troggle.parsers.caves.readcaves()
|
||||||
|
|
||||||
|
|
||||||
def import_people():
|
def import_people():
|
||||||
print("-- Importing People (folk.csv) to ",end="")
|
print("-- Importing People (folk.csv) to ", end="")
|
||||||
print(django.db.connections.databases['default']['NAME'])
|
print(django.db.connections.databases["default"]["NAME"])
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.people.load_people_expos()
|
troggle.parsers.people.load_people_expos()
|
||||||
|
|
||||||
|
|
||||||
def import_surveyscans():
|
def import_surveyscans():
|
||||||
print("-- Importing Survey Scans")
|
print("-- Importing Survey Scans")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.scans.load_all_scans()
|
troggle.parsers.scans.load_all_scans()
|
||||||
|
|
||||||
|
|
||||||
def import_logbooks():
|
def import_logbooks():
|
||||||
print("-- Importing Logbooks")
|
print("-- Importing Logbooks")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.logbooks.LoadLogbooks()
|
troggle.parsers.logbooks.LoadLogbooks()
|
||||||
|
|
||||||
|
|
||||||
def import_logbook(year=2022):
|
def import_logbook(year=2022):
|
||||||
print(f"-- Importing Logbook {year}")
|
print(f"-- Importing Logbook {year}")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.logbooks.LoadLogbook(year)
|
troggle.parsers.logbooks.LoadLogbook(year)
|
||||||
|
|
||||||
|
|
||||||
def import_QMs():
|
def import_QMs():
|
||||||
print("-- Importing old QMs for 161, 204, 234 from CSV files")
|
print("-- Importing old QMs for 161, 204, 234 from CSV files")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.QMs.Load_QMs()
|
troggle.parsers.QMs.Load_QMs()
|
||||||
|
|
||||||
|
|
||||||
def import_survex():
|
def import_survex():
|
||||||
# when this import is moved to the top with the rest it all crashes horribly
|
# when this import is moved to the top with the rest it all crashes horribly
|
||||||
print("-- Importing Survex and Entrance Positions")
|
print("-- Importing Survex and Entrance Positions")
|
||||||
@ -63,23 +69,26 @@ def import_survex():
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.survex.LoadPositions()
|
troggle.parsers.survex.LoadPositions()
|
||||||
|
|
||||||
|
|
||||||
def import_ents():
|
def import_ents():
|
||||||
# when this import is moved to the top with the rest it all crashes horribly
|
# when this import is moved to the top with the rest it all crashes horribly
|
||||||
print(" - Survex entrances x/y/z Positions")
|
print(" - Survex entrances x/y/z Positions")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
import troggle.parsers.survex
|
import troggle.parsers.survex
|
||||||
|
|
||||||
troggle.parsers.survex.LoadPositions()
|
troggle.parsers.survex.LoadPositions()
|
||||||
|
|
||||||
|
|
||||||
def import_loadpos():
|
def import_loadpos():
|
||||||
# when this import is moved to the top with the rest it all crashes horribly
|
# when this import is moved to the top with the rest it all crashes horribly
|
||||||
import troggle.parsers.survex
|
import troggle.parsers.survex
|
||||||
|
|
||||||
print(" - Survex entrances x/y/z Positions")
|
print(" - Survex entrances x/y/z Positions")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.survex.LoadPositions()
|
troggle.parsers.survex.LoadPositions()
|
||||||
|
|
||||||
|
|
||||||
def import_drawingsfiles():
|
def import_drawingsfiles():
|
||||||
print("-- Importing Drawings files")
|
print("-- Importing Drawings files")
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
troggle.parsers.drawings.load_drawings_files()
|
troggle.parsers.drawings.load_drawings_files()
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,17 +11,16 @@ from django.template.defaultfilters import slugify
|
|||||||
from django.utils.timezone import get_current_timezone, make_aware
|
from django.utils.timezone import get_current_timezone, make_aware
|
||||||
|
|
||||||
from parsers.people import GetPersonExpeditionNameLookup
|
from parsers.people import GetPersonExpeditionNameLookup
|
||||||
from troggle.core.models.caves import (Cave, GetCaveLookup, LogbookEntry,
|
from troggle.core.models.caves import Cave, GetCaveLookup, LogbookEntry, PersonTrip
|
||||||
PersonTrip)
|
|
||||||
from troggle.core.models.troggle import DataIssue, Expedition
|
from troggle.core.models.troggle import DataIssue, Expedition
|
||||||
from troggle.core.utils import TROG, save_carefully
|
from troggle.core.utils import TROG, save_carefully
|
||||||
|
|
||||||
'''
|
"""
|
||||||
Parses and imports logbooks in all their wonderful confusion
|
Parses and imports logbooks in all their wonderful confusion
|
||||||
See detailed explanation of the complete process:
|
See detailed explanation of the complete process:
|
||||||
https://expo.survex.com/handbook/computing/logbooks-parsing.html
|
https://expo.survex.com/handbook/computing/logbooks-parsing.html
|
||||||
'''
|
"""
|
||||||
todo='''
|
todo = """
|
||||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||||
|
|
||||||
- remove the TROG things since we need the database for multiuser access? Or not?
|
- remove the TROG things since we need the database for multiuser access? Or not?
|
||||||
@ -47,67 +46,101 @@ todo='''
|
|||||||
- use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache
|
- use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache
|
||||||
data for old logbooks? Not worth it..
|
data for old logbooks? Not worth it..
|
||||||
|
|
||||||
'''
|
"""
|
||||||
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
|
||||||
BLOG_PARSER_SETTINGS = {
|
BLOG_PARSER_SETTINGS = {
|
||||||
# "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
# "2022": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||||
# "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
# "2019": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||||
# "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
# "2018": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||||
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
# "2017": ("ukcavingblog.html", "parser_blog"), # now folded in to logbooks.html
|
||||||
}
|
}
|
||||||
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||||
DEFAULT_LOGBOOK_PARSER = "parser_html"
|
DEFAULT_LOGBOOK_PARSER = "parser_html"
|
||||||
# All years since 2002 use the default value for Logbook parser
|
# All years since 2002 use the default value for Logbook parser
|
||||||
# dont forget to update expoweb/pubs.htm to match.
|
# dont forget to update expoweb/pubs.htm to match.
|
||||||
LOGBOOK_PARSER_SETTINGS = {
|
LOGBOOK_PARSER_SETTINGS = {
|
||||||
"2002": ("logbook.html", "parser_html"),
|
"2002": ("logbook.html", "parser_html"),
|
||||||
"2001": ("log.htm", "parser_html_01"),
|
"2001": ("log.htm", "parser_html_01"),
|
||||||
"2000": ("log.htm", "parser_html_01"),
|
"2000": ("log.htm", "parser_html_01"),
|
||||||
"1999": ("log.htm", "parser_html_01"),
|
"1999": ("log.htm", "parser_html_01"),
|
||||||
"1998": ("log.htm", "parser_html_01"),
|
"1998": ("log.htm", "parser_html_01"),
|
||||||
"1997": ("log.htm", "parser_html_01"),
|
"1997": ("log.htm", "parser_html_01"),
|
||||||
"1996": ("log.htm", "parser_html_01"),
|
"1996": ("log.htm", "parser_html_01"),
|
||||||
"1995": ("log.htm", "parser_html_01"),
|
"1995": ("log.htm", "parser_html_01"),
|
||||||
"1994": ("logbook.html", "parser_html"),
|
"1994": ("logbook.html", "parser_html"),
|
||||||
"1993": ("logbook.html", "parser_html"),
|
"1993": ("logbook.html", "parser_html"),
|
||||||
"1992": ("logbook.html", "parser_html"),
|
"1992": ("logbook.html", "parser_html"),
|
||||||
"1991": ("logbook.html", "parser_html"),
|
"1991": ("logbook.html", "parser_html"),
|
||||||
"1990": ("logbook.html", "parser_html"),
|
"1990": ("logbook.html", "parser_html"),
|
||||||
"1989": ("logbook.html", "parser_html"),
|
"1989": ("logbook.html", "parser_html"),
|
||||||
"1988": ("logbook.html", "parser_html"),
|
"1988": ("logbook.html", "parser_html"),
|
||||||
"1987": ("logbook.html", "parser_html"),
|
"1987": ("logbook.html", "parser_html"),
|
||||||
"1985": ("logbook.html", "parser_html"),
|
"1985": ("logbook.html", "parser_html"),
|
||||||
"1984": ("logbook.html", "parser_html"),
|
"1984": ("logbook.html", "parser_html"),
|
||||||
"1983": ("logbook.html", "parser_html"),
|
"1983": ("logbook.html", "parser_html"),
|
||||||
"1982": ("logbook.html", "parser_html"),
|
"1982": ("logbook.html", "parser_html"),
|
||||||
}
|
}
|
||||||
|
|
||||||
entries = { "2022": 89, "2019": 55, "2018": 95, "2017": 74, "2016": 86, "2015": 80,
|
entries = {
|
||||||
"2014": 65, "2013": 52, "2012": 75, "2011": 71, "2010": 22, "2009": 53,
|
"2022": 89,
|
||||||
"2008": 49, "2007": 113, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
"2019": 55,
|
||||||
"2001": 49, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 95, "1995": 42,
|
"2018": 95,
|
||||||
"1994": 32, "1993": 41, "1992": 62, "1991": 39, "1990": 87, "1989": 63,"1988": 61,"1987": 34,
|
"2017": 74,
|
||||||
"1985": 24, "1984": 32, "1983": 52, "1982": 42,}
|
"2016": 86,
|
||||||
|
"2015": 80,
|
||||||
|
"2014": 65,
|
||||||
|
"2013": 52,
|
||||||
|
"2012": 75,
|
||||||
|
"2011": 71,
|
||||||
|
"2010": 22,
|
||||||
|
"2009": 53,
|
||||||
|
"2008": 49,
|
||||||
|
"2007": 113,
|
||||||
|
"2006": 60,
|
||||||
|
"2005": 55,
|
||||||
|
"2004": 76,
|
||||||
|
"2003": 42,
|
||||||
|
"2002": 31,
|
||||||
|
"2001": 49,
|
||||||
|
"2000": 54,
|
||||||
|
"1999": 79,
|
||||||
|
"1998": 43,
|
||||||
|
"1997": 53,
|
||||||
|
"1996": 95,
|
||||||
|
"1995": 42,
|
||||||
|
"1994": 32,
|
||||||
|
"1993": 41,
|
||||||
|
"1992": 62,
|
||||||
|
"1991": 39,
|
||||||
|
"1990": 87,
|
||||||
|
"1989": 63,
|
||||||
|
"1988": 61,
|
||||||
|
"1987": 34,
|
||||||
|
"1985": 24,
|
||||||
|
"1984": 32,
|
||||||
|
"1983": 52,
|
||||||
|
"1982": 42,
|
||||||
|
}
|
||||||
|
|
||||||
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
logentries = [] # the entire logbook for one year is a single object: a list of entries
|
||||||
noncaveplaces = [ "Journey", "Loser Plateau", "UNKNOWN", 'plateau',
|
noncaveplaces = ["Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"]
|
||||||
'base camp', 'basecamp', 'top camp', 'topcamp' ]
|
logdataissues = TROG["issues"]["logdataissues"]
|
||||||
logdataissues = TROG['issues']['logdataissues']
|
trips = {}
|
||||||
trips ={}
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# the logbook loading section
|
# the logbook loading section
|
||||||
#
|
#
|
||||||
def set_trip_id(year, seq):
|
def set_trip_id(year, seq):
|
||||||
tid= f"{year}_s{seq:02d}"
|
tid = f"{year}_s{seq:02d}"
|
||||||
return tid
|
return tid
|
||||||
|
|
||||||
rx_tripperson = re.compile(r'(?i)<u>(.*?)</u>$')
|
|
||||||
|
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
|
||||||
rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
||||||
|
|
||||||
|
|
||||||
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||||
res = [ ]
|
res = []
|
||||||
author = None
|
author = None
|
||||||
# print(f'# {tid}')
|
# print(f'# {tid}')
|
||||||
# print(f" - {tid} '{trippeople}' ")
|
# print(f" - {tid} '{trippeople}' ")
|
||||||
@ -118,48 +151,47 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
mul = rx_tripperson.match(tripperson)
|
mul = rx_tripperson.match(tripperson)
|
||||||
if mul:
|
if mul:
|
||||||
tripperson = mul.group(1).strip()
|
tripperson = mul.group(1).strip()
|
||||||
if tripperson and tripperson[0] != '*':
|
if tripperson and tripperson[0] != "*":
|
||||||
tripperson = re.sub(rx_round_bracket, "", tripperson).strip()
|
tripperson = re.sub(rx_round_bracket, "", tripperson).strip()
|
||||||
|
|
||||||
# these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition)
|
# these aliases should be moved to people.py GetPersonExpeditionNameLookup(expedition)
|
||||||
if tripperson =="Wiggy":
|
if tripperson == "Wiggy":
|
||||||
tripperson = "Phil Wigglesworth"
|
tripperson = "Phil Wigglesworth"
|
||||||
if tripperson =="Animal":
|
if tripperson == "Animal":
|
||||||
tripperson = "Mike Richardson"
|
tripperson = "Mike Richardson"
|
||||||
if tripperson =="MikeTA":
|
if tripperson == "MikeTA":
|
||||||
tripperson = "Mike Richardson"
|
tripperson = "Mike Richardson"
|
||||||
if tripperson =="CavingPig":
|
if tripperson == "CavingPig":
|
||||||
tripperson = "Elaine Oliver"
|
tripperson = "Elaine Oliver"
|
||||||
if tripperson =="nobrotson":
|
if tripperson == "nobrotson":
|
||||||
tripperson = "Rob Watson"
|
tripperson = "Rob Watson"
|
||||||
if tripperson =="Tinywoman":
|
if tripperson == "Tinywoman":
|
||||||
tripperson = "Nadia"
|
tripperson = "Nadia"
|
||||||
if tripperson =="tcacrossley":
|
if tripperson == "tcacrossley":
|
||||||
tripperson = "Tom Crossley"
|
tripperson = "Tom Crossley"
|
||||||
if tripperson =="Samouse1":
|
if tripperson == "Samouse1":
|
||||||
tripperson = "Todd Rye"
|
tripperson = "Todd Rye"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||||
if not personyear:
|
if not personyear:
|
||||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year."
|
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this expedition year."
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
res.append((personyear, logtime_underground))
|
res.append((personyear, logtime_underground))
|
||||||
if mul:
|
if mul:
|
||||||
author = personyear
|
author = personyear
|
||||||
if not author:
|
if not author:
|
||||||
if not res:
|
if not res:
|
||||||
return "", 0
|
return "", 0
|
||||||
author = res[-1][0] # the previous valid person and a time of 0 hours
|
author = res[-1][0] # the previous valid person and a time of 0 hours
|
||||||
|
|
||||||
#print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
||||||
return res, author
|
return res, author
|
||||||
|
|
||||||
|
|
||||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
|
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
|
||||||
""" saves a logbook entry and related persontrips
|
"""saves a logbook entry and related persontrips
|
||||||
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
Does NOT save the expeditionday_id - all NULLs. why? Because we are deprecating expeditionday !
|
||||||
|
|
||||||
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times..
|
troggle.log shows that we are creating lots of duplicates, which is no no problem with SQL as they just overwrite but we are saving the same thing too many times..
|
||||||
@ -188,68 +220,75 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
# print(f" - {author} - {logtime_underground}")
|
# print(f" - {author} - {logtime_underground}")
|
||||||
except:
|
except:
|
||||||
message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL"
|
message = f" ! - {expedition.year} Skipping logentry: {title} - GetTripPersons FAIL"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues["title"]=message
|
logdataissues["title"] = message
|
||||||
print(message)
|
print(message)
|
||||||
raise
|
raise
|
||||||
return
|
return
|
||||||
|
|
||||||
if not author:
|
if not author:
|
||||||
message = f" ! - {expedition.year} Warning: logentry: {title} - no expo member author for entry '{tid}'"
|
message = f" ! - {expedition.year} Warning: logentry: {title} - no expo member author for entry '{tid}'"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues["title"]=message
|
logdataissues["title"] = message
|
||||||
print(message)
|
print(message)
|
||||||
#return
|
# return
|
||||||
|
|
||||||
# This needs attention. The slug field is derived from 'title'
|
# This needs attention. The slug field is derived from 'title'
|
||||||
# both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though?
|
# both GetCaveLookup() and GetTripCave() need to work together better. None of this data is *used* though?
|
||||||
#tripCave = GetTripCave(place):
|
# tripCave = GetTripCave(place):
|
||||||
|
|
||||||
lplace = place.lower()
|
lplace = place.lower()
|
||||||
cave=None
|
cave = None
|
||||||
if lplace not in noncaveplaces:
|
if lplace not in noncaveplaces:
|
||||||
cave = GetCaveLookup().get(lplace)
|
cave = GetCaveLookup().get(lplace)
|
||||||
|
|
||||||
y = str(date)[:4]
|
y = str(date)[:4]
|
||||||
|
|
||||||
text = text.replace(' src="', f' src="/years/{y}/' )
|
text = text.replace(' src="', f' src="/years/{y}/')
|
||||||
text = text.replace(" src='", f" src='/years/{y}/" )
|
text = text.replace(" src='", f" src='/years/{y}/")
|
||||||
|
|
||||||
text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/' )
|
text = text.replace(f' src="/years/{y}//years/{y}/', f' src="/years/{y}/')
|
||||||
text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/" )
|
text = text.replace(f" src='/years/{y}//years/{y}/", f" src='/years/{y}/")
|
||||||
|
|
||||||
text = text.replace('\t', '' )
|
text = text.replace("\t", "")
|
||||||
text = text.replace('\n\n\n', '\n\n' )
|
text = text.replace("\n\n\n", "\n\n")
|
||||||
|
|
||||||
#Check for an existing copy of the current entry, and save
|
# Check for an existing copy of the current entry, and save
|
||||||
expeditionday = expedition.get_expedition_day(date)
|
expeditionday = expedition.get_expedition_day(date)
|
||||||
lookupAttribs={'date':date, 'title':title}
|
lookupAttribs = {"date": date, "title": title}
|
||||||
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
||||||
# but it is a db query which we should try to avoid - rewrite this
|
# but it is a db query which we should try to avoid - rewrite this
|
||||||
|
|
||||||
#NEW slug for a logbook entry here! Unique id + slugified title fragment
|
# NEW slug for a logbook entry here! Unique id + slugified title fragment
|
||||||
|
|
||||||
if tid is not None:
|
if tid is not None:
|
||||||
slug = tid
|
slug = tid
|
||||||
# slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
# slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
||||||
else:
|
else:
|
||||||
slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_')
|
slug = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
|
||||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition,
|
nonLookupAttribs = {
|
||||||
'time_underground':logtime_underground, 'cave_slug':str(cave), 'slug': slug}
|
"place": place,
|
||||||
|
"text": text,
|
||||||
|
"expedition": expedition,
|
||||||
|
"time_underground": logtime_underground,
|
||||||
|
"cave_slug": str(cave),
|
||||||
|
"slug": slug,
|
||||||
|
}
|
||||||
|
|
||||||
# This creates the lbo instance of LogbookEntry
|
# This creates the lbo instance of LogbookEntry
|
||||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
lbo, created = save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
# for PersonTrip time_underground is float (decimal hours)
|
# for PersonTrip time_underground is float (decimal hours)
|
||||||
for tripperson, time_underground in trippersons:
|
for tripperson, time_underground in trippersons:
|
||||||
# print(f" - {tid} '{tripperson}' author:{tripperson == author}")
|
# print(f" - {tid} '{tripperson}' author:{tripperson == author}")
|
||||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
lookupAttribs = {"personexpedition": tripperson, "logbook_entry": lbo}
|
||||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
nonLookupAttribs = {"time_underground": time_underground, "is_logbook_entry_author": (tripperson == author)}
|
||||||
# this creates the PersonTrip instance.
|
# this creates the PersonTrip instance.
|
||||||
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
save_carefully(PersonTrip, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
|
|
||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
"""Interprets dates in the expo logbooks and returns a correct datetime.date object"""
|
||||||
dummydate = date(1970, 1, 1)
|
dummydate = date(1970, 1, 1)
|
||||||
month = 1
|
month = 1
|
||||||
day = 1
|
day = 1
|
||||||
@ -261,16 +300,16 @@ def ParseDate(tripdate, year):
|
|||||||
if mdatestandard:
|
if mdatestandard:
|
||||||
if not (mdatestandard.group(1) == year):
|
if not (mdatestandard.group(1) == year):
|
||||||
message = f" ! - Bad date (year) in logbook: {tripdate} - {year}"
|
message = f" ! - Bad date (year) in logbook: {tripdate} - {year}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"] = message
|
||||||
return dummydate
|
return dummydate
|
||||||
else:
|
else:
|
||||||
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
||||||
elif mdategoof:
|
elif mdategoof:
|
||||||
if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
|
if not (not mdategoof.group(3) or mdategoof.group(3) == year[:2]):
|
||||||
message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
|
message = " ! - Bad date mdategoof.group(3) in logbook: " + tripdate + " - " + mdategoof.group(3)
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"] = message
|
||||||
return dummydate
|
return dummydate
|
||||||
else:
|
else:
|
||||||
yadd = int(year[:2]) * 100
|
yadd = int(year[:2]) * 100
|
||||||
@ -278,25 +317,26 @@ def ParseDate(tripdate, year):
|
|||||||
else:
|
else:
|
||||||
year = 1970
|
year = 1970
|
||||||
message = f" ! - Bad date in logbook: {tripdate} - {year}"
|
message = f" ! - Bad date in logbook: {tripdate} - {year}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"] = message
|
||||||
|
|
||||||
return date(year, month, day)
|
return date(year, month, day)
|
||||||
except:
|
except:
|
||||||
message = f" ! - Failed to parse date in logbook: {tripdate} - {year}"
|
message = f" ! - Failed to parse date in logbook: {tripdate} - {year}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"] = message
|
||||||
return datetime.date(1970, 1, 1)
|
return datetime.date(1970, 1, 1)
|
||||||
|
|
||||||
|
|
||||||
# 2002 - now
|
# 2002 - now
|
||||||
def parser_html(year, expedition, txt, seq=""):
|
def parser_html(year, expedition, txt, seq=""):
|
||||||
'''This uses some of the more obscure capabilities of regular expressions,
|
"""This uses some of the more obscure capabilities of regular expressions,
|
||||||
see https://docs.python.org/3/library/re.html
|
see https://docs.python.org/3/library/re.html
|
||||||
|
|
||||||
You can't see it here, but a round-trip export-then-import will move
|
You can't see it here, but a round-trip export-then-import will move
|
||||||
the endmatter up to the frontmatter. This makes sense when moving
|
the endmatter up to the frontmatter. This makes sense when moving
|
||||||
from parser_html_01 format logfiles, believe me.
|
from parser_html_01 format logfiles, believe me.
|
||||||
'''
|
"""
|
||||||
global logentries
|
global logentries
|
||||||
global logdataissues
|
global logdataissues
|
||||||
|
|
||||||
@ -305,29 +345,30 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
headpara = headmatch.groups()[0].strip()
|
headpara = headmatch.groups()[0].strip()
|
||||||
|
|
||||||
# print(f" - headpara:\n'{headpara}'")
|
# print(f" - headpara:\n'{headpara}'")
|
||||||
if(len(headpara)>0):
|
if len(headpara) > 0:
|
||||||
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||||
with open(frontpath,"w") as front:
|
with open(frontpath, "w") as front:
|
||||||
front.write(headpara+"\n")
|
front.write(headpara + "\n")
|
||||||
|
|
||||||
# extract END material and stash for later use when rebuilding from list of entries
|
# extract END material and stash for later use when rebuilding from list of entries
|
||||||
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
|
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
|
||||||
endpara = endmatch.groups()[0].strip()
|
endpara = endmatch.groups()[0].strip()
|
||||||
|
|
||||||
# print(f" - endpara:\n'{endpara}'")
|
# print(f" - endpara:\n'{endpara}'")
|
||||||
if(len(endpara)>0):
|
if len(endpara) > 0:
|
||||||
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
|
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
|
||||||
with open(endpath,"w") as end:
|
with open(endpath, "w") as end:
|
||||||
end.write(endpara+"\n")
|
end.write(endpara + "\n")
|
||||||
|
|
||||||
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_id(year,logbook_entry_count)
|
tid = set_trip_id(year, logbook_entry_count)
|
||||||
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
||||||
|
|
||||||
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
s = re.match(
|
||||||
|
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||||
\s*<div\s+class="trippeople">\s*(.*?)</div>
|
\s*<div\s+class="trippeople">\s*(.*?)</div>
|
||||||
@ -335,16 +376,19 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
([\s\S]*?)
|
([\s\S]*?)
|
||||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||||
\s*$
|
\s*$
|
||||||
''', trippara)
|
""",
|
||||||
|
trippara,
|
||||||
|
)
|
||||||
if s:
|
if s:
|
||||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||||
else: # allow title and people to be swapped in order
|
else: # allow title and people to be swapped in order
|
||||||
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..."
|
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:50]}'..."
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser="logbooks", message=msg)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid] = msg
|
||||||
|
|
||||||
s2 = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
s2 = re.match(
|
||||||
|
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||||
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
||||||
@ -352,15 +396,17 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
([\s\S]*?)
|
([\s\S]*?)
|
||||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||||
\s*$
|
\s*$
|
||||||
''', trippara)
|
""",
|
||||||
|
trippara,
|
||||||
|
)
|
||||||
if s2:
|
if s2:
|
||||||
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
|
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
|
||||||
else:
|
else:
|
||||||
# if not re.search(r"Rigging Guide", trippara):
|
# if not re.search(r"Rigging Guide", trippara):
|
||||||
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..."
|
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:50]}'..."
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser="logbooks", message=msg)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid] = msg
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
@ -370,14 +416,14 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
else:
|
else:
|
||||||
tripcave = "UNKNOWN"
|
tripcave = "UNKNOWN"
|
||||||
ltriptext = re.sub(r"</p>", "", triptext)
|
ltriptext = re.sub(r"</p>", "", triptext)
|
||||||
#ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
# ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||||
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
||||||
|
|
||||||
triptitle = triptitle.strip()
|
triptitle = triptitle.strip()
|
||||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tripid1)
|
||||||
trippeople, expedition, tu, tripid1)
|
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
|
|
||||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||||
def parser_html_01(year, expedition, txt, seq=""):
|
def parser_html_01(year, expedition, txt, seq=""):
|
||||||
global logentries
|
global logentries
|
||||||
@ -389,10 +435,10 @@ def parser_html_01(year, expedition, txt, seq=""):
|
|||||||
headpara = headmatch.groups()[0].strip()
|
headpara = headmatch.groups()[0].strip()
|
||||||
|
|
||||||
# print(f" - headpara:\n'{headpara}'")
|
# print(f" - headpara:\n'{headpara}'")
|
||||||
if(len(headpara)>0):
|
if len(headpara) > 0:
|
||||||
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
frontpath = Path(settings.EXPOWEB, "years", year, "frontmatter.html")
|
||||||
with open(frontpath,"w") as front:
|
with open(frontpath, "w") as front:
|
||||||
front.write(headpara+"\n")
|
front.write(headpara + "\n")
|
||||||
|
|
||||||
# extract END material and stash for later use when rebuilding from list of entries
|
# extract END material and stash for later use when rebuilding from list of entries
|
||||||
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
|
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
|
||||||
@ -403,71 +449,70 @@ def parser_html_01(year, expedition, txt, seq=""):
|
|||||||
endpara = ""
|
endpara = ""
|
||||||
|
|
||||||
# print(f" - endpara:\n'{endpara}'")
|
# print(f" - endpara:\n'{endpara}'")
|
||||||
if(len(endpara)>0):
|
if len(endpara) > 0:
|
||||||
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
|
endpath = Path(settings.EXPOWEB, "years", year, "endmatter.html")
|
||||||
with open(endpath,"w") as end:
|
with open(endpath, "w") as end:
|
||||||
end.write(endpara+"\n")
|
end.write(endpara + "\n")
|
||||||
|
|
||||||
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_id(year,logbook_entry_count)
|
tid = set_trip_id(year, logbook_entry_count)
|
||||||
# print(f" #0 - tid: {tid}")
|
# print(f" #0 - tid: {tid}")
|
||||||
try:
|
try:
|
||||||
#print(f" #1 - tid: {tid}")
|
# print(f" #1 - tid: {tid}")
|
||||||
s = re.match(r"(?i)(?s)\s*(?:<p>)?(.*?)</?p>(.*)$", trippara)
|
s = re.match(r"(?i)(?s)\s*(?:<p>)?(.*?)</?p>(.*)$", trippara)
|
||||||
if not s:
|
if not s:
|
||||||
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
|
message = " ! - Skipping logentry {year} failure to parse header: " + tid + trippara[:300] + "..."
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
tripheader, triptext = s.group(1), s.group(2)
|
tripheader, triptext = s.group(1), s.group(2)
|
||||||
except:
|
except:
|
||||||
message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'"
|
message = f" ! - Fail to set tripheader, triptext. trip:<{tid}> s:'{s}'"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
|
||||||
# mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
# mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
||||||
# if not mtripid:
|
# if not mtripid:
|
||||||
# message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'"
|
# message = f" ! - A tag id not found. Never mind. Not needed. trip:<{tid}> header:'{tripheader}'"
|
||||||
# DataIssue.objects.create(parser='logbooks', message=message)
|
# DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
# logdataissues[tid]=message
|
# logdataissues[tid]=message
|
||||||
# print(message)
|
# print(message)
|
||||||
|
|
||||||
# tripid = mtripid and mtripid.group(1) or ""
|
# tripid = mtripid and mtripid.group(1) or ""
|
||||||
# print(f" # - mtripid: {mtripid}")
|
# print(f" # - mtripid: {mtripid}")
|
||||||
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
||||||
#print(f" #2 - tid: {tid}")
|
# print(f" #2 - tid: {tid}")
|
||||||
try:
|
try:
|
||||||
tripdate, triptitle, trippeople = tripheader.split("|")
|
tripdate, triptitle, trippeople = tripheader.split("|")
|
||||||
except:
|
except:
|
||||||
message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'"
|
message = f" ! - Fail 3 to split out date|title|people. trip:<{tid}> '{tripheader.split('|')}'"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
try:
|
try:
|
||||||
tripdate, triptitle = tripheader.split("|")
|
tripdate, triptitle = tripheader.split("|")
|
||||||
trippeople = "GUESS ANON"
|
trippeople = "GUESS ANON"
|
||||||
except:
|
except:
|
||||||
message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !"
|
message = f" ! - Skipping logentry {year} Fail 2 to split out date|title (anon). trip:<{tid}> '{tripheader.split('|')}' CRASHES MySQL !"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
break
|
break
|
||||||
#print(f" #3 - tid: {tid}")
|
# print(f" #3 - tid: {tid}")
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
#print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
|
# print(f" # - tid: {tid} <{tripdate}> <{triptitle}> <{trippeople}>")
|
||||||
#print(f" #4 - tid: {tid}")
|
# print(f" #4 - tid: {tid}")
|
||||||
|
|
||||||
mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext)
|
mtu = re.search(r"<p[^>]*>(T/?U.*)", triptext)
|
||||||
if mtu:
|
if mtu:
|
||||||
tu = mtu.group(1)
|
tu = mtu.group(1)
|
||||||
triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
|
triptext = triptext[: mtu.start(0)] + triptext[mtu.end() :]
|
||||||
else:
|
else:
|
||||||
tu = ""
|
tu = ""
|
||||||
|
|
||||||
@ -478,7 +523,7 @@ def parser_html_01(year, expedition, txt, seq=""):
|
|||||||
|
|
||||||
mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext)
|
mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext)
|
||||||
if mtail:
|
if mtail:
|
||||||
ltriptext = ltriptext[:mtail.start(0)]
|
ltriptext = ltriptext[: mtail.start(0)]
|
||||||
ltriptext = re.sub(r"</p>", "", ltriptext)
|
ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||||
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
||||||
@ -488,31 +533,30 @@ def parser_html_01(year, expedition, txt, seq=""):
|
|||||||
|
|
||||||
if ltriptext == "":
|
if ltriptext == "":
|
||||||
message = " ! - Zero content for logbook entry!: " + tid
|
message = " ! - Zero content for logbook entry!: " + tid
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
|
entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tid)
|
||||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
|
||||||
trippeople, expedition, tu, tid)
|
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
message = f" ! - Skipping logentry {year} due to exception in: {tid}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
errorcount += 1
|
errorcount += 1
|
||||||
raise
|
raise
|
||||||
if errorcount >5 :
|
if errorcount > 5:
|
||||||
message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}"
|
message = f" !!- TOO MANY ERRORS - aborting at '{tid}' logbook: {year}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def parser_blog(year, expedition, txt, sq=""):
|
def parser_blog(year, expedition, txt, sq=""):
|
||||||
'''Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
|
"""Parses the format of web pages collected as 'Save As HTML" from the UK Caving blog website.
|
||||||
Note that the entries have dates and authors, but no titles.
|
Note that the entries have dates and authors, but no titles.
|
||||||
See detailed explanation of the complete process:
|
See detailed explanation of the complete process:
|
||||||
https://expo.survex.com/handbook/computing/logbooks-parsing.html
|
https://expo.survex.com/handbook/computing/logbooks-parsing.html
|
||||||
@ -527,58 +571,62 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
</article>
|
</article>
|
||||||
</article>
|
</article>
|
||||||
So the content is nested inside the header. Attachments (images) come after the content.
|
So the content is nested inside the header. Attachments (images) come after the content.
|
||||||
'''
|
"""
|
||||||
global logentries
|
global logentries
|
||||||
global logdataissues
|
global logdataissues
|
||||||
errorcount = 0
|
errorcount = 0
|
||||||
|
|
||||||
tripheads = re.findall(r"<article class=\"message message--post js-post js-inlineModContainer\s*\"\s*([\s\S]*?)(?=</article)", txt)
|
tripheads = re.findall(
|
||||||
if not ( tripheads ) :
|
r"<article class=\"message message--post js-post js-inlineModContainer\s*\"\s*([\s\S]*?)(?=</article)", txt
|
||||||
|
)
|
||||||
|
if not (tripheads):
|
||||||
message = f" ! - Skipping on failure to parse article header: {txt[:500]}"
|
message = f" ! - Skipping on failure to parse article header: {txt[:500]}"
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
# (?= is a non-consuming match, see https://docs.python.org/3/library/re.html
|
# (?= is a non-consuming match, see https://docs.python.org/3/library/re.html
|
||||||
tripparas = re.findall(r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt)
|
tripparas = re.findall(
|
||||||
if not ( tripparas ) :
|
r"<article class=\"message-body js-selectToQuote\"\>\s*([\s\S]*?)(</article[^>]*>)([\s\S]*?)(?=</article)", txt
|
||||||
|
)
|
||||||
|
if not (tripparas):
|
||||||
message = f" ! - Skipping on failure to parse article content: {txt[:500]}"
|
message = f" ! - Skipping on failure to parse article content: {txt[:500]}"
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if (len(tripheads) !=len(tripparas)):
|
if len(tripheads) != len(tripparas):
|
||||||
print(f"{len(tripheads)} != {len(tripparas)}")
|
print(f"{len(tripheads)} != {len(tripparas)}")
|
||||||
print(f"{len(tripheads)} - {len(tripparas)}")
|
print(f"{len(tripheads)} - {len(tripparas)}")
|
||||||
|
|
||||||
location = "Plateau" # best guess, fix manually later
|
location = "Plateau" # best guess, fix manually later
|
||||||
tu = 0
|
tu = 0
|
||||||
logbook_entry_count = 0
|
logbook_entry_count = 0
|
||||||
for i in range(0, len(tripparas)):
|
for i in range(0, len(tripparas)):
|
||||||
tripstuff = tripparas[i]
|
tripstuff = tripparas[i]
|
||||||
attach = tripstuff[2]
|
attach = tripstuff[2]
|
||||||
# note use on non-greedy *? regex idiom here
|
# note use on non-greedy *? regex idiom here
|
||||||
attach = re.sub(r"<div class=\"file-content\">[\s\S]*?(?=</li>)","",attach)
|
attach = re.sub(r"<div class=\"file-content\">[\s\S]*?(?=</li>)", "", attach)
|
||||||
attach = re.sub(r"<footer[\s\S]*(</footer>)","",attach)
|
attach = re.sub(r"<footer[\s\S]*(</footer>)", "", attach)
|
||||||
tripcontent = tripstuff[0] + attach
|
tripcontent = tripstuff[0] + attach
|
||||||
#print(f"{i} - {len(tripstuff)} - {tripstuff[1]}")
|
# print(f"{i} - {len(tripstuff)} - {tripstuff[1]}")
|
||||||
triphead = tripheads[i]
|
triphead = tripheads[i]
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_id(year,logbook_entry_count) +"_blog" + sq
|
tid = set_trip_id(year, logbook_entry_count) + "_blog" + sq
|
||||||
# print(f" - tid: {tid}")
|
# print(f" - tid: {tid}")
|
||||||
|
|
||||||
# data-author="tcacrossley"
|
# data-author="tcacrossley"
|
||||||
match_author = re.search(r".*data-author=\"([^\"]*)\" data-content=.*", triphead)
|
match_author = re.search(r".*data-author=\"([^\"]*)\" data-content=.*", triphead)
|
||||||
if not ( match_author ) :
|
if not (match_author):
|
||||||
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse data-author {tid} {triphead[:400]}..."
|
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse data-author {tid} {triphead[:400]}..."
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
break
|
break
|
||||||
trippeople = match_author.group(1)
|
trippeople = match_author.group(1)
|
||||||
# print(f" - tid: {tid} {trippeople}")
|
# print(f" - tid: {tid} {trippeople}")
|
||||||
# datetime="2019-07-11T13:16:18+0100"
|
# datetime="2019-07-11T13:16:18+0100"
|
||||||
match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead)
|
match_datetime = re.search(r".*datetime=\"([^\"]*)\" data-time=.*", triphead)
|
||||||
if not ( match_datetime ) :
|
if not (match_datetime):
|
||||||
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..."
|
message = f" ! - Skipping logentry {year}:{logbook_entry_count} on failure to parse datetime {tid} {triphead[:400]}..."
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
break
|
break
|
||||||
datestamp = match_datetime.group(1)
|
datestamp = match_datetime.group(1)
|
||||||
@ -587,8 +635,8 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
tripdate = datetime.fromisoformat(datestamp)
|
tripdate = datetime.fromisoformat(datestamp)
|
||||||
except:
|
except:
|
||||||
message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'"
|
message = f" ! - FROMISOFORMAT fail logentry {year}:{logbook_entry_count} {tid} '{datestamp}'"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[tid]=message
|
logdataissues[tid] = message
|
||||||
print(message)
|
print(message)
|
||||||
# fallback, ignore the timestamp bits:
|
# fallback, ignore the timestamp bits:
|
||||||
tripdate = datetime.fromisoformat(datestamp[0:10])
|
tripdate = datetime.fromisoformat(datestamp[0:10])
|
||||||
@ -596,21 +644,20 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
|
|
||||||
# tripname must have the location then a hyphen at the beginning as it is ignored by export function
|
# tripname must have the location then a hyphen at the beginning as it is ignored by export function
|
||||||
location = "Unknown"
|
location = "Unknown"
|
||||||
tripname = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}" # must be unique for a given date
|
tripname = f"Expo - UK Caving Blog{sq} post {logbook_entry_count}" # must be unique for a given date
|
||||||
tripcontent = re.sub(r"(width=\"\d+\")","",tripcontent)
|
tripcontent = re.sub(r"(width=\"\d+\")", "", tripcontent)
|
||||||
tripcontent = re.sub(r"height=\"\d+\"","",tripcontent)
|
tripcontent = re.sub(r"height=\"\d+\"", "", tripcontent)
|
||||||
tripcontent = re.sub(r"width: \d+px","",tripcontent)
|
tripcontent = re.sub(r"width: \d+px", "", tripcontent)
|
||||||
tripcontent = re.sub(r"\n\n+","\n\n",tripcontent)
|
tripcontent = re.sub(r"\n\n+", "\n\n", tripcontent)
|
||||||
tripcontent = re.sub(r"<hr\s*>","",tripcontent)
|
tripcontent = re.sub(r"<hr\s*>", "", tripcontent)
|
||||||
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
||||||
|
|
||||||
entrytuple = (tripdate, location, tripname, tripcontent,
|
entrytuple = (tripdate, location, tripname, tripcontent, trippeople, expedition, tu, tid)
|
||||||
trippeople, expedition, tu, tid)
|
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbookForExpedition(expedition, clean=True):
|
def LoadLogbookForExpedition(expedition, clean=True):
|
||||||
""" Parses all logbook entries for one expedition
|
"""Parses all logbook entries for one expedition
|
||||||
if clean==True then it deletes all entries for this year first.
|
if clean==True then it deletes all entries for this year first.
|
||||||
"""
|
"""
|
||||||
global logentries
|
global logentries
|
||||||
@ -619,63 +666,62 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
global entries
|
global entries
|
||||||
|
|
||||||
logbook_parseable = False
|
logbook_parseable = False
|
||||||
yearlinks = LOGBOOK_PARSER_SETTINGS
|
yearlinks = LOGBOOK_PARSER_SETTINGS
|
||||||
expologbase = os.path.join(settings.EXPOWEB, "years")
|
expologbase = os.path.join(settings.EXPOWEB, "years")
|
||||||
logentries=[]
|
logentries = []
|
||||||
|
|
||||||
year = expedition.year
|
year = expedition.year
|
||||||
expect = entries[year]
|
expect = entries[year]
|
||||||
# print(" - Logbook for: " + year)
|
# print(" - Logbook for: " + year)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def cleanerrors(year):
|
def cleanerrors(year):
|
||||||
global logdataissues
|
global logdataissues
|
||||||
dataissues = DataIssue.objects.filter(parser='logbooks')
|
dataissues = DataIssue.objects.filter(parser="logbooks")
|
||||||
for di in dataissues:
|
for di in dataissues:
|
||||||
ph = year
|
ph = year
|
||||||
if re.search(ph, di.message) is not None:
|
if re.search(ph, di.message) is not None:
|
||||||
#print(f' - CLEANING dataissue {di.message}')
|
# print(f' - CLEANING dataissue {di.message}')
|
||||||
di.delete()
|
di.delete()
|
||||||
|
|
||||||
#print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year')
|
# print(f' - CLEAN {year} {len(logdataissues)} {type(logdataissues)} data issues for this year')
|
||||||
dellist = []
|
dellist = []
|
||||||
for key, value in logdataissues.items():
|
for key, value in logdataissues.items():
|
||||||
#print(f' - CLEANING logdataissues [{key}]: {value}')
|
# print(f' - CLEANING logdataissues [{key}]: {value}')
|
||||||
if key.startswith(year):
|
if key.startswith(year):
|
||||||
#print(f' - CLEANING logdataissues [{key:12}]: {value} ')
|
# print(f' - CLEANING logdataissues [{key:12}]: {value} ')
|
||||||
dellist.append(key)
|
dellist.append(key)
|
||||||
for i in dellist:
|
for i in dellist:
|
||||||
del logdataissues[i]
|
del logdataissues[i]
|
||||||
if (clean):
|
|
||||||
|
if clean:
|
||||||
cleanerrors(year)
|
cleanerrors(year)
|
||||||
|
|
||||||
if year in yearlinks:
|
if year in yearlinks:
|
||||||
yearfile, yearparser = yearlinks[year]
|
yearfile, yearparser = yearlinks[year]
|
||||||
logbookpath = Path(yearfile)
|
logbookpath = Path(yearfile)
|
||||||
expedition.logbookfile = yearfile
|
expedition.logbookfile = yearfile
|
||||||
parsefunc = yearparser
|
parsefunc = yearparser
|
||||||
# print(f" - Logbook file {yearfile} using parser {yearparser}")
|
# print(f" - Logbook file {yearfile} using parser {yearparser}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logbookpath = Path(DEFAULT_LOGBOOK_FILE)
|
logbookpath = Path(DEFAULT_LOGBOOK_FILE)
|
||||||
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
|
expedition.logbookfile = DEFAULT_LOGBOOK_FILE
|
||||||
parsefunc = DEFAULT_LOGBOOK_PARSER
|
parsefunc = DEFAULT_LOGBOOK_PARSER
|
||||||
|
|
||||||
expedition.save()
|
expedition.save()
|
||||||
|
|
||||||
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
||||||
if (clean):
|
if clean:
|
||||||
for lbe in lbes:
|
for lbe in lbes:
|
||||||
lbe.delete()
|
lbe.delete()
|
||||||
|
|
||||||
for sq in ["", "2", "3", "4"]: # cope with blog saved as many separate files
|
for sq in ["", "2", "3", "4"]: # cope with blog saved as many separate files
|
||||||
lb = Path(expologbase, year, logbookpath.stem + sq + logbookpath.suffix)
|
lb = Path(expologbase, year, logbookpath.stem + sq + logbookpath.suffix)
|
||||||
if not (lb.is_file()):
|
if not (lb.is_file()):
|
||||||
# print(f" ! End of blog. Next blog file in sequence not there:{lb}")
|
# print(f" ! End of blog. Next blog file in sequence not there:{lb}")
|
||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
with open(lb,'rb') as file_in:
|
with open(lb, "rb") as file_in:
|
||||||
txt = file_in.read().decode("utf-8")
|
txt = file_in.read().decode("utf-8")
|
||||||
logbook_parseable = True
|
logbook_parseable = True
|
||||||
except (IOError):
|
except (IOError):
|
||||||
@ -689,8 +735,8 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
|
|
||||||
# --------------------
|
# --------------------
|
||||||
parser = globals()[parsefunc]
|
parser = globals()[parsefunc]
|
||||||
print(f' - {year} parsing with {parsefunc} - {lb}')
|
print(f" - {year} parsing with {parsefunc} - {lb}")
|
||||||
parser(year, expedition, txt, sq) # this launches the right parser for this year
|
parser(year, expedition, txt, sq) # this launches the right parser for this year
|
||||||
# --------------------
|
# --------------------
|
||||||
dupl = {}
|
dupl = {}
|
||||||
for entrytuple in logentries:
|
for entrytuple in logentries:
|
||||||
@ -699,11 +745,10 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
if check in dupl:
|
if check in dupl:
|
||||||
dupl[check] += 1
|
dupl[check] += 1
|
||||||
triptitle = f"{triptitle} #{dupl[check]}"
|
triptitle = f"{triptitle} #{dupl[check]}"
|
||||||
print(f' - {triptitle} -- {date}')
|
print(f" - {triptitle} -- {date}")
|
||||||
else:
|
else:
|
||||||
dupl[check] = 1
|
dupl[check] = 1
|
||||||
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground,
|
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, tripid1)
|
||||||
tripid1)
|
|
||||||
|
|
||||||
if len(logentries) == expect:
|
if len(logentries) == expect:
|
||||||
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
# print(f"OK {year} {len(logentries):5d} is {expect}\n")
|
||||||
@ -713,26 +758,29 @@ def LoadLogbookForExpedition(expedition, clean=True):
|
|||||||
|
|
||||||
return len(logentries)
|
return len(logentries)
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbook(year):
|
def LoadLogbook(year):
|
||||||
'''One off logbook for testing purposes
|
"""One off logbook for testing purposes"""
|
||||||
'''
|
|
||||||
global LOGBOOK_PARSER_SETTINGS
|
global LOGBOOK_PARSER_SETTINGS
|
||||||
|
|
||||||
nlbe={}
|
nlbe = {}
|
||||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
TROG["pagecache"]["expedition"][year] = None # clear cache
|
||||||
|
|
||||||
expo = Expedition.objects.get(year=year)
|
expo = Expedition.objects.get(year=year)
|
||||||
year = expo.year # some type funny
|
year = expo.year # some type funny
|
||||||
nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
||||||
if year in BLOG_PARSER_SETTINGS:
|
if year in BLOG_PARSER_SETTINGS:
|
||||||
print("BLOG parsing")
|
print("BLOG parsing")
|
||||||
LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year]
|
LOGBOOK_PARSER_SETTINGS[year] = BLOG_PARSER_SETTINGS[year]
|
||||||
nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo
|
nlbe[expo] = LoadLogbookForExpedition(expo, clean=False) # this loads the blog logbook for one expo
|
||||||
else:
|
else:
|
||||||
print(f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}")
|
print(
|
||||||
|
f"Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbooks():
|
def LoadLogbooks():
|
||||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
"""This is the master function for parsing all logbooks into the Troggle database.
|
||||||
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
||||||
but must be serialised to write to database as sqlite is single-user.
|
but must be serialised to write to database as sqlite is single-user.
|
||||||
"""
|
"""
|
||||||
@ -740,45 +788,48 @@ def LoadLogbooks():
|
|||||||
global entries
|
global entries
|
||||||
|
|
||||||
logdataissues = {}
|
logdataissues = {}
|
||||||
DataIssue.objects.filter(parser='logbooks').delete()
|
DataIssue.objects.filter(parser="logbooks").delete()
|
||||||
expos = Expedition.objects.all()
|
expos = Expedition.objects.all()
|
||||||
if len(expos) <= 1:
|
if len(expos) <= 1:
|
||||||
message = f" ! - No expeditions found. Load 'people' first"
|
message = f" ! - No expeditions found. Load 'people' first"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[f"sqlfail 0000"]=message
|
logdataissues[f"sqlfail 0000"] = message
|
||||||
print(message)
|
print(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
noexpo = ["1986", "2020", "2021",] #no expo
|
noexpo = [
|
||||||
|
"1986",
|
||||||
|
"2020",
|
||||||
|
"2021",
|
||||||
|
] # no expo
|
||||||
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
lostlogbook = ["1976", "1977", "1978", "1979", "1980", "1981"]
|
||||||
sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.]
|
sqlfail = [""] # breaks mysql with db constraint fail - all now fixed.]
|
||||||
nologbook = noexpo + lostlogbook + sqlfail
|
nologbook = noexpo + lostlogbook + sqlfail
|
||||||
|
|
||||||
nlbe={}
|
nlbe = {}
|
||||||
expd ={}
|
expd = {}
|
||||||
loglist = []
|
loglist = []
|
||||||
bloglist = []
|
bloglist = []
|
||||||
|
|
||||||
for expo in expos: # pointless as we explicitly know the years in this code.
|
for expo in expos: # pointless as we explicitly know the years in this code.
|
||||||
year = expo.year
|
year = expo.year
|
||||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
TROG["pagecache"]["expedition"][year] = None # clear cache
|
||||||
if year in sqlfail:
|
if year in sqlfail:
|
||||||
print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
|
print(" - Logbook for: " + year + " NO parsing attempted - known sql failures")
|
||||||
message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
|
message = f" ! - Not even attempting to parse logbook for {year} until code fixed"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
logdataissues[f"sqlfail {year}"]=message
|
logdataissues[f"sqlfail {year}"] = message
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if year not in nologbook:
|
if year not in nologbook:
|
||||||
if year in entries:
|
if year in entries:
|
||||||
loglist.append(expo)
|
loglist.append(expo)
|
||||||
else:
|
else:
|
||||||
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
print(" - No Logbook yet for: " + year) # catch case when preparing for next expo
|
||||||
|
|
||||||
if year in BLOG_PARSER_SETTINGS:
|
if year in BLOG_PARSER_SETTINGS:
|
||||||
bloglist.append(expo)
|
bloglist.append(expo)
|
||||||
|
|
||||||
|
|
||||||
for ex in loglist:
|
for ex in loglist:
|
||||||
nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo
|
nlbe[ex] = LoadLogbookForExpedition(ex) # this loads the logbook for one expo
|
||||||
|
|
||||||
@ -795,7 +846,7 @@ def LoadLogbooks():
|
|||||||
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
# tried to use map with concurrent threads - but sqlite database is not concurrent, so failed with database lock
|
||||||
# yt = 0
|
# yt = 0
|
||||||
# for r in map(LoadLogbookForExpedition, loglist):
|
# for r in map(LoadLogbookForExpedition, loglist):
|
||||||
# yt = r
|
# yt = r
|
||||||
|
|
||||||
yt = 0
|
yt = 0
|
||||||
for e in nlbe:
|
for e in nlbe:
|
||||||
@ -803,7 +854,6 @@ def LoadLogbooks():
|
|||||||
print(f"total {yt:,} log entries parsed in all expeditions")
|
print(f"total {yt:,} log entries parsed in all expeditions")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
# dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
||||||
# expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
# expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
||||||
# titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
|
# titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
|
||||||
@ -813,4 +863,3 @@ def LoadLogbooks():
|
|||||||
# TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
# TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
||||||
# locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
|
# locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
|
||||||
# caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
|
# caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
|
||||||
|
|
||||||
|
@ -9,15 +9,15 @@ from pathlib import Path
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
|
|
||||||
from troggle.core.models.troggle import (DataIssue, Expedition, Person,
|
from troggle.core.models.troggle import DataIssue, Expedition, Person, PersonExpedition
|
||||||
PersonExpedition)
|
|
||||||
from troggle.core.utils import TROG, save_carefully
|
from troggle.core.utils import TROG, save_carefully
|
||||||
|
|
||||||
'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
|
"""These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
|
||||||
href links to pages in troggle which troggle does not think are right.
|
href links to pages in troggle which troggle does not think are right.
|
||||||
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
|
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
|
||||||
or they should use the same code by importing a module.
|
or they should use the same code by importing a module.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
|
|
||||||
def parse_blurb(personline, header, person):
|
def parse_blurb(personline, header, person):
|
||||||
"""create mugshot Photo instance"""
|
"""create mugshot Photo instance"""
|
||||||
@ -28,50 +28,51 @@ def parse_blurb(personline, header, person):
|
|||||||
if not ms_path.is_file():
|
if not ms_path.is_file():
|
||||||
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
|
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='people', message=message, url=f"/person/{person.fullname}")
|
DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
|
||||||
return
|
return
|
||||||
|
|
||||||
if ms_filename.startswith('i/'):
|
if ms_filename.startswith("i/"):
|
||||||
#if person just has an image, add it. It has format 'i/adama2018.jpg'
|
# if person just has an image, add it. It has format 'i/adama2018.jpg'
|
||||||
person.mug_shot = str(Path("/folk", ms_filename))
|
person.mug_shot = str(Path("/folk", ms_filename))
|
||||||
person.blurb = None
|
person.blurb = None
|
||||||
|
|
||||||
elif ms_filename.startswith('l/'):
|
elif ms_filename.startswith("l/"):
|
||||||
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
|
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
|
||||||
with open(ms_path,'r') as blurbfile:
|
with open(ms_path, "r") as blurbfile:
|
||||||
blrb = blurbfile.read()
|
blrb = blurbfile.read()
|
||||||
pblurb=re.search(r'<body>.*<hr',blrb,re.DOTALL)
|
pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
|
||||||
if pblurb:
|
if pblurb:
|
||||||
person.mug_shot = None
|
person.mug_shot = None
|
||||||
fragment= re.search('<body>(.*)<hr',blrb,re.DOTALL).group(1)
|
fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
|
||||||
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
|
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
|
||||||
fragment = fragment.replace("src='../i/", "src='/folk/i/")
|
fragment = fragment.replace("src='../i/", "src='/folk/i/")
|
||||||
fragment = re.sub(r'<h.*>[^<]*</h.>', '', fragment)
|
fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
|
||||||
# replace src="../i/ with src="/folk/i
|
# replace src="../i/ with src="/folk/i
|
||||||
person.blurb = fragment
|
person.blurb = fragment
|
||||||
else:
|
else:
|
||||||
message = f"! Blurb parse error in {ms_filename}"
|
message = f"! Blurb parse error in {ms_filename}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='people', message=message, url="/folk/")
|
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
||||||
|
|
||||||
elif ms_filename == '':
|
elif ms_filename == "":
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
|
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser='people', message=message, url="/folk/")
|
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
||||||
|
|
||||||
person.save()
|
person.save()
|
||||||
|
|
||||||
|
|
||||||
def load_people_expos():
|
def load_people_expos():
|
||||||
'''This is where the folk.csv file is parsed to read people's names.
|
"""This is where the folk.csv file is parsed to read people's names.
|
||||||
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
|
Which it gets wrong for people like Lydia-Clare Leather and various 'von' and 'de' middle 'names'
|
||||||
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
|
and McLean and Mclean and McAdam - interaction with the url parser in urls.py too
|
||||||
'''
|
"""
|
||||||
DataIssue.objects.filter(parser='people').delete()
|
DataIssue.objects.filter(parser="people").delete()
|
||||||
|
|
||||||
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
|
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
|
||||||
personreader = csv.reader(persontab) # this is an iterator
|
personreader = csv.reader(persontab) # this is an iterator
|
||||||
headers = next(personreader)
|
headers = next(personreader)
|
||||||
header = dict(list(zip(headers, list(range(len(headers))))))
|
header = dict(list(zip(headers, list(range(len(headers))))))
|
||||||
|
|
||||||
@ -80,8 +81,8 @@ def load_people_expos():
|
|||||||
years = headers[5:]
|
years = headers[5:]
|
||||||
|
|
||||||
for year in years:
|
for year in years:
|
||||||
lookupAttribs = {'year':year}
|
lookupAttribs = {"year": year}
|
||||||
nonLookupAttribs = {'name':f"CUCC expo {year}"}
|
nonLookupAttribs = {"name": f"CUCC expo {year}"}
|
||||||
|
|
||||||
save_carefully(Expedition, lookupAttribs, nonLookupAttribs)
|
save_carefully(Expedition, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
@ -105,18 +106,18 @@ def load_people_expos():
|
|||||||
nickname = splitnick.group(2) or ""
|
nickname = splitnick.group(2) or ""
|
||||||
|
|
||||||
fullname = fullname.strip()
|
fullname = fullname.strip()
|
||||||
names = fullname.split(' ')
|
names = fullname.split(" ")
|
||||||
firstname = names[0]
|
firstname = names[0]
|
||||||
if len(names) == 1:
|
if len(names) == 1:
|
||||||
lastname = ""
|
lastname = ""
|
||||||
|
|
||||||
if personline[header["VfHO member"]] =='':
|
if personline[header["VfHO member"]] == "":
|
||||||
vfho = False
|
vfho = False
|
||||||
else:
|
else:
|
||||||
vfho = True
|
vfho = True
|
||||||
|
|
||||||
lookupAttribs={'first_name':firstname, 'last_name':(lastname or "")}
|
lookupAttribs = {"first_name": firstname, "last_name": (lastname or "")}
|
||||||
nonLookupAttribs={'is_vfho':vfho, 'fullname':fullname, 'nickname':nickname}
|
nonLookupAttribs = {"is_vfho": vfho, "fullname": fullname, "nickname": nickname}
|
||||||
person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
|
person, created = save_carefully(Person, lookupAttribs, nonLookupAttribs)
|
||||||
|
|
||||||
parse_blurb(personline=personline, header=header, person=person)
|
parse_blurb(personline=personline, header=header, person=person)
|
||||||
@ -125,29 +126,47 @@ def load_people_expos():
|
|||||||
for year, attended in list(zip(headers, personline))[5:]:
|
for year, attended in list(zip(headers, personline))[5:]:
|
||||||
expedition = Expedition.objects.get(year=year)
|
expedition = Expedition.objects.get(year=year)
|
||||||
if attended == "1" or attended == "-1":
|
if attended == "1" or attended == "-1":
|
||||||
lookupAttribs = {'person':person, 'expedition':expedition}
|
lookupAttribs = {"person": person, "expedition": expedition}
|
||||||
nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")}
|
nonLookupAttribs = {"nickname": nickname, "is_guest": (personline[header["Guest"]] == "1")}
|
||||||
save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)
|
save_carefully(PersonExpedition, lookupAttribs, nonLookupAttribs)
|
||||||
print("", flush=True)
|
print("", flush=True)
|
||||||
|
|
||||||
def who_is_this(year,possibleid):
|
|
||||||
|
def who_is_this(year, possibleid):
|
||||||
expo = Expedition.objects.filter(year=year)
|
expo = Expedition.objects.filter(year=year)
|
||||||
personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
|
personexpedition = GetPersonExpeditionNameLookup(expo)[possibleid.lower()]
|
||||||
if personexpedition:
|
if personexpedition:
|
||||||
return personexpedition.person
|
return personexpedition.person
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
global foreign_friends
|
global foreign_friends
|
||||||
foreign_friends = ["P. Jeutter", "K. Jäger", "S. Steinberger", "R. Seebacher",
|
foreign_friends = [
|
||||||
"Dominik Jauch", "Fritz Mammel", "Marcus Scheuerman",
|
"P. Jeutter",
|
||||||
"Uli Schütz", "Wieland Scheuerle", "Arndt Karger",
|
"K. Jäger",
|
||||||
"Kai Schwekend", "Regina Kaiser", "Thilo Müller","Wieland Scheuerle",
|
"S. Steinberger",
|
||||||
"Florian Gruner", "Helmut Stopka-Ebeler", "Aiko", "Mark Morgan", "Arndt Karger"]
|
"R. Seebacher",
|
||||||
|
"Dominik Jauch",
|
||||||
|
"Fritz Mammel",
|
||||||
|
"Marcus Scheuerman",
|
||||||
|
"Uli Schütz",
|
||||||
|
"Wieland Scheuerle",
|
||||||
|
"Arndt Karger",
|
||||||
|
"Kai Schwekend",
|
||||||
|
"Regina Kaiser",
|
||||||
|
"Thilo Müller",
|
||||||
|
"Wieland Scheuerle",
|
||||||
|
"Florian Gruner",
|
||||||
|
"Helmut Stopka-Ebeler",
|
||||||
|
"Aiko",
|
||||||
|
"Mark Morgan",
|
||||||
|
"Arndt Karger",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def known_foreigner(id):
|
def known_foreigner(id):
|
||||||
'''If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching
|
"""If this someone from ARGE or a known Austrian? Name has to be exact, no soft matching"""
|
||||||
'''
|
|
||||||
global foreign_friends
|
global foreign_friends
|
||||||
|
|
||||||
if id in foreign_friends:
|
if id in foreign_friends:
|
||||||
@ -159,13 +178,14 @@ def known_foreigner(id):
|
|||||||
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
||||||
# This is convoluted, the whole personexpedition concept is unnecessary?
|
# This is convoluted, the whole personexpedition concept is unnecessary?
|
||||||
|
|
||||||
Gpersonexpeditionnamelookup = { }
|
Gpersonexpeditionnamelookup = {}
|
||||||
|
|
||||||
|
|
||||||
def GetPersonExpeditionNameLookup(expedition):
|
def GetPersonExpeditionNameLookup(expedition):
|
||||||
global Gpersonexpeditionnamelookup
|
global Gpersonexpeditionnamelookup
|
||||||
|
|
||||||
def apply_variations(f, l):
|
def apply_variations(f, l):
|
||||||
'''Be generous in guessing possible matches. Any duplicates will be ruled as invalid.
|
"""Be generous in guessing possible matches. Any duplicates will be ruled as invalid."""
|
||||||
'''
|
|
||||||
f = f.lower()
|
f = f.lower()
|
||||||
l = l.lower()
|
l = l.lower()
|
||||||
variations = []
|
variations = []
|
||||||
@ -175,11 +195,11 @@ def GetPersonExpeditionNameLookup(expedition):
|
|||||||
variations.append(f + " " + l)
|
variations.append(f + " " + l)
|
||||||
variations.append(f + " " + l[0])
|
variations.append(f + " " + l[0])
|
||||||
variations.append(f + l[0])
|
variations.append(f + l[0])
|
||||||
variations.append(f + " " +l[0] + '.')
|
variations.append(f + " " + l[0] + ".")
|
||||||
variations.append(f[0] + " " + l)
|
variations.append(f[0] + " " + l)
|
||||||
variations.append(f[0] + ". " + l)
|
variations.append(f[0] + ". " + l)
|
||||||
variations.append(f[0] + l)
|
variations.append(f[0] + l)
|
||||||
variations.append(f[0] + l[0]) # initials e.g. gb or bl
|
variations.append(f[0] + l[0]) # initials e.g. gb or bl
|
||||||
return variations
|
return variations
|
||||||
|
|
||||||
res = Gpersonexpeditionnamelookup.get(expedition.name)
|
res = Gpersonexpeditionnamelookup.get(expedition.name)
|
||||||
@ -187,15 +207,15 @@ def GetPersonExpeditionNameLookup(expedition):
|
|||||||
if res:
|
if res:
|
||||||
return res
|
return res
|
||||||
|
|
||||||
res = { }
|
res = {}
|
||||||
duplicates = set()
|
duplicates = set()
|
||||||
|
|
||||||
#print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
# print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
||||||
personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
|
personexpeditions = PersonExpedition.objects.filter(expedition=expedition)
|
||||||
short = {}
|
short = {}
|
||||||
dellist = []
|
dellist = []
|
||||||
for personexpedition in personexpeditions:
|
for personexpedition in personexpeditions:
|
||||||
possnames = [ ]
|
possnames = []
|
||||||
f = unidecode(unescape(personexpedition.person.first_name.lower()))
|
f = unidecode(unescape(personexpedition.person.first_name.lower()))
|
||||||
l = unidecode(unescape(personexpedition.person.last_name.lower()))
|
l = unidecode(unescape(personexpedition.person.last_name.lower()))
|
||||||
full = unidecode(unescape(personexpedition.person.fullname.lower()))
|
full = unidecode(unescape(personexpedition.person.fullname.lower()))
|
||||||
@ -206,7 +226,7 @@ def GetPersonExpeditionNameLookup(expedition):
|
|||||||
possnames.append(n)
|
possnames.append(n)
|
||||||
|
|
||||||
if l:
|
if l:
|
||||||
possnames += apply_variations(f,l)
|
possnames += apply_variations(f, l)
|
||||||
|
|
||||||
if n:
|
if n:
|
||||||
possnames += apply_variations(n, l)
|
possnames += apply_variations(n, l)
|
||||||
@ -246,42 +266,42 @@ def GetPersonExpeditionNameLookup(expedition):
|
|||||||
if f == "Becka".lower():
|
if f == "Becka".lower():
|
||||||
possnames += apply_variations("Rebecca", l)
|
possnames += apply_variations("Rebecca", l)
|
||||||
|
|
||||||
if f'{f} {l}' == "Andy Waddington".lower():
|
if f"{f} {l}" == "Andy Waddington".lower():
|
||||||
possnames += apply_variations("aer", "waddington")
|
possnames += apply_variations("aer", "waddington")
|
||||||
if f'{f} {l}' == "Phil Underwood".lower():
|
if f"{f} {l}" == "Phil Underwood".lower():
|
||||||
possnames += apply_variations("phil", "underpants")
|
possnames += apply_variations("phil", "underpants")
|
||||||
if f'{f} {l}' == "Naomi Griffiths".lower():
|
if f"{f} {l}" == "Naomi Griffiths".lower():
|
||||||
possnames += apply_variations("naomi", "makins")
|
possnames += apply_variations("naomi", "makins")
|
||||||
if f'{f} {l}' == "Tina White".lower():
|
if f"{f} {l}" == "Tina White".lower():
|
||||||
possnames += apply_variations("tina", "richardson")
|
possnames += apply_variations("tina", "richardson")
|
||||||
if f'{f} {l}' == "Cat Hulse".lower():
|
if f"{f} {l}" == "Cat Hulse".lower():
|
||||||
possnames += apply_variations("catherine", "hulse")
|
possnames += apply_variations("catherine", "hulse")
|
||||||
possnames += apply_variations("cat", "henry")
|
possnames += apply_variations("cat", "henry")
|
||||||
if f'{f} {l}' == "Jess Stirrups".lower():
|
if f"{f} {l}" == "Jess Stirrups".lower():
|
||||||
possnames += apply_variations("jessica", "stirrups")
|
possnames += apply_variations("jessica", "stirrups")
|
||||||
if f'{f} {l}' == "Nat Dalton".lower():
|
if f"{f} {l}" == "Nat Dalton".lower():
|
||||||
possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
|
possnames += apply_variations("nathanael", "dalton") # correct. He has a weird spelling.
|
||||||
if f'{f} {l}' == "Mike Richardson".lower():
|
if f"{f} {l}" == "Mike Richardson".lower():
|
||||||
possnames.append("mta")
|
possnames.append("mta")
|
||||||
possnames.append("miketa")
|
possnames.append("miketa")
|
||||||
possnames.append("mike the animal")
|
possnames.append("mike the animal")
|
||||||
possnames.append("animal")
|
possnames.append("animal")
|
||||||
if f'{f} {l}' == "Eric Landgraf".lower():
|
if f"{f} {l}" == "Eric Landgraf".lower():
|
||||||
possnames.append("eric c.landgraf")
|
possnames.append("eric c.landgraf")
|
||||||
possnames.append("eric c. landgraf")
|
possnames.append("eric c. landgraf")
|
||||||
possnames.append("eric c landgraf")
|
possnames.append("eric c landgraf")
|
||||||
if f'{f} {l}' == "Nadia Raeburn".lower():
|
if f"{f} {l}" == "Nadia Raeburn".lower():
|
||||||
possnames.append("nadia rc")
|
possnames.append("nadia rc")
|
||||||
possnames.append("nadia raeburn-cherradi")
|
possnames.append("nadia raeburn-cherradi")
|
||||||
|
|
||||||
for i in [3, 4, 5, 6]:
|
for i in [3, 4, 5, 6]:
|
||||||
lim = min(i, len(f)+1) # short form, e.g. Dan for Daniel.
|
lim = min(i, len(f) + 1) # short form, e.g. Dan for Daniel.
|
||||||
if f[:lim] not in short:
|
if f[:lim] not in short:
|
||||||
short[f[:lim]]= personexpedition
|
short[f[:lim]] = personexpedition
|
||||||
else:
|
else:
|
||||||
dellist.append(f[:lim])
|
dellist.append(f[:lim])
|
||||||
|
|
||||||
possnames = set(possnames) # remove duplicates
|
possnames = set(possnames) # remove duplicates
|
||||||
for possname in possnames:
|
for possname in possnames:
|
||||||
if possname in res:
|
if possname in res:
|
||||||
duplicates.add(possname)
|
duplicates.add(possname)
|
||||||
@ -292,12 +312,10 @@ def GetPersonExpeditionNameLookup(expedition):
|
|||||||
del res[possname]
|
del res[possname]
|
||||||
|
|
||||||
for possname in dellist:
|
for possname in dellist:
|
||||||
if possname in short: #always true ?
|
if possname in short: # always true ?
|
||||||
del short[possname]
|
del short[possname]
|
||||||
for shortname in short:
|
for shortname in short:
|
||||||
res[shortname] = short[shortname]
|
res[shortname] = short[shortname]
|
||||||
|
|
||||||
|
|
||||||
Gpersonexpeditionnamelookup[expedition.name] = res
|
Gpersonexpeditionnamelookup[expedition.name] = res
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@ -17,8 +17,8 @@ from troggle.core.models.troggle import DataIssue
|
|||||||
from troggle.core.utils import save_carefully
|
from troggle.core.utils import save_carefully
|
||||||
from troggle.core.views.scans import datewallet
|
from troggle.core.views.scans import datewallet
|
||||||
|
|
||||||
'''Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
"""Searches through all the survey scans directories (wallets) in expofiles, looking for images to be referenced.
|
||||||
'''
|
"""
|
||||||
|
|
||||||
contentsjson = "contents.json"
|
contentsjson = "contents.json"
|
||||||
|
|
||||||
@ -26,59 +26,85 @@ git = settings.GIT
|
|||||||
|
|
||||||
# to do: Actually read all the JSON files and set the survex file field appropriately!
|
# to do: Actually read all the JSON files and set the survex file field appropriately!
|
||||||
|
|
||||||
|
|
||||||
def setwalletyear(wallet):
|
def setwalletyear(wallet):
|
||||||
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear
|
_ = wallet.year() # don't need return value. Just calling this saves it as w.walletyear
|
||||||
|
|
||||||
|
|
||||||
def load_all_scans():
|
def load_all_scans():
|
||||||
'''This iterates through the scans directories (either here or on the remote server)
|
"""This iterates through the scans directories (either here or on the remote server)
|
||||||
and builds up the models we can access later.
|
and builds up the models we can access later.
|
||||||
|
|
||||||
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
|
It does NOT read or validate anything in the JSON data attached to each wallet. Those checks
|
||||||
are done at runtime, when a wallet is accessed, not at import time.
|
are done at runtime, when a wallet is accessed, not at import time.
|
||||||
|
|
||||||
'''
|
"""
|
||||||
print(' - Loading Survey Scans')
|
print(" - Loading Survey Scans")
|
||||||
|
|
||||||
SingleScan.objects.all().delete()
|
SingleScan.objects.all().delete()
|
||||||
Wallet.objects.all().delete()
|
Wallet.objects.all().delete()
|
||||||
print(' - deleting all Wallet and SingleScan objects')
|
print(" - deleting all Wallet and SingleScan objects")
|
||||||
DataIssue.objects.filter(parser='scans').delete()
|
DataIssue.objects.filter(parser="scans").delete()
|
||||||
|
|
||||||
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
|
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
|
||||||
valids = [".top",".txt",".tif",".png",".jpg",".jpeg",".pdf",".svg",".gif",".xvi",
|
valids = [
|
||||||
".json",".autosave",".sxd",".svx",".th",".th2",".tdr",".sql",".zip",".dxf",".3d",
|
".top",
|
||||||
".ods",".csv",".xcf",".xml"]
|
".txt",
|
||||||
validnames = ["thconfig","manifest"]
|
".tif",
|
||||||
|
".png",
|
||||||
|
".jpg",
|
||||||
|
".jpeg",
|
||||||
|
".pdf",
|
||||||
|
".svg",
|
||||||
|
".gif",
|
||||||
|
".xvi",
|
||||||
|
".json",
|
||||||
|
".autosave",
|
||||||
|
".sxd",
|
||||||
|
".svx",
|
||||||
|
".th",
|
||||||
|
".th2",
|
||||||
|
".tdr",
|
||||||
|
".sql",
|
||||||
|
".zip",
|
||||||
|
".dxf",
|
||||||
|
".3d",
|
||||||
|
".ods",
|
||||||
|
".csv",
|
||||||
|
".xcf",
|
||||||
|
".xml",
|
||||||
|
]
|
||||||
|
validnames = ["thconfig", "manifest"]
|
||||||
|
|
||||||
# iterate into the surveyscans directory
|
# iterate into the surveyscans directory
|
||||||
# Not all folders with files in them are wallets.
|
# Not all folders with files in them are wallets.
|
||||||
# they are if they are /2010/2010#33
|
# they are if they are /2010/2010#33
|
||||||
# or /1996-1999NotKHbook/
|
# or /1996-1999NotKHbook/
|
||||||
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
|
# but not if they are /2010/2010#33/therion/ : the wallet is /2010#33/ not /therion/
|
||||||
print(' ', end='')
|
print(" ", end="")
|
||||||
scans_path = Path(settings.SCANS_ROOT)
|
scans_path = Path(settings.SCANS_ROOT)
|
||||||
seen = []
|
seen = []
|
||||||
c=0
|
c = 0
|
||||||
wallets = {}
|
wallets = {}
|
||||||
for p in scans_path.rglob('*'):
|
for p in scans_path.rglob("*"):
|
||||||
if p.is_file():
|
if p.is_file():
|
||||||
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
if p.suffix.lower() not in valids and p.name.lower() not in validnames:
|
||||||
# print(f"'{p}'", end='\n')
|
# print(f"'{p}'", end='\n')
|
||||||
pass
|
pass
|
||||||
elif p.parent == scans_path: # skip files directly in /surveyscans/
|
elif p.parent == scans_path: # skip files directly in /surveyscans/
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
||||||
c+=1
|
c += 1
|
||||||
if c % 15 == 0 :
|
if c % 15 == 0:
|
||||||
print(".", end='')
|
print(".", end="")
|
||||||
if c % 750 == 0 :
|
if c % 750 == 0:
|
||||||
print("\n ", end='')
|
print("\n ", end="")
|
||||||
|
|
||||||
if p.parent.parent.parent.parent == scans_path:
|
if p.parent.parent.parent.parent == scans_path:
|
||||||
# print(f"too deep {p}", end='\n')
|
# print(f"too deep {p}", end='\n')
|
||||||
fpath = p.parent.parent
|
fpath = p.parent.parent
|
||||||
walletname = p.parent.parent.name # wallet is one level higher
|
walletname = p.parent.parent.name # wallet is one level higher
|
||||||
else:
|
else:
|
||||||
fpath = p.parent
|
fpath = p.parent
|
||||||
walletname = p.parent.name
|
walletname = p.parent.name
|
||||||
@ -86,7 +112,7 @@ def load_all_scans():
|
|||||||
if walletname in wallets:
|
if walletname in wallets:
|
||||||
wallet = wallets[walletname]
|
wallet = wallets[walletname]
|
||||||
else:
|
else:
|
||||||
print("", flush=True, end='')
|
print("", flush=True, end="")
|
||||||
# Create the wallet object. But we don't have a date for it yet.
|
# Create the wallet object. But we don't have a date for it yet.
|
||||||
wallet = Wallet(fpath=fpath, walletname=walletname)
|
wallet = Wallet(fpath=fpath, walletname=walletname)
|
||||||
setwalletyear(wallet)
|
setwalletyear(wallet)
|
||||||
@ -96,26 +122,24 @@ def load_all_scans():
|
|||||||
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
|
singlescan = SingleScan(ffile=fpath, name=p.name, wallet=wallet)
|
||||||
singlescan.save()
|
singlescan.save()
|
||||||
|
|
||||||
|
|
||||||
# only printing progress:
|
# only printing progress:
|
||||||
tag = p.parent
|
tag = p.parent
|
||||||
if len(walletname)>4:
|
if len(walletname) > 4:
|
||||||
if walletname[4] == "#":
|
if walletname[4] == "#":
|
||||||
tag = p.parent.parent
|
tag = p.parent.parent
|
||||||
|
|
||||||
if tag not in seen:
|
if tag not in seen:
|
||||||
print(f" {tag.name} ", end='')
|
print(f" {tag.name} ", end="")
|
||||||
if len(str(tag.name)) > 17:
|
if len(str(tag.name)) > 17:
|
||||||
print('\n ', end='')
|
print("\n ", end="")
|
||||||
seen.append(tag)
|
seen.append(tag)
|
||||||
|
|
||||||
|
print(f"\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets")
|
||||||
print(f'\n - found and loaded {c:,} acceptable scan files in {len(wallets):,} wallets')
|
|
||||||
|
|
||||||
# but we also need to check if JSON exists, even if there are no uploaded scan files.
|
# but we also need to check if JSON exists, even if there are no uploaded scan files.
|
||||||
# Here we know there is a rigid folder structure, so no need to look for sub folders
|
# Here we know there is a rigid folder structure, so no need to look for sub folders
|
||||||
print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
|
print(f"\n - Checking for wallets where JSON exists, but there may be no uploaded scan files:")
|
||||||
print(' ', end='')
|
print(" ", end="")
|
||||||
wjson = 0
|
wjson = 0
|
||||||
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
contents_path = Path(settings.DRAWINGS_DATA, "walletjson")
|
||||||
for yeardir in contents_path.iterdir():
|
for yeardir in contents_path.iterdir():
|
||||||
@ -126,10 +150,10 @@ def load_all_scans():
|
|||||||
|
|
||||||
if walletname not in wallets:
|
if walletname not in wallets:
|
||||||
wjson += 1
|
wjson += 1
|
||||||
if wjson % 10 == 0 :
|
if wjson % 10 == 0:
|
||||||
print("\n ", end='')
|
print("\n ", end="")
|
||||||
|
|
||||||
print(f"{walletname} ", end='')
|
print(f"{walletname} ", end="")
|
||||||
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
|
fpath = Path(settings.SCANS_ROOT, str(yeardir.stem), walletname)
|
||||||
# The wallets found from JSON should all have dates already
|
# The wallets found from JSON should all have dates already
|
||||||
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
|
wallet, created = Wallet.objects.update_or_create(walletname=walletname, fpath=fpath)
|
||||||
@ -140,9 +164,11 @@ def load_all_scans():
|
|||||||
# But we *do* set the walletyear:
|
# But we *do* set the walletyear:
|
||||||
setwalletyear(wallet)
|
setwalletyear(wallet)
|
||||||
if not created:
|
if not created:
|
||||||
print(f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?")
|
print(
|
||||||
|
f"\n - {walletname} was not created, but was not in directory walk of /surveyscans/. Who created it?"
|
||||||
|
)
|
||||||
wallet.save()
|
wallet.save()
|
||||||
print(f'\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets')
|
print(f"\n - found another {wjson:,} JSON files, making a total of {len(wallets):,} wallets")
|
||||||
wallets = Wallet.objects.filter(walletyear=None)
|
wallets = Wallet.objects.filter(walletyear=None)
|
||||||
for w in wallets:
|
for w in wallets:
|
||||||
w.walletyear = datetime.date(1999, 1, 1)
|
w.walletyear = datetime.date(1999, 1, 1)
|
||||||
|
1245
parsers/survex.py
1245
parsers/survex.py
File diff suppressed because it is too large
Load Diff
5
pyproject.toml
Normal file
5
pyproject.toml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
[tool.black]
|
||||||
|
line-length = 120
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = 'black'
|
92
settings.py
92
settings.py
@ -7,12 +7,12 @@ https://docs.djangoproject.com/en/dev/topics/settings/
|
|||||||
For the full list of settings and their values, see
|
For the full list of settings and their values, see
|
||||||
https://docs.djangoproject.com/en/dev/ref/settings/
|
https://docs.djangoproject.com/en/dev/ref/settings/
|
||||||
"""
|
"""
|
||||||
#Imports should be grouped in the following order:
|
# Imports should be grouped in the following order:
|
||||||
|
|
||||||
#1.Standard library imports.
|
# 1.Standard library imports.
|
||||||
#2.Related third party imports.
|
# 2.Related third party imports.
|
||||||
#3.Local application/library specific imports.
|
# 3.Local application/library specific imports.
|
||||||
#4.You should put a blank line between each group of imports.
|
# 4.You should put a blank line between each group of imports.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
@ -24,7 +24,7 @@ print("* importing troggle/settings.py")
|
|||||||
# default value, then gets overwritten by real secrets
|
# default value, then gets overwritten by real secrets
|
||||||
SECRET_KEY = "not-the-real-secret-key-a#vaeozn0---^fj!355qki*vj2"
|
SECRET_KEY = "not-the-real-secret-key-a#vaeozn0---^fj!355qki*vj2"
|
||||||
|
|
||||||
GIT = 'git' # command for running git
|
GIT = "git" # command for running git
|
||||||
|
|
||||||
# Note that this builds upon the django system installed
|
# Note that this builds upon the django system installed
|
||||||
# global settings in
|
# global settings in
|
||||||
@ -32,18 +32,18 @@ GIT = 'git' # command for running git
|
|||||||
# read https://docs.djangoproject.com/en/3.0/topics/settings/
|
# read https://docs.djangoproject.com/en/3.0/topics/settings/
|
||||||
|
|
||||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||||
#BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
# BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||||
|
|
||||||
# Django settings for troggle project.
|
# Django settings for troggle project.
|
||||||
|
|
||||||
ALLOWED_HOSTS = ['*', 'expo.survex.com', '.survex.com', 'localhost', '127.0.0.1', '192.168.0.5' ]
|
ALLOWED_HOSTS = ["*", "expo.survex.com", ".survex.com", "localhost", "127.0.0.1", "192.168.0.5"]
|
||||||
|
|
||||||
ADMINS = (
|
ADMINS = (
|
||||||
# ('Your Name', 'your_email@domain.com'),
|
# ('Your Name', 'your_email@domain.com'),
|
||||||
)
|
)
|
||||||
MANAGERS = ADMINS
|
MANAGERS = ADMINS
|
||||||
|
|
||||||
#LOGIN_URL = '/accounts/login/' # this is the default value so does not need to be set
|
# LOGIN_URL = '/accounts/login/' # this is the default value so does not need to be set
|
||||||
|
|
||||||
# Local time zone for this installation. Choices can be found here:
|
# Local time zone for this installation. Choices can be found here:
|
||||||
# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
|
# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
|
||||||
@ -51,11 +51,11 @@ MANAGERS = ADMINS
|
|||||||
# If running in a Windows environment this must be set to the same as your
|
# If running in a Windows environment this must be set to the same as your
|
||||||
# system time zone.
|
# system time zone.
|
||||||
USE_TZ = True
|
USE_TZ = True
|
||||||
TIME_ZONE = 'Europe/London'
|
TIME_ZONE = "Europe/London"
|
||||||
|
|
||||||
# Language code for this installation. All choices can be found here:
|
# Language code for this installation. All choices can be found here:
|
||||||
# http://www.i18nguy.com/unicode/language-identifiers.html
|
# http://www.i18nguy.com/unicode/language-identifiers.html
|
||||||
LANGUAGE_CODE = 'en-uk'
|
LANGUAGE_CODE = "en-uk"
|
||||||
|
|
||||||
SITE_ID = 1
|
SITE_ID = 1
|
||||||
|
|
||||||
@ -73,37 +73,39 @@ SURVEX_TOPNAME = "1623-and-1626-no-schoenberg-hs"
|
|||||||
# Caves for which survex files exist, but are not otherwise registered
|
# Caves for which survex files exist, but are not otherwise registered
|
||||||
# replaced (?) by expoweb/cave_data/pendingcaves.txt
|
# replaced (?) by expoweb/cave_data/pendingcaves.txt
|
||||||
# PENDING = ["1626-361", "2007-06", "2009-02",
|
# PENDING = ["1626-361", "2007-06", "2009-02",
|
||||||
# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
|
# "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
|
||||||
# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
|
# "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
|
||||||
# "2018-pf-01", "2018-pf-02"]
|
# "2018-pf-01", "2018-pf-02"]
|
||||||
|
|
||||||
APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
|
APPEND_SLASH = (
|
||||||
SMART_APPEND_SLASH = True #not eorking as middleware different after Dj2.0
|
False # never relevant because we have urls that match unknown files and produce an 'edit this page' response
|
||||||
|
)
|
||||||
|
SMART_APPEND_SLASH = True # not eorking as middleware different after Dj2.0
|
||||||
|
|
||||||
|
|
||||||
LOGIN_REDIRECT_URL = '/' # does not seem to have any effect
|
LOGIN_REDIRECT_URL = "/" # does not seem to have any effect
|
||||||
|
|
||||||
SECURE_CONTENT_TYPE_NOSNIFF = True
|
SECURE_CONTENT_TYPE_NOSNIFF = True
|
||||||
SECURE_BROWSER_XSS_FILTER = True
|
SECURE_BROWSER_XSS_FILTER = True
|
||||||
# SESSION_COOKIE_SECURE = True # if enabled, cannot login to Django control panel, bug elsewhere?
|
# SESSION_COOKIE_SECURE = True # if enabled, cannot login to Django control panel, bug elsewhere?
|
||||||
# CSRF_COOKIE_SECURE = True # if enabled only sends cookies over SSL
|
# CSRF_COOKIE_SECURE = True # if enabled only sends cookies over SSL
|
||||||
X_FRAME_OPTIONS = 'DENY' # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html
|
X_FRAME_OPTIONS = "DENY" # changed to "DENY" after I eliminated all the iframes e.g. /xmlvalid.html
|
||||||
|
|
||||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' # from Django 3.2
|
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # from Django 3.2
|
||||||
|
|
||||||
INSTALLED_APPS = (
|
INSTALLED_APPS = (
|
||||||
'django.contrib.admin',
|
"django.contrib.admin",
|
||||||
'django.contrib.auth', # includes the url redirections for login, logout
|
"django.contrib.auth", # includes the url redirections for login, logout
|
||||||
'django.contrib.contenttypes',
|
"django.contrib.contenttypes",
|
||||||
'django.contrib.sessions',
|
"django.contrib.sessions",
|
||||||
'django.contrib.messages',
|
"django.contrib.messages",
|
||||||
'django.contrib.admindocs',
|
"django.contrib.admindocs",
|
||||||
'django.forms', #Required to customise widget templates
|
"django.forms", # Required to customise widget templates
|
||||||
# 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this
|
# 'django.contrib.staticfiles', # We put our CSS etc explicitly in the right place so do not need this
|
||||||
'troggle.core',
|
"troggle.core",
|
||||||
)
|
)
|
||||||
|
|
||||||
FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise widget templates
|
FORM_RENDERER = "django.forms.renderers.TemplatesSetting" # Required to customise widget templates
|
||||||
|
|
||||||
# See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/
|
# See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/
|
||||||
# Note that this is a radically different onion architecture from earlier versions though it looks the same,
|
# Note that this is a radically different onion architecture from earlier versions though it looks the same,
|
||||||
@ -111,38 +113,38 @@ FORM_RENDERER = 'django.forms.renderers.TemplatesSetting' #Required to customise
|
|||||||
# Seriously, read this: https://www.webforefront.com/django/middlewaredjango.html which is MUCH BETTER than the docs
|
# Seriously, read this: https://www.webforefront.com/django/middlewaredjango.html which is MUCH BETTER than the docs
|
||||||
MIDDLEWARE = [
|
MIDDLEWARE = [
|
||||||
#'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this
|
#'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this
|
||||||
'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache
|
"django.middleware.gzip.GZipMiddleware", # not needed when expofiles and photos served by apache
|
||||||
'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
|
"django.contrib.sessions.middleware.SessionMiddleware", # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early
|
||||||
'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
|
"django.middleware.common.CommonMiddleware", # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW
|
||||||
'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST
|
"django.middleware.csrf.CsrfViewMiddleware", # Cross Site Request Forgeries by adding hidden form fields to POST
|
||||||
'django.contrib.auth.middleware.AuthenticationMiddleware', # Adds the user attribute, representing the currently-logged-in user
|
"django.contrib.auth.middleware.AuthenticationMiddleware", # Adds the user attribute, representing the currently-logged-in user
|
||||||
'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs
|
"django.contrib.admindocs.middleware.XViewMiddleware", # this and docutils needed by admindocs
|
||||||
'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system
|
"django.contrib.messages.middleware.MessageMiddleware", # Cookie-based and session-based message support. Needed by admin system
|
||||||
'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header
|
"django.middleware.clickjacking.XFrameOptionsMiddleware", # clickjacking protection via the X-Frame-Options header
|
||||||
#'django.middleware.security.SecurityMiddleware', # SECURE_HSTS_SECONDS, SECURE_CONTENT_TYPE_NOSNIFF, SECURE_BROWSER_XSS_FILTER, SECURE_REFERRER_POLICY, and SECURE_SSL_REDIRECT
|
#'django.middleware.security.SecurityMiddleware', # SECURE_HSTS_SECONDS, SECURE_CONTENT_TYPE_NOSNIFF, SECURE_BROWSER_XSS_FILTER, SECURE_REFERRER_POLICY, and SECURE_SSL_REDIRECT
|
||||||
#'troggle.core.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0
|
#'troggle.core.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0
|
||||||
]
|
]
|
||||||
|
|
||||||
ROOT_URLCONF = 'troggle.urls'
|
ROOT_URLCONF = "troggle.urls"
|
||||||
|
|
||||||
WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0
|
WSGI_APPLICATION = "troggle.wsgi.application" # change to asgi as soon as we upgrade to Django 3.0
|
||||||
|
|
||||||
ACCOUNT_ACTIVATION_DAYS=3
|
ACCOUNT_ACTIVATION_DAYS = 3
|
||||||
|
|
||||||
# AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ?
|
# AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ?
|
||||||
|
|
||||||
QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
|
QM_PATTERN = "\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]"
|
||||||
|
|
||||||
# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html
|
# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html
|
||||||
# TINYMCE_DEFAULT_CONFIG = {
|
# TINYMCE_DEFAULT_CONFIG = {
|
||||||
# 'plugins': "table,spellchecker,paste,searchreplace",
|
# 'plugins': "table,spellchecker,paste,searchreplace",
|
||||||
# 'theme': "advanced",
|
# 'theme': "advanced",
|
||||||
# }
|
# }
|
||||||
# TINYMCE_SPELLCHECKER = False
|
# TINYMCE_SPELLCHECKER = False
|
||||||
# TINYMCE_COMPRESSOR = True
|
# TINYMCE_COMPRESSOR = True
|
||||||
|
|
||||||
TEST_RUNNER = 'django.test.runner.DiscoverRunner'
|
TEST_RUNNER = "django.test.runner.DiscoverRunner"
|
||||||
|
|
||||||
from localsettings import *
|
from localsettings import *
|
||||||
|
|
||||||
#localsettings needs to take precedence. Call it to override any existing vars.
|
# localsettings needs to take precedence. Call it to override any existing vars.
|
||||||
|
Loading…
Reference in New Issue
Block a user