Cave import & reports fixes

This commit is contained in:
Philip Sargent 2022-03-05 12:20:26 +00:00
parent 1b9fccc2a4
commit 32377f4e6c
4 changed files with 31 additions and 17 deletions

View File

@ -6,10 +6,10 @@ from pathlib import Path
from django.conf import settings from django.conf import settings
from django.db import transaction from django.db import transaction
from troggle.settings import SURVEX_DATA, EXPOWEB from troggle.settings import SURVEX_DATA, EXPOWEB, CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS
from troggle.core.models.troggle import DataIssue from troggle.core.models.troggle import DataIssue
from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave-data/1623-161.html ) '''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
and creating the various Cave, Entrance and necessary Area objects. and creating the various Cave, Entrance and necessary Area objects.
BUT in Django 2.0 and later we cannot do any queries on data we have just entered BUT in Django 2.0 and later we cannot do any queries on data we have just entered
@ -23,7 +23,7 @@ todo='''- db Update does not work when a cave id is in the pending list but a pr
and is being imported. It should work. But currently Django aborts and he file is not read in. and is being imported. It should work. But currently Django aborts and he file is not read in.
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file. - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
So we will need a separate file-editing capability just for this configuration file. So we will need a separate file-editing capability just for this configuration file ?!
''' '''
entrances_xslug = {} entrances_xslug = {}
caves_xslug = {} caves_xslug = {}
@ -140,7 +140,7 @@ def readcaves():
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys # For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
# should put this in a simple list which can be edited using 'Edit this file' # should put this in a simple list which can be edited using 'Edit this file'
pending = set() pending = set()
fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt") fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
if fpending.is_file(): if fpending.is_file():
with open(fpending, "r") as fo: with open(fpending, "r") as fo:
cids = fo.readlines() cids = fo.readlines()
@ -153,6 +153,7 @@ def readcaves():
Entrance.objects.all().delete() Entrance.objects.all().delete()
# Clear the cave data issues and the caves as we are reloading # Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser='caves').delete() DataIssue.objects.filter(parser='caves').delete()
DataIssue.objects.filter(parser='caves ok').delete()
DataIssue.objects.filter(parser='entrances').delete() DataIssue.objects.filter(parser='entrances').delete()
area_1623 = Area.objects.update_or_create(short_name = "1623", parent = None) area_1623 = Area.objects.update_or_create(short_name = "1623", parent = None)
@ -176,9 +177,9 @@ def readcaves():
raise raise
with transaction.atomic(): with transaction.atomic():
print(" - settings.CAVEDESCRIPTIONS: ", settings.CAVEDESCRIPTIONS) print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
print(" - Reading Entrances from entrance descriptions xml files") print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
# if filename.endswith('.html'): # if filename.endswith('.html'):
# if Path(filename).stem[5:] in pending: # if Path(filename).stem[5:] in pending:
# print(f'Skipping pending entrance dummy file <{filename}>') # print(f'Skipping pending entrance dummy file <{filename}>')
@ -187,7 +188,7 @@ def readcaves():
readentrance(filename) readentrance(filename)
print(" - Reading Caves from cave descriptions xml files") print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'): if filename.endswith('.html'):
readcave(filename) readcave(filename)
@ -197,10 +198,10 @@ def readentrance(filename):
global areas_xslug global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo. # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
contents = f.read() contents = f.read()
context = filename context = filename
#print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename)) #print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context) entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
if len(entrancecontentslist) != 1: if len(entrancecontentslist) != 1:
message = f'! BAD ENTRANCE at "{filename}"' message = f'! BAD ENTRANCE at "{filename}"'
@ -293,7 +294,7 @@ def readcave(filename):
global areas_xslug global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo. # Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
contents = f.read() contents = f.read()
context = filename context = filename
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
@ -419,9 +420,14 @@ def readcave(filename):
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message) print(message)
if description_file[0]:
if description_file[0]: # if not an empty string
message = f' - {slug:12} complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser='caves ok', message=message, url=f'/cave/{slug}/edit/')
print(message)
if not (Path(EXPOWEB) / description_file[0]).is_file(): if not (Path(EXPOWEB) / description_file[0]).is_file():
message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"' message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/') DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message) print(message)
#c.description_file="" # done only once, to clear out cruft. #c.description_file="" # done only once, to clear out cruft.

View File

@ -1185,9 +1185,12 @@ class LoadingSurvex():
self.uniquename[fn] = [fullpath] self.uniquename[fn] = [fullpath]
else: else:
self.uniquename[fn].append(fullpath) self.uniquename[fn].append(fullpath)
message = f" ! NON-UNIQUE survex filename, overwriting in expowebcache/3d/ '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}" # This is not an error now that we are moving .3d files to the :loser: directory tree
# message = f" ! NON-UNIQUE survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}"
# print(message)
# DataIssue.objects.create(parser='survex', message=message)
message = f" NOTE: non-unique survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}"
print(message) print(message)
DataIssue.objects.create(parser='survex', message=message)
def RunSurvexIfNeeded(self,fullpath): def RunSurvexIfNeeded(self,fullpath):

View File

@ -6,7 +6,7 @@
<h1>Loading data from files: Issues arising that need attention</h1> <h1>Loading data from files: Issues arising that need attention</h1>
<p> <p>
This is work in progress (April 2021).The URL links to the offending objects are enabled on only some types of fault as yet. This is work in progress (March 2022).The URL links to the offending objects are enabled on only some types of fault as yet.
<style> <style>
tr { text-align:center; tr { text-align:center;
font-family: Tahoma,'Trebuchet MS','Lucida Grande',Verdana, Arial, Helvetica, Sans-Serif; font-family: Tahoma,'Trebuchet MS','Lucida Grande',Verdana, Arial, Helvetica, Sans-Serif;

View File

@ -62,10 +62,12 @@ If anyone really cares, they can always look in the original survex file
</table> </table>
</div> </div>
<div style="color: slategray; background-color:lightcyan"> <div style="color: slategray; background-color:lightcyan">
<p>Horrible bug here but only when there is more than one survex block per day, <p>Horrible bug here but only when there is more than one survex block per day, or is there ?!
<p>WHat we thought was the bug:
e.g. see <a href="/personexpedition/Wookey/1999">Wookey 1999</a> where there are 3 eiscream survex blocks on 5th August. e.g. see <a href="/personexpedition/Wookey/1999">Wookey 1999</a> where there are 3 eiscream survex blocks on 5th August.
it duplicates the entry but gets it wrong. The length from the first block is displayed twice but there should be 3 rows: eiscream, eiscream2, eiscream3. it duplicates the entry but gets it wrong. The length from the first block is displayed twice but there should be 3 rows: eiscream, eiscream2, eiscream3.
<p>Fortunately it is <b>just this display on this page which is wrong</b>: no bad calculations get into the database.
<p>The interaction of django database query idioms with <a href="https://docs.djangoproject.com/en/1.11/ref/templates/api/">django HTML templating language</a> is a bit impenetrable here. <p>The interaction of django database query idioms with <a href="https://docs.djangoproject.com/en/1.11/ref/templates/api/">django HTML templating language</a> is a bit impenetrable here.
I blame Aaron Curtis who was too fond of being clever with the Django templating system I blame Aaron Curtis who was too fond of being clever with the Django templating system
instead or writing it in python anyone could understand.<br> instead or writing it in python anyone could understand.<br>
@ -76,5 +78,8 @@ instead or writing it in python anyone could understand.<br>
<br> <br>
- the connection between the two is made in the URL resolver in <var>troggle/urls.py</var> - the connection between the two is made in the URL resolver in <var>troggle/urls.py</var>
<p>To be fixed! <p>To be fixed!
<h3>What we now know</h3>
<p>The eiscream.svx file does indeed record 3 blocks: eiscream, eiscream2 & eiscream3. But (more) careful inspection shows that eiscream2 and eiscream3 are in
the year 2000, not in 1999. So they absolutely should not be shown here. So maybe everything is correct after all. (Well, apart from the duplication.)
</div> </div>
{% endblock %} {% endblock %}