Cave import & reports fixes

This commit is contained in:
Philip Sargent 2022-03-05 12:20:26 +00:00
parent 1b9fccc2a4
commit 32377f4e6c
4 changed files with 31 additions and 17 deletions

View File

@ -6,10 +6,10 @@ from pathlib import Path
from django.conf import settings
from django.db import transaction
from troggle.settings import SURVEX_DATA, EXPOWEB
from troggle.settings import SURVEX_DATA, EXPOWEB, CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS
from troggle.core.models.troggle import DataIssue
from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave-data/1623-161.html )
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
and creating the various Cave, Entrance and necessary Area objects.
BUT in Django 2.0 and later we cannot do any queries on data we have just entered
@ -23,7 +23,7 @@ todo='''- db Update does not work when a cave id is in the pending list but a pr
and is being imported. It should work. But currently Django aborts and he file is not read in.
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
So we will need a separate file-editing capability just for this configuration file.
So we will need a separate file-editing capability just for this configuration file ?!
'''
entrances_xslug = {}
caves_xslug = {}
@ -140,7 +140,7 @@ def readcaves():
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
# should put this in a simple list which can be edited using 'Edit this file'
pending = set()
fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt")
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
if fpending.is_file():
with open(fpending, "r") as fo:
cids = fo.readlines()
@ -153,6 +153,7 @@ def readcaves():
Entrance.objects.all().delete()
# Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser='caves').delete()
DataIssue.objects.filter(parser='caves ok').delete()
DataIssue.objects.filter(parser='entrances').delete()
area_1623 = Area.objects.update_or_create(short_name = "1623", parent = None)
@ -176,9 +177,9 @@ def readcaves():
raise
with transaction.atomic():
print(" - settings.CAVEDESCRIPTIONS: ", settings.CAVEDESCRIPTIONS)
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
# if filename.endswith('.html'):
# if Path(filename).stem[5:] in pending:
# print(f'Skipping pending entrance dummy file <{filename}>')
@ -187,7 +188,7 @@ def readcaves():
readentrance(filename)
print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readcave(filename)
@ -197,10 +198,10 @@ def readentrance(filename):
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
contents = f.read()
context = filename
#print("Reading file ENTRANCE {} / {}".format(settings.ENTRANCEDESCRIPTIONS, filename))
#print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
if len(entrancecontentslist) != 1:
message = f'! BAD ENTRANCE at "{filename}"'
@ -293,7 +294,7 @@ def readcave(filename):
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f:
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
contents = f.read()
context = filename
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
@ -419,9 +420,14 @@ def readcave(filename):
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message)
if description_file[0]:
if description_file[0]: # if not an empty string
message = f' - {slug:12} complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser='caves ok', message=message, url=f'/cave/{slug}/edit/')
print(message)
if not (Path(EXPOWEB) / description_file[0]).is_file():
message = f' ! {slug:12} description filename does not exist :{EXPOWEB}:"{description_file[0]}" in "{filename}"'
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message)
#c.description_file="" # done only once, to clear out cruft.

View File

@ -1185,9 +1185,12 @@ class LoadingSurvex():
self.uniquename[fn] = [fullpath]
else:
self.uniquename[fn].append(fullpath)
message = f" ! NON-UNIQUE survex filename, overwriting in expowebcache/3d/ '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}"
# This is not an error now that we are moving .3d files to the :loser: directory tree
# message = f" ! NON-UNIQUE survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}"
# print(message)
# DataIssue.objects.create(parser='survex', message=message)
message = f" NOTE: non-unique survex filename, '{fn}' - '{self.uniquename[fn]}' #{len(self.uniquename[fn])}"
print(message)
DataIssue.objects.create(parser='survex', message=message)
def RunSurvexIfNeeded(self,fullpath):

View File

@ -6,7 +6,7 @@
<h1>Loading data from files: Issues arising that need attention</h1>
<p>
This is work in progress (April 2021).The URL links to the offending objects are enabled on only some types of fault as yet.
This is work in progress (March 2022).The URL links to the offending objects are enabled on only some types of fault as yet.
<style>
tr { text-align:center;
font-family: Tahoma,'Trebuchet MS','Lucida Grande',Verdana, Arial, Helvetica, Sans-Serif;

View File

@ -62,10 +62,12 @@ If anyone really cares, they can always look in the original survex file
</table>
</div>
<div style="color: slategray; background-color:lightcyan">
<p>Horrible bug here but only when there is more than one survex block per day,
<p>Horrible bug here but only when there is more than one survex block per day, or is there ?!
<p>WHat we thought was the bug:
e.g. see <a href="/personexpedition/Wookey/1999">Wookey 1999</a> where there are 3 eiscream survex blocks on 5th August.
it duplicates the entry but gets it wrong. The length from the first block is displayed twice but there should be 3 rows: eiscream, eiscream2, eiscream3.
<p>Fortunately it is <b>just this display on this page which is wrong</b>: no bad calculations get into the database.
<p>The interaction of django database query idioms with <a href="https://docs.djangoproject.com/en/1.11/ref/templates/api/">django HTML templating language</a> is a bit impenetrable here.
I blame Aaron Curtis who was too fond of being clever with the Django templating system
instead or writing it in python anyone could understand.<br>
@ -76,5 +78,8 @@ instead or writing it in python anyone could understand.<br>
<br>
- the connection between the two is made in the URL resolver in <var>troggle/urls.py</var>
<p>To be fixed!
<h3>What we now know</h3>
<p>The eiscream.svx file does indeed record 3 blocks: eiscream, eiscream2 & eiscream3. But (more) careful inspection shows that eiscream2 and eiscream3 are in
the year 2000, not in 1999. So they absolutely should not be shown here. So maybe everything is correct after all. (Well, apart from the duplication.)
</div>
{% endblock %}