troggle-unchained/parsers/caves.py

487 lines
28 KiB
Python
Raw Normal View History

2012-06-10 16:56:12 +01:00
import os
import re
import sys
from pathlib import Path
2012-06-10 16:56:12 +01:00
2020-05-24 01:57:06 +01:00
from django.conf import settings
2021-04-06 22:50:57 +01:00
from django.db import transaction
2020-05-24 01:57:06 +01:00
2022-03-05 12:20:26 +00:00
from troggle.settings import SURVEX_DATA, EXPOWEB, CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS
2021-04-13 00:43:57 +01:00
from troggle.core.models.troggle import DataIssue
2021-04-13 00:47:17 +01:00
from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance
2022-03-05 12:20:26 +00:00
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
2021-04-06 22:50:57 +01:00
and creating the various Cave, Entrance and necessary Area objects.
2022-07-21 19:01:57 +01:00
This is the first import that happens after the dabase is reinitialised.
So is the first thing that creates tables.
2021-04-06 22:50:57 +01:00
BUT in Django 2.0 and later we cannot do any queries on data we have just entered
because this is all happening inside one transaction. Bummer.
django.db.transaction.TransactionManagementError:
An error occurred in the current transaction. You can't execute queries until the end of the 'atomic' block.
'''
2012-06-10 16:56:12 +01:00
todo='''- db Update does not work when a cave id is in the pending list but a proper cave description file exists
2021-04-27 00:32:01 +01:00
and is being imported. It should work. But currently Django aborts and he file is not read in.
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
2022-03-05 12:20:26 +00:00
So we will need a separate file-editing capability just for this configuration file ?!
- crashes on MariaDB on server when deleting Caves and complains Area needs a non null parent, But this is not true.
The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
and then restart the databasereset.py again. (status as of July 2022)
2021-04-27 00:32:01 +01:00
'''
2020-07-06 20:27:31 +01:00
entrances_xslug = {}
caves_xslug = {}
areas_xslug = {}
def dummy_entrance(k, slug, msg="DUMMY"):
'''Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
user forgot to provide one when creating the cave
'''
ent = Entrance(
name = k,
entrance_description = "Dummy entrance: auto-created when registering a new cave " +
"and you forgot to create an entrance for it. Click on 'Edit' to enter the correct data, then 'Submit'.",
marking = '?')
if ent:
ent.save() # must save to have id before foreign keys work.
try: # Now create a entrance slug ID
es = EntranceSlug.objects.update_or_create(entrance = ent,
slug = slug, primary = False)
except:
message = f" ! {k:11s} {msg} entrance create failure"
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}')
print(message)
ent.cached_primary_slug = slug
ent.filename = slug + ".html"
ent.save()
return ent
else:
message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
DataIssue.objects.create(parser='caves', message=message, url=f'{slug}')
print(message)
raise
def set_dummy_entrance(id, slug, cave, msg="DUMMY"):
'''Entrance field either missing or holds a null string instead of a filename in a cave_data file.
'''
global entrances_xslug
try:
entrance = dummy_entrance(id, slug, msg="DUMMY")
letter = ""
entrances_xslug[slug] = entrance
ce = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = entrance)
message = f' ! Warning: Dummy Entrance created for {id}'
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}')
print(message)
except:
message = f' ! Entrance Dummy setting failure, slug:"{slug}" cave id :"{id}" '
DataIssue.objects.create(parser='caves', message=message, url=f'{cave.url}')
print(message)
def do_pending_cave(k, url, area_1623):
'''
default for a PENDING cave, should be overwritten in the db later if a real cave of the same name exists
in expoweb/cave_data/1623-"k".html
'''
slug = "1623-" + k
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/{k[0:4]}'>{k[0:4]}</a> to find a "
default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
default_note += f"<br><br>\n\n - "
default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
default_note += f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
2021-04-27 14:51:04 +01:00
default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
default_note += f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
cave = Cave(
unofficial_number = k,
underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
survex_file = "caves-1623/" + k + "/" + k +".svx",
url = url,
notes = default_note)
if cave:
cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
2022-07-08 20:19:07 +01:00
cave.area.add(area_1623)
cave.save()
message = f" ! {k:12} {cave.underground_description}"
DataIssue.objects.create(parser='caves', message=message, url=url)
print(message)
try: # Now create a cave slug ID
cs = CaveSlug.objects.update_or_create(cave = cave,
slug = slug, primary = False)
except:
message = f" ! {k:11s} PENDING cave SLUG create failure"
DataIssue.objects.create(parser='caves', message=message)
print(message)
else:
message = f' ! {k:11s} PENDING cave create failure'
DataIssue.objects.create(parser='caves', message=message)
print(message)
try:
ent = dummy_entrance(k, slug, msg="PENDING")
ceinsts = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = ent)
for ceinst in ceinsts:
if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
ceinst.cave = cave
ceinst.save()
break
except:
message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
DataIssue.objects.create(parser='caves', message=message)
print(message)
2012-06-10 16:56:12 +01:00
def readcaves():
2021-04-06 22:50:57 +01:00
'''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo.
'''
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
# should put this in a simple list which can be edited using 'Edit this file'
pending = set()
2022-03-05 12:20:26 +00:00
fpending = Path(CAVEDESCRIPTIONS, "pendingcaves.txt")
2021-04-27 00:32:01 +01:00
if fpending.is_file():
with open(fpending, "r") as fo:
cids = fo.readlines()
for cid in cids:
pending.add(cid.rstrip('\n'))
2021-04-06 22:50:57 +01:00
with transaction.atomic():
print(" - Deleting Caves and Entrances")
# attempting to avoid MariaDB crash when doing this
try:
Area.objects.all().delete()
except:
pass
try:
Cave.objects.all().delete()
except:
pass
try:
Entrance.objects.all().delete()
except:
pass
2021-04-06 22:50:57 +01:00
# Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser='areas').delete()
2021-04-06 22:50:57 +01:00
DataIssue.objects.filter(parser='caves').delete()
2022-03-05 12:20:26 +00:00
DataIssue.objects.filter(parser='caves ok').delete()
2021-04-06 22:50:57 +01:00
DataIssue.objects.filter(parser='entrances').delete()
print(" - Creating Areas 1623 and 1626")
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
2022-07-21 19:01:57 +01:00
area_1623= Area.objects.create(short_name = "1623", parent=None)
print(" - Saving Area 1623")
area_1623.save()
area_1626= Area.objects.create(short_name = "1626", parent=None)
2022-07-21 19:03:54 +01:00
print(" - Saving Area 1626")
2022-07-21 19:04:49 +01:00
area_1626.save()
2021-04-06 22:50:57 +01:00
print (" - Setting pending caves")
# Do this first, so that these empty entries are overwritten as they get properly created.
2021-04-06 22:50:57 +01:00
for k in pending:
url = "1623/" + k # Note we are not appending the .htm as we are modern folks now.
try:
do_pending_cave(k, url, area_1623)
2021-04-06 22:50:57 +01:00
except:
message = " ! Error. Cannot create pending cave and entrance, pending-id:{}".format(k)
2021-04-06 22:50:57 +01:00
DataIssue.objects.create(parser='caves', message=message)
print(message)
raise
2020-07-06 20:27:31 +01:00
2021-04-06 22:50:57 +01:00
with transaction.atomic():
2022-03-05 12:20:26 +00:00
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
print(" - Reading Entrances from entrance descriptions xml files")
2022-03-05 12:20:26 +00:00
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
# if filename.endswith('.html'):
# if Path(filename).stem[5:] in pending:
# print(f'Skipping pending entrance dummy file <{filename}>')
# else:
# readentrance(filename)
readentrance(filename)
2020-07-06 20:27:31 +01:00
2021-04-06 22:50:57 +01:00
print(" - Reading Caves from cave descriptions xml files")
2022-03-05 12:20:26 +00:00
for filename in next(os.walk(CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
2021-04-06 22:50:57 +01:00
if filename.endswith('.html'):
readcave(filename)
2012-06-10 16:56:12 +01:00
def readentrance(filename):
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open(os.path.join(ENTRANCEDESCRIPTIONS, filename)) as f:
2012-06-10 16:56:12 +01:00
contents = f.read()
2021-04-14 22:50:47 +01:00
context = filename
2022-03-05 12:20:26 +00:00
#print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
2012-06-10 16:56:12 +01:00
entrancecontentslist = getXML(contents, "entrance", maxItems = 1, context = context)
if len(entrancecontentslist) != 1:
message = f'! BAD ENTRANCE at "{filename}"'
DataIssue.objects.create(parser='caves', message=message)
print(message)
else:
2012-06-10 16:56:12 +01:00
entrancecontents = entrancecontentslist[0]
non_public = getXML(entrancecontents, "non_public", maxItems = 1, context = context)
name = getXML(entrancecontents, "name", maxItems = 1, context = context)
slugs = getXML(entrancecontents, "slug", context = context)
entrance_description = getXML(entrancecontents, "entrance_description", maxItems = 1, context = context)
explorers = getXML(entrancecontents, "explorers", maxItems = 1, context = context)
map_description = getXML(entrancecontents, "map_description", maxItems = 1, context = context)
location_description = getXML(entrancecontents, "location_description", maxItems = 1, context = context)
approach = getXML(entrancecontents, "approach", maxItems = 1, context = context)
underground_description = getXML(entrancecontents, "underground_description", maxItems = 1, context = context)
photo = getXML(entrancecontents, "photo", maxItems = 1, context = context)
marking = getXML(entrancecontents, "marking", maxItems = 1, context = context)
marking_comment = getXML(entrancecontents, "marking_comment", maxItems = 1, context = context)
findability = getXML(entrancecontents, "findability", maxItems = 1, context = context)
findability_description = getXML(entrancecontents, "findability_description", maxItems = 1, context = context)
alt = getXML(entrancecontents, "alt", maxItems = 1, context = context)
northing = getXML(entrancecontents, "northing", maxItems = 1, context = context)
easting = getXML(entrancecontents, "easting", maxItems = 1, context = context)
tag_station = getXML(entrancecontents, "tag_station", maxItems = 1, context = context)
exact_station = getXML(entrancecontents, "exact_station", maxItems = 1, context = context)
other_station = getXML(entrancecontents, "other_station", maxItems = 1, context = context)
other_description = getXML(entrancecontents, "other_description", maxItems = 1, context = context)
bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context)
url = getXML(entrancecontents, "url", maxItems = 1, context = context)
if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
2021-04-06 22:50:57 +01:00
e, state = Entrance.objects.update_or_create(name = name[0],
2012-06-10 16:56:12 +01:00
non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
entrance_description = entrance_description[0],
explorers = explorers[0],
map_description = map_description[0],
location_description = location_description[0],
approach = approach[0],
underground_description = underground_description[0],
photo = photo[0],
marking = marking[0],
marking_comment = marking_comment[0],
findability = findability[0],
findability_description = findability_description[0],
alt = alt[0],
northing = northing[0],
easting = easting[0],
tag_station = tag_station[0],
exact_station = exact_station[0],
other_station = other_station[0],
other_description = other_description[0],
bearings = bearings[0],
url = url[0],
filename = filename,
cached_primary_slug = slugs[0])
primary = True
for slug in slugs:
2020-06-12 18:10:07 +01:00
#print("entrance slug:{} filename:{}".format(slug, filename))
try:
2021-04-06 22:50:57 +01:00
cs = EntranceSlug.objects.update_or_create(entrance = e,
2020-06-12 18:10:07 +01:00
slug = slug,
primary = primary)
except:
# need to cope with duplicates
message = f" ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/{filename}"
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
2021-04-06 22:50:57 +01:00
kents = EntranceSlug.objects.all().filter(entrance = e,
2020-06-12 18:10:07 +01:00
slug = slug,
primary = primary)
for k in kents:
message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug())
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
2020-06-12 18:10:07 +01:00
print(message)
2020-06-13 01:26:28 +01:00
for k in kents:
2020-06-12 18:10:07 +01:00
if k.slug() != None:
print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
k.notes = "DUPLICATE entrance found on import. Please fix\n" + k.notes
c = k
2012-06-10 16:56:12 +01:00
primary = False
2021-04-15 01:52:09 +01:00
else: # more than one item in long list
message = f' ! {slug:12} ABORT loading this entrance. in "{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
print(message)
2012-06-10 16:56:12 +01:00
def readcave(filename):
2021-04-06 22:50:57 +01:00
'''Assumes any area it hasn't seen before is a subarea of 1623
'''
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
2012-06-10 16:56:12 +01:00
contents = f.read()
2021-04-14 22:50:47 +01:00
context = filename
2012-06-10 16:56:12 +01:00
cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context)
if len(cavecontentslist) != 1:
message = f'! BAD CAVE at "{filename}"'
DataIssue.objects.create(parser='caves', message=message)
print(message)
else:
2012-06-10 16:56:12 +01:00
cavecontents = cavecontentslist[0]
non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context)
slugs = getXML(cavecontents, "caveslug", maxItems = 1, context = context)
official_name = getXML(cavecontents, "official_name", maxItems = 1, context = context)
areas = getXML(cavecontents, "area", context = context)
kataster_code = getXML(cavecontents, "kataster_code", maxItems = 1, context = context)
kataster_number = getXML(cavecontents, "kataster_number", maxItems = 1, context = context)
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems = 1, context = context)
explorers = getXML(cavecontents, "explorers", maxItems = 1, context = context)
underground_description = getXML(cavecontents, "underground_description", maxItems = 1, context = context)
equipment = getXML(cavecontents, "equipment", maxItems = 1, context = context)
references = getXML(cavecontents, "references", maxItems = 1, context = context)
survey = getXML(cavecontents, "survey", maxItems = 1, context = context)
kataster_status = getXML(cavecontents, "kataster_status", maxItems = 1, context = context)
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems = 1, context = context)
notes = getXML(cavecontents, "notes", maxItems = 1, context = context)
length = getXML(cavecontents, "length", maxItems = 1, context = context)
depth = getXML(cavecontents, "depth", maxItems = 1, context = context)
extent = getXML(cavecontents, "extent", maxItems = 1, context = context)
survex_file = getXML(cavecontents, "survex_file", maxItems = 1, context = context)
description_file = getXML(cavecontents, "description_file", maxItems = 1, context = context)
url = getXML(cavecontents, "url", maxItems = 1, context = context)
entrances = getXML(cavecontents, "entrance", context = context)
if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1:
try:
2021-04-06 22:50:57 +01:00
c, state = Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]],
official_name = official_name[0],
kataster_code = kataster_code[0],
kataster_number = kataster_number[0],
unofficial_number = unofficial_number[0],
explorers = explorers[0],
underground_description = underground_description[0],
equipment = equipment[0],
references = references[0],
survey = survey[0],
kataster_status = kataster_status[0],
underground_centre_line = underground_centre_line[0],
notes = notes[0],
length = length[0],
depth = depth[0],
extent = extent[0],
survex_file = survex_file[0],
description_file = description_file[0],
url = url[0],
filename = filename)
except:
2020-06-12 18:10:07 +01:00
print(" ! FAILED to get only one CAVE when updating using: "+filename)
2021-04-06 22:50:57 +01:00
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
for k in kaves:
message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug())
DataIssue.objects.create(parser='caves', message=message)
print(message)
for k in kaves:
if k.slug() != None:
print(" ! - OVERWRITING this one: slug:"+ str(k.slug()))
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
c = k
2012-06-10 16:56:12 +01:00
for area_slug in areas:
2020-07-06 20:27:31 +01:00
if area_slug in areas_xslug:
newArea = areas_xslug[area_slug]
2012-06-10 16:56:12 +01:00
else:
2021-04-06 22:50:57 +01:00
area = Area.objects.filter(short_name = area_slug)
2020-07-06 20:27:31 +01:00
if area:
newArea = area[0]
else:
2021-04-06 22:50:57 +01:00
newArea = Area(short_name = area_slug, parent = Area.objects.get(short_name = "1623"))
2020-07-06 20:27:31 +01:00
newArea.save()
areas_xslug[area_slug] = newArea
2012-06-10 16:56:12 +01:00
c.area.add(newArea)
primary = True
for slug in slugs:
2020-07-06 20:27:31 +01:00
if slug in caves_xslug:
cs = caves_xslug[slug]
else:
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
2021-04-06 22:50:57 +01:00
cs = CaveSlug.objects.update_or_create(cave = c,
2020-07-06 20:27:31 +01:00
slug = slug,
primary = primary)
caves_xslug[slug] = cs
except Exception as ex:
2021-04-27 00:32:01 +01:00
# This fails to do an update! It just crashes.. to be fixed
message = " ! Cave update/create failure : %s, skipping file cave_data/%s with exception\nException: %s" % (slug, context, ex.__class__)
2020-07-06 20:27:31 +01:00
DataIssue.objects.create(parser='caves', message=message)
print(message)
2012-06-10 16:56:12 +01:00
primary = False
2020-07-01 22:49:38 +01:00
if not entrances or len(entrances) < 1:
# missing entrance link in cave_data/1623-* .html file
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
else:
for entrance in entrances:
eslug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY")
2020-07-06 20:27:31 +01:00
else:
try:
if eslug in entrances_xslug:
entrance = entrances_xslug[eslug]
else:
entrance = Entrance.objects.get(entranceslug__slug = eslug)
entrances_xslug[eslug] = entrance
ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'{c.url}_edit/')
print(message)
if survex_file[0]:
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
2021-04-14 22:50:47 +01:00
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
2022-03-22 02:22:15 +00:00
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
print(message)
2022-03-05 12:20:26 +00:00
if description_file[0]: # if not an empty string
message = f' - {slug:12} complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser='caves ok', message=message, url=f'/{slug}_cave_edit/')
2022-03-05 12:20:26 +00:00
print(message)
if not (Path(EXPOWEB) / description_file[0]).is_file():
2022-03-05 12:20:26 +00:00
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
print(message)
#c.description_file="" # done only once, to clear out cruft.
#c.save()
2021-04-15 01:52:09 +01:00
else: # more than one item in long list
2021-04-15 18:06:04 +01:00
message = f' ! ABORT loading this cave. in "{filename}"'
DataIssue.objects.create(parser='caves', message=message, url=f'/{slug}_cave_edit/')
2021-04-15 01:52:09 +01:00
print(message)
2012-06-10 16:56:12 +01:00
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems and printwarnings:
2021-04-15 01:52:09 +01:00
message = " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % {"count": len(items),
2012-06-10 16:56:12 +01:00
"itemname": itemname,
2021-04-14 22:50:47 +01:00
"min": minItems} + " in file " + context
DataIssue.objects.create(parser='caves', message=message, url=""+context)
print(message)
2012-06-10 16:56:12 +01:00
if maxItems is not None and len(items) > maxItems and printwarnings:
2021-04-15 01:52:09 +01:00
message = " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % {"count": len(items),
2012-06-10 16:56:12 +01:00
"itemname": itemname,
2021-04-14 22:50:47 +01:00
"max": maxItems} + " in file " + context
DataIssue.objects.create(parser='caves', message=message)
print(message)
return items