2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

edit cave reads from HTML file not just db

This commit is contained in:
Philip Sargent 2023-04-22 01:24:32 +01:00
parent 116cfc7c6e
commit 2ed66fe3d0
3 changed files with 342 additions and 197 deletions

View File

@ -117,7 +117,7 @@ def write_and_commit(files, message):
kwargs = {}
try:
with open(filepath, mode, **kwargs) as f:
print(f"WRITING{cwd}---{filename} ")
print(f"WRITING {cwd}---{filename} ")
# as the wsgi process www-data, we have group write-access but are not owner, so cannot chmod.
# os.chmod(filepath, 0o664) # set file permissions to rw-rw-r--
f.write(content)

View File

@ -14,6 +14,9 @@ from troggle.core.models.caves import Cave, CaveAndEntrance, Entrance, GetCaveLo
from troggle.core.models.logbooks import CaveSlug, QM
from troggle.core.utils import write_and_commit
from troggle.core.views import expo
from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS
from troggle.parsers.caves import readcave, readentrance
from .auth import login_required_if_public
@ -333,7 +336,7 @@ def edit_cave(request, path="", slug=None):
The format for the file being saved is in templates/dataformat/cave.xml
Warning. This uses Django deep magic.
It does save the data into into the database directly, not by parsing the file.
It saves the data into into the database and into the html file, which it then commits to git.
"""
message = ""
if slug is not None:
@ -373,7 +376,7 @@ def edit_cave(request, path="", slug=None):
ceinst.save()
try:
cave_file = cave.file_output()
print(cave_file)
# print(cave_file)
write_and_commit([cave_file], f"Online edit of {cave}")
# leave other exceptions unhandled so that they bubble up to user interface
except PermissionError:
@ -388,6 +391,10 @@ def edit_cave(request, path="", slug=None):
message = f"! POST data is INVALID {cave}"
print(message)
else:
# re-read cave data from file.
filename = str(cave.slug() +".html")
readcave(filename, cave=cave)
form = CaveForm(instance=cave)
ceFormSet = CaveAndEntranceFormSet(queryset=cave.caveandentrance_set.all())

View File

@ -10,7 +10,12 @@ from troggle.core.models.logbooks import CaveSlug
from troggle.core.models.troggle import DataIssue
from troggle.settings import CAVEDESCRIPTIONS, ENTRANCEDESCRIPTIONS, EXPOWEB, SURVEX_DATA
"""Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
"""Reads all the cave description data and entrance description data
by parsing the xml files stored as e.g.
:EXPOWEB:/cave_data/1623-161.html
or
:EXPOWEB:/entrance_data/1623-161g.html
and creating the various Cave, Entrance and necessary Area objects.
This is the first import that happens after the database is reinitialised.
@ -272,8 +277,41 @@ def do_pending_cave(k, url, area):
print(message)
return cave
def readentrance(filename):
def getXML(text, itemname, minItems=1, maxItems=None, context=""):
"""Reads a single XML tag
Should throw exception rather than producing error message here,
then handle exception in calling routine where it has the context.
"""
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems:
message = (
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
% {"count": len(items), "itemname": itemname, "min": minItems}
+ " in file "
+ context
)
DataIssue.objects.create(parser="caves", message=message, url="" + context)
print(message)
if maxItems is not None and len(items) > maxItems:
message = (
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
% {"count": len(items), "itemname": itemname, "max": maxItems}
+ " in file "
+ context
)
DataIssue.objects.create(parser="caves", message=message)
print(message)
if minItems == 0:
if not items:
items = [""]
return items
def readentrance(filename, ent=None):
"""Reads an entrance description from the .html file
If not called as part of initial import, then the global lists will not be correct
but this is OK, a search will find them in the db.
"""
def getXMLmax1(field):
return getXML(entrancecontents, field, maxItems=1, context=context)
@ -362,17 +400,50 @@ def readentrance(filename):
)
def readcave(filename):
def readcave(filename, cave=None):
"""Reads an entrance description from the .html file
Convoluted. Sorry.This is as I inherited it and I haven't fiddled with it. Needs rewriting
Convoluted. Sorry. Needs rewriting
Assumes any area it hasn't seen before is a subarea of 1623
If not called as part of initial import, then the global lists will not be correct
but this is OK, a search will find them in the db.
"""
def do_entrances():
for entrance in entrances:
eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
else:
try:
if eslug in entrances_xslug:
entrance = entrances_xslug[eslug]
else:
# entrance = Entrance.objects.get(entranceslug__slug=eslug)
entrance = Entrance.objects.get(slug=eslug)
entrances_xslug[eslug] = entrance
CaveAndEntrance.objects.update_or_create(
cave=c, entrance_letter=letter, entrance=entrance
)
except:
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser="entrances", message=message, url=f"{c.url}_edit/")
print(message)
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(CAVEDESCRIPTIONS, filename)) as f:
fn = settings.CAVEDESCRIPTIONS / filename
# print(f" - Reading Cave from cave descriptions file {fn}")
if not fn.exists():
message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
DataIssue.objects.create(parser="caves", message=message, url=None)
print(message)
return
with open(fn) as f:
contents = f.read()
context = filename
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
@ -380,204 +451,271 @@ def readcave(filename):
message = f'! BAD CAVE at "{filename}"'
DataIssue.objects.create(parser="caves", message=message)
print(message)
else:
cavecontents = cavecontentslist[0]
non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
areas = getXML(cavecontents, "area", context=context)
kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
references = getXML(cavecontents, "references", maxItems=1, context=context)
survey = getXML(cavecontents, "survey", maxItems=1, context=context)
kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
notes = getXML(cavecontents, "notes", maxItems=1, context=context)
length = getXML(cavecontents, "length", maxItems=1, context=context)
depth = getXML(cavecontents, "depth", maxItems=1, context=context)
extent = getXML(cavecontents, "extent", maxItems=1, context=context)
survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
url = getXML(cavecontents, "url", maxItems=1, context=context)
entrances = getXML(cavecontents, "entrance", context=context)
return
cavecontents = cavecontentslist[0]
non_public = getXML(cavecontents, "non_public", maxItems=1, context=context)
slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
official_name = getXML(cavecontents, "official_name", maxItems=1, context=context)
areas = getXML(cavecontents, "area", context=context)
kataster_code = getXML(cavecontents, "kataster_code", maxItems=1, context=context)
kataster_number = getXML(cavecontents, "kataster_number", maxItems=1, context=context)
unofficial_number = getXML(cavecontents, "unofficial_number", maxItems=1, context=context)
explorers = getXML(cavecontents, "explorers", maxItems=1, context=context)
underground_description = getXML(cavecontents, "underground_description", maxItems=1, context=context)
equipment = getXML(cavecontents, "equipment", maxItems=1, context=context)
references = getXML(cavecontents, "references", maxItems=1, context=context)
survey = getXML(cavecontents, "survey", maxItems=1, context=context)
kataster_status = getXML(cavecontents, "kataster_status", maxItems=1, context=context)
underground_centre_line = getXML(cavecontents, "underground_centre_line", maxItems=1, context=context)
notes = getXML(cavecontents, "notes", maxItems=1, context=context)
length = getXML(cavecontents, "length", maxItems=1, context=context)
depth = getXML(cavecontents, "depth", maxItems=1, context=context)
extent = getXML(cavecontents, "extent", maxItems=1, context=context)
survex_file = getXML(cavecontents, "survex_file", maxItems=1, context=context)
description_file = getXML(cavecontents, "description_file", maxItems=1, context=context)
url = getXML(cavecontents, "url", maxItems=1, context=context)
entrances = getXML(cavecontents, "entrance", context=context)
if (
len(non_public) == 1
and len(slugs) >= 1
and len(official_name) == 1
and len(areas) >= 1
and len(kataster_code) == 1
and len(kataster_number) == 1
and len(unofficial_number) == 1
and len(explorers) == 1
and len(underground_description) == 1
and len(equipment) == 1
and len(references) == 1
and len(survey) == 1
and len(kataster_status) == 1
and len(underground_centre_line) == 1
and len(notes) == 1
and len(length) == 1
and len(depth) == 1
and len(extent) == 1
and len(survex_file) == 1
and len(description_file) == 1
and len(url) == 1
):
try:
c, state = Cave.objects.update_or_create(
non_public={
"True": True,
"False": False,
"true": True,
"false": False,
}[non_public[0]],
official_name=official_name[0],
kataster_code=kataster_code[0],
kataster_number=kataster_number[0],
unofficial_number=unofficial_number[0],
explorers=explorers[0],
underground_description=underground_description[0],
equipment=equipment[0],
references=references[0],
survey=survey[0],
kataster_status=kataster_status[0],
underground_centre_line=underground_centre_line[0],
notes=notes[0],
length=length[0],
depth=depth[0],
extent=extent[0],
survex_file=survex_file[0],
description_file=description_file[0],
url=url[0],
filename=filename,
)
except:
print(" ! FAILED to get only one CAVE when updating using: " + filename)
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
for k in kaves:
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
DataIssue.objects.create(parser="caves", message=message)
print(message)
for k in kaves:
if k.slug() is not None:
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
c = k
for area_slug in areas:
if area_slug in areas_xslug:
newArea = areas_xslug[area_slug]
else:
area = Area.objects.filter(short_name=area_slug)
if area:
newArea = area[0]
else:
newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
newArea.save()
areas_xslug[area_slug] = newArea
c.area.add(newArea)
primary = True # this sets the first thing we find to be primary=True and all the others =False
for slug in slugs:
if slug in caves_xslug:
cs = caves_xslug[slug]
else:
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
caves_xslug[slug] = cs
except Exception as ex:
# This fails to do an update! It just crashes.. to be fixed
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
DataIssue.objects.create(parser="caves", message=message)
print(message)
primary = False
if not entrances or len(entrances) < 1:
# missing entrance link in cave_data/1623-* .html file
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrances")
else:
for entrance in entrances:
eslug = getXML(entrance, "entranceslug", maxItems=1, context=context)[0]
letter = getXML(entrance, "letter", maxItems=1, context=context)[0]
if len(entrances) == 1 and not eslug: # may be empty: <entranceslug></entranceslug>
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrance slug read from file")
else:
try:
if eslug in entrances_xslug:
entrance = entrances_xslug[eslug]
else:
# entrance = Entrance.objects.get(entranceslug__slug=eslug)
entrance = Entrance.objects.get(slug=eslug)
entrances_xslug[eslug] = entrance
CaveAndEntrance.objects.update_or_create(
cave=c, entrance_letter=letter, entrance=entrance
)
except:
message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{c}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser="entrances", message=message, url=f"{c.url}_edit/")
print(message)
if survex_file[0]:
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
print(message)
if description_file[0]: # if not an empty string
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
print(message)
if not (Path(EXPOWEB) / description_file[0]).is_file():
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
print(message)
# c.description_file="" # done only once, to clear out cruft.
# c.save()
else: # more than one item in long list
message = f' ! ABORT loading this cave. in "{filename}"'
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
if not (
len(non_public) == 1
and len(slugs) >= 1 # is this really correct ?
and len(official_name) == 1
and len(areas) >= 1 # we want to stop using the sub-ares in 2023
and len(kataster_code) == 1
and len(kataster_number) == 1
and len(unofficial_number) == 1
and len(explorers) == 1
and len(underground_description) == 1
and len(equipment) == 1
and len(references) == 1
and len(survey) == 1
and len(kataster_status) == 1
and len(underground_centre_line) == 1
and len(notes) == 1
and len(length) == 1
and len(depth) == 1
and len(extent) == 1
and len(survex_file) == 1
and len(description_file) == 1
and len(url) == 1
):
# more than one item in long list
message = f' ! ABORT loading this cave. in "{filename}"'
DataIssue.objects.create(parser="caves", message=message, url=f"/{slugs}_cave_edit/")
print(message)
return
if cave:
# this a re-load prior to editing and we already know the cave id
cave.non_public={
"True": True,
"False": False,
"true": True,
"false": False}[non_public[0]]
cave.official_name=official_name[0]
cave.kataster_code=kataster_code[0]
cave.kataster_number=kataster_number[0]
cave.unofficial_number=unofficial_number[0]
cave.explorers=explorers[0]
cave.underground_description=underground_description[0]
cave.equipment=equipment[0]
cave.references=references[0]
cave.survey=survey[0]
cave.kataster_status=kataster_status[0]
cave.underground_centre_line=underground_centre_line[0]
cave.notes=notes[0]
cave.length=length[0]
cave.depth=depth[0]
cave.extent=extent[0]
cave.survex_file=survex_file[0]
cave.description_file=description_file[0]
cave.url=url[0]
if len(slugs) > 1:
message = f" ! Cave edit failure due to more than one slug: {slugs}, skipping this field edit. "
DataIssue.objects.create(parser="caves", message=message)
print(message)
cave.areas = None
cave.save()
for area_slug in areas:
a = Area.objects.filter(short_name=area_slug)
if a:
cave.area.add(a[0])
else:
message = f" ! Cave edit failure due to unrecognised Area: {a}, skipping this field edit. "
DataIssue.objects.create(parser="caves", message=message)
print(message)
c = cave
do_entrances()
cave.save()
else:
try:
c, state = Cave.objects.update_or_create(
non_public={
"True": True,
"False": False,
"true": True,
"false": False,
}[non_public[0]],
official_name=official_name[0],
kataster_code=kataster_code[0],
kataster_number=kataster_number[0],
unofficial_number=unofficial_number[0],
explorers=explorers[0],
underground_description=underground_description[0],
equipment=equipment[0],
references=references[0],
survey=survey[0],
kataster_status=kataster_status[0],
underground_centre_line=underground_centre_line[0],
notes=notes[0],
length=length[0],
depth=depth[0],
extent=extent[0],
survex_file=survex_file[0],
description_file=description_file[0],
url=url[0],
filename=filename,
)
except:
print(" ! FAILED to get only one CAVE when updating using: " + filename)
kaves = Cave.objects.all().filter(kataster_number=kataster_number[0])
for k in kaves:
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
DataIssue.objects.create(parser="caves", message=message)
print(message)
for k in kaves:
if k.slug() is not None:
print(" ! - OVERWRITING this one: slug:" + str(k.slug()))
k.notes = "DUPLICATE kataster number found on import. Please fix\n" + k.notes
c = k
for area_slug in areas:
if area_slug in areas_xslug:
newArea = areas_xslug[area_slug]
else:
area = Area.objects.filter(short_name=area_slug)
if area:
newArea = area[0]
else:
newArea = Area(short_name=area_slug, super=Area.objects.get(short_name="1623"))
newArea.save()
areas_xslug[area_slug] = newArea
c.area.add(newArea)
primary = True # this sets the first thing we find to be primary=True and all the others =False
for slug in slugs:
if slug in caves_xslug:
cs = caves_xslug[slug]
else:
try: # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
cs = CaveSlug.objects.update_or_create(cave=c, slug=slug, primary=primary)
caves_xslug[slug] = cs
except Exception as ex:
#raise
# This fails to do an update! It just crashes.. to be fixed
message = f" ! Cave update/create failure : {slug}, skipping file cave_data/{context} with exception\nException: {ex.__class__}"
DataIssue.objects.create(parser="caves", message=message)
print(message)
def getXML(text, itemname, minItems=1, maxItems=None, context=""):
"""Reads a single XML tag
Should throw exception rather than producing error message here,
then handle exception in calling routine where it has the context.
"""
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems:
message = (
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
% {"count": len(items), "itemname": itemname, "min": minItems}
+ " in file "
+ context
)
DataIssue.objects.create(parser="caves", message=message, url="" + context)
primary = False
if not entrances or len(entrances) < 1:
# missing entrance link in cave_data/1623-* .html file
set_dummy_entrance(slug[5:], slug, c, msg="DUMMY: no entrances")
else:
do_entrances()
# From here on the code applies to both edited and newly-imported caves
if survex_file[0]:
if not (Path(SURVEX_DATA) / survex_file[0]).is_file():
message = f' ! {slug:12} survex filename does not exist :LOSER:"{survex_file[0]}" in "{filename}"'
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug[0:4]}/{slug}_cave_edit/")
print(message)
if description_file[0]: # if not an empty string
message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
print(message)
if maxItems is not None and len(items) > maxItems:
message = (
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
% {"count": len(items), "itemname": itemname, "max": maxItems}
+ " in file "
+ context
)
DataIssue.objects.create(parser="caves", message=message)
print(message)
if minItems == 0:
if not items:
items = [""]
return items
if not (Path(EXPOWEB) / description_file[0]).is_file():
message = f' ! {slug:12} description filename "{EXPOWEB}/{description_file[0]}" does not refer to a real file'
DataIssue.objects.create(parser="caves", message=message, url=f"/{slug}_cave_edit/")
print(message)
# c.description_file="" # done only once, to clear out cruft.
c.save()
# ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas
# import os
# import xml.etree.ElementTree as ET
# class BadCaveException(Exception):
# pass
# class FailedCaveUpdateException(Exception):
# pass
# def readcave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug):
# """Reads an entrance description from the .html file and updates the corresponding Cave object"""
# tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename))
# root = tree.getroot()
# cavecontents = root.find("cave")
# if cavecontents is None:
# raise BadCaveException(f'! BAD CAVE at "{filename}"')
# non_public = cavecontents.findtext("non_public")
# slugs = cavecontents.findtext("caveslug")
# official_name = cavecontents.findtext("official_name")
# kataster_code = cavecontents.findtext("kataster_code")
# kataster_number = cavecontents.findtext("kataster_number")
# unofficial_number = cavecontents.findtext("unofficial_number")
# explorers = cavecontents.findtext("explorers")
# underground_description = cavecontents.findtext("underground_description")
# equipment = cavecontents.findtext("equipment")
# references = cavecontents.findtext("references")
# survey = cavecontents.findtext("survey")
# kataster_status = cavecontents.findtext("kataster_status")
# underground_centre_line = cavecontents.findtext("underground_centre_line")
# notes = cavecontents.findtext("notes")
# length = cavecontents.findtext("length")
# depth = cavecontents.findtext("depth")
# extent = cavecontents.findtext("extent")
# survex_file = cavecontents.findtext("survex_file")
# description_file = cavecontents.findtext("description_file")
# url = cavecontents.findtext("url")
# areas = cavecontents.findall("area")
# entrances = cavecontents.findall("entrance")
# if (
# non_public is not None
# # etc.
# # wrong, some of these should be ==1 and some >=1
# ):
# try:
# cave = caves_xslug.get(kataster_number)
# if cave is None:
# cave = Cave.objects.create(
# non_public={
# "True": True,
# "False": False,
# "true": True,
# "false": False,
# }[non_public],
# official_name=official_name,
# # kataster [truncated]
def readcaves():
"""Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo."""
"""Called from databaseReset mass importer.
Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
"""
# Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though
# they exist and have surveys.
pending = set()