forked from expo/troggle
xml parser attmpt retract
This commit is contained in:
parent
30ef427b90
commit
c5a9bdc724
@ -1,5 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -426,6 +428,14 @@ def read_cave(filename, cave=None):
|
|||||||
|
|
||||||
If not called as part of initial import, then the global lists will not be correct
|
If not called as part of initial import, then the global lists will not be correct
|
||||||
but this is OK, a search will find them in the db.
|
but this is OK, a search will find them in the db.
|
||||||
|
|
||||||
|
Attempted to use standard python3.11 xml library but fails on HTML entities (2023-04-23)
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
tree = ET.parse(fn)
|
||||||
|
xml_root = tree.getroot()
|
||||||
|
for t in ["html", "head", "body", "cave","non_public", "caveslug", "official_name","entrance"]:
|
||||||
|
elements = xml_root.findall(t)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def getXMLmax1(field):
|
def getXMLmax1(field):
|
||||||
return getXML(cavecontents, field, maxItems=1, context=context)
|
return getXML(cavecontents, field, maxItems=1, context=context)
|
||||||
@ -492,6 +502,8 @@ def read_cave(filename, cave=None):
|
|||||||
|
|
||||||
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
|
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
|
||||||
fn = settings.CAVEDESCRIPTIONS / filename
|
fn = settings.CAVEDESCRIPTIONS / filename
|
||||||
|
context = filename
|
||||||
|
|
||||||
# print(f" - Reading Cave from cave descriptions file {fn}")
|
# print(f" - Reading Cave from cave descriptions file {fn}")
|
||||||
if not fn.exists():
|
if not fn.exists():
|
||||||
message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
|
message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
|
||||||
@ -501,7 +513,6 @@ def read_cave(filename, cave=None):
|
|||||||
|
|
||||||
with open(fn) as f:
|
with open(fn) as f:
|
||||||
contents = f.read()
|
contents = f.read()
|
||||||
context = filename
|
|
||||||
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
|
cavecontentslist = getXML(contents, "cave", maxItems=1, context=context)
|
||||||
|
|
||||||
if len(cavecontentslist) != 1:
|
if len(cavecontentslist) != 1:
|
||||||
@ -580,7 +591,7 @@ def read_cave(filename, cave=None):
|
|||||||
cave.url=url[0]
|
cave.url=url[0]
|
||||||
|
|
||||||
areas = getXML(cavecontents, "area", context=context)
|
areas = getXML(cavecontents, "area", context=context)
|
||||||
# cave.area_set.clear() # Need to find correct syntax. Does not delete previously loaded areas.. WARNING
|
cave.area.clear() # Deletes all links to areas in db
|
||||||
for area_slug in areas:
|
for area_slug in areas:
|
||||||
if area_slug in areas_xslug:
|
if area_slug in areas_xslug:
|
||||||
newArea = areas_xslug[area_slug]
|
newArea = areas_xslug[area_slug]
|
||||||
@ -630,70 +641,6 @@ def read_cave(filename, cave=None):
|
|||||||
cave.save()
|
cave.save()
|
||||||
return cave
|
return cave
|
||||||
|
|
||||||
|
|
||||||
# ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas
|
|
||||||
# import os
|
|
||||||
# import xml.etree.ElementTree as ET
|
|
||||||
|
|
||||||
# class BadCaveException(Exception):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
# class FailedCaveUpdateException(Exception):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
# def read_cave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug):
|
|
||||||
# """Reads an entrance description from the .html file and updates the corresponding Cave object"""
|
|
||||||
# tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename))
|
|
||||||
# root = tree.getroot()
|
|
||||||
|
|
||||||
# cavecontents = root.find("cave")
|
|
||||||
# if cavecontents is None:
|
|
||||||
# raise BadCaveException(f'! BAD CAVE at "{filename}"')
|
|
||||||
|
|
||||||
# non_public = cavecontents.findtext("non_public")
|
|
||||||
# slugs = cavecontents.findtext("caveslug")
|
|
||||||
# official_name = cavecontents.findtext("official_name")
|
|
||||||
# kataster_code = cavecontents.findtext("kataster_code")
|
|
||||||
# kataster_number = cavecontents.findtext("kataster_number")
|
|
||||||
# unofficial_number = cavecontents.findtext("unofficial_number")
|
|
||||||
# explorers = cavecontents.findtext("explorers")
|
|
||||||
# underground_description = cavecontents.findtext("underground_description")
|
|
||||||
# equipment = cavecontents.findtext("equipment")
|
|
||||||
# references = cavecontents.findtext("references")
|
|
||||||
# survey = cavecontents.findtext("survey")
|
|
||||||
# kataster_status = cavecontents.findtext("kataster_status")
|
|
||||||
# underground_centre_line = cavecontents.findtext("underground_centre_line")
|
|
||||||
# notes = cavecontents.findtext("notes")
|
|
||||||
# length = cavecontents.findtext("length")
|
|
||||||
# depth = cavecontents.findtext("depth")
|
|
||||||
# extent = cavecontents.findtext("extent")
|
|
||||||
# survex_file = cavecontents.findtext("survex_file")
|
|
||||||
# description_file = cavecontents.findtext("description_file")
|
|
||||||
# url = cavecontents.findtext("url")
|
|
||||||
|
|
||||||
# areas = cavecontents.findall("area")
|
|
||||||
# entrances = cavecontents.findall("entrance")
|
|
||||||
|
|
||||||
# if (
|
|
||||||
# non_public is not None
|
|
||||||
# # etc.
|
|
||||||
# # wrong, some of these should be ==1 and some >=1
|
|
||||||
# ):
|
|
||||||
# try:
|
|
||||||
# cave = caves_xslug.get(kataster_number)
|
|
||||||
# if cave is None:
|
|
||||||
# cave = Cave.objects.create(
|
|
||||||
# non_public={
|
|
||||||
# "True": True,
|
|
||||||
# "False": False,
|
|
||||||
# "true": True,
|
|
||||||
# "false": False,
|
|
||||||
# }[non_public],
|
|
||||||
# official_name=official_name,
|
|
||||||
# # kataster [truncated]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def readcaves():
|
def readcaves():
|
||||||
"""Called from databaseReset mass importer.
|
"""Called from databaseReset mass importer.
|
||||||
Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
|
Reads the xml-format HTML 'cave' files in the EXPOWEB repo, the survex files from the loser repo.
|
||||||
@ -732,7 +679,6 @@ def readcaves():
|
|||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
area = get_area("1623")
|
area = get_area("1623")
|
||||||
|
|
||||||
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
|
|
||||||
print(" - Reading Entrances from entrance descriptions xml files")
|
print(" - Reading Entrances from entrance descriptions xml files")
|
||||||
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
for filename in next(os.walk(ENTRANCEDESCRIPTIONS))[2]: # Should be a better way of getting a list of files
|
||||||
read_entrance(filename)
|
read_entrance(filename)
|
||||||
|
@ -12,7 +12,7 @@ though, you do not need to do a data import as it happens automatically -->
|
|||||||
<body>
|
<body>
|
||||||
<b>This file is generated by troggle</b> on {{date}} UTC using the form documented at
|
<b>This file is generated by troggle</b> on {{date}} UTC using the form documented at
|
||||||
the form documented at
|
the form documented at
|
||||||
<a ="/handbook/survey/caveentry.html">handbook/survey/caveentry.html</a>
|
<a href="/handbook/survey/caveentry.html">handbook/survey/caveentry.html</a>
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
<cave>
|
<cave>
|
||||||
|
@ -15,7 +15,7 @@ though, you do not need to do a data import as it happens automatically -->
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<b>This file is generated by troggle</b> on {{date}} UTC using the form documented at
|
<b>This file is generated by troggle</b> on {{date}} UTC using the form documented at
|
||||||
<a ="/handbook/survey/ententry.html">handbook/survey/ententry.html</a>
|
<a href="/handbook/survey/ententry.html">handbook/survey/ententry.html</a>
|
||||||
<br>
|
<br>
|
||||||
<entrance>
|
<entrance>
|
||||||
<non_public>{{ entrance.non_public }}</non_public>
|
<non_public>{{ entrance.non_public }}</non_public>
|
||||||
|
Loading…
Reference in New Issue
Block a user