2012-06-10 16:56:12 +01:00
import os
import re
2021-04-14 18:24:08 +01:00
from pathlib import Path
2012-06-10 16:56:12 +01:00
2020-05-24 01:57:06 +01:00
from django . conf import settings
2021-04-06 22:50:57 +01:00
from django . db import transaction
2020-05-24 01:57:06 +01:00
2023-03-28 19:26:37 +01:00
from troggle . core . models . caves import Area , Cave , CaveAndEntrance , Entrance , GetCaveLookup #EntranceSlug,
2023-01-29 21:45:51 +00:00
from troggle . core . models . logbooks import CaveSlug
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2023-01-19 21:18:42 +00:00
from troggle . settings import CAVEDESCRIPTIONS , ENTRANCEDESCRIPTIONS , EXPOWEB , SURVEX_DATA
2022-07-25 00:57:00 +01:00
2023-01-19 21:18:42 +00:00
""" Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
2021-04-06 22:50:57 +01:00
and creating the various Cave , Entrance and necessary Area objects .
2022-07-27 21:24:40 +01:00
This is the first import that happens after the database is reinitialised .
2022-07-21 19:01:57 +01:00
So is the first thing that creates tables .
2023-01-19 21:18:42 +00:00
"""
2012-06-10 16:56:12 +01:00
2023-01-19 21:18:42 +00:00
todo = """
2023-02-02 21:50:40 +00:00
- Cannot use Edit This Page for pendingcaves . txt_edit as Edit This Page is expecting an html file .
2022-03-05 12:20:26 +00:00
So we will need a separate file - editing capability just for this configuration file ? !
2022-07-19 17:48:11 +01:00
2023-02-02 21:50:40 +00:00
- Semi - automagically import all the 1627 - pending caves and create HTML files for them to be
edited individually . ( These are caves we only know about because we have German survex files . )
- crashes on MariaDB in databasereset . py on server when deleting Caves and complains Area needs a
non null parent , But this is not true . The only solution we have found is to let it crash , then
stop and restart MariaDB ( requires a logon able to sudo ) and then restart the databasereset . py
again . ( status as of July 2022 )
2023-01-19 21:18:42 +00:00
"""
2020-07-06 20:27:31 +01:00
entrances_xslug = { }
caves_xslug = { }
areas_xslug = { }
2023-01-19 21:18:42 +00:00
2021-04-26 17:23:23 +01:00
def dummy_entrance ( k , slug , msg = " DUMMY " ) :
2023-01-19 21:18:42 +00:00
""" Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
2021-04-26 17:23:23 +01:00
user forgot to provide one when creating the cave
2023-01-19 21:18:42 +00:00
"""
2023-03-28 19:08:05 +01:00
ent = Entrance . objects . create ( # creates object and saves into db
name = k , slug = k , filename = k + " .html " ,
2023-01-19 21:18:42 +00:00
entrance_description = " Dummy entrance: auto-created when registering a new cave "
+ " and you forgot to create an entrance for it. Click on ' Edit ' to enter the correct data, then ' Submit ' . " ,
marking = " ? " ,
)
2021-04-26 17:23:23 +01:00
if ent :
2023-03-28 19:26:37 +01:00
# try: # Now create a entranceslug object
# EntranceSlug(entrance=ent, slug=slug)
# except:
# message = f" ! {k:11s} {msg} cave SLUG '{slug}' create failure"
# DataIssue.objects.create(parser="entrances", message=message, url=f"{slug}")
# print(message)
2021-04-26 17:23:23 +01:00
2023-03-28 19:26:37 +01:00
# # ent.cached_slug = slug
# # ent.filename = slug + ".html"
# # ent.save()
2021-04-26 17:23:23 +01:00
return ent
else :
2023-03-28 19:26:37 +01:00
message = f " ! { k : 11s } { msg } - { slug } { k } entrance create failure "
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { slug } " )
2021-04-26 17:23:23 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
raise
2021-04-26 17:23:23 +01:00
2023-03-28 19:08:05 +01:00
2021-04-26 17:23:23 +01:00
def set_dummy_entrance ( id , slug , cave , msg = " DUMMY " ) :
2023-03-28 19:08:05 +01:00
""" Called only when reading the cave and entrance html files
Entrance field either missing or holds a null string instead of a filename in a cave_data file . """
2021-04-26 17:23:23 +01:00
global entrances_xslug
try :
entrance = dummy_entrance ( id , slug , msg = " DUMMY " )
entrances_xslug [ slug ] = entrance
2023-01-19 21:34:09 +00:00
CaveAndEntrance . objects . update_or_create ( cave = cave , entrance_letter = " " , entrance = entrance )
2023-03-28 19:08:05 +01:00
message = f " - Note: Dummy Entrance successfully set for entrance { id } on cave { cave } "
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { cave . url } " )
# print(message)
2021-04-26 17:23:23 +01:00
except :
2023-01-19 21:18:42 +00:00
# raise
2021-04-26 17:23:23 +01:00
message = f ' ! Entrance Dummy setting failure, slug: " { slug } " cave id : " { id } " '
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { cave . url } " )
print ( message )
2023-03-28 19:26:37 +01:00
# try:
# EntranceSlug.objects.update_or_create(entrance=entrance, slug=slug)
# except:
# # raise
# message = f' ! EntranceSlug setting failure for Dummy cave, slug:"{slug}" cave id :"{id}" '
# DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}")
# print(message)
2023-01-19 21:18:42 +00:00
2023-03-24 00:54:26 +00:00
def make_areas ( ) :
print ( " - Creating Areas 1623, 1624, 1627 and 1626 " )
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
area_1623 = Area . objects . create ( short_name = " 1623 " , super = None )
area_1623 . save ( )
area_1624 = Area . objects . create ( short_name = " 1624 " , super = None )
area_1624 . save ( )
area_1626 = Area . objects . create ( short_name = " 1626 " , super = None )
area_1626 . save ( )
area_1627 = Area . objects . create ( short_name = " 1627 " , super = None )
area_1627 . save ( )
def get_area ( areanum ) :
""" Given the number as a string, return the area object
"""
a = Area . objects . all ( )
if len ( a ) == 0 :
make_areas ( )
area = Area . objects . get ( short_name = " 1623 " ) # default
if areanum == " 1623 " :
area = Area . objects . get ( short_name = " 1623 " )
if areanum == " 1624 " :
area = Area . objects . get ( short_name = " 1624 " )
if areanum == " 1626 " :
area = Area . objects . get ( short_name = " 1626 " )
if areanum == " 1627 " :
area = Area . objects . get ( short_name = " 1627 " )
return area
def create_new_cave ( svxpath ) :
""" This is called only when a new survex file is edited online which has a path on the
: loser : repo which is not recognised as a known cave .
"""
# e.g. svxpath = "caves-1623/666/antig"
print ( f " Create new cave at { svxpath } " )
#
survex_file = svxpath + " .svx "
parts = svxpath . split ( " / " )
a = parts [ 0 ] [ - 4 : ]
caveid = parts [ 1 ]
print ( f " parts { parts } , { a } , { caveid } " )
# double check
if a [ 0 : 3 ] == " 162 " :
areanum = a [ 0 : 4 ]
url = f " { areanum } / { a [ 5 : ] } " # Note we are not appending the .htm as we are modern folks now.
else :
areanum = " 1623 "
url = f " 1623/ { k } "
k = f " { areanum } - { caveid } "
area = get_area ( areanum )
caves = Cave . objects . filter ( unofficial_number = caveid )
if caves :
message = f " ! Already exists, caveid: { k } in area { areanum } { caves } "
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
return caves [ 0 ]
try :
cave = do_pending_cave ( k , url , area )
except :
message = f " ! Error. Cannot create pending cave and entrance, pending-id: { k } in area { areanum } "
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
raise
# we know what the survex file is, we don't need to use the guess
cave . survex_file = survex_file
cave . save ( )
return cave
2023-01-19 21:18:42 +00:00
2022-07-25 00:57:00 +01:00
def do_pending_cave ( k , url , area ) :
2023-01-19 21:18:42 +00:00
"""
default for a PENDING cave , should be overwritten in the db later if a real cave of the same name exists
in expoweb / cave_data / 1623 - " k " . html
2022-09-19 19:54:51 +01:00
Note that at this point in importing the data we have not yet seen the survex files , so we can ' t
look inside the relevant survex file to find the year and so we con ' t provide helpful links.
2023-01-19 21:18:42 +00:00
"""
2022-10-06 19:02:15 +01:00
def get_survex_file ( k ) :
2023-01-19 21:18:42 +00:00
""" Guesses at and finds a survex file for this pending cave.
2022-11-17 01:24:39 +00:00
Convoluted . Needs rewriting
2023-01-19 21:18:42 +00:00
"""
2022-10-06 19:02:15 +01:00
if k [ 0 : 3 ] == " 162 " :
id = Path ( k [ 5 : ] )
else :
id = Path ( k )
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
survex_file = f " caves- { area . short_name } / { id } / { id } .svx "
if Path ( settings . SURVEX_DATA , survex_file ) . is_file ( ) :
return survex_file
else :
survex_file = f " caves- { area . short_name } / { id } .svx "
if Path ( settings . SURVEX_DATA , survex_file ) . is_file ( ) :
return survex_file
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
survex_file = " "
d = Path ( settings . SURVEX_DATA , f " caves- { area . short_name } / { id } " )
if d . is_dir ( ) :
prime_suspect = " "
dir = d . iterdir ( )
for f in dir :
if f . suffix == " .svx " :
survex_file = f . relative_to ( settings . SURVEX_DATA )
2023-01-19 21:18:42 +00:00
chk = min ( 5 , len ( f . name ) - 1 )
if str ( f . name ) [ : chk ] . lower ( ) == str ( id . name ) [ : chk ] . lower ( ) : # bodge which mostly works
2022-10-06 19:02:15 +01:00
prime_suspect = survex_file
if prime_suspect :
survex_file = prime_suspect
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}"
# DataIssue.objects.create(parser='caves', message=message, url=url)
# print(message)
return survex_file
2022-07-23 17:26:47 +01:00
slug = k
2023-01-19 21:18:42 +00:00
2022-07-28 16:36:57 +01:00
g = GetCaveLookup ( )
2022-10-06 19:02:15 +01:00
with transaction . atomic ( ) :
if slug in g :
message = f " ! { k : 18 } cave listed in pendingcaves.txt already exists. "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = url )
2022-10-06 19:02:15 +01:00
print ( message )
return
2022-07-28 16:36:57 +01:00
2023-01-19 21:34:09 +00:00
default_note = " _Survex file found in loser repo but no description in expoweb <br><br><br> \n "
default_note + = " INSTRUCTIONS: First open ' This survex file ' (link above the CaveView panel) to find the date and info. Then "
default_note + = ' <br><br> \n \n - (0) look in the <a href= " /noinfo/cave-number-index " >cave number index</a> for notes on this cave, '
default_note + = " <br><br> \n \n - (1) search in the survex file for the *ref to find a "
default_note + = " relevant wallet, e.g.<a href= ' /survey_scans/2009 % 252311/ ' >2009#11</a> and read the notes image files <br> \n - "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" <br><br> \n \n - (2) search in the Expo for that year e.g. <a href= ' /expedition/2009 ' >2009</a> to find a "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " relevant logbook entry, remember that the date may have been recorded incorrectly, "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
2023-01-19 21:18:42 +00:00
)
default_note + = (
2023-01-19 21:34:09 +00:00
" and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " just in case a vital trip was not transcribed, then <br> \n - "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" click on ' Edit this cave ' and copy the information you find in the survex file and the logbook "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " and delete all the text in the ' Notes ' section - which is the text you are reading now. "
default_note + = " <br><br> \n \n - Only two fields on this form are essential. "
default_note + = " Documentation of all the fields on ' Edit this cave ' form is in <a href= ' /handbook/survey/caveentryfields.html ' >handbook/survey/caveentryfields</a> "
default_note + = " <br><br> \n \n - "
default_note + = " You will also need to create a new entrance from the ' Edit this cave ' page. Ignore the existing dummy one, it will evaporate on the next full import. "
default_note + = " <br><br> \n \n - "
default_note + = " When you Submit it will create a new file in expoweb/cave_data/ "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" <br><br> \n \n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description). "
default_note + = " <br><br> \n \n - Finally, you need to find a nerd to edit the file ' <var>expoweb/cave_data/pending.txt</var> ' "
2023-01-19 21:18:42 +00:00
default_note + = (
f " to remove the line <br><var> { slug } </var><br> as it is no longer ' pending ' but ' done. Well Done. "
)
2021-04-26 02:10:45 +01:00
2022-10-06 19:02:15 +01:00
survex_file = get_survex_file ( k )
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
cave = Cave (
2023-01-19 21:18:42 +00:00
unofficial_number = k ,
underground_description = " Pending cave write-up - creating as empty object. No XML file available yet. " ,
survex_file = survex_file ,
url = url ,
notes = default_note ,
)
2022-10-06 19:02:15 +01:00
if cave :
2023-01-19 21:18:42 +00:00
cave . save ( ) # must save to have id before foreign keys work. This is also a ManyToMany key.
2022-10-06 19:02:15 +01:00
cave . area . add ( area )
cave . save ( )
message = f " ! { k : 18 } { cave . underground_description } url: { url } "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = url )
2022-10-06 19:02:15 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
try : # Now create a cave slug ID
2023-01-19 21:34:09 +00:00
CaveSlug . objects . update_or_create ( cave = cave , slug = slug , primary = False )
2022-10-06 19:02:15 +01:00
except :
2023-03-24 00:54:26 +00:00
message = f " ! { k : 11s } PENDING CaveSLUG { slug } create failure "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2022-10-06 19:02:15 +01:00
print ( message )
else :
2023-01-19 21:18:42 +00:00
message = f " ! { k : 11s } PENDING cave create failure "
DataIssue . objects . create ( parser = " caves " , message = message )
2021-04-26 02:10:45 +01:00
print ( message )
2022-10-06 19:02:15 +01:00
try :
2023-03-28 19:08:05 +01:00
set_dummy_entrance ( k , slug , cave , msg = " PENDING " )
2022-10-06 19:02:15 +01:00
except :
2023-03-28 19:08:05 +01:00
message = f " ! { k : 11s } PENDING entrance + cave UNION create failure ' { cave } ' [ { slug } ] { k } "
# message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2022-10-06 19:02:15 +01:00
print ( message )
2023-03-24 00:54:26 +00:00
return cave
2021-04-26 02:10:45 +01:00
2012-06-10 16:56:12 +01:00
def readentrance ( filename ) :
2023-03-28 19:08:05 +01:00
""" Reads an entrance description from the .html file
2022-11-17 01:24:39 +00:00
Convoluted . Sorry . This is as I inherited it and I haven ' t fiddled with it. Needs rewriting
2023-01-19 21:18:42 +00:00
"""
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open ( os . path . join ( ENTRANCEDESCRIPTIONS , filename ) ) as f :
2012-06-10 16:56:12 +01:00
contents = f . read ( )
2021-04-14 22:50:47 +01:00
context = filename
2023-01-19 21:18:42 +00:00
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
entrancecontentslist = getXML ( contents , " entrance " , maxItems = 1 , context = context )
2021-04-14 18:24:08 +01:00
if len ( entrancecontentslist ) != 1 :
message = f ' ! BAD ENTRANCE at " { filename } " '
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message )
2021-04-14 18:24:08 +01:00
print ( message )
else :
2012-06-10 16:56:12 +01:00
entrancecontents = entrancecontentslist [ 0 ]
2023-01-19 21:18:42 +00:00
non_public = getXML ( entrancecontents , " non_public " , maxItems = 1 , context = context )
name = getXML ( entrancecontents , " name " , maxItems = 1 , context = context )
slugs = getXML ( entrancecontents , " slug " , context = context )
entrance_description = getXML ( entrancecontents , " entrance_description " , maxItems = 1 , context = context )
explorers = getXML ( entrancecontents , " explorers " , maxItems = 1 , context = context )
map_description = getXML ( entrancecontents , " map_description " , maxItems = 1 , context = context )
location_description = getXML ( entrancecontents , " location_description " , maxItems = 1 , context = context )
lastvisit = getXML ( entrancecontents , " last visit date " , maxItems = 1 , minItems = 0 , context = context )
approach = getXML ( entrancecontents , " approach " , maxItems = 1 , context = context )
underground_description = getXML ( entrancecontents , " underground_description " , maxItems = 1 , context = context )
photo = getXML ( entrancecontents , " photo " , maxItems = 1 , context = context )
marking = getXML ( entrancecontents , " marking " , maxItems = 1 , context = context )
marking_comment = getXML ( entrancecontents , " marking_comment " , maxItems = 1 , context = context )
findability = getXML ( entrancecontents , " findability " , maxItems = 1 , context = context )
findability_description = getXML ( entrancecontents , " findability_description " , maxItems = 1 , context = context )
alt = getXML ( entrancecontents , " alt " , maxItems = 1 , context = context )
northing = getXML ( entrancecontents , " northing " , maxItems = 1 , context = context )
easting = getXML ( entrancecontents , " easting " , maxItems = 1 , context = context )
tag_station = getXML ( entrancecontents , " tag_station " , maxItems = 1 , context = context )
exact_station = getXML ( entrancecontents , " exact_station " , maxItems = 1 , context = context )
other_station = getXML ( entrancecontents , " other_station " , maxItems = 1 , context = context )
other_description = getXML ( entrancecontents , " other_description " , maxItems = 1 , context = context )
bearings = getXML ( entrancecontents , " bearings " , maxItems = 1 , context = context )
url = getXML ( entrancecontents , " url " , maxItems = 1 , context = context )
2023-03-28 19:26:37 +01:00
2023-01-19 21:18:42 +00:00
e , state = Entrance . objects . update_or_create (
name = name [ 0 ] ,
non_public = {
" True " : True ,
" False " : False ,
" true " : True ,
" false " : False ,
} [ non_public [ 0 ] ] ,
entrance_description = entrance_description [ 0 ] ,
explorers = explorers [ 0 ] ,
map_description = map_description [ 0 ] ,
location_description = location_description [ 0 ] ,
lastvisit = lastvisit [ 0 ] ,
approach = approach [ 0 ] ,
underground_description = underground_description [ 0 ] ,
photo = photo [ 0 ] ,
marking = marking [ 0 ] ,
marking_comment = marking_comment [ 0 ] ,
findability = findability [ 0 ] ,
findability_description = findability_description [ 0 ] ,
alt = alt [ 0 ] ,
northing = northing [ 0 ] ,
easting = easting [ 0 ] ,
tag_station = tag_station [ 0 ] ,
exact_station = exact_station [ 0 ] ,
other_station = other_station [ 0 ] ,
other_description = other_description [ 0 ] ,
bearings = bearings [ 0 ] ,
url = url [ 0 ] ,
filename = filename ,
2023-03-28 19:08:05 +01:00
slug = slugs [ 0 ] ,
2023-01-19 21:18:42 +00:00
)
2023-03-28 19:08:05 +01:00
if len ( slugs ) > 1 :
# Only ever one of these in the expo dataset
message = f " ! - More than one slug for an entrance: { entrance } , slugs: { slugs } . Aborting. "
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " /cave/ { slug } /edit/ " )
print ( message )
2022-10-12 21:12:55 +01:00
for slug in slugs :
2023-01-19 21:18:42 +00:00
# print("entrance slug:{} filename:{}".format(slug, filename))
2022-10-12 21:12:55 +01:00
try :
2023-03-28 17:08:55 +01:00
EntranceSlug . objects . update_or_create ( entrance = e , slug = slug )
2022-10-12 21:12:55 +01:00
except :
# need to cope with duplicates
message = f " ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/ { filename } "
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " /cave/ { slug } /edit/ " )
2023-03-28 15:37:25 +01:00
# kents = EntranceSlug.objects.all().filter(entrance=e, slug=slug, primary=primary)
kents = EntranceSlug . objects . all ( ) . filter ( entrance = e , slug = slug )
2022-10-12 21:12:55 +01:00
for k in kents :
2023-01-19 21:18:42 +00:00
message = " ! - DUPLICATE in db. entrance: " + str ( k . entrance ) + " , slug: " + str ( k . slug ( ) )
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " /cave/ { slug } /edit/ " )
2022-10-12 21:12:55 +01:00
print ( message )
for k in kents :
2023-01-19 21:34:09 +00:00
if k . slug ( ) is not None :
2023-01-19 21:18:42 +00:00
print ( " ! - OVERWRITING this one: slug: " + str ( k . slug ( ) ) )
2022-10-12 21:12:55 +01:00
k . notes = " DUPLICATE entrance found on import. Please fix \n " + k . notes
2023-01-19 21:18:42 +00:00
2012-06-10 16:56:12 +01:00
def readcave ( filename ) :
2023-01-19 21:18:42 +00:00
""" Reads an enrance description from the .html file
2022-11-17 01:24:39 +00:00
Convoluted . Sorry . This is as I inherited it and I haven ' t fiddled with it. Needs rewriting
Assumes any area it hasn ' t seen before is a subarea of 1623
2023-01-19 21:18:42 +00:00
"""
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
2023-01-19 21:18:42 +00:00
2020-07-06 20:27:31 +01:00
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open ( os . path . join ( CAVEDESCRIPTIONS , filename ) ) as f :
2012-06-10 16:56:12 +01:00
contents = f . read ( )
2021-04-14 22:50:47 +01:00
context = filename
2023-01-19 21:18:42 +00:00
cavecontentslist = getXML ( contents , " cave " , maxItems = 1 , context = context )
2021-04-14 18:24:08 +01:00
if len ( cavecontentslist ) != 1 :
message = f ' ! BAD CAVE at " { filename } " '
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2021-04-14 18:24:08 +01:00
print ( message )
else :
2012-06-10 16:56:12 +01:00
cavecontents = cavecontentslist [ 0 ]
2023-01-19 21:18:42 +00:00
non_public = getXML ( cavecontents , " non_public " , maxItems = 1 , context = context )
slugs = getXML ( cavecontents , " caveslug " , maxItems = 1 , context = context )
official_name = getXML ( cavecontents , " official_name " , maxItems = 1 , context = context )
areas = getXML ( cavecontents , " area " , context = context )
kataster_code = getXML ( cavecontents , " kataster_code " , maxItems = 1 , context = context )
kataster_number = getXML ( cavecontents , " kataster_number " , maxItems = 1 , context = context )
unofficial_number = getXML ( cavecontents , " unofficial_number " , maxItems = 1 , context = context )
explorers = getXML ( cavecontents , " explorers " , maxItems = 1 , context = context )
underground_description = getXML ( cavecontents , " underground_description " , maxItems = 1 , context = context )
equipment = getXML ( cavecontents , " equipment " , maxItems = 1 , context = context )
references = getXML ( cavecontents , " references " , maxItems = 1 , context = context )
survey = getXML ( cavecontents , " survey " , maxItems = 1 , context = context )
kataster_status = getXML ( cavecontents , " kataster_status " , maxItems = 1 , context = context )
underground_centre_line = getXML ( cavecontents , " underground_centre_line " , maxItems = 1 , context = context )
notes = getXML ( cavecontents , " notes " , maxItems = 1 , context = context )
length = getXML ( cavecontents , " length " , maxItems = 1 , context = context )
depth = getXML ( cavecontents , " depth " , maxItems = 1 , context = context )
extent = getXML ( cavecontents , " extent " , maxItems = 1 , context = context )
survex_file = getXML ( cavecontents , " survex_file " , maxItems = 1 , context = context )
description_file = getXML ( cavecontents , " description_file " , maxItems = 1 , context = context )
url = getXML ( cavecontents , " url " , maxItems = 1 , context = context )
entrances = getXML ( cavecontents , " entrance " , context = context )
if (
len ( non_public ) == 1
and len ( slugs ) > = 1
and len ( official_name ) == 1
and len ( areas ) > = 1
and len ( kataster_code ) == 1
and len ( kataster_number ) == 1
and len ( unofficial_number ) == 1
and len ( explorers ) == 1
and len ( underground_description ) == 1
and len ( equipment ) == 1
and len ( references ) == 1
and len ( survey ) == 1
and len ( kataster_status ) == 1
and len ( underground_centre_line ) == 1
and len ( notes ) == 1
and len ( length ) == 1
and len ( depth ) == 1
and len ( extent ) == 1
and len ( survex_file ) == 1
and len ( description_file ) == 1
and len ( url ) == 1
) :
2020-06-07 17:49:58 +01:00
try :
2023-01-19 21:18:42 +00:00
c , state = Cave . objects . update_or_create (
non_public = {
" True " : True ,
" False " : False ,
" true " : True ,
" false " : False ,
} [ non_public [ 0 ] ] ,
official_name = official_name [ 0 ] ,
kataster_code = kataster_code [ 0 ] ,
kataster_number = kataster_number [ 0 ] ,
unofficial_number = unofficial_number [ 0 ] ,
explorers = explorers [ 0 ] ,
underground_description = underground_description [ 0 ] ,
equipment = equipment [ 0 ] ,
references = references [ 0 ] ,
survey = survey [ 0 ] ,
kataster_status = kataster_status [ 0 ] ,
underground_centre_line = underground_centre_line [ 0 ] ,
notes = notes [ 0 ] ,
length = length [ 0 ] ,
depth = depth [ 0 ] ,
extent = extent [ 0 ] ,
survex_file = survex_file [ 0 ] ,
description_file = description_file [ 0 ] ,
url = url [ 0 ] ,
filename = filename ,
)
2020-06-07 17:49:58 +01:00
except :
2023-01-19 21:18:42 +00:00
print ( " ! FAILED to get only one CAVE when updating using: " + filename )
2021-04-06 22:50:57 +01:00
kaves = Cave . objects . all ( ) . filter ( kataster_number = kataster_number [ 0 ] )
2020-06-07 17:49:58 +01:00
for k in kaves :
2023-01-19 21:18:42 +00:00
message = " ! - DUPLICATES in db. kataster: " + str ( k . kataster_number ) + " , slug: " + str ( k . slug ( ) )
DataIssue . objects . create ( parser = " caves " , message = message )
2020-06-07 17:49:58 +01:00
print ( message )
for k in kaves :
2023-01-19 21:34:09 +00:00
if k . slug ( ) is not None :
2023-01-19 21:18:42 +00:00
print ( " ! - OVERWRITING this one: slug: " + str ( k . slug ( ) ) )
2020-06-07 17:49:58 +01:00
k . notes = " DUPLICATE kataster number found on import. Please fix \n " + k . notes
c = k
2023-01-19 21:18:42 +00:00
2012-06-10 16:56:12 +01:00
for area_slug in areas :
2020-07-06 20:27:31 +01:00
if area_slug in areas_xslug :
newArea = areas_xslug [ area_slug ]
2012-06-10 16:56:12 +01:00
else :
2023-01-19 21:18:42 +00:00
area = Area . objects . filter ( short_name = area_slug )
2020-07-06 20:27:31 +01:00
if area :
newArea = area [ 0 ]
else :
2023-01-19 21:18:42 +00:00
newArea = Area ( short_name = area_slug , super = Area . objects . get ( short_name = " 1623 " ) )
2020-07-06 20:27:31 +01:00
newArea . save ( )
areas_xslug [ area_slug ] = newArea
2012-06-10 16:56:12 +01:00
c . area . add ( newArea )
2023-03-28 15:37:25 +01:00
primary = True # this sets the first thing we find to be primary=True and all the others =False
2012-06-10 16:56:12 +01:00
for slug in slugs :
2020-07-06 20:27:31 +01:00
if slug in caves_xslug :
cs = caves_xslug [ slug ]
else :
2023-01-19 21:18:42 +00:00
try : # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
cs = CaveSlug . objects . update_or_create ( cave = c , slug = slug , primary = primary )
2020-07-06 20:27:31 +01:00
caves_xslug [ slug ] = cs
2021-04-25 04:04:53 +01:00
except Exception as ex :
2021-04-27 00:32:01 +01:00
# This fails to do an update! It just crashes.. to be fixed
2022-11-23 10:41:14 +00:00
message = f " ! Cave update/create failure : { slug } , skipping file cave_data/ { context } with exception \n Exception: { ex . __class__ } "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2020-07-06 20:27:31 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
2012-06-10 16:56:12 +01:00
primary = False
2020-07-01 22:49:38 +01:00
2021-04-26 17:23:23 +01:00
if not entrances or len ( entrances ) < 1 :
# missing entrance link in cave_data/1623-* .html file
2023-03-28 19:08:05 +01:00
set_dummy_entrance ( slug [ 5 : ] , slug , c , msg = " DUMMY: no entrances " )
2021-04-26 17:23:23 +01:00
else :
for entrance in entrances :
2023-01-19 21:18:42 +00:00
eslug = getXML ( entrance , " entranceslug " , maxItems = 1 , context = context ) [ 0 ]
letter = getXML ( entrance , " letter " , maxItems = 1 , context = context ) [ 0 ]
if len ( entrances ) == 1 and not eslug : # may be empty: <entranceslug></entranceslug>
2023-03-28 19:08:05 +01:00
set_dummy_entrance ( slug [ 5 : ] , slug , c , msg = " DUMMY: no entrance slug read from file " )
2020-07-06 20:27:31 +01:00
else :
2021-04-26 17:23:23 +01:00
try :
if eslug in entrances_xslug :
entrance = entrances_xslug [ eslug ]
else :
2023-01-19 21:18:42 +00:00
entrance = Entrance . objects . get ( entranceslug__slug = eslug )
2021-04-26 17:23:23 +01:00
entrances_xslug [ eslug ] = entrance
2023-01-19 21:34:09 +00:00
CaveAndEntrance . objects . update_or_create (
2023-01-19 21:18:42 +00:00
cave = c , entrance_letter = letter , entrance = entrance
)
2021-04-26 17:23:23 +01:00
except :
message = f ' ! Entrance setting failure, slug: " { slug } " #entrances: { len ( entrances ) } { entrance } letter: " { letter } " cave: " { c } " filename: " cave_data/ { filename } " '
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { c . url } _edit/ " )
2021-04-26 17:23:23 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
2021-04-14 18:24:08 +01:00
if survex_file [ 0 ] :
if not ( Path ( SURVEX_DATA ) / survex_file [ 0 ] ) . is_file ( ) :
2021-04-14 22:50:47 +01:00
message = f ' ! { slug : 12 } survex filename does not exist :LOSER: " { survex_file [ 0 ] } " in " { filename } " '
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = f " / { slug [ 0 : 4 ] } / { slug } _cave_edit/ " )
2021-04-14 18:24:08 +01:00
print ( message )
2022-03-05 12:20:26 +00:00
2023-01-19 21:18:42 +00:00
if description_file [ 0 ] : # if not an empty string
2022-10-06 19:02:15 +01:00
message = f ' - { slug : 12 } Note (not an error): complex description filename " { description_file [ 0 ] } " inside " { CAVEDESCRIPTIONS } / { filename } " '
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves ok " , message = message , url = f " / { slug } _cave_edit/ " )
2022-03-05 12:20:26 +00:00
print ( message )
2021-04-14 18:24:08 +01:00
if not ( Path ( EXPOWEB ) / description_file [ 0 ] ) . is_file ( ) :
2022-03-05 12:20:26 +00:00
message = f ' ! { slug : 12 } description filename " { EXPOWEB } / { description_file [ 0 ] } " does not refer to a real file '
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = f " / { slug } _cave_edit/ " )
2021-04-14 18:24:08 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
# c.description_file="" # done only once, to clear out cruft.
# c.save()
else : # more than one item in long list
2021-04-15 18:06:04 +01:00
message = f ' ! ABORT loading this cave. in " { filename } " '
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = f " / { slug } _cave_edit/ " )
2021-04-15 01:52:09 +01:00
print ( message )
2012-06-10 16:56:12 +01:00
2023-01-19 21:18:42 +00:00
def getXML ( text , itemname , minItems = 1 , maxItems = None , printwarnings = True , context = " " ) :
""" Reads a single XML tag """
2012-06-10 16:56:12 +01:00
items = re . findall ( " < %(itemname)s >(.*?)</ %(itemname)s > " % { " itemname " : itemname } , text , re . S )
if len ( items ) < minItems and printwarnings :
2023-01-19 21:18:42 +00:00
message = (
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
% { " count " : len ( items ) , " itemname " : itemname , " min " : minItems }
+ " in file "
+ context
)
DataIssue . objects . create ( parser = " caves " , message = message , url = " " + context )
2019-04-14 22:45:31 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
2012-06-10 16:56:12 +01:00
if maxItems is not None and len ( items ) > maxItems and printwarnings :
2023-01-19 21:18:42 +00:00
message = (
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
% { " count " : len ( items ) , " itemname " : itemname , " max " : maxItems }
+ " in file "
+ context
)
DataIssue . objects . create ( parser = " caves " , message = message )
2019-04-14 22:45:31 +01:00
print ( message )
2022-10-12 21:12:55 +01:00
if minItems == 0 :
if not items :
2023-01-19 21:18:42 +00:00
items = [ " " ]
2022-07-23 17:26:47 +01:00
return items
2023-01-19 21:18:42 +00:00
2022-07-23 17:26:47 +01:00
def readcaves ( ) :
2023-01-19 21:18:42 +00:00
""" Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo. """
2023-03-28 19:08:05 +01:00
# Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though
# they exist and have surveys.
2022-07-23 17:26:47 +01:00
pending = set ( )
fpending = Path ( CAVEDESCRIPTIONS , " pendingcaves.txt " )
if fpending . is_file ( ) :
with open ( fpending , " r " ) as fo :
cids = fo . readlines ( )
for cid in cids :
2023-01-19 21:18:42 +00:00
pending . add ( cid . strip ( ) . rstrip ( " \n " ) . upper ( ) )
2022-07-23 17:26:47 +01:00
with transaction . atomic ( ) :
print ( " - Deleting Caves and Entrances " )
# attempting to avoid MariaDB crash when doing this
try :
Area . objects . all ( ) . delete ( )
except :
pass
try :
Cave . objects . all ( ) . delete ( )
except :
pass
try :
Entrance . objects . all ( ) . delete ( )
except :
pass
# Clear the cave data issues and the caves as we are reloading
2023-01-19 21:18:42 +00:00
DataIssue . objects . filter ( parser = " areas " ) . delete ( )
DataIssue . objects . filter ( parser = " caves " ) . delete ( )
DataIssue . objects . filter ( parser = " caves ok " ) . delete ( )
DataIssue . objects . filter ( parser = " entrances " ) . delete ( )
2022-07-23 17:26:47 +01:00
with transaction . atomic ( ) :
2023-03-24 00:54:26 +00:00
area = get_area ( " 1623 " )
2022-07-23 17:26:47 +01:00
print ( " - settings.CAVEDESCRIPTIONS: " , CAVEDESCRIPTIONS )
print ( " - Reading Entrances from entrance descriptions xml files " )
2023-01-19 21:18:42 +00:00
for filename in next ( os . walk ( ENTRANCEDESCRIPTIONS ) ) [ 2 ] : # Should be a better way of getting a list of files
2022-07-23 17:26:47 +01:00
# if filename.endswith('.html'):
2023-01-19 21:18:42 +00:00
# if Path(filename).stem[5:] in pending:
# print(f'Skipping pending entrance dummy file <{filename}>')
# else:
# readentrance(filename)
2022-07-23 17:26:47 +01:00
readentrance ( filename )
print ( " - Reading Caves from cave descriptions xml files " )
2023-01-19 21:18:42 +00:00
for filename in next ( os . walk ( CAVEDESCRIPTIONS ) ) [ 2 ] : # Should be a better way of getting a list of files
if filename . endswith ( " .html " ) :
2022-07-23 17:26:47 +01:00
readcave ( filename )
2022-07-25 00:57:00 +01:00
2023-01-19 21:18:42 +00:00
print ( " - Setting up all the variously useful alias names " )
2023-01-19 21:34:09 +00:00
GetCaveLookup ( )
2023-01-19 21:18:42 +00:00
print ( " - Setting pending caves " )
2022-07-28 16:36:57 +01:00
# Do this last, so we can detect if they are created and no longer 'pending'
2022-10-06 19:02:15 +01:00
with transaction . atomic ( ) :
for k in pending :
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
if k [ 0 : 3 ] == " 162 " :
areanum = k [ 0 : 4 ]
2023-01-19 21:18:42 +00:00
url = f " { areanum } / { k [ 5 : ] } " # Note we are not appending the .htm as we are modern folks now.
2022-10-06 19:02:15 +01:00
else :
areanum = " 1623 "
2023-01-19 21:18:42 +00:00
url = f " 1623/ { k } "
2022-09-25 19:43:00 +01:00
2023-03-24 00:54:26 +00:00
area = get_area ( areanum )
2023-01-19 21:18:42 +00:00
try :
2022-10-06 19:02:15 +01:00
do_pending_cave ( k , url , area )
except :
message = f " ! Error. Cannot create pending cave and entrance, pending-id: { k } in area { areanum } "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2022-10-06 19:02:15 +01:00
print ( message )
raise