2012-06-10 16:56:12 +01:00
import os
import re
2021-04-14 18:24:08 +01:00
from pathlib import Path
2012-06-10 16:56:12 +01:00
2020-05-24 01:57:06 +01:00
from django . conf import settings
2021-04-06 22:50:57 +01:00
from django . db import transaction
2020-05-24 01:57:06 +01:00
2023-03-28 20:30:00 +01:00
from troggle . core . models . caves import Area , Cave , CaveAndEntrance , Entrance , GetCaveLookup
2023-01-29 21:45:51 +00:00
from troggle . core . models . logbooks import CaveSlug
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2023-01-19 21:18:42 +00:00
from troggle . settings import CAVEDESCRIPTIONS , ENTRANCEDESCRIPTIONS , EXPOWEB , SURVEX_DATA
2022-07-25 00:57:00 +01:00
2023-04-22 01:24:32 +01:00
""" Reads all the cave description data and entrance description data
by parsing the xml files stored as e . g .
: EXPOWEB : / cave_data / 1623 - 161. html
or
: EXPOWEB : / entrance_data / 1623 - 161 g . html
2021-04-06 22:50:57 +01:00
and creating the various Cave , Entrance and necessary Area objects .
2022-07-27 21:24:40 +01:00
This is the first import that happens after the database is reinitialised .
2022-07-21 19:01:57 +01:00
So is the first thing that creates tables .
2023-01-19 21:18:42 +00:00
"""
2012-06-10 16:56:12 +01:00
2023-01-19 21:18:42 +00:00
todo = """
2023-02-02 21:50:40 +00:00
- Cannot use Edit This Page for pendingcaves . txt_edit as Edit This Page is expecting an html file .
2022-03-05 12:20:26 +00:00
So we will need a separate file - editing capability just for this configuration file ? !
2022-07-19 17:48:11 +01:00
2023-02-02 21:50:40 +00:00
- Semi - automagically import all the 1627 - pending caves and create HTML files for them to be
edited individually . ( These are caves we only know about because we have German survex files . )
- crashes on MariaDB in databasereset . py on server when deleting Caves and complains Area needs a
non null parent , But this is not true . The only solution we have found is to let it crash , then
stop and restart MariaDB ( requires a logon able to sudo ) and then restart the databasereset . py
again . ( status as of July 2022 )
2023-01-19 21:18:42 +00:00
"""
2020-07-06 20:27:31 +01:00
entrances_xslug = { }
caves_xslug = { }
areas_xslug = { }
2023-01-19 21:18:42 +00:00
2021-04-26 17:23:23 +01:00
def dummy_entrance ( k , slug , msg = " DUMMY " ) :
2023-01-19 21:18:42 +00:00
""" Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
2021-04-26 17:23:23 +01:00
user forgot to provide one when creating the cave
2023-01-19 21:18:42 +00:00
"""
2023-03-28 19:08:05 +01:00
ent = Entrance . objects . create ( # creates object and saves into db
name = k , slug = k , filename = k + " .html " ,
2023-01-19 21:18:42 +00:00
entrance_description = " Dummy entrance: auto-created when registering a new cave "
+ " and you forgot to create an entrance for it. Click on ' Edit ' to enter the correct data, then ' Submit ' . " ,
marking = " ? " ,
)
2021-04-26 17:23:23 +01:00
if ent :
return ent
else :
2023-03-28 19:26:37 +01:00
message = f " ! { k : 11s } { msg } - { slug } { k } entrance create failure "
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { slug } " )
2021-04-26 17:23:23 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
raise
2021-04-26 17:23:23 +01:00
2023-03-28 19:08:05 +01:00
2021-04-26 17:23:23 +01:00
def set_dummy_entrance ( id , slug , cave , msg = " DUMMY " ) :
2023-03-28 19:08:05 +01:00
""" Called only when reading the cave and entrance html files
Entrance field either missing or holds a null string instead of a filename in a cave_data file . """
2021-04-26 17:23:23 +01:00
global entrances_xslug
try :
entrance = dummy_entrance ( id , slug , msg = " DUMMY " )
entrances_xslug [ slug ] = entrance
2023-01-19 21:34:09 +00:00
CaveAndEntrance . objects . update_or_create ( cave = cave , entrance_letter = " " , entrance = entrance )
2023-03-28 19:08:05 +01:00
message = f " - Note: Dummy Entrance successfully set for entrance { id } on cave { cave } "
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { cave . url } " )
# print(message)
2021-04-26 17:23:23 +01:00
except :
2023-01-19 21:18:42 +00:00
# raise
2021-04-26 17:23:23 +01:00
message = f ' ! Entrance Dummy setting failure, slug: " { slug } " cave id : " { id } " '
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { cave . url } " )
print ( message )
2023-01-19 21:18:42 +00:00
2023-03-24 00:54:26 +00:00
def make_areas ( ) :
print ( " - Creating Areas 1623, 1624, 1627 and 1626 " )
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
area_1623 = Area . objects . create ( short_name = " 1623 " , super = None )
area_1623 . save ( )
area_1624 = Area . objects . create ( short_name = " 1624 " , super = None )
area_1624 . save ( )
area_1626 = Area . objects . create ( short_name = " 1626 " , super = None )
area_1626 . save ( )
area_1627 = Area . objects . create ( short_name = " 1627 " , super = None )
area_1627 . save ( )
def get_area ( areanum ) :
""" Given the number as a string, return the area object
"""
a = Area . objects . all ( )
if len ( a ) == 0 :
make_areas ( )
area = Area . objects . get ( short_name = " 1623 " ) # default
if areanum == " 1623 " :
area = Area . objects . get ( short_name = " 1623 " )
if areanum == " 1624 " :
area = Area . objects . get ( short_name = " 1624 " )
if areanum == " 1626 " :
area = Area . objects . get ( short_name = " 1626 " )
if areanum == " 1627 " :
area = Area . objects . get ( short_name = " 1627 " )
return area
def create_new_cave ( svxpath ) :
""" This is called only when a new survex file is edited online which has a path on the
: loser : repo which is not recognised as a known cave .
"""
# e.g. svxpath = "caves-1623/666/antig"
print ( f " Create new cave at { svxpath } " )
#
survex_file = svxpath + " .svx "
parts = svxpath . split ( " / " )
a = parts [ 0 ] [ - 4 : ]
caveid = parts [ 1 ]
print ( f " parts { parts } , { a } , { caveid } " )
# double check
if a [ 0 : 3 ] == " 162 " :
areanum = a [ 0 : 4 ]
url = f " { areanum } / { a [ 5 : ] } " # Note we are not appending the .htm as we are modern folks now.
else :
areanum = " 1623 "
url = f " 1623/ { k } "
k = f " { areanum } - { caveid } "
area = get_area ( areanum )
caves = Cave . objects . filter ( unofficial_number = caveid )
if caves :
message = f " ! Already exists, caveid: { k } in area { areanum } { caves } "
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
return caves [ 0 ]
try :
cave = do_pending_cave ( k , url , area )
except :
message = f " ! Error. Cannot create pending cave and entrance, pending-id: { k } in area { areanum } "
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
raise
# we know what the survex file is, we don't need to use the guess
cave . survex_file = survex_file
cave . save ( )
return cave
2023-01-19 21:18:42 +00:00
2022-07-25 00:57:00 +01:00
def do_pending_cave ( k , url , area ) :
2023-01-19 21:18:42 +00:00
"""
default for a PENDING cave , should be overwritten in the db later if a real cave of the same name exists
in expoweb / cave_data / 1623 - " k " . html
2022-09-19 19:54:51 +01:00
Note that at this point in importing the data we have not yet seen the survex files , so we can ' t
look inside the relevant survex file to find the year and so we con ' t provide helpful links.
2023-01-19 21:18:42 +00:00
"""
2022-10-06 19:02:15 +01:00
def get_survex_file ( k ) :
2023-01-19 21:18:42 +00:00
""" Guesses at and finds a survex file for this pending cave.
2022-11-17 01:24:39 +00:00
Convoluted . Needs rewriting
2023-01-19 21:18:42 +00:00
"""
2022-10-06 19:02:15 +01:00
if k [ 0 : 3 ] == " 162 " :
id = Path ( k [ 5 : ] )
else :
id = Path ( k )
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
survex_file = f " caves- { area . short_name } / { id } / { id } .svx "
if Path ( settings . SURVEX_DATA , survex_file ) . is_file ( ) :
return survex_file
else :
survex_file = f " caves- { area . short_name } / { id } .svx "
if Path ( settings . SURVEX_DATA , survex_file ) . is_file ( ) :
return survex_file
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
survex_file = " "
d = Path ( settings . SURVEX_DATA , f " caves- { area . short_name } / { id } " )
if d . is_dir ( ) :
prime_suspect = " "
dir = d . iterdir ( )
for f in dir :
if f . suffix == " .svx " :
survex_file = f . relative_to ( settings . SURVEX_DATA )
2023-01-19 21:18:42 +00:00
chk = min ( 5 , len ( f . name ) - 1 )
if str ( f . name ) [ : chk ] . lower ( ) == str ( id . name ) [ : chk ] . lower ( ) : # bodge which mostly works
2022-10-06 19:02:15 +01:00
prime_suspect = survex_file
if prime_suspect :
survex_file = prime_suspect
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}"
# DataIssue.objects.create(parser='caves', message=message, url=url)
# print(message)
return survex_file
2022-07-23 17:26:47 +01:00
slug = k
2023-01-19 21:18:42 +00:00
2022-07-28 16:36:57 +01:00
g = GetCaveLookup ( )
2022-10-06 19:02:15 +01:00
with transaction . atomic ( ) :
if slug in g :
message = f " ! { k : 18 } cave listed in pendingcaves.txt already exists. "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = url )
2022-10-06 19:02:15 +01:00
print ( message )
return
2022-07-28 16:36:57 +01:00
2023-01-19 21:34:09 +00:00
default_note = " _Survex file found in loser repo but no description in expoweb <br><br><br> \n "
default_note + = " INSTRUCTIONS: First open ' This survex file ' (link above the CaveView panel) to find the date and info. Then "
default_note + = ' <br><br> \n \n - (0) look in the <a href= " /noinfo/cave-number-index " >cave number index</a> for notes on this cave, '
default_note + = " <br><br> \n \n - (1) search in the survex file for the *ref to find a "
default_note + = " relevant wallet, e.g.<a href= ' /survey_scans/2009 % 252311/ ' >2009#11</a> and read the notes image files <br> \n - "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" <br><br> \n \n - (2) search in the Expo for that year e.g. <a href= ' /expedition/2009 ' >2009</a> to find a "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " relevant logbook entry, remember that the date may have been recorded incorrectly, "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
2023-01-19 21:18:42 +00:00
)
default_note + = (
2023-01-19 21:34:09 +00:00
" and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " just in case a vital trip was not transcribed, then <br> \n - "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" click on ' Edit this cave ' and copy the information you find in the survex file and the logbook "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " and delete all the text in the ' Notes ' section - which is the text you are reading now. "
default_note + = " <br><br> \n \n - Only two fields on this form are essential. "
default_note + = " Documentation of all the fields on ' Edit this cave ' form is in <a href= ' /handbook/survey/caveentryfields.html ' >handbook/survey/caveentryfields</a> "
default_note + = " <br><br> \n \n - "
default_note + = " You will also need to create a new entrance from the ' Edit this cave ' page. Ignore the existing dummy one, it will evaporate on the next full import. "
default_note + = " <br><br> \n \n - "
default_note + = " When you Submit it will create a new file in expoweb/cave_data/ "
2023-01-19 21:18:42 +00:00
default_note + = (
2023-01-19 21:34:09 +00:00
" <br><br> \n \n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
2023-01-19 21:18:42 +00:00
)
2023-01-19 21:34:09 +00:00
default_note + = " and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description). "
default_note + = " <br><br> \n \n - Finally, you need to find a nerd to edit the file ' <var>expoweb/cave_data/pending.txt</var> ' "
2023-01-19 21:18:42 +00:00
default_note + = (
f " to remove the line <br><var> { slug } </var><br> as it is no longer ' pending ' but ' done. Well Done. "
)
2021-04-26 02:10:45 +01:00
2022-10-06 19:02:15 +01:00
survex_file = get_survex_file ( k )
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
cave = Cave (
2023-01-19 21:18:42 +00:00
unofficial_number = k ,
underground_description = " Pending cave write-up - creating as empty object. No XML file available yet. " ,
survex_file = survex_file ,
url = url ,
notes = default_note ,
)
2022-10-06 19:02:15 +01:00
if cave :
2023-01-19 21:18:42 +00:00
cave . save ( ) # must save to have id before foreign keys work. This is also a ManyToMany key.
2022-10-06 19:02:15 +01:00
cave . area . add ( area )
cave . save ( )
message = f " ! { k : 18 } { cave . underground_description } url: { url } "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message , url = url )
2022-10-06 19:02:15 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
try : # Now create a cave slug ID
2023-01-19 21:34:09 +00:00
CaveSlug . objects . update_or_create ( cave = cave , slug = slug , primary = False )
2022-10-06 19:02:15 +01:00
except :
2023-03-24 00:54:26 +00:00
message = f " ! { k : 11s } PENDING CaveSLUG { slug } create failure "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2022-10-06 19:02:15 +01:00
print ( message )
else :
2023-01-19 21:18:42 +00:00
message = f " ! { k : 11s } PENDING cave create failure "
DataIssue . objects . create ( parser = " caves " , message = message )
2021-04-26 02:10:45 +01:00
print ( message )
2022-10-06 19:02:15 +01:00
try :
2023-03-28 19:08:05 +01:00
set_dummy_entrance ( k , slug , cave , msg = " PENDING " )
2022-10-06 19:02:15 +01:00
except :
2023-03-28 19:08:05 +01:00
message = f " ! { k : 11s } PENDING entrance + cave UNION create failure ' { cave } ' [ { slug } ] { k } "
# message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2022-10-06 19:02:15 +01:00
print ( message )
2023-03-24 00:54:26 +00:00
return cave
2021-04-26 02:10:45 +01:00
2023-04-22 01:24:32 +01:00
def getXML ( text , itemname , minItems = 1 , maxItems = None , context = " " ) :
""" Reads a single XML tag
Should throw exception rather than producing error message here ,
then handle exception in calling routine where it has the context .
"""
items = re . findall ( " < %(itemname)s >(.*?)</ %(itemname)s > " % { " itemname " : itemname } , text , re . S )
if len ( items ) < minItems :
message = (
" ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. "
% { " count " : len ( items ) , " itemname " : itemname , " min " : minItems }
+ " in file "
+ context
)
DataIssue . objects . create ( parser = " caves " , message = message , url = " " + context )
print ( message )
if maxItems is not None and len ( items ) > maxItems :
message = (
" ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. "
% { " count " : len ( items ) , " itemname " : itemname , " max " : maxItems }
+ " in file "
+ context
)
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
if minItems == 0 :
if not items :
items = [ " " ]
return items
2023-04-22 03:26:53 +01:00
def boolify ( boolstrs ) :
return {
" True " : True ,
" False " : False ,
" true " : True ,
" false " : False } [ boolstrs [ 0 ] ]
2023-04-22 01:24:32 +01:00
def readentrance ( filename , ent = None ) :
2023-03-28 19:08:05 +01:00
""" Reads an entrance description from the .html file
2023-04-22 01:24:32 +01:00
If not called as part of initial import , then the global lists will not be correct
but this is OK , a search will find them in the db .
2023-01-19 21:18:42 +00:00
"""
2023-03-31 12:19:22 +01:00
def getXMLmax1 ( field ) :
return getXML ( entrancecontents , field , maxItems = 1 , context = context )
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open ( os . path . join ( ENTRANCEDESCRIPTIONS , filename ) ) as f :
2012-06-10 16:56:12 +01:00
contents = f . read ( )
2021-04-14 22:50:47 +01:00
context = filename
2023-04-22 03:26:53 +01:00
2023-01-19 21:18:42 +00:00
# print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
entrancecontentslist = getXML ( contents , " entrance " , maxItems = 1 , context = context )
2021-04-14 18:24:08 +01:00
if len ( entrancecontentslist ) != 1 :
2023-03-31 12:19:22 +01:00
message = f ' ! BAD ENTRANCE at " { filename } " . Loading aborted. '
2023-03-28 19:08:05 +01:00
DataIssue . objects . create ( parser = " entrances " , message = message )
2021-04-14 18:24:08 +01:00
print ( message )
2023-04-22 03:26:53 +01:00
return
entrancecontents = entrancecontentslist [ 0 ]
slugs = getXML ( entrancecontents , " slug " , context = context )
if len ( slugs ) > 1 :
# Only ever one of these per entrance in the expo dataset
message = f " ! - More than one slug for an entrance: { entrance } , slugs: { slugs } . Aborting. "
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " /cave/ { slug } /edit/ " )
print ( message )
return
2023-03-28 19:26:37 +01:00
2023-04-22 03:26:53 +01:00
lastvisit = getXML ( entrancecontents , " last visit date " , maxItems = 1 , minItems = 0 , context = context )
alt = getXMLmax1 ( " alt " )
approach = getXMLmax1 ( " approach " )
bearings = getXMLmax1 ( " bearings " )
easting = getXMLmax1 ( " easting " )
entrance_description = getXMLmax1 ( " entrance_description " )
exact_station = getXMLmax1 ( " exact_station " )
explorers = getXMLmax1 ( " explorers " )
findability = getXMLmax1 ( " findability " )
findability_description = getXMLmax1 ( " findability_description " )
location_description = getXMLmax1 ( " location_description " )
map_description = getXMLmax1 ( " map_description " )
marking = getXMLmax1 ( " marking " )
marking_comment = getXMLmax1 ( " marking_comment " )
name = getXMLmax1 ( " name " )
non_public = getXMLmax1 ( " non_public " )
northing = getXMLmax1 ( " northing " )
other_description = getXMLmax1 ( " other_description " )
other_station = getXMLmax1 ( " other_station " )
photo = getXMLmax1 ( " photo " )
tag_station = getXMLmax1 ( " tag_station " )
underground_description = getXMLmax1 ( " underground_description " )
url = getXMLmax1 ( " url " )
if ent :
ent . name = name [ 0 ]
ent . non_public = boolify ( non_public )
ent . alt = alt [ 0 ]
ent . approach = approach [ 0 ]
ent . bearings = bearings [ 0 ]
ent . easting = easting [ 0 ]
ent . entrance_description = entrance_description [ 0 ]
ent . exact_station = exact_station [ 0 ]
ent . explorers = explorers [ 0 ]
ent . filename = filename
ent . findability = findability [ 0 ]
ent . findability_description = findability_description [ 0 ]
ent . lastvisit = lastvisit [ 0 ]
ent . location_description = location_description [ 0 ]
ent . map_description = map_description [ 0 ]
ent . marking = marking [ 0 ]
ent . marking_comment = marking_comment [ 0 ]
ent . northing = northing [ 0 ]
ent . other_description = other_description [ 0 ]
ent . other_station = other_station [ 0 ]
ent . photo = photo [ 0 ]
ent . slug = slugs [ 0 ]
ent . tag_station = tag_station [ 0 ]
ent . underground_description = underground_description [ 0 ]
ent . url = url [ 0 ]
ent . save ( )
else :
2023-01-19 21:18:42 +00:00
e , state = Entrance . objects . update_or_create (
name = name [ 0 ] ,
2023-04-22 03:26:53 +01:00
non_public = boolify ( non_public ) ,
2023-03-31 12:19:22 +01:00
alt = alt [ 0 ] ,
approach = approach [ 0 ] ,
bearings = bearings [ 0 ] ,
easting = easting [ 0 ] ,
2023-01-19 21:18:42 +00:00
entrance_description = entrance_description [ 0 ] ,
2023-03-31 12:19:22 +01:00
exact_station = exact_station [ 0 ] ,
2023-01-19 21:18:42 +00:00
explorers = explorers [ 0 ] ,
2023-03-31 12:19:22 +01:00
filename = filename ,
findability = findability [ 0 ] ,
findability_description = findability_description [ 0 ] ,
2023-01-19 21:18:42 +00:00
lastvisit = lastvisit [ 0 ] ,
2023-03-31 12:19:22 +01:00
location_description = location_description [ 0 ] ,
map_description = map_description [ 0 ] ,
2023-01-19 21:18:42 +00:00
marking = marking [ 0 ] ,
marking_comment = marking_comment [ 0 ] ,
northing = northing [ 0 ] ,
other_description = other_description [ 0 ] ,
2023-03-31 12:19:22 +01:00
other_station = other_station [ 0 ] ,
photo = photo [ 0 ] ,
2023-03-28 19:08:05 +01:00
slug = slugs [ 0 ] ,
2023-03-31 12:19:22 +01:00
tag_station = tag_station [ 0 ] ,
underground_description = underground_description [ 0 ] ,
url = url [ 0 ] ,
2023-01-19 21:18:42 +00:00
)
2023-04-22 03:26:53 +01:00
e . save ( )
2012-06-10 16:56:12 +01:00
2023-04-22 01:24:32 +01:00
def readcave ( filename , cave = None ) :
2023-04-05 20:42:19 +01:00
""" Reads an entrance description from the .html file
2023-04-22 01:24:32 +01:00
Convoluted . Sorry . Needs rewriting
2022-11-17 01:24:39 +00:00
Assumes any area it hasn ' t seen before is a subarea of 1623
2023-04-22 01:24:32 +01:00
If not called as part of initial import , then the global lists will not be correct
but this is OK , a search will find them in the db .
2023-01-19 21:18:42 +00:00
"""
2023-04-22 01:24:32 +01:00
def do_entrances ( ) :
2023-04-22 03:26:53 +01:00
""" For both bulk import and individual re-reading of cave_data file,
fix the entrances
"""
for e in entrances :
eslug = getXML ( e , " entranceslug " , maxItems = 1 , context = context ) [ 0 ]
letter = getXML ( e , " letter " , maxItems = 1 , context = context ) [ 0 ]
2023-04-22 01:24:32 +01:00
if len ( entrances ) == 1 and not eslug : # may be empty: <entranceslug></entranceslug>
set_dummy_entrance ( slug [ 5 : ] , slug , c , msg = " DUMMY: no entrance slug read from file " )
else :
try :
if eslug in entrances_xslug :
entrance = entrances_xslug [ eslug ]
else :
entrance = Entrance . objects . get ( slug = eslug )
entrances_xslug [ eslug ] = entrance
CaveAndEntrance . objects . update_or_create (
cave = c , entrance_letter = letter , entrance = entrance
)
except :
message = f ' ! Entrance setting failure, slug: " { slug } " #entrances: { len ( entrances ) } { entrance } letter: " { letter } " cave: " { c } " filename: " cave_data/ { filename } " '
DataIssue . objects . create ( parser = " entrances " , message = message , url = f " { c . url } _edit/ " )
print ( message )
2023-04-22 03:26:53 +01:00
def reload_entrances ( ) :
""" For individual re-reading of a cave_data file when editing,
also re - read the entrance_data files
"""
for eslug in entrances_xslug :
entrance = entrances_xslug [ eslug ]
readentrance ( entrance . filename , ent = entrance )
entrance . save ( )
2023-04-22 01:24:32 +01:00
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
2023-01-19 21:18:42 +00:00
2020-07-06 20:27:31 +01:00
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2023-04-22 01:24:32 +01:00
fn = settings . CAVEDESCRIPTIONS / filename
# print(f" - Reading Cave from cave descriptions file {fn}")
if not fn . exists ( ) :
message = f " ! Cave_data file reading problem filename: ' cave_data/ { filename } ' "
DataIssue . objects . create ( parser = " caves " , message = message , url = None )
print ( message )
return
with open ( fn ) as f :
2012-06-10 16:56:12 +01:00
contents = f . read ( )
2021-04-14 22:50:47 +01:00
context = filename
2023-01-19 21:18:42 +00:00
cavecontentslist = getXML ( contents , " cave " , maxItems = 1 , context = context )
2021-04-14 18:24:08 +01:00
if len ( cavecontentslist ) != 1 :
message = f ' ! BAD CAVE at " { filename } " '
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2021-04-14 18:24:08 +01:00
print ( message )
2023-04-22 01:24:32 +01:00
return
cavecontents = cavecontentslist [ 0 ]
non_public = getXML ( cavecontents , " non_public " , maxItems = 1 , context = context )
slugs = getXML ( cavecontents , " caveslug " , maxItems = 1 , context = context )
official_name = getXML ( cavecontents , " official_name " , maxItems = 1 , context = context )
areas = getXML ( cavecontents , " area " , context = context )
kataster_code = getXML ( cavecontents , " kataster_code " , maxItems = 1 , context = context )
kataster_number = getXML ( cavecontents , " kataster_number " , maxItems = 1 , context = context )
unofficial_number = getXML ( cavecontents , " unofficial_number " , maxItems = 1 , context = context )
explorers = getXML ( cavecontents , " explorers " , maxItems = 1 , context = context )
underground_description = getXML ( cavecontents , " underground_description " , maxItems = 1 , context = context )
equipment = getXML ( cavecontents , " equipment " , maxItems = 1 , context = context )
references = getXML ( cavecontents , " references " , maxItems = 1 , context = context )
survey = getXML ( cavecontents , " survey " , maxItems = 1 , context = context )
kataster_status = getXML ( cavecontents , " kataster_status " , maxItems = 1 , context = context )
underground_centre_line = getXML ( cavecontents , " underground_centre_line " , maxItems = 1 , context = context )
notes = getXML ( cavecontents , " notes " , maxItems = 1 , context = context )
length = getXML ( cavecontents , " length " , maxItems = 1 , context = context )
depth = getXML ( cavecontents , " depth " , maxItems = 1 , context = context )
extent = getXML ( cavecontents , " extent " , maxItems = 1 , context = context )
survex_file = getXML ( cavecontents , " survex_file " , maxItems = 1 , context = context )
description_file = getXML ( cavecontents , " description_file " , maxItems = 1 , context = context )
url = getXML ( cavecontents , " url " , maxItems = 1 , context = context )
entrances = getXML ( cavecontents , " entrance " , context = context )
if not (
len ( non_public ) == 1
and len ( slugs ) > = 1 # is this really correct ?
and len ( official_name ) == 1
and len ( areas ) > = 1 # we want to stop using the sub-ares in 2023
and len ( kataster_code ) == 1
and len ( kataster_number ) == 1
and len ( unofficial_number ) == 1
and len ( explorers ) == 1
and len ( underground_description ) == 1
and len ( equipment ) == 1
and len ( references ) == 1
and len ( survey ) == 1
and len ( kataster_status ) == 1
and len ( underground_centre_line ) == 1
and len ( notes ) == 1
and len ( length ) == 1
and len ( depth ) == 1
and len ( extent ) == 1
and len ( survex_file ) == 1
and len ( description_file ) == 1
and len ( url ) == 1
) :
# more than one item in long list
message = f ' ! ABORT loading this cave. in " { filename } " '
DataIssue . objects . create ( parser = " caves " , message = message , url = f " / { slugs } _cave_edit/ " )
print ( message )
return
if cave :
# this a re-load prior to editing and we already know the cave id
2023-04-22 03:26:53 +01:00
cave . non_public = boolify ( non_public )
2023-04-22 01:24:32 +01:00
cave . official_name = official_name [ 0 ]
cave . kataster_code = kataster_code [ 0 ]
cave . kataster_number = kataster_number [ 0 ]
cave . unofficial_number = unofficial_number [ 0 ]
cave . explorers = explorers [ 0 ]
cave . underground_description = underground_description [ 0 ]
cave . equipment = equipment [ 0 ]
cave . references = references [ 0 ]
cave . survey = survey [ 0 ]
cave . kataster_status = kataster_status [ 0 ]
cave . underground_centre_line = underground_centre_line [ 0 ]
cave . notes = notes [ 0 ]
cave . length = length [ 0 ]
cave . depth = depth [ 0 ]
cave . extent = extent [ 0 ]
cave . survex_file = survex_file [ 0 ]
cave . description_file = description_file [ 0 ]
cave . url = url [ 0 ]
if len ( slugs ) > 1 :
message = f " ! Cave edit failure due to more than one slug: { slugs } , skipping this field edit. "
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
cave . areas = None
cave . save ( )
for area_slug in areas :
a = Area . objects . filter ( short_name = area_slug )
if a :
cave . area . add ( a [ 0 ] )
else :
message = f " ! Cave edit failure due to unrecognised Area: { a } , skipping this field edit. "
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
c = cave
do_entrances ( )
2023-04-22 03:26:53 +01:00
print ( f " - { entrances_xslug =} " )
reload_entrances ( )
2023-04-22 01:24:32 +01:00
cave . save ( )
2021-04-14 18:24:08 +01:00
else :
2023-04-22 01:24:32 +01:00
try :
c , state = Cave . objects . update_or_create (
2023-04-22 03:26:53 +01:00
non_public = boolify ( non_public ) ,
2023-04-22 01:24:32 +01:00
official_name = official_name [ 0 ] ,
kataster_code = kataster_code [ 0 ] ,
kataster_number = kataster_number [ 0 ] ,
unofficial_number = unofficial_number [ 0 ] ,
explorers = explorers [ 0 ] ,
underground_description = underground_description [ 0 ] ,
equipment = equipment [ 0 ] ,
references = references [ 0 ] ,
survey = survey [ 0 ] ,
kataster_status = kataster_status [ 0 ] ,
underground_centre_line = underground_centre_line [ 0 ] ,
notes = notes [ 0 ] ,
length = length [ 0 ] ,
depth = depth [ 0 ] ,
extent = extent [ 0 ] ,
survex_file = survex_file [ 0 ] ,
description_file = description_file [ 0 ] ,
url = url [ 0 ] ,
filename = filename ,
)
except :
print ( " ! FAILED to get only one CAVE when updating using: " + filename )
kaves = Cave . objects . all ( ) . filter ( kataster_number = kataster_number [ 0 ] )
for k in kaves :
message = " ! - DUPLICATES in db. kataster: " + str ( k . kataster_number ) + " , slug: " + str ( k . slug ( ) )
DataIssue . objects . create ( parser = " caves " , message = message )
print ( message )
for k in kaves :
if k . slug ( ) is not None :
print ( " ! - OVERWRITING this one: slug: " + str ( k . slug ( ) ) )
k . notes = " DUPLICATE kataster number found on import. Please fix \n " + k . notes
c = k
for area_slug in areas :
if area_slug in areas_xslug :
newArea = areas_xslug [ area_slug ]
else :
area = Area . objects . filter ( short_name = area_slug )
if area :
newArea = area [ 0 ]
2020-07-06 20:27:31 +01:00
else :
2023-04-22 01:24:32 +01:00
newArea = Area ( short_name = area_slug , super = Area . objects . get ( short_name = " 1623 " ) )
newArea . save ( )
areas_xslug [ area_slug ] = newArea
c . area . add ( newArea )
primary = True # this sets the first thing we find to be primary=True and all the others =False
for slug in slugs :
if slug in caves_xslug :
cs = caves_xslug [ slug ]
2021-04-26 17:23:23 +01:00
else :
2023-04-22 01:24:32 +01:00
try : # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
cs = CaveSlug . objects . update_or_create ( cave = c , slug = slug , primary = primary )
caves_xslug [ slug ] = cs
except Exception as ex :
#raise
# This fails to do an update! It just crashes.. to be fixed
message = f " ! Cave update/create failure : { slug } , skipping file cave_data/ { context } with exception \n Exception: { ex . __class__ } "
DataIssue . objects . create ( parser = " caves " , message = message )
2021-04-14 18:24:08 +01:00
print ( message )
2022-03-05 12:20:26 +00:00
2023-04-22 01:24:32 +01:00
primary = False
2022-03-05 12:20:26 +00:00
2023-04-22 01:24:32 +01:00
if not entrances or len ( entrances ) < 1 :
# missing entrance link in cave_data/1623-* .html file
set_dummy_entrance ( slug [ 5 : ] , slug , c , msg = " DUMMY: no entrances " )
else :
do_entrances ( )
2012-06-10 16:56:12 +01:00
2023-04-22 01:24:32 +01:00
# From here on the code applies to both edited and newly-imported caves
if survex_file [ 0 ] :
if not ( Path ( SURVEX_DATA ) / survex_file [ 0 ] ) . is_file ( ) :
message = f ' ! { slug : 12 } survex filename does not exist :LOSER: " { survex_file [ 0 ] } " in " { filename } " '
DataIssue . objects . create ( parser = " caves " , message = message , url = f " / { slug [ 0 : 4 ] } / { slug } _cave_edit/ " )
print ( message )
2023-01-19 21:18:42 +00:00
2023-04-22 01:24:32 +01:00
if description_file [ 0 ] : # if not an empty string
message = f ' - { slug : 12 } Note (not an error): complex description filename " { description_file [ 0 ] } " inside " { CAVEDESCRIPTIONS } / { filename } " '
DataIssue . objects . create ( parser = " caves ok " , message = message , url = f " / { slug } _cave_edit/ " )
2019-04-14 22:45:31 +01:00
print ( message )
2023-01-19 21:18:42 +00:00
2023-04-22 01:24:32 +01:00
if not ( Path ( EXPOWEB ) / description_file [ 0 ] ) . is_file ( ) :
message = f ' ! { slug : 12 } description filename " { EXPOWEB } / { description_file [ 0 ] } " does not refer to a real file '
DataIssue . objects . create ( parser = " caves " , message = message , url = f " / { slug } _cave_edit/ " )
print ( message )
# c.description_file="" # done only once, to clear out cruft.
c . save ( )
# ChatGPT replacement attempt 2023-04-21. Obviously very incomplete, but some useful ideas
# import os
# import xml.etree.ElementTree as ET
# class BadCaveException(Exception):
# pass
# class FailedCaveUpdateException(Exception):
# pass
# def readcave_chatgpt(filename, entrances_xslug, caves_xslug, areas_xslug):
# """Reads an entrance description from the .html file and updates the corresponding Cave object"""
# tree = ET.parse(os.path.join(CAVEDESCRIPTIONS, filename))
# root = tree.getroot()
# cavecontents = root.find("cave")
# if cavecontents is None:
# raise BadCaveException(f'! BAD CAVE at "{filename}"')
# non_public = cavecontents.findtext("non_public")
# slugs = cavecontents.findtext("caveslug")
# official_name = cavecontents.findtext("official_name")
# kataster_code = cavecontents.findtext("kataster_code")
# kataster_number = cavecontents.findtext("kataster_number")
# unofficial_number = cavecontents.findtext("unofficial_number")
# explorers = cavecontents.findtext("explorers")
# underground_description = cavecontents.findtext("underground_description")
# equipment = cavecontents.findtext("equipment")
# references = cavecontents.findtext("references")
# survey = cavecontents.findtext("survey")
# kataster_status = cavecontents.findtext("kataster_status")
# underground_centre_line = cavecontents.findtext("underground_centre_line")
# notes = cavecontents.findtext("notes")
# length = cavecontents.findtext("length")
# depth = cavecontents.findtext("depth")
# extent = cavecontents.findtext("extent")
# survex_file = cavecontents.findtext("survex_file")
# description_file = cavecontents.findtext("description_file")
# url = cavecontents.findtext("url")
# areas = cavecontents.findall("area")
# entrances = cavecontents.findall("entrance")
# if (
# non_public is not None
# # etc.
# # wrong, some of these should be ==1 and some >=1
# ):
# try:
# cave = caves_xslug.get(kataster_number)
# if cave is None:
# cave = Cave.objects.create(
# non_public={
# "True": True,
# "False": False,
# "true": True,
# "false": False,
# }[non_public],
# official_name=official_name,
# # kataster [truncated]
2023-01-19 21:18:42 +00:00
2022-07-23 17:26:47 +01:00
def readcaves ( ) :
2023-04-22 01:24:32 +01:00
""" Called from databaseReset mass importer.
Reads the xml - format HTML ' cave ' files in the EXPOWEB repo , the survex files from the loser repo .
"""
2023-03-28 19:08:05 +01:00
# Pending is for those caves which do not have cave_data/1623-xxx.html XML files even though
# they exist and have surveys.
2022-07-23 17:26:47 +01:00
pending = set ( )
fpending = Path ( CAVEDESCRIPTIONS , " pendingcaves.txt " )
if fpending . is_file ( ) :
with open ( fpending , " r " ) as fo :
cids = fo . readlines ( )
for cid in cids :
2023-01-19 21:18:42 +00:00
pending . add ( cid . strip ( ) . rstrip ( " \n " ) . upper ( ) )
2022-07-23 17:26:47 +01:00
with transaction . atomic ( ) :
print ( " - Deleting Caves and Entrances " )
# attempting to avoid MariaDB crash when doing this
try :
Area . objects . all ( ) . delete ( )
except :
pass
try :
Cave . objects . all ( ) . delete ( )
except :
pass
try :
Entrance . objects . all ( ) . delete ( )
except :
pass
# Clear the cave data issues and the caves as we are reloading
2023-01-19 21:18:42 +00:00
DataIssue . objects . filter ( parser = " areas " ) . delete ( )
DataIssue . objects . filter ( parser = " caves " ) . delete ( )
DataIssue . objects . filter ( parser = " caves ok " ) . delete ( )
DataIssue . objects . filter ( parser = " entrances " ) . delete ( )
2022-07-23 17:26:47 +01:00
with transaction . atomic ( ) :
2023-03-24 00:54:26 +00:00
area = get_area ( " 1623 " )
2022-07-23 17:26:47 +01:00
print ( " - settings.CAVEDESCRIPTIONS: " , CAVEDESCRIPTIONS )
print ( " - Reading Entrances from entrance descriptions xml files " )
2023-01-19 21:18:42 +00:00
for filename in next ( os . walk ( ENTRANCEDESCRIPTIONS ) ) [ 2 ] : # Should be a better way of getting a list of files
2022-07-23 17:26:47 +01:00
# if filename.endswith('.html'):
2023-01-19 21:18:42 +00:00
# if Path(filename).stem[5:] in pending:
# print(f'Skipping pending entrance dummy file <{filename}>')
# else:
# readentrance(filename)
2022-07-23 17:26:47 +01:00
readentrance ( filename )
print ( " - Reading Caves from cave descriptions xml files " )
2023-01-19 21:18:42 +00:00
for filename in next ( os . walk ( CAVEDESCRIPTIONS ) ) [ 2 ] : # Should be a better way of getting a list of files
if filename . endswith ( " .html " ) :
2022-07-23 17:26:47 +01:00
readcave ( filename )
2022-07-25 00:57:00 +01:00
2023-01-19 21:18:42 +00:00
print ( " - Setting up all the variously useful alias names " )
2023-01-19 21:34:09 +00:00
GetCaveLookup ( )
2023-01-19 21:18:42 +00:00
print ( " - Setting pending caves " )
2022-07-28 16:36:57 +01:00
# Do this last, so we can detect if they are created and no longer 'pending'
2022-10-06 19:02:15 +01:00
with transaction . atomic ( ) :
for k in pending :
2023-01-19 21:18:42 +00:00
2022-10-06 19:02:15 +01:00
if k [ 0 : 3 ] == " 162 " :
areanum = k [ 0 : 4 ]
2023-01-19 21:18:42 +00:00
url = f " { areanum } / { k [ 5 : ] } " # Note we are not appending the .htm as we are modern folks now.
2022-10-06 19:02:15 +01:00
else :
areanum = " 1623 "
2023-01-19 21:18:42 +00:00
url = f " 1623/ { k } "
2022-09-25 19:43:00 +01:00
2023-03-24 00:54:26 +00:00
area = get_area ( areanum )
2023-01-19 21:18:42 +00:00
try :
2022-10-06 19:02:15 +01:00
do_pending_cave ( k , url , area )
except :
message = f " ! Error. Cannot create pending cave and entrance, pending-id: { k } in area { areanum } "
2023-01-19 21:18:42 +00:00
DataIssue . objects . create ( parser = " caves " , message = message )
2022-10-06 19:02:15 +01:00
print ( message )
raise