2012-06-10 16:56:12 +01:00
import os
import re
2021-04-25 04:04:53 +01:00
import sys
2021-04-14 18:24:08 +01:00
from pathlib import Path
2012-06-10 16:56:12 +01:00
2020-05-24 01:57:06 +01:00
from django . conf import settings
2021-04-06 22:50:57 +01:00
from django . db import transaction
2020-05-24 01:57:06 +01:00
2022-03-05 12:20:26 +00:00
from troggle . settings import SURVEX_DATA , EXPOWEB , CAVEDESCRIPTIONS , ENTRANCEDESCRIPTIONS
2021-04-13 00:43:57 +01:00
from troggle . core . models . troggle import DataIssue
2022-07-25 00:57:00 +01:00
from troggle . core . models . caves import Area , Cave , Entrance , CaveSlug , EntranceSlug , CaveAndEntrance , GetCaveLookup
2022-03-05 12:20:26 +00:00
''' Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
2021-04-06 22:50:57 +01:00
and creating the various Cave , Entrance and necessary Area objects .
2022-07-27 21:24:40 +01:00
This is the first import that happens after the database is reinitialised .
2022-07-21 19:01:57 +01:00
So is the first thing that creates tables .
2021-04-06 22:50:57 +01:00
'''
2012-06-10 16:56:12 +01:00
2022-07-27 21:24:40 +01:00
todo = '''
2021-04-27 00:32:01 +01:00
- Cannot use Edit This Page for pendingcaves . txt_edit as Edit This Page is expecting an html file .
2022-03-05 12:20:26 +00:00
So we will need a separate file - editing capability just for this configuration file ? !
2022-07-19 17:48:11 +01:00
2022-07-27 21:24:40 +01:00
- crashes on MariaDB in databasereset . py on server when deleting Caves and complains Area needs a non null parent , But this is not true .
2022-07-21 17:01:04 +01:00
The only solution we have found is to let it crash , then stop and restart MariaDB ( requires a logon able to sudo )
and then restart the databasereset . py again . ( status as of July 2022 )
2021-04-27 00:32:01 +01:00
'''
2020-07-06 20:27:31 +01:00
entrances_xslug = { }
caves_xslug = { }
areas_xslug = { }
2021-04-26 17:23:23 +01:00
def dummy_entrance ( k , slug , msg = " DUMMY " ) :
''' Returns an empty entrance object for either a PENDING cave or a DUMMY entrance if
user forgot to provide one when creating the cave
'''
ent = Entrance (
name = k ,
entrance_description = " Dummy entrance: auto-created when registering a new cave " +
" and you forgot to create an entrance for it. Click on ' Edit ' to enter the correct data, then ' Submit ' . " ,
marking = ' ? ' )
if ent :
ent . save ( ) # must save to have id before foreign keys work.
try : # Now create a entrance slug ID
2022-07-25 09:31:43 +01:00
es = EntranceSlug ( entrance = ent ,
2021-04-26 17:23:23 +01:00
slug = slug , primary = False )
except :
2022-07-25 09:31:43 +01:00
message = f " ! { k : 11s } { msg } - { slug } entrance create failure "
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' { slug } ' )
2021-04-26 17:23:23 +01:00
print ( message )
ent . cached_primary_slug = slug
ent . filename = slug + " .html "
ent . save ( )
return ent
else :
message = f " ! { k : 11s } { msg } cave SLUG ' { slug } ' create failure "
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' { slug } ' )
2021-04-26 17:23:23 +01:00
print ( message )
raise
def set_dummy_entrance ( id , slug , cave , msg = " DUMMY " ) :
''' Entrance field either missing or holds a null string instead of a filename in a cave_data file.
'''
global entrances_xslug
try :
entrance = dummy_entrance ( id , slug , msg = " DUMMY " )
letter = " "
entrances_xslug [ slug ] = entrance
ce = CaveAndEntrance . objects . update_or_create ( cave = cave , entrance_letter = " " , entrance = entrance )
2022-07-25 09:31:43 +01:00
message = f ' ! Warning: Dummy Entrance successfully set for entrance { id } on cave { cave } '
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' { cave . url } ' )
2021-04-26 17:23:23 +01:00
print ( message )
except :
2022-07-25 09:31:43 +01:00
#raise
2021-04-26 17:23:23 +01:00
message = f ' ! Entrance Dummy setting failure, slug: " { slug } " cave id : " { id } " '
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' { cave . url } ' )
2021-04-26 17:23:23 +01:00
print ( message )
2022-07-25 00:57:00 +01:00
def do_pending_cave ( k , url , area ) :
2021-04-26 02:10:45 +01:00
'''
default for a PENDING cave , should be overwritten in the db later if a real cave of the same name exists
in expoweb / cave_data / 1623 - " k " . html
2022-09-19 19:54:51 +01:00
Note that at this point in importing the data we have not yet seen the survex files , so we can ' t
look inside the relevant survex file to find the year and so we con ' t provide helpful links.
2021-04-26 02:10:45 +01:00
'''
2022-10-06 19:02:15 +01:00
def get_survex_file ( k ) :
''' Guesses at and finds a survex file for this pending cave.
Convoluted . Sorry . Needs rewriting
'''
if k [ 0 : 3 ] == " 162 " :
id = Path ( k [ 5 : ] )
else :
id = Path ( k )
survex_file = f " caves- { area . short_name } / { id } / { id } .svx "
if Path ( settings . SURVEX_DATA , survex_file ) . is_file ( ) :
return survex_file
else :
survex_file = f " caves- { area . short_name } / { id } .svx "
if Path ( settings . SURVEX_DATA , survex_file ) . is_file ( ) :
return survex_file
survex_file = " "
d = Path ( settings . SURVEX_DATA , f " caves- { area . short_name } / { id } " )
if d . is_dir ( ) :
prime_suspect = " "
dir = d . iterdir ( )
for f in dir :
if f . suffix == " .svx " :
survex_file = f . relative_to ( settings . SURVEX_DATA )
chk = min ( 5 , len ( f . name ) - 1 )
if str ( f . name ) [ : chk ] . lower ( ) == str ( id . name ) [ : chk ] . lower ( ) : # bodge which mostly works
prime_suspect = survex_file
if prime_suspect :
survex_file = prime_suspect
# message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}"
# DataIssue.objects.create(parser='caves', message=message, url=url)
# print(message)
return survex_file
2022-07-23 17:26:47 +01:00
slug = k
2022-07-28 16:36:57 +01:00
g = GetCaveLookup ( )
2022-10-06 19:02:15 +01:00
with transaction . atomic ( ) :
if slug in g :
message = f " ! { k : 18 } cave listed in pendingcaves.txt already exists. "
DataIssue . objects . create ( parser = ' caves ' , message = message , url = url )
print ( message )
return
2022-07-28 16:36:57 +01:00
2022-10-06 19:02:15 +01:00
2022-07-21 17:01:04 +01:00
2022-10-06 19:02:15 +01:00
default_note = f " _Survex file found in loser repo but no description in expoweb <br><br><br> \n "
default_note + = f " INSTRUCTIONS: First open ' This survex file ' (link above the CaveView panel) to find the date and info. Then "
default_note + = f " <br><br> \n \n - (1) search in the survex file for the *ref to find a "
default_note + = f " relevant wallet, e.g.<a href= ' /survey_scans/2009%252311/ ' >2009#11</a> and read the notes image files <br> \n - "
default_note + = f " <br><br> \n \n - (2) search in the Expo for that year e.g. <a href= ' /expedition/2009 ' >2009</a> to find a "
default_note + = f " relevant logbook entry, remember that the date may have been recorded incorrectly, "
default_note + = f " so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
default_note + = f " and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
default_note + = f " just in case a vital trip was not transcribed, then <br> \n - "
default_note + = f " click on ' Edit this cave ' and copy the information you find in the survex file and the logbook "
default_note + = f " and delete all the text in the ' Notes ' section - which is the text you are reading now. "
default_note + = f " <br><br> \n \n - Only two fields on this form are essential. "
default_note + = f " Documentation of all the fields on ' Edit this cave ' form is in <a href= ' /handbook/survey/caveentryfields.html ' >handbook/survey/caveentryfields</a> "
default_note + = f " <br><br> \n \n - "
default_note + = f " You will also need to create a new entrance from the ' Edit this cave ' page. Ignore the existing dummy one, it will evaporate on the next full import. "
default_note + = f " <br><br> \n \n - "
default_note + = f " When you Submit it will create a new file in expoweb/cave_data/ "
default_note + = f " <br><br> \n \n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
default_note + = f " and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description). "
default_note + = f " <br><br> \n \n - Finally, you need to find a nerd to edit the file ' <var>expoweb/cave_data/pending.txt</var> ' "
default_note + = f " to remove the line <br><var> { slug } </var><br> as it is no longer ' pending ' but ' done. Well Done. "
2021-04-26 02:10:45 +01:00
2022-10-06 19:02:15 +01:00
survex_file = get_survex_file ( k )
cave = Cave (
unofficial_number = k ,
underground_description = " Pending cave write-up - creating as empty object. No XML file available yet. " ,
survex_file = survex_file ,
url = url ,
notes = default_note )
if cave :
cave . save ( ) # must save to have id before foreign keys work. This is also a ManyToMany key.
cave . area . add ( area )
cave . save ( )
message = f " ! { k : 18 } { cave . underground_description } url: { url } "
DataIssue . objects . create ( parser = ' caves ' , message = message , url = url )
print ( message )
try : # Now create a cave slug ID
cs = CaveSlug . objects . update_or_create ( cave = cave ,
slug = slug , primary = False )
except :
message = f " ! { k : 11s } PENDING cave SLUG create failure "
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
else :
message = f ' ! { k : 11s } PENDING cave create failure '
2021-04-26 02:10:45 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
2022-10-06 19:02:15 +01:00
try :
ent = dummy_entrance ( k , slug , msg = " PENDING " )
ceinsts = CaveAndEntrance . objects . update_or_create ( cave = cave , entrance_letter = " " , entrance = ent )
for ceinst in ceinsts :
if str ( ceinst ) == str ( cave ) : # magic runes... why is the next value a Bool?
ceinst . cave = cave
ceinst . save ( )
break
except :
message = f " ! { k : 11s } PENDING entrance + cave UNION create failure ' { cave } ' [ { ent } ] "
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
2021-04-26 02:10:45 +01:00
2020-05-28 04:54:53 +01:00
2012-06-10 16:56:12 +01:00
def readentrance ( filename ) :
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open ( os . path . join ( ENTRANCEDESCRIPTIONS , filename ) ) as f :
2012-06-10 16:56:12 +01:00
contents = f . read ( )
2021-04-14 22:50:47 +01:00
context = filename
2022-03-05 12:20:26 +00:00
#print("Reading file ENTRANCE {} / {}".format(ENTRANCEDESCRIPTIONS, filename))
2012-06-10 16:56:12 +01:00
entrancecontentslist = getXML ( contents , " entrance " , maxItems = 1 , context = context )
2021-04-14 18:24:08 +01:00
if len ( entrancecontentslist ) != 1 :
message = f ' ! BAD ENTRANCE at " { filename } " '
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
else :
2012-06-10 16:56:12 +01:00
entrancecontents = entrancecontentslist [ 0 ]
non_public = getXML ( entrancecontents , " non_public " , maxItems = 1 , context = context )
name = getXML ( entrancecontents , " name " , maxItems = 1 , context = context )
slugs = getXML ( entrancecontents , " slug " , context = context )
entrance_description = getXML ( entrancecontents , " entrance_description " , maxItems = 1 , context = context )
explorers = getXML ( entrancecontents , " explorers " , maxItems = 1 , context = context )
map_description = getXML ( entrancecontents , " map_description " , maxItems = 1 , context = context )
location_description = getXML ( entrancecontents , " location_description " , maxItems = 1 , context = context )
2022-10-12 21:12:55 +01:00
lastvisit = getXML ( entrancecontents , " last visit date " , maxItems = 1 , minItems = 0 , context = context )
2012-06-10 16:56:12 +01:00
approach = getXML ( entrancecontents , " approach " , maxItems = 1 , context = context )
underground_description = getXML ( entrancecontents , " underground_description " , maxItems = 1 , context = context )
photo = getXML ( entrancecontents , " photo " , maxItems = 1 , context = context )
marking = getXML ( entrancecontents , " marking " , maxItems = 1 , context = context )
marking_comment = getXML ( entrancecontents , " marking_comment " , maxItems = 1 , context = context )
findability = getXML ( entrancecontents , " findability " , maxItems = 1 , context = context )
findability_description = getXML ( entrancecontents , " findability_description " , maxItems = 1 , context = context )
alt = getXML ( entrancecontents , " alt " , maxItems = 1 , context = context )
northing = getXML ( entrancecontents , " northing " , maxItems = 1 , context = context )
easting = getXML ( entrancecontents , " easting " , maxItems = 1 , context = context )
tag_station = getXML ( entrancecontents , " tag_station " , maxItems = 1 , context = context )
exact_station = getXML ( entrancecontents , " exact_station " , maxItems = 1 , context = context )
other_station = getXML ( entrancecontents , " other_station " , maxItems = 1 , context = context )
other_description = getXML ( entrancecontents , " other_description " , maxItems = 1 , context = context )
bearings = getXML ( entrancecontents , " bearings " , maxItems = 1 , context = context )
url = getXML ( entrancecontents , " url " , maxItems = 1 , context = context )
2022-10-12 21:12:55 +01:00
#if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(lastvisit) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1:
e , state = Entrance . objects . update_or_create ( name = name [ 0 ] ,
non_public = { " True " : True , " False " : False , " true " : True , " false " : False , } [ non_public [ 0 ] ] ,
entrance_description = entrance_description [ 0 ] ,
explorers = explorers [ 0 ] ,
map_description = map_description [ 0 ] ,
location_description = location_description [ 0 ] ,
lastvisit = lastvisit [ 0 ] ,
approach = approach [ 0 ] ,
underground_description = underground_description [ 0 ] ,
photo = photo [ 0 ] ,
marking = marking [ 0 ] ,
marking_comment = marking_comment [ 0 ] ,
findability = findability [ 0 ] ,
findability_description = findability_description [ 0 ] ,
alt = alt [ 0 ] ,
northing = northing [ 0 ] ,
easting = easting [ 0 ] ,
tag_station = tag_station [ 0 ] ,
exact_station = exact_station [ 0 ] ,
other_station = other_station [ 0 ] ,
other_description = other_description [ 0 ] ,
bearings = bearings [ 0 ] ,
url = url [ 0 ] ,
filename = filename ,
cached_primary_slug = slugs [ 0 ] )
primary = True
for slug in slugs :
#print("entrance slug:{} filename:{}".format(slug, filename))
try :
cs = EntranceSlug . objects . update_or_create ( entrance = e ,
slug = slug ,
primary = primary )
except :
# need to cope with duplicates
message = f " ! FAILED to get precisely one ENTRANCE when updating using: cave_entrance/ { filename } "
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' /cave/ { slug } /edit/ ' )
kents = EntranceSlug . objects . all ( ) . filter ( entrance = e ,
2020-06-12 18:10:07 +01:00
slug = slug ,
primary = primary )
2022-10-12 21:12:55 +01:00
for k in kents :
message = " ! - DUPLICATE in db. entrance: " + str ( k . entrance ) + " , slug: " + str ( k . slug ( ) )
2021-04-26 02:10:45 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' /cave/ { slug } /edit/ ' )
2022-10-12 21:12:55 +01:00
print ( message )
for k in kents :
if k . slug ( ) != None :
print ( " ! - OVERWRITING this one: slug: " + str ( k . slug ( ) ) )
k . notes = " DUPLICATE entrance found on import. Please fix \n " + k . notes
c = k
2012-06-10 16:56:12 +01:00
primary = False
2022-10-12 21:12:55 +01:00
# else: # more than one item in long list. But this is not an error, and the max and min have been checked by getXML
# slug = Path(filename).stem
# message = f' ! ABORT loading this entrance. in "{filename}"'
# DataIssue.objects.create(parser='caves', message=message, url=f'/cave/{slug}/edit/')
# print(message)
2012-06-10 16:56:12 +01:00
def readcave ( filename ) :
2021-04-06 22:50:57 +01:00
''' Assumes any area it hasn ' t seen before is a subarea of 1623
'''
2020-07-06 20:27:31 +01:00
global entrances_xslug
global caves_xslug
global areas_xslug
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
2022-03-05 12:20:26 +00:00
with open ( os . path . join ( CAVEDESCRIPTIONS , filename ) ) as f :
2012-06-10 16:56:12 +01:00
contents = f . read ( )
2021-04-14 22:50:47 +01:00
context = filename
2012-06-10 16:56:12 +01:00
cavecontentslist = getXML ( contents , " cave " , maxItems = 1 , context = context )
2021-04-14 18:24:08 +01:00
if len ( cavecontentslist ) != 1 :
message = f ' ! BAD CAVE at " { filename } " '
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
else :
2012-06-10 16:56:12 +01:00
cavecontents = cavecontentslist [ 0 ]
non_public = getXML ( cavecontents , " non_public " , maxItems = 1 , context = context )
slugs = getXML ( cavecontents , " caveslug " , maxItems = 1 , context = context )
official_name = getXML ( cavecontents , " official_name " , maxItems = 1 , context = context )
areas = getXML ( cavecontents , " area " , context = context )
kataster_code = getXML ( cavecontents , " kataster_code " , maxItems = 1 , context = context )
kataster_number = getXML ( cavecontents , " kataster_number " , maxItems = 1 , context = context )
unofficial_number = getXML ( cavecontents , " unofficial_number " , maxItems = 1 , context = context )
explorers = getXML ( cavecontents , " explorers " , maxItems = 1 , context = context )
underground_description = getXML ( cavecontents , " underground_description " , maxItems = 1 , context = context )
equipment = getXML ( cavecontents , " equipment " , maxItems = 1 , context = context )
references = getXML ( cavecontents , " references " , maxItems = 1 , context = context )
survey = getXML ( cavecontents , " survey " , maxItems = 1 , context = context )
kataster_status = getXML ( cavecontents , " kataster_status " , maxItems = 1 , context = context )
underground_centre_line = getXML ( cavecontents , " underground_centre_line " , maxItems = 1 , context = context )
notes = getXML ( cavecontents , " notes " , maxItems = 1 , context = context )
length = getXML ( cavecontents , " length " , maxItems = 1 , context = context )
depth = getXML ( cavecontents , " depth " , maxItems = 1 , context = context )
extent = getXML ( cavecontents , " extent " , maxItems = 1 , context = context )
survex_file = getXML ( cavecontents , " survex_file " , maxItems = 1 , context = context )
description_file = getXML ( cavecontents , " description_file " , maxItems = 1 , context = context )
url = getXML ( cavecontents , " url " , maxItems = 1 , context = context )
entrances = getXML ( cavecontents , " entrance " , context = context )
2021-04-26 17:23:23 +01:00
if len ( non_public ) == 1 and len ( slugs ) > = 1 and len ( official_name ) == 1 and len ( areas ) > = 1 and len ( kataster_code ) == 1 and len ( kataster_number ) == 1 and len ( unofficial_number ) == 1 and len ( explorers ) == 1 and len ( underground_description ) == 1 and len ( equipment ) == 1 and len ( references ) == 1 and len ( survey ) == 1 and len ( kataster_status ) == 1 and len ( underground_centre_line ) == 1 and len ( notes ) == 1 and len ( length ) == 1 and len ( depth ) == 1 and len ( extent ) == 1 and len ( survex_file ) == 1 and len ( description_file ) == 1 and len ( url ) == 1 :
2020-06-07 17:49:58 +01:00
try :
2021-04-06 22:50:57 +01:00
c , state = Cave . objects . update_or_create ( non_public = { " True " : True , " False " : False , " true " : True , " false " : False , } [ non_public [ 0 ] ] ,
2020-06-07 17:49:58 +01:00
official_name = official_name [ 0 ] ,
kataster_code = kataster_code [ 0 ] ,
kataster_number = kataster_number [ 0 ] ,
unofficial_number = unofficial_number [ 0 ] ,
explorers = explorers [ 0 ] ,
underground_description = underground_description [ 0 ] ,
equipment = equipment [ 0 ] ,
references = references [ 0 ] ,
survey = survey [ 0 ] ,
kataster_status = kataster_status [ 0 ] ,
underground_centre_line = underground_centre_line [ 0 ] ,
notes = notes [ 0 ] ,
length = length [ 0 ] ,
depth = depth [ 0 ] ,
extent = extent [ 0 ] ,
survex_file = survex_file [ 0 ] ,
description_file = description_file [ 0 ] ,
url = url [ 0 ] ,
filename = filename )
except :
2020-06-12 18:10:07 +01:00
print ( " ! FAILED to get only one CAVE when updating using: " + filename )
2021-04-06 22:50:57 +01:00
kaves = Cave . objects . all ( ) . filter ( kataster_number = kataster_number [ 0 ] )
2020-06-07 17:49:58 +01:00
for k in kaves :
message = " ! - DUPLICATES in db. kataster: " + str ( k . kataster_number ) + " , slug: " + str ( k . slug ( ) )
2020-06-30 15:39:24 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message )
2020-06-07 17:49:58 +01:00
print ( message )
for k in kaves :
if k . slug ( ) != None :
print ( " ! - OVERWRITING this one: slug: " + str ( k . slug ( ) ) )
k . notes = " DUPLICATE kataster number found on import. Please fix \n " + k . notes
c = k
2012-06-10 16:56:12 +01:00
for area_slug in areas :
2020-07-06 20:27:31 +01:00
if area_slug in areas_xslug :
newArea = areas_xslug [ area_slug ]
2012-06-10 16:56:12 +01:00
else :
2021-04-06 22:50:57 +01:00
area = Area . objects . filter ( short_name = area_slug )
2020-07-06 20:27:31 +01:00
if area :
newArea = area [ 0 ]
else :
2022-07-21 19:52:10 +01:00
newArea = Area ( short_name = area_slug , super = Area . objects . get ( short_name = " 1623 " ) )
2020-07-06 20:27:31 +01:00
newArea . save ( )
areas_xslug [ area_slug ] = newArea
2012-06-10 16:56:12 +01:00
c . area . add ( newArea )
primary = True
for slug in slugs :
2020-07-06 20:27:31 +01:00
if slug in caves_xslug :
cs = caves_xslug [ slug ]
else :
2021-04-25 04:04:53 +01:00
try : # we want to overwrite a PENDING cave if we are now importing the 1623-xxx.html file for it
2021-04-06 22:50:57 +01:00
cs = CaveSlug . objects . update_or_create ( cave = c ,
2020-07-06 20:27:31 +01:00
slug = slug ,
primary = primary )
caves_xslug [ slug ] = cs
2021-04-25 04:04:53 +01:00
except Exception as ex :
2021-04-27 00:32:01 +01:00
# This fails to do an update! It just crashes.. to be fixed
2021-04-26 02:10:45 +01:00
message = " ! Cave update/create failure : %s , skipping file cave_data/ %s with exception \n Exception: %s " % ( slug , context , ex . __class__ )
2020-07-06 20:27:31 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
2012-09-24 23:23:38 +01:00
2012-06-10 16:56:12 +01:00
primary = False
2020-07-01 22:49:38 +01:00
2021-04-26 17:23:23 +01:00
if not entrances or len ( entrances ) < 1 :
# missing entrance link in cave_data/1623-* .html file
set_dummy_entrance ( slug [ 5 : ] , slug , c , msg = " DUMMY " )
else :
for entrance in entrances :
eslug = getXML ( entrance , " entranceslug " , maxItems = 1 , context = context ) [ 0 ]
letter = getXML ( entrance , " letter " , maxItems = 1 , context = context ) [ 0 ]
if len ( entrances ) == 1 and not eslug : # may be empty: <entranceslug></entranceslug>
set_dummy_entrance ( slug [ 5 : ] , slug , c , msg = " DUMMY " )
2020-07-06 20:27:31 +01:00
else :
2021-04-26 17:23:23 +01:00
try :
if eslug in entrances_xslug :
entrance = entrances_xslug [ eslug ]
else :
entrance = Entrance . objects . get ( entranceslug__slug = eslug )
entrances_xslug [ eslug ] = entrance
ce = CaveAndEntrance . objects . update_or_create ( cave = c , entrance_letter = letter , entrance = entrance )
except :
message = f ' ! Entrance setting failure, slug: " { slug } " #entrances: { len ( entrances ) } { entrance } letter: " { letter } " cave: " { c } " filename: " cave_data/ { filename } " '
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' { c . url } _edit/ ' )
2021-04-26 17:23:23 +01:00
print ( message )
2021-04-14 18:24:08 +01:00
if survex_file [ 0 ] :
if not ( Path ( SURVEX_DATA ) / survex_file [ 0 ] ) . is_file ( ) :
2021-04-14 22:50:47 +01:00
message = f ' ! { slug : 12 } survex filename does not exist :LOSER: " { survex_file [ 0 ] } " in " { filename } " '
2022-08-24 12:15:40 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' / { slug [ 0 : 4 ] } / { slug } _cave_edit/ ' )
2021-04-14 18:24:08 +01:00
print ( message )
2022-03-05 12:20:26 +00:00
if description_file [ 0 ] : # if not an empty string
2022-10-06 19:02:15 +01:00
message = f ' - { slug : 12 } Note (not an error): complex description filename " { description_file [ 0 ] } " inside " { CAVEDESCRIPTIONS } / { filename } " '
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ok ' , message = message , url = f ' / { slug } _cave_edit/ ' )
2022-03-05 12:20:26 +00:00
print ( message )
2021-04-14 18:24:08 +01:00
if not ( Path ( EXPOWEB ) / description_file [ 0 ] ) . is_file ( ) :
2022-03-05 12:20:26 +00:00
message = f ' ! { slug : 12 } description filename " { EXPOWEB } / { description_file [ 0 ] } " does not refer to a real file '
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' / { slug } _cave_edit/ ' )
2021-04-14 18:24:08 +01:00
print ( message )
2021-04-14 21:08:06 +01:00
#c.description_file="" # done only once, to clear out cruft.
#c.save()
2021-04-15 01:52:09 +01:00
else : # more than one item in long list
2021-04-15 18:06:04 +01:00
message = f ' ! ABORT loading this cave. in " { filename } " '
2022-07-21 17:01:04 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message , url = f ' / { slug } _cave_edit/ ' )
2021-04-15 01:52:09 +01:00
print ( message )
2012-06-10 16:56:12 +01:00
def getXML ( text , itemname , minItems = 1 , maxItems = None , printwarnings = True , context = " " ) :
items = re . findall ( " < %(itemname)s >(.*?)</ %(itemname)s > " % { " itemname " : itemname } , text , re . S )
if len ( items ) < minItems and printwarnings :
2021-04-15 01:52:09 +01:00
message = " ! %(count)i x %(itemname)s found, at least %(min)i expected. Load ABORT. " % { " count " : len ( items ) ,
2012-06-10 16:56:12 +01:00
" itemname " : itemname ,
2021-04-14 22:50:47 +01:00
" min " : minItems } + " in file " + context
DataIssue . objects . create ( parser = ' caves ' , message = message , url = " " + context )
2019-04-14 22:45:31 +01:00
print ( message )
2012-06-10 16:56:12 +01:00
if maxItems is not None and len ( items ) > maxItems and printwarnings :
2021-04-15 01:52:09 +01:00
message = " ! %(count)i x %(itemname)s found, no more than %(max)i expected in this XML unit. Load ABORT. " % { " count " : len ( items ) ,
2012-06-10 16:56:12 +01:00
" itemname " : itemname ,
2021-04-14 22:50:47 +01:00
" max " : maxItems } + " in file " + context
2020-06-30 15:39:24 +01:00
DataIssue . objects . create ( parser = ' caves ' , message = message )
2019-04-14 22:45:31 +01:00
print ( message )
2022-10-12 21:12:55 +01:00
if minItems == 0 :
if not items :
items = [ " " ]
2022-07-23 17:26:47 +01:00
return items
def readcaves ( ) :
''' Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo.
'''
# For those caves which do not have cave_data/1623-xxx.html XML files even though they exist and have surveys
2022-09-25 19:43:00 +01:00
# should put this in a simple list
2022-07-23 17:26:47 +01:00
pending = set ( )
fpending = Path ( CAVEDESCRIPTIONS , " pendingcaves.txt " )
if fpending . is_file ( ) :
with open ( fpending , " r " ) as fo :
cids = fo . readlines ( )
for cid in cids :
2022-10-06 19:02:15 +01:00
pending . add ( cid . strip ( ) . rstrip ( ' \n ' ) . upper ( ) )
2022-07-23 17:26:47 +01:00
with transaction . atomic ( ) :
print ( " - Deleting Caves and Entrances " )
# attempting to avoid MariaDB crash when doing this
try :
Area . objects . all ( ) . delete ( )
except :
pass
try :
Cave . objects . all ( ) . delete ( )
except :
pass
try :
Entrance . objects . all ( ) . delete ( )
except :
pass
# Clear the cave data issues and the caves as we are reloading
DataIssue . objects . filter ( parser = ' areas ' ) . delete ( )
DataIssue . objects . filter ( parser = ' caves ' ) . delete ( )
DataIssue . objects . filter ( parser = ' caves ok ' ) . delete ( )
DataIssue . objects . filter ( parser = ' entrances ' ) . delete ( )
2022-10-05 19:11:18 +01:00
print ( " - Creating Areas 1623, 1624, 1627 and 1626 " )
2022-07-23 17:26:47 +01:00
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
area_1623 = Area . objects . create ( short_name = " 1623 " , super = None )
area_1623 . save ( )
area_1624 = Area . objects . create ( short_name = " 1624 " , super = None )
2022-08-30 15:30:46 +01:00
area_1624 . save ( )
2022-07-23 17:26:47 +01:00
area_1626 = Area . objects . create ( short_name = " 1626 " , super = None )
area_1626 . save ( )
2022-10-05 19:11:18 +01:00
area_1627 = Area . objects . create ( short_name = " 1627 " , super = None )
area_1627 . save ( )
2022-07-23 17:26:47 +01:00
with transaction . atomic ( ) :
print ( " - settings.CAVEDESCRIPTIONS: " , CAVEDESCRIPTIONS )
print ( " - Reading Entrances from entrance descriptions xml files " )
for filename in next ( os . walk ( ENTRANCEDESCRIPTIONS ) ) [ 2 ] : #Should be a better way of getting a list of files
# if filename.endswith('.html'):
# if Path(filename).stem[5:] in pending:
# print(f'Skipping pending entrance dummy file <{filename}>')
# else:
# readentrance(filename)
readentrance ( filename )
print ( " - Reading Caves from cave descriptions xml files " )
for filename in next ( os . walk ( CAVEDESCRIPTIONS ) ) [ 2 ] : #Should be a better way of getting a list of files
if filename . endswith ( ' .html ' ) :
readcave ( filename )
2022-07-25 00:57:00 +01:00
print ( " - Setting up all the variously useful alias names " )
mycavelookup = GetCaveLookup ( )
2022-07-28 16:36:57 +01:00
print ( " - Setting pending caves " )
# Do this last, so we can detect if they are created and no longer 'pending'
2022-10-06 19:02:15 +01:00
with transaction . atomic ( ) :
for k in pending :
if k [ 0 : 3 ] == " 162 " :
areanum = k [ 0 : 4 ]
url = f ' { areanum } / { k [ 5 : ] } ' # Note we are not appending the .htm as we are modern folks now.
else :
areanum = " 1623 "
url = f ' 1623/ { k } '
2022-09-25 19:43:00 +01:00
2022-07-28 16:36:57 +01:00
area = area_1623
2022-10-06 19:02:15 +01:00
if areanum == " 1623 " :
area = area_1623
if areanum == " 1624 " :
area = area_1624
if areanum == " 1626 " :
area = area_1626
if areanum == " 1627 " :
area = area_1627
try :
do_pending_cave ( k , url , area )
except :
message = f " ! Error. Cannot create pending cave and entrance, pending-id: { k } in area { areanum } "
DataIssue . objects . create ( parser = ' caves ' , message = message )
print ( message )
raise
2022-07-28 16:36:57 +01:00
2022-07-25 00:57:00 +01:00