2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-21 23:01:52 +00:00

refactoring cave id stuff in survex parser - working

This commit is contained in:
Philip Sargent 2023-09-08 18:35:47 +03:00
parent 28d1092956
commit 7779544c0c

View File

@ -22,20 +22,19 @@ It also scans the Loser repo for all the svx files, which it loads individually
"""
todo = """
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down,
probably in the team cache which should NOT be global, but should be an instance variable of
LoadingSurvex
- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
speed it up noticably.
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
- Learn to use Django .select_related() and .prefetch_related() to speed things up
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
- LoadSurvexFile() Creates a new current survexfile
The survexblock passed-in is not necessarily the parent. FIX THIS.
- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
easier if this is a completely new file rather than an existing file.. nasty.
The survexblock passed-in is not necessarily the survex parent. FIX THIS.
- When Olly implements LEG in the 'dump3d --legs' utility, then we can use that to get the length of
all the legs in a survex block instead of adding them up oursleves. Which means that we can
ignore all the Units and offset stuff, that troggle will work with survex files with backsights,
@ -62,27 +61,10 @@ class SurvexLeg:
compass = 0.0
clino = 0.0
def IdentifyCave(cavepath):
"""Given a file path for a survex file, or a survex-block path,
return the cave object
"""
caveslist = GetCaveLookup()
if cavepath.lower() in caveslist:
return caveslist[cavepath.lower()]
# TO DO - this predates the big revision to Gcavelookup so look at this again carefully
path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
if path_match:
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
guesses = [sluggy.lower(), path_match.group(2).lower()]
for g in guesses:
if g in caveslist:
caveslist[cavepath] = caveslist[g]
return caveslist[g]
print(f" ! Failed to find cave for {cavepath.lower()}")
else:
# not a cave, but that is fine.
# print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
return None
def datewallet(w, earliest):
"""Gets the date of the youngest survexblock associated with the wallet
@ -141,7 +123,8 @@ def get_offending_filename(path):
"""
return "/survexfile/" + path + ".svx"
trip_people_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader..
trip_people_cache = {} # indexed by survexblock, so never needs cleaning out
def get_team_on_trip(survexblock):
"""Uses a cache to avoid a database query if it doesn't need to.
Only used for complete team."""
@ -165,8 +148,9 @@ def get_people_on_trip(survexblock):
return list(set(people))
trip_person_record = {} # per survexblock, so robust wrt PUSH/POP begin/end
trip_team_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader
trip_person_record = {} # indexed by (survexblock, personexpedition) - so never needs cleaning out
trip_team_cache = {} # indexed by survexblock, so never needs cleaning out
def put_person_on_trip(survexblock, personexpedition, tm):
"""Uses a cache to avoid a database query if it doesn't need to.
Only used for a single person"""
@ -206,18 +190,17 @@ def confirm_team_on_trip(survexblock):
SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
trip_team_cache[survexblock] = [] # in database now, so empty cache
def check_team_cache():
def check_team_cache(label=None):
global trip_team_cache
message = f"! check_team_cache() called.. "
print(message)
print(message, file=sys.stderr)
for block in trip_team_cache:
message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block}) "
message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
print(message)
print(message, file=sys.stderr)
person_pending_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
person_pending_cache = {} # indexed per survexblock, so robust wrt PUSH/POP begin/end
def add_to_pending(survexblock, tm):
"""Collects team names before we have a date so cannot validate against
expo attendance yet"""
@ -1180,7 +1163,45 @@ class LoadingSurvex:
self.svxprim[headpath.lower()] = primary
return self.svxprim[headpath.lower()]
def ReportNonCaveIncludes(self, headpath, includelabel, depth):
def IdentifyCave(self, cavepath, svxid, depth):
"""Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path,
return the cave object
REWRITE ALL THIS and make a methoid on the class
"""
caveslist = GetCaveLookup()
if cavepath.lower() in caveslist: # will only work after we load in full paths as indexes, see below
return caveslist[cavepath.lower()]
# rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
path_match = self.rx_cave.search(cavepath) # use as Class method.
if path_match:
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
# guesses = [sluggy.lower(), path_match.group(2).lower()] # this looks for JUST "107" and ignores 1626..
guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms
for g in guesses:
if g in caveslist:
caveslist[cavepath] = caveslist[g] # set "caves-1626/107/107.svx" as index to cave 1626-107
return caveslist[g]
print(f" ! Failed to find cave for {cavepath.lower()}", file=sys.stderr)
else:
# not a cave, but that is fine.
if self.is_it_already_pending(cavepath, svxid, depth):
pass
else:
# It is too late to add it to the pending caves list here, they were already
# processed in parsers/caves.py So we have to do a bespoke creation.
cave = create_new_cave(svxid)
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
print("\n" + message)
print("\n" + message, file=sys.stderr)
print(f"{self.pending}", end="", file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr)
return None
def is_it_already_pending(self, headpath, includelabel, depth):
"""Ignore surface, kataser and gpx *include survex files"""
if not self.pending:
self.pending = set()
@ -1199,7 +1220,7 @@ class LoadingSurvex:
message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
# print("\n"+message)
# print("\n"+message,file=sys.stderr)
return
return True
for i in self.ignoreprefix:
if headpath.startswith(i):
message = (
@ -1207,28 +1228,17 @@ class LoadingSurvex:
)
# print("\n"+message)
# print("\n"+message,file=sys.stderr)
return
return True
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
if caveid in self.pending:
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
# print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
return
print(f'! ALREADY PENDING caveid {caveid}',file=sys.stderr)
return True
id = caveid[5:]
if id in self.pending:
print(f"! ALREADY PENDING {id}", file=sys.stderr)
return
print(f"! ALREADY PENDING id {id}", file=sys.stderr)
return True
# It is too late to add it to the pending caves list here, they were already
# processed in parsers/caves.py So we have to do a bespoke creation.
svxpath= includelabel
cave = create_new_cave(svxpath)
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
print("\n" + message)
print("\n" + message, file=sys.stderr)
print(f"{self.pending}", end="", file=sys.stderr)
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
def LoadSurvexFile(self, svxid):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
Creates a new current survexfile and valid .survexdirectory
@ -1267,15 +1277,9 @@ class LoadingSurvex:
newfile.save() # until we do this there is no internal id so no foreign key works
self.currentsurvexfile = newfile
newfile.primary = self.set_primary(headpath)
# REPLACE all this IdentifyCave() stuff with GCaveLookup ?
cave = IdentifyCave(headpath) # cave already exists in db
if not cave:
# probably a surface survey, or a cave in a new area
# e.g. 1624 not previously managed, and not in the pending list
self.ReportNonCaveIncludes(headpath, svxid, depth)
#try again
cave = IdentifyCave(headpath)
# refactor this !
cave = self.IdentifyCave(headpath, svxid, depth) # cave already exists in db?
if cave:
newfile.cave = cave
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
@ -1530,7 +1534,7 @@ class LoadingSurvex:
slengthtotal = 0.0
nlegstotal = 0
self.relativefilename = path
IdentifyCave(path) # this will produce null for survex files which are geographic collections
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
self.currentsurvexfile = survexblock.survexfile
self.currentsurvexfile.save() # django insists on this although it is already saved !?
@ -2198,7 +2202,6 @@ def FindAndLoadSurvex():
)
print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
check_team_cache()
s_date = date.today().isoformat().replace('-','.')
print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
print(f" - (except: {excpts})", file=sys.stderr)
@ -2250,8 +2253,6 @@ def FindAndLoadSurvex():
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
fcollate.write(f";*edulcni {UNSEENS}\n")
check_team_cache()
mem1 = get_process_memory()
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
@ -2294,6 +2295,7 @@ def FindAndLoadSurvex():
# ps = pstats.Stats(pr2, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
mem1 = get_process_memory()
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)