mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-22 07:11:52 +00:00
refactoring cave id stuff in survex parser - working
This commit is contained in:
parent
28d1092956
commit
7779544c0c
@ -22,19 +22,18 @@ It also scans the Loser repo for all the svx files, which it loads individually
|
||||
"""
|
||||
|
||||
todo = """
|
||||
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down,
|
||||
probably in the team cache which should NOT be global, but should be an instance variable of
|
||||
LoadingSurvex
|
||||
|
||||
- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
|
||||
speed it up noticably.
|
||||
|
||||
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
|
||||
|
||||
- Learn to use Django .select_related() and .prefetch_related() to speed things up
|
||||
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
|
||||
|
||||
- LoadSurvexFile() Creates a new current survexfile
|
||||
The survexblock passed-in is not necessarily the parent. FIX THIS.
|
||||
|
||||
- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
|
||||
easier if this is a completely new file rather than an existing file.. nasty.
|
||||
The survexblock passed-in is not necessarily the survex parent. FIX THIS.
|
||||
|
||||
- When Olly implements LEG in the 'dump3d --legs' utility, then we can use that to get the length of
|
||||
all the legs in a survex block instead of adding them up oursleves. Which means that we can
|
||||
@ -62,27 +61,10 @@ class SurvexLeg:
|
||||
compass = 0.0
|
||||
clino = 0.0
|
||||
|
||||
def IdentifyCave(cavepath):
|
||||
"""Given a file path for a survex file, or a survex-block path,
|
||||
return the cave object
|
||||
"""
|
||||
caveslist = GetCaveLookup()
|
||||
if cavepath.lower() in caveslist:
|
||||
return caveslist[cavepath.lower()]
|
||||
# TO DO - this predates the big revision to Gcavelookup so look at this again carefully
|
||||
path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
|
||||
if path_match:
|
||||
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
|
||||
guesses = [sluggy.lower(), path_match.group(2).lower()]
|
||||
for g in guesses:
|
||||
if g in caveslist:
|
||||
caveslist[cavepath] = caveslist[g]
|
||||
return caveslist[g]
|
||||
print(f" ! Failed to find cave for {cavepath.lower()}")
|
||||
else:
|
||||
# not a cave, but that is fine.
|
||||
# print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def datewallet(w, earliest):
|
||||
"""Gets the date of the youngest survexblock associated with the wallet
|
||||
@ -141,7 +123,8 @@ def get_offending_filename(path):
|
||||
"""
|
||||
return "/survexfile/" + path + ".svx"
|
||||
|
||||
trip_people_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
||||
# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader..
|
||||
trip_people_cache = {} # indexed by survexblock, so never needs cleaning out
|
||||
def get_team_on_trip(survexblock):
|
||||
"""Uses a cache to avoid a database query if it doesn't need to.
|
||||
Only used for complete team."""
|
||||
@ -165,8 +148,9 @@ def get_people_on_trip(survexblock):
|
||||
|
||||
return list(set(people))
|
||||
|
||||
trip_person_record = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
||||
trip_team_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
||||
# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader
|
||||
trip_person_record = {} # indexed by (survexblock, personexpedition) - so never needs cleaning out
|
||||
trip_team_cache = {} # indexed by survexblock, so never needs cleaning out
|
||||
def put_person_on_trip(survexblock, personexpedition, tm):
|
||||
"""Uses a cache to avoid a database query if it doesn't need to.
|
||||
Only used for a single person"""
|
||||
@ -206,18 +190,17 @@ def confirm_team_on_trip(survexblock):
|
||||
SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
|
||||
trip_team_cache[survexblock] = [] # in database now, so empty cache
|
||||
|
||||
def check_team_cache():
|
||||
def check_team_cache(label=None):
|
||||
global trip_team_cache
|
||||
|
||||
message = f"! check_team_cache() called.. "
|
||||
print(message)
|
||||
|
||||
print(message, file=sys.stderr)
|
||||
for block in trip_team_cache:
|
||||
message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block}) "
|
||||
message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
|
||||
print(message)
|
||||
print(message, file=sys.stderr)
|
||||
|
||||
|
||||
person_pending_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
||||
person_pending_cache = {} # indexed per survexblock, so robust wrt PUSH/POP begin/end
|
||||
def add_to_pending(survexblock, tm):
|
||||
"""Collects team names before we have a date so cannot validate against
|
||||
expo attendance yet"""
|
||||
@ -1180,7 +1163,45 @@ class LoadingSurvex:
|
||||
self.svxprim[headpath.lower()] = primary
|
||||
return self.svxprim[headpath.lower()]
|
||||
|
||||
def ReportNonCaveIncludes(self, headpath, includelabel, depth):
|
||||
def IdentifyCave(self, cavepath, svxid, depth):
|
||||
"""Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path,
|
||||
return the cave object
|
||||
|
||||
REWRITE ALL THIS and make a methoid on the class
|
||||
"""
|
||||
caveslist = GetCaveLookup()
|
||||
if cavepath.lower() in caveslist: # will only work after we load in full paths as indexes, see below
|
||||
return caveslist[cavepath.lower()]
|
||||
# rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
|
||||
path_match = self.rx_cave.search(cavepath) # use as Class method.
|
||||
if path_match:
|
||||
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
|
||||
# guesses = [sluggy.lower(), path_match.group(2).lower()] # this looks for JUST "107" and ignores 1626..
|
||||
guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms
|
||||
for g in guesses:
|
||||
if g in caveslist:
|
||||
caveslist[cavepath] = caveslist[g] # set "caves-1626/107/107.svx" as index to cave 1626-107
|
||||
return caveslist[g]
|
||||
print(f" ! Failed to find cave for {cavepath.lower()}", file=sys.stderr)
|
||||
else:
|
||||
# not a cave, but that is fine.
|
||||
if self.is_it_already_pending(cavepath, svxid, depth):
|
||||
pass
|
||||
else:
|
||||
# It is too late to add it to the pending caves list here, they were already
|
||||
# processed in parsers/caves.py So we have to do a bespoke creation.
|
||||
cave = create_new_cave(svxid)
|
||||
|
||||
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
|
||||
print("\n" + message)
|
||||
print("\n" + message, file=sys.stderr)
|
||||
print(f"{self.pending}", end="", file=sys.stderr)
|
||||
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
|
||||
|
||||
print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr)
|
||||
return None
|
||||
|
||||
def is_it_already_pending(self, headpath, includelabel, depth):
|
||||
"""Ignore surface, kataser and gpx *include survex files"""
|
||||
if not self.pending:
|
||||
self.pending = set()
|
||||
@ -1199,7 +1220,7 @@ class LoadingSurvex:
|
||||
message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
|
||||
# print("\n"+message)
|
||||
# print("\n"+message,file=sys.stderr)
|
||||
return
|
||||
return True
|
||||
for i in self.ignoreprefix:
|
||||
if headpath.startswith(i):
|
||||
message = (
|
||||
@ -1207,27 +1228,16 @@ class LoadingSurvex:
|
||||
)
|
||||
# print("\n"+message)
|
||||
# print("\n"+message,file=sys.stderr)
|
||||
return
|
||||
return True
|
||||
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
|
||||
if caveid in self.pending:
|
||||
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
|
||||
# print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
|
||||
return
|
||||
print(f'! ALREADY PENDING caveid {caveid}',file=sys.stderr)
|
||||
return True
|
||||
id = caveid[5:]
|
||||
if id in self.pending:
|
||||
print(f"! ALREADY PENDING {id}", file=sys.stderr)
|
||||
return
|
||||
|
||||
# It is too late to add it to the pending caves list here, they were already
|
||||
# processed in parsers/caves.py So we have to do a bespoke creation.
|
||||
svxpath= includelabel
|
||||
cave = create_new_cave(svxpath)
|
||||
|
||||
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
|
||||
print("\n" + message)
|
||||
print("\n" + message, file=sys.stderr)
|
||||
print(f"{self.pending}", end="", file=sys.stderr)
|
||||
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
|
||||
print(f"! ALREADY PENDING id {id}", file=sys.stderr)
|
||||
return True
|
||||
|
||||
def LoadSurvexFile(self, svxid):
|
||||
"""Creates SurvexFile in the database, and SurvexDirectory if needed
|
||||
@ -1268,14 +1278,8 @@ class LoadingSurvex:
|
||||
self.currentsurvexfile = newfile
|
||||
newfile.primary = self.set_primary(headpath)
|
||||
|
||||
# REPLACE all this IdentifyCave() stuff with GCaveLookup ?
|
||||
cave = IdentifyCave(headpath) # cave already exists in db
|
||||
if not cave:
|
||||
# probably a surface survey, or a cave in a new area
|
||||
# e.g. 1624 not previously managed, and not in the pending list
|
||||
self.ReportNonCaveIncludes(headpath, svxid, depth)
|
||||
#try again
|
||||
cave = IdentifyCave(headpath)
|
||||
# refactor this !
|
||||
cave = self.IdentifyCave(headpath, svxid, depth) # cave already exists in db?
|
||||
if cave:
|
||||
newfile.cave = cave
|
||||
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
|
||||
@ -1530,7 +1534,7 @@ class LoadingSurvex:
|
||||
slengthtotal = 0.0
|
||||
nlegstotal = 0
|
||||
self.relativefilename = path
|
||||
IdentifyCave(path) # this will produce null for survex files which are geographic collections
|
||||
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
|
||||
|
||||
self.currentsurvexfile = survexblock.survexfile
|
||||
self.currentsurvexfile.save() # django insists on this although it is already saved !?
|
||||
@ -2198,7 +2202,6 @@ def FindAndLoadSurvex():
|
||||
)
|
||||
print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
|
||||
|
||||
check_team_cache()
|
||||
s_date = date.today().isoformat().replace('-','.')
|
||||
print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
|
||||
print(f" - (except: {excpts})", file=sys.stderr)
|
||||
@ -2251,8 +2254,6 @@ def FindAndLoadSurvex():
|
||||
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
|
||||
fcollate.write(f";*edulcni {UNSEENS}\n")
|
||||
|
||||
check_team_cache()
|
||||
|
||||
mem1 = get_process_memory()
|
||||
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
||||
flinear.write(f" - MEM:{mem1 - mem0:.3f} MB ADDITIONALLY USED Unseen Oddments\n")
|
||||
@ -2294,6 +2295,7 @@ def FindAndLoadSurvex():
|
||||
# ps = pstats.Stats(pr2, stream=f)
|
||||
# ps.sort_stats(SortKey.CUMULATIVE)
|
||||
# ps.print_stats()
|
||||
|
||||
mem1 = get_process_memory()
|
||||
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
|
||||
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)
|
||||
|
Loading…
Reference in New Issue
Block a user