mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-25 08:41:51 +00:00
refactoring cave id stuff in survex parser - working
This commit is contained in:
parent
28d1092956
commit
7779544c0c
@ -22,20 +22,19 @@ It also scans the Loser repo for all the svx files, which it loads individually
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
todo = """
|
todo = """
|
||||||
|
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down,
|
||||||
|
probably in the team cache which should NOT be global, but should be an instance variable of
|
||||||
|
LoadingSurvex
|
||||||
|
|
||||||
- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
|
- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
|
||||||
speed it up noticably.
|
speed it up noticably.
|
||||||
|
|
||||||
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
|
|
||||||
|
|
||||||
- Learn to use Django .select_related() and .prefetch_related() to speed things up
|
- Learn to use Django .select_related() and .prefetch_related() to speed things up
|
||||||
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
|
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
|
||||||
|
|
||||||
- LoadSurvexFile() Creates a new current survexfile
|
- LoadSurvexFile() Creates a new current survexfile
|
||||||
The survexblock passed-in is not necessarily the parent. FIX THIS.
|
The survexblock passed-in is not necessarily the survex parent. FIX THIS.
|
||||||
|
|
||||||
- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
|
|
||||||
easier if this is a completely new file rather than an existing file.. nasty.
|
|
||||||
|
|
||||||
- When Olly implements LEG in the 'dump3d --legs' utility, then we can use that to get the length of
|
- When Olly implements LEG in the 'dump3d --legs' utility, then we can use that to get the length of
|
||||||
all the legs in a survex block instead of adding them up oursleves. Which means that we can
|
all the legs in a survex block instead of adding them up oursleves. Which means that we can
|
||||||
ignore all the Units and offset stuff, that troggle will work with survex files with backsights,
|
ignore all the Units and offset stuff, that troggle will work with survex files with backsights,
|
||||||
@ -62,27 +61,10 @@ class SurvexLeg:
|
|||||||
compass = 0.0
|
compass = 0.0
|
||||||
clino = 0.0
|
clino = 0.0
|
||||||
|
|
||||||
def IdentifyCave(cavepath):
|
|
||||||
"""Given a file path for a survex file, or a survex-block path,
|
|
||||||
return the cave object
|
|
||||||
"""
|
|
||||||
caveslist = GetCaveLookup()
|
|
||||||
if cavepath.lower() in caveslist:
|
|
||||||
return caveslist[cavepath.lower()]
|
|
||||||
# TO DO - this predates the big revision to Gcavelookup so look at this again carefully
|
|
||||||
path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
|
|
||||||
if path_match:
|
|
||||||
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
|
|
||||||
guesses = [sluggy.lower(), path_match.group(2).lower()]
|
|
||||||
for g in guesses:
|
|
||||||
if g in caveslist:
|
|
||||||
caveslist[cavepath] = caveslist[g]
|
|
||||||
return caveslist[g]
|
|
||||||
print(f" ! Failed to find cave for {cavepath.lower()}")
|
|
||||||
else:
|
|
||||||
# not a cave, but that is fine.
|
|
||||||
# print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
|
|
||||||
return None
|
|
||||||
|
|
||||||
def datewallet(w, earliest):
|
def datewallet(w, earliest):
|
||||||
"""Gets the date of the youngest survexblock associated with the wallet
|
"""Gets the date of the youngest survexblock associated with the wallet
|
||||||
@ -141,7 +123,8 @@ def get_offending_filename(path):
|
|||||||
"""
|
"""
|
||||||
return "/survexfile/" + path + ".svx"
|
return "/survexfile/" + path + ".svx"
|
||||||
|
|
||||||
trip_people_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader..
|
||||||
|
trip_people_cache = {} # indexed by survexblock, so never needs cleaning out
|
||||||
def get_team_on_trip(survexblock):
|
def get_team_on_trip(survexblock):
|
||||||
"""Uses a cache to avoid a database query if it doesn't need to.
|
"""Uses a cache to avoid a database query if it doesn't need to.
|
||||||
Only used for complete team."""
|
Only used for complete team."""
|
||||||
@ -165,8 +148,9 @@ def get_people_on_trip(survexblock):
|
|||||||
|
|
||||||
return list(set(people))
|
return list(set(people))
|
||||||
|
|
||||||
trip_person_record = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader
|
||||||
trip_team_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
trip_person_record = {} # indexed by (survexblock, personexpedition) - so never needs cleaning out
|
||||||
|
trip_team_cache = {} # indexed by survexblock, so never needs cleaning out
|
||||||
def put_person_on_trip(survexblock, personexpedition, tm):
|
def put_person_on_trip(survexblock, personexpedition, tm):
|
||||||
"""Uses a cache to avoid a database query if it doesn't need to.
|
"""Uses a cache to avoid a database query if it doesn't need to.
|
||||||
Only used for a single person"""
|
Only used for a single person"""
|
||||||
@ -206,18 +190,17 @@ def confirm_team_on_trip(survexblock):
|
|||||||
SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
|
SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
|
||||||
trip_team_cache[survexblock] = [] # in database now, so empty cache
|
trip_team_cache[survexblock] = [] # in database now, so empty cache
|
||||||
|
|
||||||
def check_team_cache():
|
def check_team_cache(label=None):
|
||||||
global trip_team_cache
|
global trip_team_cache
|
||||||
|
|
||||||
message = f"! check_team_cache() called.. "
|
message = f"! check_team_cache() called.. "
|
||||||
print(message)
|
print(message)
|
||||||
|
print(message, file=sys.stderr)
|
||||||
for block in trip_team_cache:
|
for block in trip_team_cache:
|
||||||
message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block}) "
|
message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
|
||||||
print(message)
|
print(message)
|
||||||
|
print(message, file=sys.stderr)
|
||||||
|
|
||||||
person_pending_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
person_pending_cache = {} # indexed per survexblock, so robust wrt PUSH/POP begin/end
|
||||||
def add_to_pending(survexblock, tm):
|
def add_to_pending(survexblock, tm):
|
||||||
"""Collects team names before we have a date so cannot validate against
|
"""Collects team names before we have a date so cannot validate against
|
||||||
expo attendance yet"""
|
expo attendance yet"""
|
||||||
@ -1180,7 +1163,45 @@ class LoadingSurvex:
|
|||||||
self.svxprim[headpath.lower()] = primary
|
self.svxprim[headpath.lower()] = primary
|
||||||
return self.svxprim[headpath.lower()]
|
return self.svxprim[headpath.lower()]
|
||||||
|
|
||||||
def ReportNonCaveIncludes(self, headpath, includelabel, depth):
|
def IdentifyCave(self, cavepath, svxid, depth):
|
||||||
|
"""Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path,
|
||||||
|
return the cave object
|
||||||
|
|
||||||
|
REWRITE ALL THIS and make a methoid on the class
|
||||||
|
"""
|
||||||
|
caveslist = GetCaveLookup()
|
||||||
|
if cavepath.lower() in caveslist: # will only work after we load in full paths as indexes, see below
|
||||||
|
return caveslist[cavepath.lower()]
|
||||||
|
# rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
|
||||||
|
path_match = self.rx_cave.search(cavepath) # use as Class method.
|
||||||
|
if path_match:
|
||||||
|
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
|
||||||
|
# guesses = [sluggy.lower(), path_match.group(2).lower()] # this looks for JUST "107" and ignores 1626..
|
||||||
|
guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms
|
||||||
|
for g in guesses:
|
||||||
|
if g in caveslist:
|
||||||
|
caveslist[cavepath] = caveslist[g] # set "caves-1626/107/107.svx" as index to cave 1626-107
|
||||||
|
return caveslist[g]
|
||||||
|
print(f" ! Failed to find cave for {cavepath.lower()}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
# not a cave, but that is fine.
|
||||||
|
if self.is_it_already_pending(cavepath, svxid, depth):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# It is too late to add it to the pending caves list here, they were already
|
||||||
|
# processed in parsers/caves.py So we have to do a bespoke creation.
|
||||||
|
cave = create_new_cave(svxid)
|
||||||
|
|
||||||
|
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
|
||||||
|
print("\n" + message)
|
||||||
|
print("\n" + message, file=sys.stderr)
|
||||||
|
print(f"{self.pending}", end="", file=sys.stderr)
|
||||||
|
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
|
||||||
|
|
||||||
|
print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def is_it_already_pending(self, headpath, includelabel, depth):
|
||||||
"""Ignore surface, kataser and gpx *include survex files"""
|
"""Ignore surface, kataser and gpx *include survex files"""
|
||||||
if not self.pending:
|
if not self.pending:
|
||||||
self.pending = set()
|
self.pending = set()
|
||||||
@ -1199,7 +1220,7 @@ class LoadingSurvex:
|
|||||||
message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
|
message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
|
||||||
# print("\n"+message)
|
# print("\n"+message)
|
||||||
# print("\n"+message,file=sys.stderr)
|
# print("\n"+message,file=sys.stderr)
|
||||||
return
|
return True
|
||||||
for i in self.ignoreprefix:
|
for i in self.ignoreprefix:
|
||||||
if headpath.startswith(i):
|
if headpath.startswith(i):
|
||||||
message = (
|
message = (
|
||||||
@ -1207,28 +1228,17 @@ class LoadingSurvex:
|
|||||||
)
|
)
|
||||||
# print("\n"+message)
|
# print("\n"+message)
|
||||||
# print("\n"+message,file=sys.stderr)
|
# print("\n"+message,file=sys.stderr)
|
||||||
return
|
return True
|
||||||
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
|
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
|
||||||
if caveid in self.pending:
|
if caveid in self.pending:
|
||||||
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
|
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
|
||||||
# print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
|
print(f'! ALREADY PENDING caveid {caveid}',file=sys.stderr)
|
||||||
return
|
return True
|
||||||
id = caveid[5:]
|
id = caveid[5:]
|
||||||
if id in self.pending:
|
if id in self.pending:
|
||||||
print(f"! ALREADY PENDING {id}", file=sys.stderr)
|
print(f"! ALREADY PENDING id {id}", file=sys.stderr)
|
||||||
return
|
return True
|
||||||
|
|
||||||
# It is too late to add it to the pending caves list here, they were already
|
|
||||||
# processed in parsers/caves.py So we have to do a bespoke creation.
|
|
||||||
svxpath= includelabel
|
|
||||||
cave = create_new_cave(svxpath)
|
|
||||||
|
|
||||||
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
|
|
||||||
print("\n" + message)
|
|
||||||
print("\n" + message, file=sys.stderr)
|
|
||||||
print(f"{self.pending}", end="", file=sys.stderr)
|
|
||||||
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
|
|
||||||
|
|
||||||
def LoadSurvexFile(self, svxid):
|
def LoadSurvexFile(self, svxid):
|
||||||
"""Creates SurvexFile in the database, and SurvexDirectory if needed
|
"""Creates SurvexFile in the database, and SurvexDirectory if needed
|
||||||
Creates a new current survexfile and valid .survexdirectory
|
Creates a new current survexfile and valid .survexdirectory
|
||||||
@ -1267,15 +1277,9 @@ class LoadingSurvex:
|
|||||||
newfile.save() # until we do this there is no internal id so no foreign key works
|
newfile.save() # until we do this there is no internal id so no foreign key works
|
||||||
self.currentsurvexfile = newfile
|
self.currentsurvexfile = newfile
|
||||||
newfile.primary = self.set_primary(headpath)
|
newfile.primary = self.set_primary(headpath)
|
||||||
|
|
||||||
# REPLACE all this IdentifyCave() stuff with GCaveLookup ?
|
# refactor this !
|
||||||
cave = IdentifyCave(headpath) # cave already exists in db
|
cave = self.IdentifyCave(headpath, svxid, depth) # cave already exists in db?
|
||||||
if not cave:
|
|
||||||
# probably a surface survey, or a cave in a new area
|
|
||||||
# e.g. 1624 not previously managed, and not in the pending list
|
|
||||||
self.ReportNonCaveIncludes(headpath, svxid, depth)
|
|
||||||
#try again
|
|
||||||
cave = IdentifyCave(headpath)
|
|
||||||
if cave:
|
if cave:
|
||||||
newfile.cave = cave
|
newfile.cave = cave
|
||||||
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
|
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
|
||||||
@ -1530,7 +1534,7 @@ class LoadingSurvex:
|
|||||||
slengthtotal = 0.0
|
slengthtotal = 0.0
|
||||||
nlegstotal = 0
|
nlegstotal = 0
|
||||||
self.relativefilename = path
|
self.relativefilename = path
|
||||||
IdentifyCave(path) # this will produce null for survex files which are geographic collections
|
#self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
|
||||||
|
|
||||||
self.currentsurvexfile = survexblock.survexfile
|
self.currentsurvexfile = survexblock.survexfile
|
||||||
self.currentsurvexfile.save() # django insists on this although it is already saved !?
|
self.currentsurvexfile.save() # django insists on this although it is already saved !?
|
||||||
@ -2198,7 +2202,6 @@ def FindAndLoadSurvex():
|
|||||||
)
|
)
|
||||||
print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
|
print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
|
||||||
|
|
||||||
check_team_cache()
|
|
||||||
s_date = date.today().isoformat().replace('-','.')
|
s_date = date.today().isoformat().replace('-','.')
|
||||||
print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
|
print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
|
||||||
print(f" - (except: {excpts})", file=sys.stderr)
|
print(f" - (except: {excpts})", file=sys.stderr)
|
||||||
@ -2250,8 +2253,6 @@ def FindAndLoadSurvex():
|
|||||||
|
|
||||||
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
|
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
|
||||||
fcollate.write(f";*edulcni {UNSEENS}\n")
|
fcollate.write(f";*edulcni {UNSEENS}\n")
|
||||||
|
|
||||||
check_team_cache()
|
|
||||||
|
|
||||||
mem1 = get_process_memory()
|
mem1 = get_process_memory()
|
||||||
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
||||||
@ -2294,6 +2295,7 @@ def FindAndLoadSurvex():
|
|||||||
# ps = pstats.Stats(pr2, stream=f)
|
# ps = pstats.Stats(pr2, stream=f)
|
||||||
# ps.sort_stats(SortKey.CUMULATIVE)
|
# ps.sort_stats(SortKey.CUMULATIVE)
|
||||||
# ps.print_stats()
|
# ps.print_stats()
|
||||||
|
|
||||||
mem1 = get_process_memory()
|
mem1 = get_process_memory()
|
||||||
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
|
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
|
||||||
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)
|
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)
|
||||||
|
Loading…
Reference in New Issue
Block a user