2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-25 08:41:51 +00:00

use generator when reading individual survex files too, saves anothe 6MB

This commit is contained in:
Philip Sargent 2022-10-07 11:41:46 +03:00
parent b4c4f2aefc
commit c76cd38d76

View File

@ -1225,57 +1225,15 @@ class LoadingSurvex():
self.legsnumber = nlegstotal self.legsnumber = nlegstotal
self.slength = slengthtotal self.slength = slengthtotal
def PushdownStackScan(self, survexblock, path, fin, flinear, fcollate): def PushdownStackScan(self, survexblock, path, finname, flinear, fcollate):
"""Follows the *include links in all the survex files from the root file 1623.svx """Follows the *include links in all the survex files from the root file (usually 1623.svx)
and reads only the *include and *begin and *end statements. It produces a linearised and reads only the *include and *begin and *end statements. It produces a linearised
list of the include tree and detects blocks included more than once. list of the include tree and detects blocks included more than once.
""" """
global stop_dup_warning global stop_dup_warning
thissvxline = 0
indent = " " * self.depthinclude
sys.stderr.flush();
self.callcount +=1
if self.callcount % 10 ==0 :
print(".", file=sys.stderr,end='')
if self.callcount % 500 ==0 :
print("\n ", file=sys.stderr,end='')
if path in self.svxfileslist: def process_line(svxline):
# We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already.
if stop_dup_warning:
#print("D",end="", file=sys.stderr)
pass
else:
message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
print(message)
print(message,file=flinear)
#print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
if self.svxfileslist.count(path) > 2:
message = " ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
print(message)
print(message,file=flinear)
#print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return
return
self.svxfileslist.append(path)
try:
svxlines = fin.read().splitlines()
except UnicodeDecodeError:
# some bugger put an umlaut in a non-UTF survex file ?!
message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError"
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return # skip this survex file and all things *included in it
for svxline in svxlines:
self.lineno += 1 self.lineno += 1
thissvxline += 1
# detect a merge failure inserted by version control # detect a merge failure inserted by version control
mfail = self.rx_badmerge.match(svxline) mfail = self.rx_badmerge.match(svxline)
if mfail: if mfail:
@ -1305,13 +1263,14 @@ class LoadingSurvex():
if os.path.isfile(fullpath): if os.path.isfile(fullpath):
#-------------------------------------------------------- #--------------------------------------------------------
self.depthinclude += 1 self.depthinclude += 1
fininclude = open(fullpath,'r') # fininclude = open(fullpath,'r')
finincludename = fullpath
fcollate.write(";|*include {}\n".format(includepath)) fcollate.write(";|*include {}\n".format(includepath))
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath)) flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
push = includepath.lower() push = includepath.lower()
self.includestack.append(push) self.includestack.append(push)
#----------------- #-----------------
self.PushdownStackScan(survexblock, includepath, fininclude, flinear, fcollate) self.PushdownStackScan(survexblock, includepath, finincludename, flinear, fcollate)
#----------------- #-----------------
pop = self.includestack.pop() pop = self.includestack.pop()
if pop != push: if pop != push:
@ -1322,7 +1281,7 @@ class LoadingSurvex():
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop)) flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
fcollate.write(";|*edulcni {}\n".format(pop)) fcollate.write(";|*edulcni {}\n".format(pop))
fininclude.close() # fininclude.close()
self.depthinclude -= 1 self.depthinclude -= 1
#-------------------------------------------------------- #--------------------------------------------------------
else: else:
@ -1360,7 +1319,57 @@ class LoadingSurvex():
flinear.write(" {:2} {} *title {}\n".format(self.depthbegin, depth, args)) flinear.write(" {:2} {} *title {}\n".format(self.depthbegin, depth, args))
pass pass
indent = " " * self.depthinclude
sys.stderr.flush();
self.callcount +=1
if self.callcount % 10 ==0 :
print(".", file=sys.stderr,end='')
if self.callcount % 500 ==0 :
print("\n ", file=sys.stderr,end='')
if path in self.svxfileslist:
# We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already.
if stop_dup_warning:
#print("D",end="", file=sys.stderr)
pass
else:
message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
print(message)
print(message,file=flinear)
#print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
if self.svxfileslist.count(path) > 2:
message = " ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
print(message)
print(message,file=flinear)
#print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return
return
try:
# python generator idiom again
with open(finname, "r") as fin:
for svxline in fin:
process_line(svxline)
self.svxfileslist.append(path)
except UnicodeDecodeError:
# some bugger put an umlaut in a non-UTF survex file ?!
message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError. Omitted."
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return # skip this survex file and all things *included in it
except :
message = f" ! ERROR *include file '{path}' in '{survexblock}' has unexpected error. Omitted."
print(message)
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
return # skip this survex file and all things *included in it
def checkUniqueness(self,fullpath): def checkUniqueness(self,fullpath):
fn = Path(fullpath).name fn = Path(fullpath).name
if fn not in self.uniquename: if fn not in self.uniquename:
@ -1485,7 +1494,7 @@ def FindAndLoadSurvex(survexblockroot):
flinear.write(" - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path)) flinear.write(" - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path))
print(" ", file=sys.stderr,end='') print(" ", file=sys.stderr,end='')
finroot = survexfileroot.OpenFile() finrootname = Path(settings.SURVEX_DATA, survexfileroot.path + ".svx")
fcollate.write(";*include {}\n".format(survexfileroot.path)) fcollate.write(";*include {}\n".format(survexfileroot.path))
flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
@ -1495,7 +1504,7 @@ def FindAndLoadSurvex(survexblockroot):
pr.enable() pr.enable()
#print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr) #print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr)
#---------------------------------------------------------------- #----------------------------------------------------------------
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate) svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
#---------------------------------------------------------------- #----------------------------------------------------------------
pr.disable() pr.disable()
with open('PushdownStackScan.prof', 'w') as f: with open('PushdownStackScan.prof', 'w') as f:
@ -1583,12 +1592,12 @@ def FindAndLoadSurvex(survexblockroot):
flinear.write(f" - MEM:{mem0:7.2f} MB START '_unseens'\n") flinear.write(f" - MEM:{mem0:7.2f} MB START '_unseens'\n")
print(" ", file=sys.stderr,end='') print(" ", file=sys.stderr,end='')
finroot = open(fullpathtotop) finrootname = fullpathtotop
fcollate.write(";*include {}\n".format('_unseens.svx')) fcollate.write(";*include {}\n".format('_unseens.svx'))
flinear.write("{:2} {} *include {}\n".format(omit_scan.depthinclude, indent, '_unseens')) flinear.write("{:2} {} *include {}\n".format(omit_scan.depthinclude, indent, '_unseens'))
stop_dup_warning = True stop_dup_warning = True
#---------------------------------------------------------------- #----------------------------------------------------------------
omit_scan.PushdownStackScan(survexblockroot, '_unseens', finroot, flinear, fcollate) omit_scan.PushdownStackScan(survexblockroot, '_unseens', finrootname, flinear, fcollate)
#---------------------------------------------------------------- #----------------------------------------------------------------
stop_dup_warning = False stop_dup_warning = False