From c76cd38d76d40ce1aff907ccd646e1efd279c59f Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 7 Oct 2022 11:41:46 +0300 Subject: [PATCH] use generator when reading individual survex files too, saves anothe 6MB --- parsers/survex.py | 113 +++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 52 deletions(-) diff --git a/parsers/survex.py b/parsers/survex.py index 222c676..1f97dd0 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1225,57 +1225,15 @@ class LoadingSurvex(): self.legsnumber = nlegstotal self.slength = slengthtotal - def PushdownStackScan(self, survexblock, path, fin, flinear, fcollate): - """Follows the *include links in all the survex files from the root file 1623.svx + def PushdownStackScan(self, survexblock, path, finname, flinear, fcollate): + """Follows the *include links in all the survex files from the root file (usually 1623.svx) and reads only the *include and *begin and *end statements. It produces a linearised list of the include tree and detects blocks included more than once. """ global stop_dup_warning - thissvxline = 0 - indent = " " * self.depthinclude - sys.stderr.flush(); - self.callcount +=1 - - if self.callcount % 10 ==0 : - print(".", file=sys.stderr,end='') - if self.callcount % 500 ==0 : - print("\n ", file=sys.stderr,end='') - if path in self.svxfileslist: - # We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already. - if stop_dup_warning: - #print("D",end="", file=sys.stderr) - pass - else: - message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}" - print(message) - print(message,file=flinear) - #print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) - if self.svxfileslist.count(path) > 2: - message = " ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path) - print(message) - print(message,file=flinear) - #print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) - return - return - self.svxfileslist.append(path) - - try: - svxlines = fin.read().splitlines() - except UnicodeDecodeError: - # some bugger put an umlaut in a non-UTF survex file ?! - message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError" - print(message) - print(message,file=sys.stderr) - DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) - return # skip this survex file and all things *included in it - - - for svxline in svxlines: + def process_line(svxline): self.lineno += 1 - thissvxline += 1 # detect a merge failure inserted by version control mfail = self.rx_badmerge.match(svxline) if mfail: @@ -1305,13 +1263,14 @@ class LoadingSurvex(): if os.path.isfile(fullpath): #-------------------------------------------------------- self.depthinclude += 1 - fininclude = open(fullpath,'r') + # fininclude = open(fullpath,'r') + finincludename = fullpath fcollate.write(";|*include {}\n".format(includepath)) flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath)) push = includepath.lower() self.includestack.append(push) #----------------- - self.PushdownStackScan(survexblock, includepath, fininclude, flinear, fcollate) + self.PushdownStackScan(survexblock, includepath, finincludename, flinear, fcollate) #----------------- pop = self.includestack.pop() if pop != push: @@ -1322,7 +1281,7 @@ class LoadingSurvex(): DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop)) fcollate.write(";|*edulcni {}\n".format(pop)) - fininclude.close() + # fininclude.close() self.depthinclude -= 1 #-------------------------------------------------------- else: @@ -1360,7 +1319,57 @@ class LoadingSurvex(): flinear.write(" {:2} {} *title {}\n".format(self.depthbegin, depth, args)) pass + indent = " " * self.depthinclude + sys.stderr.flush(); + self.callcount +=1 + + + if self.callcount % 10 ==0 : + print(".", file=sys.stderr,end='') + if self.callcount % 500 ==0 : + print("\n ", file=sys.stderr,end='') + if path in self.svxfileslist: + # We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already. + if stop_dup_warning: + #print("D",end="", file=sys.stderr) + pass + else: + message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}" + print(message) + print(message,file=flinear) + #print(message,file=sys.stderr) + DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + if self.svxfileslist.count(path) > 2: + message = " ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path) + print(message) + print(message,file=flinear) + #print(message,file=sys.stderr) + DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + return + return + try: + # python generator idiom again + with open(finname, "r") as fin: + for svxline in fin: + process_line(svxline) + + self.svxfileslist.append(path) + + except UnicodeDecodeError: + # some bugger put an umlaut in a non-UTF survex file ?! + message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError. Omitted." + print(message) + print(message,file=sys.stderr) + DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + return # skip this survex file and all things *included in it + except : + message = f" ! ERROR *include file '{path}' in '{survexblock}' has unexpected error. Omitted." + print(message) + print(message,file=sys.stderr) + DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) + return # skip this survex file and all things *included in it + def checkUniqueness(self,fullpath): fn = Path(fullpath).name if fn not in self.uniquename: @@ -1485,7 +1494,7 @@ def FindAndLoadSurvex(survexblockroot): flinear.write(" - MEM:{:7.2f} MB START {}\n".format(mem0,survexfileroot.path)) print(" ", file=sys.stderr,end='') - finroot = survexfileroot.OpenFile() + finrootname = Path(settings.SURVEX_DATA, survexfileroot.path + ".svx") fcollate.write(";*include {}\n".format(survexfileroot.path)) flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path)) @@ -1495,7 +1504,7 @@ def FindAndLoadSurvex(survexblockroot): pr.enable() #print(f"###{survexblockroot=} {survexfileroot.path=}",file=sys.stderr) #---------------------------------------------------------------- - svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate) + svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate) #---------------------------------------------------------------- pr.disable() with open('PushdownStackScan.prof', 'w') as f: @@ -1583,12 +1592,12 @@ def FindAndLoadSurvex(survexblockroot): flinear.write(f" - MEM:{mem0:7.2f} MB START '_unseens'\n") print(" ", file=sys.stderr,end='') - finroot = open(fullpathtotop) + finrootname = fullpathtotop fcollate.write(";*include {}\n".format('_unseens.svx')) flinear.write("{:2} {} *include {}\n".format(omit_scan.depthinclude, indent, '_unseens')) stop_dup_warning = True #---------------------------------------------------------------- - omit_scan.PushdownStackScan(survexblockroot, '_unseens', finroot, flinear, fcollate) + omit_scan.PushdownStackScan(survexblockroot, '_unseens', finrootname, flinear, fcollate) #---------------------------------------------------------------- stop_dup_warning = False