forked from expo/troggle
SurvexBlocks now importing in deatil
This commit is contained in:
@@ -192,6 +192,8 @@ def readcave(filename):
|
||||
url = url[0],
|
||||
filename = filename)
|
||||
except:
|
||||
# this slow db query happens on every cave, but on import we have all this in memory
|
||||
# and don't need to do a db query. Fix this to speed it up!
|
||||
# need to cope with duplicates
|
||||
print(" ! FAILED to get only one CAVE when updating using: "+filename)
|
||||
kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
|
||||
@@ -206,6 +208,8 @@ def readcave(filename):
|
||||
c = k
|
||||
|
||||
for area_slug in areas:
|
||||
# this slow db query happens on every cave, but on import we have all this in memory
|
||||
# and don't need to do a db query. Fix this to speed it up!
|
||||
area = models_caves.Area.objects.filter(short_name = area_slug)
|
||||
if area:
|
||||
newArea = area[0]
|
||||
@@ -216,6 +220,8 @@ def readcave(filename):
|
||||
primary = True
|
||||
for slug in slugs:
|
||||
try:
|
||||
# this slow db query happens on every cave, but on import we have all this in memory
|
||||
# and don't need to do a db query. Fix this to speed it up!
|
||||
cs = models_caves.CaveSlug.objects.update_or_create(cave = c,
|
||||
slug = slug,
|
||||
primary = primary)
|
||||
@@ -225,10 +231,13 @@ def readcave(filename):
|
||||
print(message)
|
||||
|
||||
primary = False
|
||||
|
||||
for entrance in entrances:
|
||||
slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0]
|
||||
letter = getXML(entrance, "letter", maxItems = 1, context = context)[0]
|
||||
try:
|
||||
# this slow db query happens on every entrance, but on import we have all this in memory
|
||||
# and don't need to do a db query. Fix this to speed it up!
|
||||
entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug)
|
||||
ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
|
||||
except:
|
||||
|
||||
@@ -16,21 +16,21 @@ import troggle.parsers.logbooks
|
||||
import troggle.parsers.QMs
|
||||
|
||||
def import_caves():
|
||||
print("Importing Caves to ",end="")
|
||||
print("-- Importing Caves to ",end="")
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
troggle.parsers.caves.readcaves()
|
||||
|
||||
def import_people():
|
||||
print("Importing People (folk.csv) to ",end="")
|
||||
print("-- Importing People (folk.csv) to ",end="")
|
||||
print(django.db.connections.databases['default']['NAME'])
|
||||
troggle.parsers.people.LoadPersonsExpos()
|
||||
|
||||
def import_surveyscans():
|
||||
print("Importing Survey Scans")
|
||||
print("-- Importing Survey Scans")
|
||||
troggle.parsers.surveys.LoadListScans()
|
||||
|
||||
def import_logbooks():
|
||||
print("Importing Logbooks")
|
||||
print("-- Importing Logbooks")
|
||||
troggle.parsers.logbooks.LoadLogbooks()
|
||||
|
||||
def import_QMs():
|
||||
@@ -40,7 +40,7 @@ def import_QMs():
|
||||
def import_survex():
|
||||
# when this import is moved to the top with the rest it all crashes horribly
|
||||
import troggle.parsers.survex
|
||||
print("Importing Survex Blocks")
|
||||
print("-- Importing Survex Blocks")
|
||||
print(" - Survex Blocks")
|
||||
troggle.parsers.survex.LoadSurvexBlocks()
|
||||
print(" - Survex entrances x/y/z Positions")
|
||||
@@ -53,6 +53,6 @@ def import_loadpos():
|
||||
troggle.parsers.survex.LoadPos()
|
||||
|
||||
def import_drawingsfiles():
|
||||
print("Importing Drawings files")
|
||||
print("-- Importing Drawings files")
|
||||
troggle.parsers.surveys.LoadDrawingFiles()
|
||||
|
||||
|
||||
@@ -114,6 +114,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
expeditionday = expedition.get_expedition_day(date)
|
||||
lookupAttribs={'date':date, 'title':title}
|
||||
# 'cave' is converted to a string doing this, which renders as the cave slug.
|
||||
# but it is a db query which we should try to avoid - rewrite this
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug':slugify(title)[:50], 'entry_type':entry_type}
|
||||
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
@@ -356,6 +357,8 @@ def SetDatesFromLogbookEntries(expedition):
|
||||
Sets the date_from and date_to field for an expedition based on persontrips.
|
||||
Then sets the expedition date_from and date_to based on the personexpeditions.
|
||||
"""
|
||||
# Probably a faster way to do this. This uses a lot of db queries, but we have all this
|
||||
# in memory..
|
||||
for personexpedition in expedition.personexpedition_set.all():
|
||||
persontrips = personexpedition.persontrip_set.order_by('logbook_entry__date')
|
||||
# sequencing is difficult to do
|
||||
|
||||
@@ -324,7 +324,8 @@ class LoadingSurvex():
|
||||
return self.caveslist[g]
|
||||
print(' ! Failed to find cave for {}'.format(cavepath.lower()))
|
||||
else:
|
||||
print(' ! No regex cave match for %s' % cavepath.lower())
|
||||
# not a cave, but that is fine.
|
||||
# print(' ! No regex(standard identifier) cave match for %s' % cavepath.lower())
|
||||
return None
|
||||
|
||||
def GetSurvexDirectory(self, headpath):
|
||||
@@ -353,17 +354,17 @@ class LoadingSurvex():
|
||||
print("\n"+message,file=sys.stderr)
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
|
||||
def LoadSurvexFile(self, includelabel):
|
||||
def LoadSurvexFile(self, svxid):
|
||||
"""Creates SurvexFile in the database, and SurvexDirectory if needed
|
||||
with links to 'cave'
|
||||
Creates a new current survexblock with valid .survexfile and valid .survexdirectory
|
||||
Creates a new current survexfile and valid .survexdirectory
|
||||
The survexblock passed-in is not necessarily the parent. FIX THIS.
|
||||
"""
|
||||
depth = " " * self.depthbegin
|
||||
print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
|
||||
headpath, tail = os.path.split(includelabel)
|
||||
print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid))
|
||||
headpath = os.path.dirname(svxid)
|
||||
|
||||
newfile = models_survex.SurvexFile(path=includelabel)
|
||||
newfile = models_survex.SurvexFile(path=svxid)
|
||||
newfile.save() # until we do this there is no internal id so no foreign key works
|
||||
self.currentsurvexfile = newfile
|
||||
newdirectory = self.GetSurvexDirectory(headpath)
|
||||
@@ -383,10 +384,10 @@ class LoadingSurvex():
|
||||
newfile.cave = cave
|
||||
#print("\n"+str(newdirectory.cave),file=sys.stderr)
|
||||
else:
|
||||
self.ReportNonCaveIncludes(headpath, includelabel)
|
||||
self.ReportNonCaveIncludes(headpath, svxid)
|
||||
|
||||
if not newfile.survexdirectory:
|
||||
message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
|
||||
message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(svxid)
|
||||
print(message)
|
||||
print(message,file=sys.stderr)
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
@@ -401,7 +402,7 @@ class LoadingSurvex():
|
||||
def ProcessIncludeLine(self, included):
|
||||
svxid = included.groups()[0]
|
||||
#depth = " " * self.depthbegin
|
||||
#print("{:2}{} - Include survexfile:'{}'".format(self.depthbegin, depth, svxid))
|
||||
#print("{:2}{} - Include survexfile:'{}' {}".format(self.depthbegin, depth, svxid, included))
|
||||
self.LoadSurvexFile(svxid)
|
||||
self.stacksvxfiles.append(self.currentsurvexfile)
|
||||
|
||||
@@ -426,8 +427,10 @@ class LoadingSurvex():
|
||||
self.LoadSurvexQM(survexblock, qmline)
|
||||
|
||||
included = self.rx_comminc.match(comment)
|
||||
# ;*include means we have been included; not 'proceed to include' which *include means
|
||||
# ;*include means 'we have been included'; whereas *include means 'proceed to include'
|
||||
if included:
|
||||
#depth = " " * self.depthbegin
|
||||
#print("{:2}{} - Include comment:'{}' {}".format(self.depthbegin, depth, comment, included))
|
||||
self.ProcessIncludeLine(included)
|
||||
|
||||
edulcni = self.rx_commcni.match(comment)
|
||||
@@ -457,7 +460,7 @@ class LoadingSurvex():
|
||||
|
||||
def LinearLoad(self, survexblock, path, svxlines):
|
||||
"""Loads a single survex file. Usually used to import all the survex files which have been collated
|
||||
into a single file. Loads the begin/end blocks recursively.
|
||||
into a single file. Loads the begin/end blocks using a stack for labels.
|
||||
"""
|
||||
self.relativefilename = path
|
||||
cave = self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
|
||||
@@ -466,19 +469,25 @@ class LoadingSurvex():
|
||||
self.currentsurvexfile.save() # django insists on this although it is already saved !?
|
||||
|
||||
blockcount = 0
|
||||
lineno = 0
|
||||
def tickle():
|
||||
nonlocal blockcount
|
||||
blockcount +=1
|
||||
if blockcount % 10 ==0 :
|
||||
print(".", file=sys.stderr,end='')
|
||||
if blockcount % 500 ==0 :
|
||||
if blockcount % 200 ==0 :
|
||||
print("\n", file=sys.stderr,end='')
|
||||
sys.stderr.flush();
|
||||
print(" - MEM:{:7.3f} MB in use".format(models.get_process_memory()),file=sys.stderr)
|
||||
sys.stderr.flush()
|
||||
|
||||
for svxline in svxlines:
|
||||
sline, comment = self.rx_comment.match(svxline.strip()).groups()
|
||||
lineno += 1
|
||||
sline, comment = self.rx_comment.match(svxline).groups()
|
||||
if comment:
|
||||
depth = " " * self.depthbegin
|
||||
print("{:4} {:2}{} - Include comment:'{}' {}".format(lineno, self.depthbegin, depth, comment, sline))
|
||||
self.LoadSurvexComment(survexblock, comment) # this catches the ;*include and ;*edulcni lines too
|
||||
|
||||
if not sline:
|
||||
continue # skip blank lines
|
||||
|
||||
@@ -503,10 +512,10 @@ class LoadingSurvex():
|
||||
pathlist += "." + id
|
||||
newsurvexblock = models_survex.SurvexBlock(name=blockid, parent=survexblock,
|
||||
survexpath=pathlist,
|
||||
title = survexblock.title, # copy parent inititally
|
||||
cave=self.currentcave, survexfile=self.currentsurvexfile,
|
||||
legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
|
||||
newsurvexblock.save()
|
||||
newsurvexblock.title = "("+survexblock.title+")" # copy parent inititally
|
||||
survexblock = newsurvexblock
|
||||
# survexblock.survexfile.save()
|
||||
survexblock.save() # django insists on this , but we want to save at the end !
|
||||
@@ -564,7 +573,7 @@ class LoadingSurvex():
|
||||
else:
|
||||
pass # ignore all other sorts of data
|
||||
|
||||
def RecursiveScan(self, survexblock, survexfile, fin, flinear, fcollate):
|
||||
def RecursiveScan(self, survexblock, path, fin, flinear, fcollate):
|
||||
"""Follows the *include links in all the survex files from the root file 1623.svx
|
||||
and reads only the *include and *begin and *end statements. It produces a linearised
|
||||
list of the include tree
|
||||
@@ -577,27 +586,27 @@ class LoadingSurvex():
|
||||
if self.callcount % 500 ==0 :
|
||||
print("\n", file=sys.stderr,end='')
|
||||
|
||||
if survexfile in self.svxfileslist:
|
||||
message = " * Warning. Survex file already seen: {}".format(survexfile.path)
|
||||
if path in self.svxfileslist:
|
||||
message = " * Warning. Duplicate in *include list at:{} depth:{} file:{}".format(self.callcount, self.depthinclude, path)
|
||||
print(message)
|
||||
print(message,file=flinear)
|
||||
print(message,file=sys.stderr)
|
||||
print("\n"+message,file=sys.stderr)
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
if self.svxfileslist.count(survexfile) > 20:
|
||||
message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(survexfile.path)
|
||||
if self.svxfileslist.count(path) > 20:
|
||||
message = " ! ERROR. Survex file already seen 20x. Probably an infinite loop so fix your *include statements that include this. Aborting. {}".format(path)
|
||||
print(message)
|
||||
print(message,file=flinear)
|
||||
print(message,file=sys.stderr)
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
return
|
||||
self.svxfileslist.append(survexfile)
|
||||
self.svxfileslist.append(path)
|
||||
|
||||
svxlines = fin.read().splitlines()
|
||||
for svxline in svxlines:
|
||||
self.lineno += 1
|
||||
includestmt =self.rx_include.match(svxline)
|
||||
if not includestmt:
|
||||
fcollate.write("{}\n".format(svxline))
|
||||
fcollate.write("{}\n".format(svxline.strip()))
|
||||
|
||||
sline, comment = self.rx_comment.match(svxline.strip()).groups()
|
||||
mstar = self.rx_star.match(sline)
|
||||
@@ -605,40 +614,35 @@ class LoadingSurvex():
|
||||
cmd, args = mstar.groups()
|
||||
cmd = cmd.lower()
|
||||
if re.match("(?i)include$", cmd):
|
||||
includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
|
||||
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
|
||||
includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args)))
|
||||
#path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", includepath)
|
||||
|
||||
includesurvexfile = models_survex.SurvexFile(path=includepath)
|
||||
includesurvexfile.save()
|
||||
|
||||
if includesurvexfile.exists():
|
||||
# do not create SurvexFile in DB here by doing includesurvexfile.save(). Do it when reading data.
|
||||
fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
|
||||
if os.path.isfile(fullpath):
|
||||
#--------------------------------------------------------
|
||||
self.depthinclude += 1
|
||||
fininclude = includesurvexfile.OpenFile()
|
||||
fcollate.write(";*include {}\n".format(includesurvexfile.path))
|
||||
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
|
||||
push = includesurvexfile.path.lower()
|
||||
fininclude = open(fullpath,'r')
|
||||
fcollate.write(";*include {}\n".format(includepath))
|
||||
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
|
||||
push = includepath.lower()
|
||||
self.stackinclude.append(push)
|
||||
#-----------------
|
||||
self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
|
||||
self.RecursiveScan(survexblock, includepath, fininclude, flinear, fcollate)
|
||||
#-----------------
|
||||
pop = self.stackinclude.pop()
|
||||
if pop != push:
|
||||
message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
|
||||
message = "!! ERROR mismatch *include pop!=push {}".format(pop, push, self.stackinclude)
|
||||
print(message)
|
||||
print(message,file=flinear)
|
||||
print(message,file=sys.stderr)
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
includesurvexfile.path += "-TEMP"
|
||||
includesurvexfile = None
|
||||
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
|
||||
fcollate.write(";*edulcni {}\n".format(pop))
|
||||
fininclude.close()
|
||||
self.depthinclude -= 1
|
||||
#--------------------------------------------------------
|
||||
else:
|
||||
message = " ! ERROR *include file not found for [{}]:'{}'".format(includesurvexfile, includepath)
|
||||
message = " ! ERROR *include file not found for:'{}'".format(includepath)
|
||||
print(message)
|
||||
print(message,file=sys.stderr)
|
||||
models.DataIssue.objects.create(parser='survex', message=message)
|
||||
@@ -659,7 +663,7 @@ class LoadingSurvex():
|
||||
args = " "
|
||||
popargs = self.stackbegin.pop()
|
||||
if popargs != args.lower():
|
||||
message = "!!!!!!! ERROR BEGIN/END pop != push {} != {}\n{}".format(popargs, args, self. stackbegin)
|
||||
message = "!! ERROR mismatch in BEGIN/END labels pop!=push '{}'!='{}'\n{}".format(popargs, args, self. stackbegin)
|
||||
print(message)
|
||||
print(message,file=flinear)
|
||||
print(message,file=sys.stderr)
|
||||
@@ -701,7 +705,7 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
fcollate.write(";*include {}\n".format(survexfileroot.path))
|
||||
flinear.write("{:2} {} *include {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
|
||||
#----------------------------------------------------------------
|
||||
svx_scan.RecursiveScan(survexblockroot, survexfileroot, finroot, flinear, fcollate)
|
||||
svx_scan.RecursiveScan(survexblockroot, survexfileroot.path, finroot, flinear, fcollate)
|
||||
#----------------------------------------------------------------
|
||||
flinear.write("{:2} {} *edulcni {}\n".format(svx_scan.depthinclude, indent, survexfileroot.path))
|
||||
fcollate.write(";*edulcni {}\n".format(survexfileroot.path))
|
||||
@@ -712,7 +716,7 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
flinear.write(" - {:,} survex files in linear include list \n".format(len(svxfileslist)))
|
||||
flinear.close()
|
||||
fcollate.close()
|
||||
svx_scan = None
|
||||
svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
|
||||
print("\n - {:,} survex files in linear include list \n".format(len(svxfileslist)),file=sys.stderr)
|
||||
|
||||
mem1 = models.get_process_memory()
|
||||
@@ -724,7 +728,7 @@ def FindAndLoadSurvex(survexblockroot):
|
||||
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
|
||||
# look in MapLocations() for how we find the entrances
|
||||
|
||||
print('\n - Loading All Survex Blocks (LinearRecursive)',file=sys.stderr)
|
||||
print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
|
||||
svx_load = LoadingSurvex()
|
||||
|
||||
svx_load.svxdirs[""] = survexfileroot.survexdirectory
|
||||
|
||||
Reference in New Issue
Block a user