2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-01-19 09:22:32 +00:00

all SurvexDirectory set OK, pending caves made

This commit is contained in:
Philip Sargent 2020-06-30 15:39:24 +01:00
parent 6b0275d035
commit 0a57ac3132
2 changed files with 75 additions and 173 deletions

View File

@ -1,18 +1,27 @@
# -*- coding: utf-8 -*-
import os
import re
from django.conf import settings
import troggle.core.models as models
from troggle.core.models import DataIssue, get_process_memory
import troggle.core.models_caves as models_caves
def readcaves():
# Clear the cave data issues as we are reloading
models.DataIssue.objects.filter(parser='caves').delete()
print(" - Deleting Caves and Entrances")
models_caves.Cave.objects.all().delete()
models_caves.Entrance.objects.all().delete()
# Clear the cave data issues and the caves as we are reloading
DataIssue.objects.filter(parser='caves').delete()
DataIssue.objects.filter(parser='entrances').delete()
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
print (" - Setting pending caves")
# Do this first, so that these empty entries are overwritten as they get properly created.
# For those caves which do not have XML files even though they exist and have surveys
# also needs to be done *before* entrances so that the entrance-cave links work properly.
forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02",
"2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06",
"2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888",
@ -21,33 +30,31 @@ def readcaves():
try:
cave = models_caves.Cave(
unofficial_number = k,
official_name = "Mislaid cave - created as empty object. No XML available at this time.",
official_name = "Pending cave write-up - creating as empty object. No XML file available yet.",
notes="_Survex file found in loser repo but no description in expoweb")
if cave:
print("{} {}".format(cave.unofficial_number, cave.official_name))
cave.save() # must save to have id before foreign keys work
cave.area = area_1623
cave.save()
message = " ! {} {}".format(cave.unofficial_number, cave.official_name)
DataIssue.objects.create(parser='caves', message=message)
print(message)
else:
print("Failed to create cave {} ".format(k))
except:
message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k)
models.DataIssue.objects.create(parser='caves', message=message)
message = " ! Error. Cannot create pending cave, pending-id:{}".format(k)
DataIssue.objects.create(parser='caves', message=message)
print(message)
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None)
area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None)
print(" - Reading Entrances")
raise
print(" - Reading Entrances from entrance descriptions xml files")
for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readentrance(filename)
print (" - Reading Caves")
print(" - Reading Caves from cave descriptions xml files")
for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files
if filename.endswith('.html'):
readcave(filename)
def readentrance(filename):
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f:
@ -120,7 +127,7 @@ def readentrance(filename):
primary = primary)
for k in kents:
message = " ! - DUPLICATE in db. entrance:"+ str(k.entrance) + ", slug:" + str(k.slug())
models.DataIssue.objects.create(parser='caves', message=message)
DataIssue.objects.create(parser='caves', message=message)
print(message)
for k in kents:
if k.slug() != None:
@ -189,7 +196,7 @@ def readcave(filename):
kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0])
for k in kaves:
message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug())
models.DataIssue.objects.create(parser='caves', message=message)
DataIssue.objects.create(parser='caves', message=message)
print(message)
for k in kaves:
if k.slug() != None:
@ -213,7 +220,7 @@ def readcave(filename):
primary = primary)
except:
message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
DataIssue.objects.create(parser='caves', message=message)
print(message)
primary = False
@ -225,7 +232,7 @@ def readcave(filename):
ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter)
models.DataIssue.objects.create(parser='caves', message=message)
DataIssue.objects.create(parser='caves', message=message)
print(message)
@ -235,13 +242,13 @@ def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True,
message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
"itemname": itemname,
"min": minItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
DataIssue.objects.create(parser='caves', message=message)
print(message)
if maxItems is not None and len(items) > maxItems and printwarnings:
message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
"itemname": itemname,
"max": maxItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
DataIssue.objects.create(parser='caves', message=message)
print(message)
return items

View File

@ -68,6 +68,8 @@ class LoadingSurvex():
insp = ""
callcount = 0
stardata ={}
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
includedfilename =""
currentsurvexblock = None
currentsurvexfile = None
@ -308,7 +310,8 @@ class LoadingSurvex():
def IdentifyCave(self, cavepath):
if cavepath.lower() in self.caveslist:
return self.caveslist[cavepath.lower()]
# TO DO - some of this is already done in generating self.caveslist so simplify this
# esp. as it is in a loop.
path_match = self.rx_cave.search(cavepath)
if path_match:
sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@ -329,6 +332,19 @@ class LoadingSurvex():
self.svxdirs[headpath.lower()] = models_survex.SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile)
return self.svxdirs[headpath.lower()]
def ReportNonCaveIncludes(self, headpath, includelabel):
"""Ignore surface, kataser and gps *include survex files
"""
if headpath in self.ignorenoncave:
return
for i in self.ignoreprefix:
if headpath.startswith(i):
return
message = " ! {} is not a cave. (while creating {} sfile & sdirectory)".format(headpath, includelabel)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
def LoadSurvexFile(self, includelabel):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
with links to 'cave'
@ -338,28 +354,37 @@ class LoadingSurvex():
depth = " " * self.depthbegin
print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, includelabel))
newsurvexfile = models_survex.SurvexFile(path=includelabel)
newfile = models_survex.SurvexFile(path=includelabel)
headpath, tail = os.path.split(includelabel)
newsurvexdirectory = self.GetSurvexDirectory(headpath)
newsurvexfile.survexdirectory = newsurvexdirectory
newdirectory = self.GetSurvexDirectory(headpath)
if not newdirectory:
message = " ! 'None' SurvexDirectory returned from GetSurvexDirectory({})".format(headpath)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
newfile.survexdirectory = newdirectory
cave = self.IdentifyCave(headpath)
if cave:
newsurvexdirectory.cave = cave
newsurvexfile.cave = cave
# else:
# message = " ! Cannot identify cave from {} when creating sfile & sdirectory".format(headpath)
# print(message)
# print(message,file=sys.stderr)
# models.DataIssue.objects.create(parser='survex', message=message)
newdirectory.cave = cave
newfile.cave = cave
else:
self.ReportNonCaveIncludes(headpath, includelabel)
if not newfile.survexdirectory:
message = " ! SurvexDirectory NOT SET in new SurvexFile {} ".format(includelabel)
print(message)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
self.currentsurvexfile.save() # django insists on this although it is already saved !?
try:
newsurvexdirectory.save()
newdirectory.save()
except:
print(newsurvexdirectory, file=sys.stderr)
print(newsurvexdirectory.primarysurvexfile, file=sys.stderr)
print(newdirectory, file=sys.stderr)
print(newdirectory.primarysurvexfile, file=sys.stderr)
raise
self.currentsurvexfile = newsurvexfile
self.currentsurvexfile = newfile
def ProcessIncludeLine(self, included):
svxid = included.groups()[0]
@ -418,129 +443,6 @@ class LoadingSurvex():
if cave:
survexfile.cave = cave
def RecursiveRecursiveLoad(self, survexblock, survexfile, fin):
"""Follows the *include links in all the survex files from the root file 1623.svx
and reads in the survex blocks, other data and the wallet references (scansfolder) as it
goes. This part of the data include process is where the maximum memory is used and where it
crashes on memory-constrained machines. Begin-end blocks may also be nested.
"""
# self.LoadSurvexSetup(survexblock, survexfile)
# insp =self.insp
# previousnlegs = 0
# svxlines = fin.read().splitlines()
# # cannot close file now as may be recursively called with the same fin if nested *begin-end
# for svxline in svxlines:
# self.lineno += 1
# sline, comment = self.rx_comment.match(svxline.strip()).groups()
# if comment:
# self.LoadSurvexComment(survexblock, comment)
# if not sline:
# continue # skip blank lines
# # detect the star command
# mstar = self.rx_star.match(sline)
# if mstar: # yes we are reading a *cmd
# cmd, args = mstar.groups()
# cmd = cmd.lower()
# if re.match("include$(?i)", cmd):
# cave = self.IdentifyCave(args)
# if cave:
# survexfile.cave = cave
# includepath = os.path.normpath(os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args)))
# print((insp+' - INCLUDE-go path found, including - ' + args))
# includesurvexfile = models_survex.SurvexFile(path=includepath)
# includesurvexfile.save()
# includesurvexfile.SetDirectory()
# if includesurvexfile.exists():
# survexblock.save()
# self.insp += "> "
# #--------------------------------------------------------
# fininclude = includesurvexfile.OpenFile()
# self.RecursiveRecursiveLoad(survexblock, includesurvexfile, fininclude)
# fininclude.close()
# #--------------------------------------------------------
# self.insp = self.insp[2:]
# insp = self.insp
# print((insp+' - INCLUDE-return from include - ' + includepath))
# else:
# print((insp+' ! ERROR *include file not found for %s' % includesurvexfile))
# elif re.match("begin$(?i)", cmd):
# # On a *begin statement we start a new survexblock.
# # There should not be any *include inside a begin-end block, so this is a simple
# # load not a recursive fileload. But there may be many blocks nested to any depth in one file.
# if args:
# newsvxpath = os.path.join(os.path.split(survexfile.path)[0], re.sub(r"\.svx$", "", args))
# # Try to find the cave in the DB if not use the string as before
# path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", newsvxpath)
# if path_match:
# pos_cave = '%s-%s' % (path_match.group(1), path_match.group(2))
# # print(insp+pos_cave)
# cave = models_caves.getCaveByReference(pos_cave)
# if cave:
# survexfile.cave = cave
# else:
# print((insp+' - No match (b) for %s' % newsvxpath))
# previousnlegs = self.survexlegsnumber
# name = args.lower()
# print(insp+' - Begin found for:{}, creating new SurvexBlock '.format(name))
# # the recursive call re-reads the entire file. This is wasteful. We should pass in only
# # the un-parsed part of the file.
# survexblockdown = models_survex.SurvexBlock(name=name, parent=survexblock,
# survexpath=survexblock.survexpath+"."+name,
# cave=survexfile.cave, survexfile=survexfile,
# legsall=0, legssplay=0, legssurfc=0, totalleglength=0.0)
# survexblockdown.save()
# survexblock.save()
# survexblock = survexblockdown
# print(insp+" - BLOCK-enter nested *begin/*end block: '{}'".format(name))
# self.insp += "> "
# #--------------------------------------------------------
# self.RecursiveRecursiveLoad(survexblockdown, survexfile, fin)
# #--------------------------------------------------------
# # do not close the file as there may be more blocks in this one
# # and it is re-read afresh with every nested begin-end block.
# self.insp = self.insp[2:]
# insp = self.insp
# else:
# self.depthbegin += 1
# elif re.match("end$(?i)", cmd):
# if self.depthbegin:
# print(insp+" - BLOCK-return from nested *begin/*end block: '{}'".format(args))
# self.depthbegin -= 1
# else:
# legsinblock = self.survexlegsnumber - previousnlegs
# print(insp+" - LEGS: {} (previous: {}, now:{})".format(legsinblock,previousnlegs,self.survexlegsnumber))
# survexblock.legsall = legsinblock
# survexblock.save()
# return
# elif cmd == "ref":
# self.LoadSurvexRef(survexblock, args)
# elif cmd == "flags":
# self.LoadSurvexFlags(args, cmd)
# elif cmd == "data":
# self.LoadSurvexDataCmd(survexblock, args)
# elif cmd == "set" and re.match("names(?i)", args):
# pass
# elif re.match("date$(?i)", cmd):
# self.LoadSurvexDate(survexblock, args)
# elif re.match("team$(?i)", cmd):
# self.LoadSurvexTeam(survexblock, args)
# else:
# self.LoadSurvexIgnore(survexblock, args, cmd)
# else: # not a *cmd so we are reading data OR rx_comment failed
# if "from" in self.stardata: # only interested in survey legs
# self.LoadSurvexLineLeg(survexblock, svxline, sline, comment)
# else:
# pass # ignore all other sorts of data
pass
def LinearRecursiveLoad(self, survexblock, path, svxlines):
"""Loads a single survex file. Usually used to import all the survex files which have been collated
into a single file. Loads the begin/end blocks recursively.
@ -702,7 +604,9 @@ class LoadingSurvex():
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includesurvexfile.path))
push = includesurvexfile.path.lower()
self.stackinclude.append(push)
#-----------------
self.RecursiveScan(survexblock, includesurvexfile, fininclude, flinear, fcollate)
#-----------------
pop = self.stackinclude.pop()
if pop != push:
message = "!!!!!!! ERROR pop != push {} != {} {}".format(pop, push, self.stackinclude)
@ -710,8 +614,9 @@ class LoadingSurvex():
print(message,file=flinear)
print(message,file=sys.stderr)
models.DataIssue.objects.create(parser='survex', message=message)
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, includesurvexfile.path))
fcollate.write(";*edulcni {}\n".format(includesurvexfile.path))
includesurvexfile = None
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
fcollate.write(";*edulcni {}\n".format(pop))
fininclude.close()
self.depthinclude -= 1
#--------------------------------------------------------
@ -820,16 +725,6 @@ def FindAndLoadSurvex(survexblockroot):
mem1 = models.get_process_memory()
svx_load = None
# print('\n - Loading All Survex Blocks (RecursiveRecursive)',file=sys.stderr)
# svxlrl = LoadingSurvex()
# finroot = survexfileroot.OpenFile()
# svxlrl.RecursiveRecursiveLoad(survexblockroot, survexfileroot, finroot)
# finroot.close()
# survexlegsnumber = svxlrl.survexlegsnumber
# survexlegsalllength = svxlrl.survexlegsalllength
# svxlrl = None
# Close the logging file, Restore sys.stdout to our old saved file handle
sys.stdout.close()
print("+", file=sys.stderr)