Thorough spring clean and profiling

This commit is contained in:
Philip Sargent
2020-04-27 23:51:41 +01:00
parent 2b39dec560
commit a8460065a4
4 changed files with 129 additions and 85 deletions

View File

@@ -152,7 +152,7 @@ def readcave(filename):
slug = slug,
primary = primary)
except:
message = "Can't find text (slug): %s, skipping %s" % (slug, context)
message = " ! Can't find text (slug): %s, skipping %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -164,7 +164,7 @@ def readcave(filename):
entrance = models.Entrance.objects.get(entranceslug__slug = slug)
ce = models.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance)
except:
message = "Entrance text (slug) %s missing %s" % (slug, context)
message = " ! Entrance text (slug) %s missing %s" % (slug, context)
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
@@ -172,14 +172,14 @@ def readcave(filename):
def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""):
items = re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
if len(items) < minItems and printwarnings:
message = "%(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
message = " ! %(count)i %(itemname)s found, at least %(min)i expected" % {"count": len(items),
"itemname": itemname,
"min": minItems} + context
models.DataIssue.objects.create(parser='caves', message=message)
print(message)
if maxItems is not None and len(items) > maxItems and printwarnings:
message = "%(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
message = " ! %(count)i %(itemname)s found, no more than %(max)i expected" % {"count": len(items),
"itemname": itemname,
"max": maxItems} + context
models.DataIssue.objects.create(parser='caves', message=message)

View File

@@ -58,7 +58,7 @@ def LoadPersonsExpos():
header = dict(zip(headers, range(len(headers))))
# make expeditions
print("Loading expeditions")
print(" - Loading expeditions")
years = headers[5:]
for year in years:
@@ -68,7 +68,7 @@ def LoadPersonsExpos():
save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
# make persons
print("Loading personexpeditions")
print(" - Loading personexpeditions")
for personline in personreader:
name = personline[header["Name"]]

View File

@@ -150,17 +150,19 @@ def parseSurveyScans(expedition, logfile=None):
scanObj.save()
except (IOError, OSError):
yearPath=os.path.join(settings.SURVEY_SCANS, "surveyscans", expedition.year)
print("No folder found for " + expedition.year + " at:- " + yearPath)
print(" ! No folder found for " + expedition.year + " at:- " + yearPath)
# dead
def parseSurveys(logfile=None):
try:
readSurveysFromCSV()
except (IOError, OSError):
print("Survey CSV not found..")
print(" ! Survey CSV not found..")
pass
print " - Loading scans by expedition year"
for expedition in Expedition.objects.filter(year__gte=2000): #expos since 2000, because paths and filenames were nonstandard before then
print "%s" % expedition,
parseSurveyScans(expedition)
# dead
@@ -190,7 +192,7 @@ def GetListDir(sdir):
def LoadListScansFile(survexscansfolder):
gld = [ ]
# flatten out any directories in these book files
# flatten out any directories in these wallet folders - should not be any
for (fyf, ffyf, fisdiryf) in GetListDir(survexscansfolder.fpath):
if fisdiryf:
gld.extend(GetListDir(ffyf))
@@ -199,7 +201,7 @@ def LoadListScansFile(survexscansfolder):
for (fyf, ffyf, fisdiryf) in gld:
#assert not fisdiryf, ffyf
if re.search(r"\.(?:png|jpg|jpeg)(?i)$", fyf):
if re.search(r"\.(?:png|jpg|jpeg|pdf|jpeg|svg)(?i)$", fyf):
survexscansingle = SurvexScanSingle(ffile=ffyf, name=fyf, survexscansfolder=survexscansfolder)
survexscansingle.save()
@@ -208,7 +210,7 @@ def LoadListScansFile(survexscansfolder):
# and builds up the models we can access later
def LoadListScans():
print('Loading Survey Scans...')
print(' - Loading Survey Scans... (deleting all objects first)')
SurvexScanSingle.objects.all().delete()
SurvexScansFolder.objects.all().delete()
@@ -221,12 +223,14 @@ def LoadListScans():
# iterate into the surveyscans directory
print ' - ',
for f, ff, fisdir in GetListDir(os.path.join(settings.SURVEY_SCANS, "surveyscans")):
if not fisdir:
continue
# do the year folders
if re.match(r"\d\d\d\d$", f):
print "%s" % f,
for fy, ffy, fisdiry in GetListDir(ff):
if fisdiry:
assert fisdiry, ffy