diff --git a/core/models/caves.py b/core/models/caves.py index be44e8f..edaae9a 100644 --- a/core/models/caves.py +++ b/core/models/caves.py @@ -1,7 +1,6 @@ import string import os import datetime -import logging import re import json from subprocess import call diff --git a/core/models/troggle.py b/core/models/troggle.py index cf32893..f1f10be 100644 --- a/core/models/troggle.py +++ b/core/models/troggle.py @@ -1,7 +1,6 @@ import string import os import datetime -import logging import re import resource from subprocess import call @@ -51,7 +50,7 @@ class DataIssue(TroggleModel): This is a use of the NOTIFICATION pattern: https://martinfowler.com/eaaDev/Notification.html - And we need to use it to replace all assertions in the code too: + We have replaced all assertions in the code with messages and local fix-ups or skips: https://martinfowler.com/articles/replaceThrowWithNotification.html """ date = models.DateTimeField(auto_now_add=True, blank=True) diff --git a/core/unused.py b/core/unused.py index 76f55f4..ba4f187 100644 --- a/core/unused.py +++ b/core/unused.py @@ -1,6 +1,5 @@ import sys import re -import logging from django.conf import settings from django.shortcuts import render diff --git a/core/utils.py b/core/utils.py index 4309fee..fe85533 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,10 +1,10 @@ import string import os import datetime -import logging import re import resource import random +import logging from subprocess import call from urllib.parse import urljoin @@ -41,12 +41,12 @@ TROG = { # This is module-level executable. This is a Bad Thing. Especially when it touches the file system. try: - logging.basicConfig(level=logging.DEBUG, - filename=settings.LOGFILE, - filemode='w') + logging.basicConfig(level=logging.DEBUG, + filename=settings.LOGFILE, + filemode='w') except: -# Opening of file for writing is going to fail currently, so decide it doesn't matter for now - pass + # Opening of file for writing is going to fail currently, so decide it doesn't matter for now + pass def get_process_memory(): usage=resource.getrusage(resource.RUSAGE_SELF) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 51171d0..9d56894 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -35,7 +35,6 @@ def GetTripPersons(trippeople, expedition, logtime_underground): if mul: tripperson = mul.group(1).strip() if tripperson and tripperson[0] != '*': - #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) tripperson = re.sub(round_bracket_regex, "", tripperson).strip() personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower()) if not personyear: @@ -160,7 +159,6 @@ def Parselogwikitxt(year, expedition, txt): for triphead, triptext in trippara: logbook_entry_count += 1 tripheadp = triphead.split("|") - # assert len(tripheadp) == 3, (tripheadp, triptext) if not (len(tripheadp) == 3): message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp DataIssue.objects.create(parser='logbooks', message=message) @@ -169,6 +167,12 @@ def Parselogwikitxt(year, expedition, txt): tripdate, tripplace, trippeople = tripheadp tripsplace = tripplace.split(" - ") tripcave = tripsplace[0].strip() + if len(tripsplace) == 1: + tripsplace = tripsplace[0] + else: + tripsplace = tripsplace[1] + + print(f"! LOGBOOK {year} {logbook_entry_count:2} {len(triptext):4} '{tripsplace}'") tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext) if tul: @@ -193,7 +197,7 @@ def Parselogwikitxt(year, expedition, txt): def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq): # This will need additional functions to replicate the persontrip calculation and storage. For the # moment we leave all that to be done in the django db - global trips # should be a singleton class object in models.py eventually + global trips # should be a singleton TROG eventually global logdataissues if tripid1 is None or tripid1 =="": @@ -354,7 +358,6 @@ def Parseloghtml03(year, expedition, txt): logbook_entry_count += 1 s = re.match(r"(?s)\s*

(.*?)

(.*)$", trippara) - #assert s, trippara if not ( s ) : message = " ! - Skipping logentry on failure to parse Parseloghtml03: {} {} {}...".format(tripentry,s,trippara[:300]) DataIssue.objects.create(parser='logbooks', message=message) diff --git a/parsers/survex.py b/parsers/survex.py index ad6f27f..bcb4070 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -213,7 +213,11 @@ class LoadingSurvex(): expo = self.expos[year] else: expeditions = Expedition.objects.filter(year=year) - assert len(expeditions) == 1 + if len(expeditions) != 1 : + message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}" + print((self.insp+message)) + DataIssue.objects.create(parser='survexunits', message=message) + expo= expeditions[0] self.expos[year]= expo @@ -411,7 +415,11 @@ class LoadingSurvex(): letterx = "X" if len(wallet)<2: wallet = "0" + wallet - assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr + if not (int(yr)>1960 and int(yr)<2039): + message = " ! Wallet year out of bounds {yr} '{refscan}' {survexblock.survexfile.path}" + print((self.insp+message)) + DataIssue.objects.create(parser='survex', message=message) + refscan = "%s#%s%s" % (yr, letterx, wallet) try: if int(wallet)>100: diff --git a/parsers/surveys.py b/parsers/surveys.py index bba5d3f..11aa805 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -1,7 +1,6 @@ import sys import os import types -import logging import stat import csv import re @@ -46,13 +45,16 @@ def listdir(*directories): def GetListDir(sdir): res = [ ] if sdir[:7] == "http://": - assert False, "Not written" - s = urllib.request.urlopen(sdir) - else: - for f in os.listdir(sdir): - if f[0] != ".": - ff = os.path.join(sdir, f) - res.append((f, ff, os.path.isdir(ff))) + # s = urllib.request.urlopen(sdir) + message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]" + print(message) + DataIssue.objects.create(parser='Drawings', message=message) + sdir[:7] = "" + + for f in os.listdir(sdir): + if f[0] != ".": + ff = os.path.join(sdir, f) + res.append((f, ff, os.path.isdir(ff))) return res @@ -67,7 +69,6 @@ def LoadListScansFile(scansfolder): c=0 for (fyf, ffyf, fisdiryf) in gld: - #assert not fisdiryf, ffyf if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf): singlescan = SingleScan(ffile=ffyf, name=fyf, scansfolder=scansfolder) singlescan.save() @@ -106,7 +107,6 @@ def LoadListScans(): print("%s" % f, end=' ') for fy, ffy, fisdiry in GetListDir(ff): if fisdiry: - assert fisdiry, ffy scansfolder = ScansFolder(fpath=ffy, walletname=fy) scansfolder.save() LoadListScansFile(scansfolder) @@ -120,20 +120,25 @@ def LoadListScans(): def find_tunnel_scan(tunnelfile, path): '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file - which we have already seen when we imported all the files we could find in teh surveyscans direstories + which we have already seen when we imported all the files we could find in the surveyscans direstories ''' scansfolder, scansfile = None, None mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path) if mscansdir: scansfolderl = ScansFolder.objects.filter(walletname=mscansdir.group(1)) + # This should properly detect if a list of folders is returned and do something sensible, not just pick the first. if len(scansfolderl): - assert len(scansfolderl) == 1 scansfolder = scansfolderl[0] + if len(scansfolderl) > 1: + message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path) + print(message) + DataIssue.objects.create(parser='Tunnel', message=message) + if scansfolder: scansfilel = scansfolder.singlescan_set.filter(name=mscansdir.group(2)) if len(scansfilel): if len(scansfilel) > 1: - message = "! More than one image filename matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path) + message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path) print(message) DataIssue.objects.create(parser='Tunnel', message=message) scansfile = scansfilel[0] diff --git a/settings.py b/settings.py index 98a9016..46bbbcd 100644 --- a/settings.py +++ b/settings.py @@ -76,6 +76,7 @@ LOGBOOK_PARSER_SETTINGS = { "2009": ("2009/2009logbook.txt", "Parselogwikitxt"), "2008": ("2008/2008logbook.txt", "Parselogwikitxt"), "2007": ("2007/logbook.html", "Parseloghtmltxt"), + "2006": ("2006/logbook.html", "Parseloghtmltxt"), # "2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"), "2006": ("2006/logbook.html", "Parseloghtmltxt"), "2005": ("2005/logbook.html", "Parseloghtmltxt"),