From daf58e9e45b5c2d94cbdcd00d9883cfa6b01c6dd Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Tue, 13 Apr 2021 22:27:01 +0100
Subject: [PATCH] replace assert() with message logging

---
 core/models/caves.py   |  1 -
 core/models/troggle.py |  3 +--
 core/unused.py         |  1 -
 core/utils.py          | 12 ++++++------
 parsers/logbooks.py    | 11 +++++++----
 parsers/survex.py      | 12 ++++++++++--
 parsers/surveys.py     | 31 ++++++++++++++++++-------------
 settings.py            |  1 +
 8 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/core/models/caves.py b/core/models/caves.py
index be44e8f..edaae9a 100644
--- a/core/models/caves.py
+++ b/core/models/caves.py
@@ -1,7 +1,6 @@
 import string
 import os
 import datetime
-import logging
 import re
 import json
 from subprocess import call
diff --git a/core/models/troggle.py b/core/models/troggle.py
index cf32893..f1f10be 100644
--- a/core/models/troggle.py
+++ b/core/models/troggle.py
@@ -1,7 +1,6 @@
 import string
 import os
 import datetime
-import logging
 import re
 import resource
 from subprocess import call
@@ -51,7 +50,7 @@ class DataIssue(TroggleModel):
     This is a use of the NOTIFICATION pattern: 
     https://martinfowler.com/eaaDev/Notification.html
     
-    And we need to use it to replace all assertions in the code too:
+    We have replaced all assertions in the code with messages and local fix-ups or skips:
     https://martinfowler.com/articles/replaceThrowWithNotification.html
     """
     date = models.DateTimeField(auto_now_add=True, blank=True)
diff --git a/core/unused.py b/core/unused.py
index 76f55f4..ba4f187 100644
--- a/core/unused.py
+++ b/core/unused.py
@@ -1,6 +1,5 @@
 import sys
 import re
-import logging
 
 from django.conf import settings
 from django.shortcuts import render
diff --git a/core/utils.py b/core/utils.py
index 4309fee..fe85533 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -1,10 +1,10 @@
 import string
 import os
 import datetime
-import logging
 import re
 import resource
 import random
+import logging
 from subprocess import call
 
 from urllib.parse import urljoin
@@ -41,12 +41,12 @@ TROG = {
 
 # This is module-level executable. This is a Bad Thing. Especially when it touches the file system.
 try:
-      logging.basicConfig(level=logging.DEBUG,
-                           filename=settings.LOGFILE,
-                           filemode='w')
+    logging.basicConfig(level=logging.DEBUG,
+        filename=settings.LOGFILE,
+        filemode='w')
 except:
-# Opening of file for writing is going to fail currently, so decide it doesn't matter for now    
-      pass
+    # Opening of file for writing is going to fail currently, so decide it doesn't matter for now    
+    pass
 
 def get_process_memory():
     usage=resource.getrusage(resource.RUSAGE_SELF)
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 51171d0..9d56894 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -35,7 +35,6 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
         if mul:
             tripperson = mul.group(1).strip()
         if tripperson and tripperson[0] != '*':
-            #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
             tripperson = re.sub(round_bracket_regex, "", tripperson).strip()
             personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
             if not personyear:
@@ -160,7 +159,6 @@ def Parselogwikitxt(year, expedition, txt):
     for triphead, triptext in trippara:
         logbook_entry_count += 1
         tripheadp = triphead.split("|")
-        # assert len(tripheadp) == 3, (tripheadp, triptext)
         if not (len(tripheadp) == 3):
             message = " ! - Bad no of items in tripdate in logbook: " + tripdate + " - " + tripheadp
             DataIssue.objects.create(parser='logbooks', message=message)
@@ -169,6 +167,12 @@ def Parselogwikitxt(year, expedition, txt):
         tripdate, tripplace, trippeople = tripheadp
         tripsplace = tripplace.split(" - ")
         tripcave = tripsplace[0].strip()
+        if len(tripsplace) == 1:
+            tripsplace = tripsplace[0] 
+        else:
+            tripsplace = tripsplace[1]
+            
+        print(f"! LOGBOOK {year} {logbook_entry_count:2}  {len(triptext):4}  '{tripsplace}'")
 
         tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
         if tul:
@@ -193,7 +197,7 @@ def Parselogwikitxt(year, expedition, txt):
 def EnterLogIntoObjStore(year, date, tripcave, triptitle, text, trippeople, tu, formattype, tripid1, seq):
     # This will need additional functions to replicate the persontrip calculation and storage. For the
     # moment we leave all that to be done in the django db
-    global trips # should be a singleton class object in models.py eventually
+    global trips # should be a singleton TROG eventually
     global logdataissues
 
     if tripid1 is None or tripid1 =="":
@@ -354,7 +358,6 @@ def Parseloghtml03(year, expedition, txt):
         logbook_entry_count += 1
         
         s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
-        #assert s, trippara
         if not ( s ) :
             message = " ! - Skipping logentry on failure to parse Parseloghtml03: {} {} {}...".format(tripentry,s,trippara[:300])
             DataIssue.objects.create(parser='logbooks', message=message)
diff --git a/parsers/survex.py b/parsers/survex.py
index ad6f27f..bcb4070 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -213,7 +213,11 @@ class LoadingSurvex():
                 expo = self.expos[year]
             else:
                 expeditions = Expedition.objects.filter(year=year)
-                assert len(expeditions) == 1
+                if len(expeditions) != 1 :
+                    message = f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
+                    print((self.insp+message))
+                    DataIssue.objects.create(parser='survexunits', message=message)
+                   
                 expo= expeditions[0]
                 self.expos[year]= expo
 
@@ -411,7 +415,11 @@ class LoadingSurvex():
             letterx = "X"
         if len(wallet)<2:
             wallet = "0" + wallet
-        assert (int(yr)>1960 and int(yr)<2039), "Wallet year out of bounds: %s" % yr
+        if not (int(yr)>1960 and int(yr)<2039):
+                message = " ! Wallet year out of bounds {yr} '{refscan}' {survexblock.survexfile.path}"
+                print((self.insp+message))
+                DataIssue.objects.create(parser='survex', message=message)
+            
         refscan = "%s#%s%s" % (yr, letterx, wallet)
         try:
             if int(wallet)>100:
diff --git a/parsers/surveys.py b/parsers/surveys.py
index bba5d3f..11aa805 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -1,7 +1,6 @@
 import sys
 import os
 import types
-import logging
 import stat
 import csv
 import re
@@ -46,13 +45,16 @@ def listdir(*directories):
 def GetListDir(sdir):
     res = [ ]
     if sdir[:7] == "http://":
-        assert False, "Not written"
-        s = urllib.request.urlopen(sdir)
-    else:
-        for f in os.listdir(sdir):
-            if f[0] != ".":
-                ff = os.path.join(sdir, f)
-                res.append((f, ff, os.path.isdir(ff)))
+        # s = urllib.request.urlopen(sdir)
+        message = f"! Requesting loading from http:// NOT IMPLEMENTED. [{sdir}]"         
+        print(message)
+        DataIssue.objects.create(parser='Drawings', message=message)
+        sdir[:7] = ""
+
+    for f in os.listdir(sdir):
+        if f[0] != ".":
+            ff = os.path.join(sdir, f)
+            res.append((f, ff, os.path.isdir(ff)))
     return res
 
 
@@ -67,7 +69,6 @@ def LoadListScansFile(scansfolder):
     
     c=0
     for (fyf, ffyf, fisdiryf) in gld:
-        #assert not fisdiryf, ffyf
         if re.search(r"\.(?:png|jpg|jpeg|pdf|svg|gif)(?i)$", fyf):
             singlescan = SingleScan(ffile=ffyf, name=fyf, scansfolder=scansfolder)
             singlescan.save()
@@ -106,7 +107,6 @@ def LoadListScans():
             print("%s" % f, end=' ')
             for fy, ffy, fisdiry in GetListDir(ff):
                 if fisdiry:
-                    assert fisdiry, ffy
                     scansfolder = ScansFolder(fpath=ffy, walletname=fy)
                     scansfolder.save()
                     LoadListScansFile(scansfolder)
@@ -120,20 +120,25 @@ def LoadListScans():
 
 def find_tunnel_scan(tunnelfile, path):
     '''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
-    which we have already seen when we imported all the files we could find in teh surveyscans direstories
+    which we have already seen when we imported all the files we could find in the surveyscans direstories
     '''
     scansfolder, scansfile = None, None
     mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg))$", path)
     if mscansdir:
         scansfolderl = ScansFolder.objects.filter(walletname=mscansdir.group(1))
+        # This should properly detect if a list of folders is returned and do something sensible, not just pick the first.
         if len(scansfolderl):
-            assert len(scansfolderl) == 1
             scansfolder = scansfolderl[0]
+            if len(scansfolderl) > 1:
+                message = "! More than one scan FOLDER matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path)
+                print(message)
+                DataIssue.objects.create(parser='Tunnel', message=message)
+            
         if scansfolder:
             scansfilel = scansfolder.singlescan_set.filter(name=mscansdir.group(2))
             if len(scansfilel):
                 if len(scansfilel) > 1:
-                    message = "! More than one image filename matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path)
+                    message = "! More than one image FILENAME matches filter query. [{}]: {} {} {} {}".format(scansfilel[0], mscansdir.group(1), mscansdir.group(2), tunnelfile.tunnelpath, path)
                     print(message)
                     DataIssue.objects.create(parser='Tunnel', message=message)
                 scansfile = scansfilel[0]
diff --git a/settings.py b/settings.py
index 98a9016..46bbbcd 100644
--- a/settings.py
+++ b/settings.py
@@ -76,6 +76,7 @@ LOGBOOK_PARSER_SETTINGS = {
                 "2009": ("2009/2009logbook.txt", "Parselogwikitxt"), 
                 "2008": ("2008/2008logbook.txt", "Parselogwikitxt"), 
                 "2007": ("2007/logbook.html", "Parseloghtmltxt"), 
+                "2006": ("2006/logbook.html", "Parseloghtmltxt"), 
 #               "2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"), 
                 "2006": ("2006/logbook.html", "Parseloghtmltxt"), 
                 "2005": ("2005/logbook.html", "Parseloghtmltxt"),