From 760abe1a9efd0bfa64f44b009fa9aef6a3e72c6a Mon Sep 17 00:00:00 2001
From: Philip Sargent <philip.sargent@klebos.com>
Date: Thu, 25 Aug 2022 15:54:00 +0300
Subject: [PATCH] cope with swapped people/title

---
 parsers/logbooks.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index cc589f0..2f57dcf 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -20,6 +20,9 @@ from parsers.people import GetPersonExpeditionNameLookup
 '''
 Parses and imports logbooks in all their wonderful confusion
 
+The Objectstore stuff is an initial attempt to see how we can migrate away from the Django database.
+An idea which no longer seems sensible given that we rely on the database to do the multiuser bit.
+
 # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
 # it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
 '''
@@ -59,7 +62,7 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
 logdataissues = TROG['issues']['logdataissues']
 trips ={}
 
-entries = { "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
+entries = { "2022": 42, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, 
     "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, 
     "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, 
     "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, 
@@ -175,6 +178,8 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
 def ParseDate(tripdate, year):
     """ Interprets dates in the expo logbooks and returns a correct datetime.date object  """
     dummydate = date(1970, 1, 1)
+    month = 1
+    day = 1
     try:
         mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
         mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
@@ -196,6 +201,7 @@ def ParseDate(tripdate, year):
                 yadd = int(year[:2]) * 100
                 day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
         else:
+            year = 1970
             message = f" ! - Bad date in logbook: {tripdate} - {year}"
             DataIssue.objects.create(parser='logbooks', message=message)
             logdataissues["tripdate"]=message
@@ -311,6 +317,19 @@ def Parseloghtmltxt(year, expedition, txt):
                             \s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
                             \s*$
                      ''', trippara)
+        tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
+        if not s: # allow title and people to be swapped in order
+            s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)?  # second date
+                                \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
+                                \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
+                                \s*<div\s+class="triptitle">\s*(.*?)</div>
+                                \s*<div\s+class="trippeople">\s*(.*?)</div>
+                                ([\s\S]*?)
+                                \s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
+                                \s*$
+                         ''', trippara)
+            tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s.groups()
+                     
         if not s:
             if not re.search(r"Rigging Guide", trippara):
                 msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count) 
@@ -318,7 +337,7 @@ def Parseloghtmltxt(year, expedition, txt):
                 DataIssue.objects.create(parser='logbooks', message=msg)
                 logdataissues[tid]=msg
             continue
-        tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
+        
         ldate = ParseDate(tripdate.strip(), year)
         triptitles = triptitle.split(" - ")
         if len(triptitles) >= 2: