From 760abe1a9efd0bfa64f44b009fa9aef6a3e72c6a Mon Sep 17 00:00:00 2001 From: Philip Sargent <philip.sargent@klebos.com> Date: Thu, 25 Aug 2022 15:54:00 +0300 Subject: [PATCH] cope with swapped people/title --- parsers/logbooks.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index cc589f0..2f57dcf 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -20,6 +20,9 @@ from parsers.people import GetPersonExpeditionNameLookup ''' Parses and imports logbooks in all their wonderful confusion +The Objectstore stuff is an initial attempt to see how we can migrate away from the Django database. +An idea which no longer seems sensible given that we rely on the database to do the multiuser bit. + # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and # it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc) ''' @@ -59,7 +62,7 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate logdataissues = TROG['issues']['logdataissues'] trips ={} -entries = { "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, +entries = { "2022": 42, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, @@ -175,6 +178,8 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ def ParseDate(tripdate, year): """ Interprets dates in the expo logbooks and returns a correct datetime.date object """ dummydate = date(1970, 1, 1) + month = 1 + day = 1 try: mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate) mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate) @@ -196,6 +201,7 @@ def ParseDate(tripdate, year): yadd = int(year[:2]) * 100 day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd else: + year = 1970 message = f" ! - Bad date in logbook: {tripdate} - {year}" DataIssue.objects.create(parser='logbooks', message=message) logdataissues["tripdate"]=message @@ -311,6 +317,19 @@ def Parseloghtmltxt(year, expedition, txt): \s*(?:<div\s+class="timeug">\s*(.*?)</div>)? \s*$ ''', trippara) + tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() + if not s: # allow title and people to be swapped in order + s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date + \s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)? + \s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)? + \s*<div\s+class="triptitle">\s*(.*?)</div> + \s*<div\s+class="trippeople">\s*(.*?)</div> + ([\s\S]*?) + \s*(?:<div\s+class="timeug">\s*(.*?)</div>)? + \s*$ + ''', trippara) + tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s.groups() + if not s: if not re.search(r"Rigging Guide", trippara): msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count) @@ -318,7 +337,7 @@ def Parseloghtmltxt(year, expedition, txt): DataIssue.objects.create(parser='logbooks', message=msg) logdataissues[tid]=msg continue - tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() + ldate = ParseDate(tripdate.strip(), year) triptitles = triptitle.split(" - ") if len(triptitles) >= 2: