2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-01-19 09:22:32 +00:00

cope with swapped people/title

This commit is contained in:
Philip Sargent 2022-08-25 15:54:00 +03:00
parent 8f03e590cc
commit 760abe1a9e

View File

@ -20,6 +20,9 @@ from parsers.people import GetPersonExpeditionNameLookup
'''
Parses and imports logbooks in all their wonderful confusion
The Objectstore stuff is an initial attempt to see how we can migrate away from the Django database.
An idea which no longer seems sensible given that we rely on the database to do the multiuser bit.
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
'''
@ -59,7 +62,7 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
logdataissues = TROG['issues']['logdataissues']
trips ={}
entries = { "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
entries = { "2022": 42, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
@ -175,6 +178,8 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
def ParseDate(tripdate, year):
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
dummydate = date(1970, 1, 1)
month = 1
day = 1
try:
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
@ -196,6 +201,7 @@ def ParseDate(tripdate, year):
yadd = int(year[:2]) * 100
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
else:
year = 1970
message = f" ! - Bad date in logbook: {tripdate} - {year}"
DataIssue.objects.create(parser='logbooks', message=message)
logdataissues["tripdate"]=message
@ -311,6 +317,19 @@ def Parseloghtmltxt(year, expedition, txt):
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
\s*$
''', trippara)
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
if not s: # allow title and people to be swapped in order
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
\s*<div\s+class="triptitle">\s*(.*?)</div>
\s*<div\s+class="trippeople">\s*(.*?)</div>
([\s\S]*?)
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
\s*$
''', trippara)
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s.groups()
if not s:
if not re.search(r"Rigging Guide", trippara):
msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count)
@ -318,7 +337,7 @@ def Parseloghtmltxt(year, expedition, txt):
DataIssue.objects.create(parser='logbooks', message=msg)
logdataissues[tid]=msg
continue
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
ldate = ParseDate(tripdate.strip(), year)
triptitles = triptitle.split(" - ")
if len(triptitles) >= 2: