forked from expo/troggle
cope with swapped people/title
This commit is contained in:
parent
8f03e590cc
commit
760abe1a9e
@ -20,6 +20,9 @@ from parsers.people import GetPersonExpeditionNameLookup
|
|||||||
'''
|
'''
|
||||||
Parses and imports logbooks in all their wonderful confusion
|
Parses and imports logbooks in all their wonderful confusion
|
||||||
|
|
||||||
|
The Objectstore stuff is an initial attempt to see how we can migrate away from the Django database.
|
||||||
|
An idea which no longer seems sensible given that we rely on the database to do the multiuser bit.
|
||||||
|
|
||||||
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
|
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
|
||||||
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
|
# it can be checked up later from the hard-copy if necessary; or it's not possible to determin (name, trip place, etc)
|
||||||
'''
|
'''
|
||||||
@ -59,7 +62,7 @@ noncaveplaces = [ "QMplaceholder", "Journey", "Loser Plateau", "UNKNOWN", 'plate
|
|||||||
logdataissues = TROG['issues']['logdataissues']
|
logdataissues = TROG['issues']['logdataissues']
|
||||||
trips ={}
|
trips ={}
|
||||||
|
|
||||||
entries = { "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
entries = { "2022": 42, "2019": 44, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||||
@ -175,6 +178,8 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
||||||
dummydate = date(1970, 1, 1)
|
dummydate = date(1970, 1, 1)
|
||||||
|
month = 1
|
||||||
|
day = 1
|
||||||
try:
|
try:
|
||||||
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
||||||
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
|
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
|
||||||
@ -196,6 +201,7 @@ def ParseDate(tripdate, year):
|
|||||||
yadd = int(year[:2]) * 100
|
yadd = int(year[:2]) * 100
|
||||||
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
|
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
|
||||||
else:
|
else:
|
||||||
|
year = 1970
|
||||||
message = f" ! - Bad date in logbook: {tripdate} - {year}"
|
message = f" ! - Bad date in logbook: {tripdate} - {year}"
|
||||||
DataIssue.objects.create(parser='logbooks', message=message)
|
DataIssue.objects.create(parser='logbooks', message=message)
|
||||||
logdataissues["tripdate"]=message
|
logdataissues["tripdate"]=message
|
||||||
@ -311,6 +317,19 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||||
\s*$
|
\s*$
|
||||||
''', trippara)
|
''', trippara)
|
||||||
|
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||||
|
if not s: # allow title and people to be swapped in order
|
||||||
|
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
|
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||||
|
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||||
|
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
||||||
|
\s*<div\s+class="trippeople">\s*(.*?)</div>
|
||||||
|
([\s\S]*?)
|
||||||
|
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||||
|
\s*$
|
||||||
|
''', trippara)
|
||||||
|
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s.groups()
|
||||||
|
|
||||||
if not s:
|
if not s:
|
||||||
if not re.search(r"Rigging Guide", trippara):
|
if not re.search(r"Rigging Guide", trippara):
|
||||||
msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count)
|
msg = " !- Logbook. Can't parse: {} entry:{}".format(trippara, logbook_entry_count)
|
||||||
@ -318,7 +337,7 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||||
logdataissues[tid]=msg
|
logdataissues[tid]=msg
|
||||||
continue
|
continue
|
||||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
triptitles = triptitle.split(" - ")
|
triptitles = triptitle.split(" - ")
|
||||||
if len(triptitles) >= 2:
|
if len(triptitles) >= 2:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user