mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-14 20:37:10 +00:00
Make the logbook parser a little more sane
Move the parser to expo mapping to settings Set a default parser Iterate over the expo years rather than the mapping list!
This commit is contained in:
@@ -26,16 +26,16 @@ from utils import save_carefully
|
|||||||
def GetTripPersons(trippeople, expedition, logtime_underground):
|
def GetTripPersons(trippeople, expedition, logtime_underground):
|
||||||
res = [ ]
|
res = [ ]
|
||||||
author = None
|
author = None
|
||||||
for tripperson in re.split(",|\+|&|&(?!\w+;)| and ", trippeople):
|
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
|
||||||
tripperson = tripperson.strip()
|
tripperson = tripperson.strip()
|
||||||
mul = re.match("<u>(.*?)</u>$(?i)", tripperson)
|
mul = re.match(r"<u>(.*?)</u>$(?i)", tripperson)
|
||||||
if mul:
|
if mul:
|
||||||
tripperson = mul.group(1).strip()
|
tripperson = mul.group(1).strip()
|
||||||
if tripperson and tripperson[0] != '*':
|
if tripperson and tripperson[0] != '*':
|
||||||
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
|
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
|
||||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||||
if not personyear:
|
if not personyear:
|
||||||
print "NoMatchFor: '%s'" % tripperson
|
print(" - No name match for: '%s'" % tripperson)
|
||||||
res.append((personyear, logtime_underground))
|
res.append((personyear, logtime_underground))
|
||||||
if mul:
|
if mul:
|
||||||
author = personyear
|
author = personyear
|
||||||
@@ -65,11 +65,11 @@ def GetTripCave(place): #need to be fuzzier about matching h
|
|||||||
return tripCaveRes
|
return tripCaveRes
|
||||||
|
|
||||||
elif len(tripCaveRes)>1:
|
elif len(tripCaveRes)>1:
|
||||||
print "Ambiguous place " + str(place) + " entered. Choose from " + str(tripCaveRes)
|
print("Ambiguous place " + str(place) + " entered. Choose from " + str(tripCaveRes))
|
||||||
correctIndex=input("type list index of correct cave")
|
correctIndex=input("type list index of correct cave")
|
||||||
return tripCaveRes[correctIndex]
|
return tripCaveRes[correctIndex]
|
||||||
else:
|
else:
|
||||||
print "No cave found for place " , place
|
print("No cave found for place " , place)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
""" saves a logbook entry and related persontrips """
|
""" saves a logbook entry and related persontrips """
|
||||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
|
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
|
||||||
if not author:
|
if not author:
|
||||||
print "skipping logentry", title
|
print(" - skipping logentry" + title + " no author for entry")
|
||||||
return
|
return
|
||||||
|
|
||||||
# tripCave = GetTripCave(place)
|
# tripCave = GetTripCave(place)
|
||||||
@@ -102,8 +102,8 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
|||||||
|
|
||||||
def ParseDate(tripdate, year):
|
def ParseDate(tripdate, year):
|
||||||
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
""" Interprets dates in the expo logbooks and returns a correct datetime.date object """
|
||||||
mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
mdatestandard = re.match(r"(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
||||||
mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
|
mdategoof = re.match(r"(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
|
||||||
if mdatestandard:
|
if mdatestandard:
|
||||||
assert mdatestandard.group(1) == year, (tripdate, year)
|
assert mdatestandard.group(1) == year, (tripdate, year)
|
||||||
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
||||||
@@ -117,7 +117,7 @@ def ParseDate(tripdate, year):
|
|||||||
|
|
||||||
# 2007, 2008, 2006
|
# 2007, 2008, 2006
|
||||||
def Parselogwikitxt(year, expedition, txt):
|
def Parselogwikitxt(year, expedition, txt):
|
||||||
trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
|
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
|
||||||
for triphead, triptext in trippara:
|
for triphead, triptext in trippara:
|
||||||
tripheadp = triphead.split("|")
|
tripheadp = triphead.split("|")
|
||||||
#print "ttt", tripheadp
|
#print "ttt", tripheadp
|
||||||
@@ -126,7 +126,7 @@ def Parselogwikitxt(year, expedition, txt):
|
|||||||
tripsplace = tripplace.split(" - ")
|
tripsplace = tripplace.split(" - ")
|
||||||
tripcave = tripsplace[0].strip()
|
tripcave = tripsplace[0].strip()
|
||||||
|
|
||||||
tul = re.findall("T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
|
tul = re.findall(r"T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
|
||||||
if tul:
|
if tul:
|
||||||
#assert len(tul) <= 1, (triphead, triptext)
|
#assert len(tul) <= 1, (triphead, triptext)
|
||||||
#assert tul[0][1] in ["hrs", "hours"], (triphead, triptext)
|
#assert tul[0][1] in ["hrs", "hours"], (triphead, triptext)
|
||||||
@@ -142,10 +142,14 @@ def Parselogwikitxt(year, expedition, txt):
|
|||||||
|
|
||||||
# 2002, 2004, 2005
|
# 2002, 2004, 2005
|
||||||
def Parseloghtmltxt(year, expedition, txt):
|
def Parseloghtmltxt(year, expedition, txt):
|
||||||
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
print(" - Using log html parser")
|
||||||
|
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||||
|
logbook_entry_count = 0
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
|
#print(" - HR detected - maybe a trip?")
|
||||||
|
logbook_entry_count += 1
|
||||||
|
|
||||||
s = re.match('''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
s = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||||
\s*<div\s+class="trippeople">\s*(.*?)</div>
|
\s*<div\s+class="trippeople">\s*(.*?)</div>
|
||||||
@@ -155,38 +159,40 @@ def Parseloghtmltxt(year, expedition, txt):
|
|||||||
\s*$
|
\s*$
|
||||||
''', trippara)
|
''', trippara)
|
||||||
if not s:
|
if not s:
|
||||||
if not re.search("Rigging Guide", trippara):
|
if not re.search(r"Rigging Guide", trippara):
|
||||||
print "can't parse: ", trippara # this is 2007 which needs editing
|
print("can't parse: ", trippara) # this is 2007 which needs editing
|
||||||
#assert s, trippara
|
#assert s, trippara
|
||||||
continue
|
continue
|
||||||
|
|
||||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
|
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
|
||||||
trippeople = re.sub("Ol(?!l)", "Olly", trippeople)
|
trippeople = re.sub(r"Ol(?!l)", "Olly", trippeople)
|
||||||
trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)
|
trippeople = re.sub(r"Wook(?!e)", "Wookey", trippeople)
|
||||||
triptitles = triptitle.split(" - ")
|
triptitles = triptitle.split(" - ")
|
||||||
if len(triptitles) >= 2:
|
if len(triptitles) >= 2:
|
||||||
tripcave = triptitles[0]
|
tripcave = triptitles[0]
|
||||||
else:
|
else:
|
||||||
tripcave = "UNKNOWN"
|
tripcave = "UNKNOWN"
|
||||||
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
|
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
|
||||||
ltriptext = re.sub("</p>", "", triptext)
|
ltriptext = re.sub(r"</p>", "", triptext)
|
||||||
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
|
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||||
ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
|
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
||||||
|
if logbook_entry_count == 0:
|
||||||
|
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
|
||||||
|
|
||||||
|
|
||||||
# main parser for pre-2001. simpler because the data has been hacked so much to fit it
|
# main parser for pre-2001. simpler because the data has been hacked so much to fit it
|
||||||
def Parseloghtml01(year, expedition, txt):
|
def Parseloghtml01(year, expedition, txt):
|
||||||
tripparas = re.findall("<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
|
s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
|
||||||
assert s, trippara[:300]
|
assert s, trippara[:300]
|
||||||
tripheader, triptext = s.group(1), s.group(2)
|
tripheader, triptext = s.group(1), s.group(2)
|
||||||
mtripid = re.search('<a id="(.*?)"', tripheader)
|
mtripid = re.search(r'<a id="(.*?)"', tripheader)
|
||||||
tripid = mtripid and mtripid.group(1) or ""
|
tripid = mtripid and mtripid.group(1) or ""
|
||||||
tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
|
tripheader = re.sub(r"</?(?:[ab]|span)[^>]*>", "", tripheader)
|
||||||
|
|
||||||
#print " ", [tripheader]
|
#print " ", [tripheader]
|
||||||
#continue
|
#continue
|
||||||
@@ -194,7 +200,7 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
tripdate, triptitle, trippeople = tripheader.split("|")
|
tripdate, triptitle, trippeople = tripheader.split("|")
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
|
|
||||||
mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
|
mtu = re.search(r'<p[^>]*>(T/?U.*)', triptext)
|
||||||
if mtu:
|
if mtu:
|
||||||
tu = mtu.group(1)
|
tu = mtu.group(1)
|
||||||
triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
|
triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
|
||||||
@@ -206,17 +212,17 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
|
|
||||||
ltriptext = triptext
|
ltriptext = triptext
|
||||||
|
|
||||||
mtail = re.search('(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext)
|
mtail = re.search(r'(?:<a href="[^"]*">[^<]*</a>|\s|/|-|&|</?p>|\((?:same day|\d+)\))*$', ltriptext)
|
||||||
if mtail:
|
if mtail:
|
||||||
#print mtail.group(0)
|
#print mtail.group(0)
|
||||||
ltriptext = ltriptext[:mtail.start(0)]
|
ltriptext = ltriptext[:mtail.start(0)]
|
||||||
ltriptext = re.sub("</p>", "", ltriptext)
|
ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||||
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
|
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||||
ltriptext = re.sub("<p>|<br>", "\n\n", ltriptext).strip()
|
ltriptext = re.sub(r"<p>|<br>", "\n\n", ltriptext).strip()
|
||||||
#ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
|
#ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
|
||||||
ltriptext = re.sub("</?u>", "_", ltriptext)
|
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
||||||
ltriptext = re.sub("</?i>", "''", ltriptext)
|
ltriptext = re.sub(r"</?i>", "''", ltriptext)
|
||||||
ltriptext = re.sub("</?b>", "'''", ltriptext)
|
ltriptext = re.sub(r"</?b>", "'''", ltriptext)
|
||||||
|
|
||||||
|
|
||||||
#print ldate, trippeople.strip()
|
#print ldate, trippeople.strip()
|
||||||
@@ -225,19 +231,19 @@ def Parseloghtml01(year, expedition, txt):
|
|||||||
|
|
||||||
|
|
||||||
def Parseloghtml03(year, expedition, txt):
|
def Parseloghtml03(year, expedition, txt):
|
||||||
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
||||||
assert s, trippara
|
assert s, trippara
|
||||||
tripheader, triptext = s.group(1), s.group(2)
|
tripheader, triptext = s.group(1), s.group(2)
|
||||||
tripheader = re.sub(" ", " ", tripheader)
|
tripheader = re.sub(r" ", " ", tripheader)
|
||||||
tripheader = re.sub("\s+", " ", tripheader).strip()
|
tripheader = re.sub(r"\s+", " ", tripheader).strip()
|
||||||
sheader = tripheader.split(" -- ")
|
sheader = tripheader.split(" -- ")
|
||||||
tu = ""
|
tu = ""
|
||||||
if re.match("T/U|Time underwater", sheader[-1]):
|
if re.match("T/U|Time underwater", sheader[-1]):
|
||||||
tu = sheader.pop()
|
tu = sheader.pop()
|
||||||
if len(sheader) != 3:
|
if len(sheader) != 3:
|
||||||
print "header not three pieces", sheader
|
print("header not three pieces", sheader)
|
||||||
tripdate, triptitle, trippeople = sheader
|
tripdate, triptitle, trippeople = sheader
|
||||||
ldate = ParseDate(tripdate.strip(), year)
|
ldate = ParseDate(tripdate.strip(), year)
|
||||||
triptitles = triptitle.split(" , ")
|
triptitles = triptitle.split(" , ")
|
||||||
@@ -246,37 +252,12 @@ def Parseloghtml03(year, expedition, txt):
|
|||||||
else:
|
else:
|
||||||
tripcave = "UNKNOWN"
|
tripcave = "UNKNOWN"
|
||||||
#print tripcave, "--- ppp", triptitle, trippeople, len(triptext)
|
#print tripcave, "--- ppp", triptitle, trippeople, len(triptext)
|
||||||
ltriptext = re.sub("</p>", "", triptext)
|
ltriptext = re.sub(r"</p>", "", triptext)
|
||||||
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
|
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||||
ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
|
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
|
||||||
ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
||||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
||||||
|
|
||||||
yearlinks = [
|
|
||||||
# ("2013", "2013/logbook.html", Parseloghtmltxt),
|
|
||||||
("2012", "2012/logbook.html", Parseloghtmltxt),
|
|
||||||
("2011", "2011/logbook.html", Parseloghtmltxt),
|
|
||||||
("2010", "2010/logbook.html", Parselogwikitxt),
|
|
||||||
("2009", "2009/2009logbook.txt", Parselogwikitxt),
|
|
||||||
("2008", "2008/2008logbook.txt", Parselogwikitxt),
|
|
||||||
("2007", "2007/logbook.html", Parseloghtmltxt),
|
|
||||||
("2006", "2006/logbook/logbook_06.txt", Parselogwikitxt),
|
|
||||||
("2005", "2005/logbook.html", Parseloghtmltxt),
|
|
||||||
("2004", "2004/logbook.html", Parseloghtmltxt),
|
|
||||||
("2003", "2003/logbook.html", Parseloghtml03),
|
|
||||||
("2002", "2002/logbook.html", Parseloghtmltxt),
|
|
||||||
("2001", "2001/log.htm", Parseloghtml01),
|
|
||||||
("2000", "2000/log.htm", Parseloghtml01),
|
|
||||||
("1999", "1999/log.htm", Parseloghtml01),
|
|
||||||
("1998", "1998/log.htm", Parseloghtml01),
|
|
||||||
("1997", "1997/log.htm", Parseloghtml01),
|
|
||||||
("1996", "1996/log.htm", Parseloghtml01),
|
|
||||||
("1995", "1995/log.htm", Parseloghtml01),
|
|
||||||
("1994", "1994/log.htm", Parseloghtml01),
|
|
||||||
("1993", "1993/log.htm", Parseloghtml01),
|
|
||||||
("1992", "1992/log.htm", Parseloghtml01),
|
|
||||||
("1991", "1991/log.htm", Parseloghtml01),
|
|
||||||
]
|
|
||||||
|
|
||||||
def SetDatesFromLogbookEntries(expedition):
|
def SetDatesFromLogbookEntries(expedition):
|
||||||
"""
|
"""
|
||||||
@@ -297,21 +278,39 @@ def SetDatesFromLogbookEntries(expedition):
|
|||||||
persontrip.save()
|
persontrip.save()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def LoadLogbookForExpedition(expedition):
|
def LoadLogbookForExpedition(expedition):
|
||||||
""" Parses all logbook entries for one expedition """
|
""" Parses all logbook entries for one expedition """
|
||||||
|
|
||||||
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
||||||
year = str(expedition.year)
|
year = str(expedition.year)
|
||||||
for lyear, lloc, parsefunc in yearlinks:
|
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||||
if lyear == year:
|
|
||||||
break
|
logbook_parseable = False
|
||||||
fin = open(os.path.join(expowebbase, lloc))
|
|
||||||
print "opennning", lloc
|
if expedition.year in yearlinks:
|
||||||
txt = fin.read().decode("latin1")
|
year_settings = yearlinks[expedition.year]
|
||||||
fin.close()
|
file_in = open(os.path.join(expowebbase, year_settings[0]))
|
||||||
parsefunc(year, expedition, txt)
|
txt = file_in.read().decode("latin1")
|
||||||
|
file_in.close()
|
||||||
|
parsefunc = year_settings[1]
|
||||||
|
logbook_parseable = True
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
|
||||||
|
txt = file_in.read().decode("latin1")
|
||||||
|
file_in.close()
|
||||||
|
logbook_parseable = True
|
||||||
|
print("No set parser found using default")
|
||||||
|
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||||
|
except (IOError):
|
||||||
|
logbook_parseable = False
|
||||||
|
print("Couldn't open default logbook file and nothing set for expo " + expo.year)
|
||||||
|
|
||||||
|
if logbook_parseable:
|
||||||
|
parser = globals()[parsefunc]
|
||||||
|
parser(expedition.year, expedition, txt)
|
||||||
SetDatesFromLogbookEntries(expedition)
|
SetDatesFromLogbookEntries(expedition)
|
||||||
|
|
||||||
return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
|
return "TOLOAD: " + year + " " + str(expedition.personexpedition_set.all()[1].logbookentry_set.count()) + " " + str(models.PersonTrip.objects.filter(personexpedition__expedition=expedition).count())
|
||||||
|
|
||||||
|
|
||||||
@@ -324,25 +323,49 @@ def LoadLogbooks():
|
|||||||
#yearlinks = [ ("2001", "2001/log.htm", Parseloghtml01), ] #overwrite
|
#yearlinks = [ ("2001", "2001/log.htm", Parseloghtml01), ] #overwrite
|
||||||
#yearlinks = [ ("1996", "1996/log.htm", Parseloghtml01),] # overwrite
|
#yearlinks = [ ("1996", "1996/log.htm", Parseloghtml01),] # overwrite
|
||||||
|
|
||||||
for year, lloc, parsefunc in yearlinks:
|
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||||
# This will not work until the corresponding year exists in the database.
|
|
||||||
# In 2012 this needed noscript/folk.csv to be updated first.
|
|
||||||
expedition = models.Expedition.objects.filter(year = year)[0]
|
|
||||||
fin = open(os.path.join(expowebbase, lloc))
|
|
||||||
txt = fin.read().decode("latin1")
|
|
||||||
fin.close()
|
|
||||||
parsefunc(year, expedition, txt)
|
|
||||||
SetDatesFromLogbookEntries(expedition)
|
|
||||||
|
|
||||||
dateRegex = re.compile('<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
expos = models.Expedition.objects.all()
|
||||||
expeditionYearRegex = re.compile('<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
for expo in expos:
|
||||||
titleRegex = re.compile('<H1>(.*?)</H1>', re.S)
|
print("\nLoading Logbook for: " + expo.year)
|
||||||
reportRegex = re.compile('<div\s+class="report">(.*)</div>\s*</body>', re.S)
|
|
||||||
personRegex = re.compile('<div\s+class="person">(.*?)</div>', re.S)
|
logbook_parseable = False
|
||||||
nameAuthorRegex = re.compile('<span\s+class="name(,author|)">(.*?)</span>', re.S)
|
|
||||||
TURegex = re.compile('<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
if expo.year in yearlinks:
|
||||||
locationRegex = re.compile('<span\s+class="location">(.*?)</span>', re.S)
|
#print(yearlinks[expo.year])
|
||||||
caveRegex = re.compile('<span\s+class="cave">(.*?)</span>', re.S)
|
year_settings = yearlinks[expo.year]
|
||||||
|
file_in = open(os.path.join(expowebbase, year_settings[0]))
|
||||||
|
txt = file_in.read().decode("latin1")
|
||||||
|
file_in.close()
|
||||||
|
parsefunc = year_settings[1]
|
||||||
|
logbook_parseable = True
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
file_in = open(os.path.join(expowebbase, expo.year, settings.DEFAULT_LOGBOOK_FILE))
|
||||||
|
txt = file_in.read().decode("latin1")
|
||||||
|
file_in.close()
|
||||||
|
logbook_parseable = True
|
||||||
|
print("No set parser found using default")
|
||||||
|
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||||
|
except (IOError):
|
||||||
|
logbook_parseable = False
|
||||||
|
print("Couldn't open default logbook file and nothing in settings for expo " + expo.year)
|
||||||
|
|
||||||
|
if logbook_parseable:
|
||||||
|
parser = globals()[parsefunc]
|
||||||
|
parser(expo.year, expo, txt)
|
||||||
|
SetDatesFromLogbookEntries(expo)
|
||||||
|
|
||||||
|
|
||||||
|
dateRegex = re.compile(r'<span\s+class="date">(\d\d\d\d)-(\d\d)-(\d\d)</span>', re.S)
|
||||||
|
expeditionYearRegex = re.compile(r'<span\s+class="expeditionyear">(.*?)</span>', re.S)
|
||||||
|
titleRegex = re.compile(r'<H1>(.*?)</H1>', re.S)
|
||||||
|
reportRegex = re.compile(r'<div\s+class="report">(.*)</div>\s*</body>', re.S)
|
||||||
|
personRegex = re.compile(r'<div\s+class="person">(.*?)</div>', re.S)
|
||||||
|
nameAuthorRegex = re.compile(r'<span\s+class="name(,author|)">(.*?)</span>', re.S)
|
||||||
|
TURegex = re.compile(r'<span\s+class="TU">([0-9]*\.?[0-9]+)</span>', re.S)
|
||||||
|
locationRegex = re.compile(r'<span\s+class="location">(.*?)</span>', re.S)
|
||||||
|
caveRegex = re.compile(r'<span\s+class="cave">(.*?)</span>', re.S)
|
||||||
|
|
||||||
def parseAutoLogBookEntry(filename):
|
def parseAutoLogBookEntry(filename):
|
||||||
errors = []
|
errors = []
|
||||||
@@ -435,4 +458,4 @@ def parseAutoLogBookEntry(filename):
|
|||||||
time_underground = TU,
|
time_underground = TU,
|
||||||
logbook_entry = logbookEntry,
|
logbook_entry = logbookEntry,
|
||||||
is_logbook_entry_author = author).save()
|
is_logbook_entry_author = author).save()
|
||||||
print logbookEntry
|
print(logbookEntry)
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ def LoadPersonsExpos():
|
|||||||
header = dict(zip(headers, range(len(headers))))
|
header = dict(zip(headers, range(len(headers))))
|
||||||
|
|
||||||
# make expeditions
|
# make expeditions
|
||||||
print "Loading expeditions"
|
print("Loading expeditions")
|
||||||
years = headers[5:]
|
years = headers[5:]
|
||||||
|
|
||||||
for year in years:
|
for year in years:
|
||||||
@@ -61,7 +61,7 @@ def LoadPersonsExpos():
|
|||||||
|
|
||||||
|
|
||||||
# make persons
|
# make persons
|
||||||
print "Loading personexpeditions"
|
print("Loading personexpeditions")
|
||||||
#expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
|
#expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
|
||||||
#expomissing = set(expoers2008)
|
#expomissing = set(expoers2008)
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ def GetPersonExpeditionNameLookup(expedition):
|
|||||||
res = { }
|
res = { }
|
||||||
duplicates = set()
|
duplicates = set()
|
||||||
|
|
||||||
print "Calculating GetPersonExpeditionNameLookup for", expedition.year
|
print("Calculating GetPersonExpeditionNameLookup for " + expedition.year)
|
||||||
personexpeditions = models.PersonExpedition.objects.filter(expedition=expedition)
|
personexpeditions = models.PersonExpedition.objects.filter(expedition=expedition)
|
||||||
for personexpedition in personexpeditions:
|
for personexpedition in personexpeditions:
|
||||||
possnames = [ ]
|
possnames = [ ]
|
||||||
|
|||||||
34
settings.py
34
settings.py
@@ -53,6 +53,40 @@ SURVEYS_URL = urlparse.urljoin(URL_ROOT , '/survey_scans/')
|
|||||||
PHOTOS_URL = urlparse.urljoin(URL_ROOT , '/photos/')
|
PHOTOS_URL = urlparse.urljoin(URL_ROOT , '/photos/')
|
||||||
SVX_URL = urlparse.urljoin(URL_ROOT , '/survex/')
|
SVX_URL = urlparse.urljoin(URL_ROOT , '/survex/')
|
||||||
|
|
||||||
|
DEFAULT_LOGBOOK_PARSER = "Parseloghtmltxt"
|
||||||
|
DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||||
|
|
||||||
|
LOGBOOK_PARSER_SETTINGS = {
|
||||||
|
"2018": ("2018/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2017": ("2017/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2016": ("2016/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2015": ("2015/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2014": ("2014/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2013": ("2013/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2012": ("2012/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2011": ("2011/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2010": ("2010/logbook.html", "Parselogwikitxt"),
|
||||||
|
"2009": ("2009/2009logbook.txt", "Parselogwikitxt"),
|
||||||
|
"2008": ("2008/2008logbook.txt", "Parselogwikitxt"),
|
||||||
|
"2007": ("2007/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2006": ("2006/logbook/logbook_06.txt", "Parselogwikitxt"),
|
||||||
|
"2005": ("2005/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2004": ("2004/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2003": ("2003/logbook.html", "Parseloghtml03"),
|
||||||
|
"2002": ("2002/logbook.html", "Parseloghtmltxt"),
|
||||||
|
"2001": ("2001/log.htm", "Parseloghtml01"),
|
||||||
|
"2000": ("2000/log.htm", "Parseloghtml01"),
|
||||||
|
"1999": ("1999/log.htm", "Parseloghtml01"),
|
||||||
|
"1998": ("1998/log.htm", "Parseloghtml01"),
|
||||||
|
"1997": ("1997/log.htm", "Parseloghtml01"),
|
||||||
|
"1996": ("1996/log.htm", "Parseloghtml01"),
|
||||||
|
"1995": ("1995/log.htm", "Parseloghtml01"),
|
||||||
|
"1994": ("1994/log.htm", "Parseloghtml01"),
|
||||||
|
"1993": ("1993/log.htm", "Parseloghtml01"),
|
||||||
|
"1992": ("1992/log.htm", "Parseloghtml01"),
|
||||||
|
"1991": ("1991/log.htm", "Parseloghtml01"),
|
||||||
|
}
|
||||||
|
|
||||||
APPEND_SLASH = False
|
APPEND_SLASH = False
|
||||||
SMART_APPEND_SLASH = True
|
SMART_APPEND_SLASH = True
|
||||||
|
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ an "S" for a survey trip. The colours are the same for people on the same trip.
|
|||||||
{% if persondayactivities.persontrips or persondayactivities.survexblocks %}
|
{% if persondayactivities.persontrips or persondayactivities.survexblocks %}
|
||||||
<td class="persondayactivity">
|
<td class="persondayactivity">
|
||||||
{% for persontrip in persondayactivities.persontrips %}
|
{% for persontrip in persondayactivities.persontrips %}
|
||||||
<a href="{{persontrip.logbook_entry.get_absolute_url}}" class="dayindexlog-{{persontrip.logbook_entry.DayIndex}}">T</a>
|
<a href="{{persontrip.logbook_entry.get_absolute_url}}" class="dayindexlog-1">T</a>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<br/>
|
<br/>
|
||||||
{% for survexblock in persondayactivities.survexblocks %}
|
{% for survexblock in persondayactivities.survexblocks %}
|
||||||
|
|||||||
Reference in New Issue
Block a user