diff --git a/expo/models_logbooks.py b/expo/models_logbooks.py index 22c0ee9..e9cf391 100644 --- a/expo/models_logbooks.py +++ b/expo/models_logbooks.py @@ -8,18 +8,18 @@ class Expedition(models.Model): start_date = models.DateField(blank=True,null=True) end_date = models.DateField(blank=True,null=True) - def __str__(self): + def __unicode__(self): return self.year def GetPersonExpedition(self, name): - personexpeditions = PersonExpedition.objects.filter(expedition=self) + person_expeditions = PersonExpedition.objects.filter(expedition=self) res = None - for personexpedition in personexpeditions: - for possiblenameform in personexpedition.GetPossibleNameForms(): - #print "nnn", possiblenameform - if name == possiblenameform: + for person_expedition in person_expeditions: + for possible_name_from in person_expedition.GetPossibleNameForms(): + #print "nnn", possiblenamefrom + if name == possible_name_from: assert not res, "Ambiguous: " + name - res = personexpedition + res = person_expedition return res @@ -28,7 +28,7 @@ class Person(models.Model): last_name = models.CharField(max_length=100) is_vfho = models.BooleanField() mug_shot = models.CharField(max_length=100, blank=True,null=True) - def __str__(self): + def __unicode__(self): return "%s %s" % (self.first_name, self.last_name) class PersonExpedition(models.Model): @@ -49,7 +49,7 @@ class PersonExpedition(models.Model): res.append(self.nickname) return res - def __str__(self): + def __unicode__(self): return "%s: (%s)" % (self.person, self.expedition) @@ -64,22 +64,22 @@ class LogbookEntry(models.Model): # several PersonTrips point in to this object - def __str__(self): + def __unicode__(self): return "%s: (%s)" % (self.date, self.title) class PersonTrip(models.Model): - personexpedition = models.ForeignKey(PersonExpedition) + person_expedition = models.ForeignKey(PersonExpedition) # this will be a foreign key of the place(s) the trip went through # possibly a trip has a plurality of triplets pointing into it place = models.CharField(max_length=100) date = models.DateField() - timeunderground = models.CharField(max_length=100) - logbookentry = models.ForeignKey(LogbookEntry) - is_logbookentryauthor = models.BooleanField() + time_underground = models.CharField(max_length=100) + logbook_entry = models.ForeignKey(LogbookEntry) + is_logbook_entry_author = models.BooleanField() - def __str__(self): - return "%s %s (%s)" % (self.personexpedition, self.place, self.date) + def __unicode__(self): + return "%s %s (%s)" % (self.person_expedition, self.place, self.date) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index c9e1651..900022f 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -30,15 +30,15 @@ def LoadPersons(): for person in personreader: name = person[header["Name"]] name = re.sub("<.*?>", "", name) - mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) + mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) if mname.group(3): nickname = mname.group(3) else: nickname = "" - firstname, lastname = mname.group(1), mname.group(2) or "" - + firstname, lastname = mname.group(1), mname.group(2) or "" + #print firstname, lastname, "NNN", nickname #assert lastname == person[header[""]], person @@ -53,14 +53,14 @@ def LoadPersons(): yo = models.Expedition.objects.filter(year = year)[0] if attended == "1" or attended == "-1": pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) - pyo.save() - - # error - elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001": - print "Mike Richardson(2001) error" + pyo.save() + + # error + elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001": + print "Mike Richardson(2001) error" pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) - pyo.save() - + pyo.save() + if name in expoers2008: print "2008:", name @@ -98,8 +98,8 @@ def GetTripPersons(trippeople, expedition): if tripperson and tripperson[0] != '*': #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) personyear = expedition.GetPersonExpedition(tripperson) - if not personyear: - print "NoMatchFor: '%s'" % tripperson + if not personyear: + print "NoMatchFor: '%s'" % tripperson res.append(personyear) if mul: author = personyear @@ -107,31 +107,31 @@ def GetTripPersons(trippeople, expedition): author = res[-1] return res, author -def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu): +def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu): trippersons, author = GetTripPersons(trippeople, expedition) lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author) - lbo.save() + lbo.save() print "ttt", date, place for tripperson in trippersons: - pto = models.PersonTrip(personexpedition = tripperson, place=place, date=date, timeunderground=(tu or ""), - logbookentry=lbo, is_logbookentryauthor=(tripperson == author)) + pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=(tu or ""), + logbook_entry=lbo, is_logbook_entry_author=(tripperson == author)) pto.save() - -def ParseDate(tripdate, year): + +def ParseDate(tripdate, year): mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate) mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate) if mdatestandard: - assert mdatestandard.group(1) == year, (tripdate, year) + assert mdatestandard.group(1) == year, (tripdate, year) year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) - elif mdategoof: - assert not mdategoof.group(3) or mdategoof.group(3) == year[:2] + elif mdategoof: + assert not mdategoof.group(3) or mdategoof.group(3) == year[:2] yadd = int(year[:2]) * 100 day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd else: - assert False, tripdate + assert False, tripdate return datetime.date(year, month, day) - -# 2007, 2008, 2006 + +# 2007, 2008, 2006 def Parselogwikitxt(year, expedition, txt): trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt) for triphead, triptext in trippara: @@ -150,11 +150,11 @@ def Parselogwikitxt(year, expedition, txt): tu = "" #assert tripcave == "Journey", (triphead, triptext) - ldate = ParseDate(tripdate.strip(), year) + ldate = ParseDate(tripdate.strip(), year) #print "\n", tripcave, "--- ppp", trippeople, len(triptext) - EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu) + EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu) -# 2002, 2004, 2005 +# 2002, 2004, 2005 def Parseloghtmltxt(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?== 2: - tripcave = triptitles[0] - else: + ldate = ParseDate(tripdate.strip(), year) + #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate) + trippeople = re.sub("Ol(?!l)", "Olly", trippeople) + trippeople = re.sub("Wook(?!e)", "Wookey", trippeople) + triptitles = triptitle.split(" - ") + if len(triptitles) >= 2: + tripcave = triptitles[0] + else: tripcave = "UNKNOWN" #print "\n", tripcave, "--- ppp", trippeople, len(triptext) ltriptext = re.sub("

", "", triptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub("

", "\n\n", ltriptext).strip() EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + ltriptext = re.sub("

", "\n\n", ltriptext).strip() + EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) - -# main parser for pre-2001. simpler because the data has been hacked so much to fit it + +# main parser for pre-2001. simpler because the data has been hacked so much to fit it def Parseloghtml01(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?=)?(.*?)(.*)$(?i)", trippara) - assert s, trippara[:100] + assert s, trippara[:100] tripheader, triptext = s.group(1), s.group(2) - mtripid = re.search(']*>", "", tripheader) - - #print [tripheader] - #continue - + mtripid = re.search(']*>", "", tripheader) + + #print [tripheader] + #continue + tripdate, triptitle, trippeople = tripheader.split("|") - ldate = ParseDate(tripdate.strip(), year) - - mtu = re.search(']*>(T/?U.*)', triptext) - if mtu: - tu = mtu.group(1) - triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] - else: - tu = "" - - triptitles = triptitle.split(" - ") - tripcave = triptitles[0].strip() - + ldate = ParseDate(tripdate.strip(), year) + + mtu = re.search(']*>(T/?U.*)', triptext) + if mtu: + tu = mtu.group(1) + triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] + else: + tu = "" + + triptitles = triptitle.split(" - ") + tripcave = triptitles[0].strip() + ltriptext = re.sub("

", "", triptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) ltriptext = re.sub("

", "\n\n", ltriptext).strip() - #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext) - - #print ldate, trippeople.strip() - # could includ the tripid (url link for cross referencing) - EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) - + #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext) + + #print ldate, trippeople.strip() + # could includ the tripid (url link for cross referencing) + EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + def Parseloghtml03(year, expedition, txt): tripparas = re.findall("([\s\S]*?)(?=(.*?)

(.*)$", trippara) - assert s, trippara + assert s, trippara tripheader, triptext = s.group(1), s.group(2) - tripheader = re.sub(" ", " ", tripheader) - tripheader = re.sub("\s+", " ", tripheader).strip() - sheader = tripheader.split(" -- ") - tu = "" - if re.match("T/U|Time underwater", sheader[-1]): - tu = sheader.pop() - if len(sheader) != 3: - print sheader - # continue - tripdate, triptitle, trippeople = sheader - ldate = ParseDate(tripdate.strip(), year) - triptitles = triptitle.split(" , ") - if len(triptitles) >= 2: - tripcave = triptitles[0] - else: + tripheader = re.sub(" ", " ", tripheader) + tripheader = re.sub("\s+", " ", tripheader).strip() + sheader = tripheader.split(" -- ") + tu = "" + if re.match("T/U|Time underwater", sheader[-1]): + tu = sheader.pop() + if len(sheader) != 3: + print sheader + # continue + tripdate, triptitle, trippeople = sheader + ldate = ParseDate(tripdate.strip(), year) + triptitles = triptitle.split(" , ") + if len(triptitles) >= 2: + tripcave = triptitles[0] + else: tripcave = "UNKNOWN" #print tripcave, "--- ppp", triptitle, trippeople, len(triptext) ltriptext = re.sub("

", "", triptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub("

", "\n\n", ltriptext).strip() - ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) - EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) + ltriptext = re.sub("

", "\n\n", ltriptext).strip() + ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) + EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) def LoadLogbooks(): models.LogbookEntry.objects.all().delete() @@ -272,12 +273,13 @@ def LoadLogbooks(): expedition = models.Expedition.objects.filter(year = year)[0] fin = open(os.path.join(expowebbase, lloc)) txt = fin.read() - fin.close() - parsefunc(year, expedition, txt) + fin.close() + parsefunc(year, expedition, txt) # command line run through the loading stages # you can comment out these in turn to control what gets reloaded -LoadExpos() LoadPersons() +LoadExpos() +LoadPersons() LoadLogbooks()