[svn r8076] Djangoed Julians code

added underscores to field names
turned __str__ to __unicode__
This commit is contained in:
julian 2008-11-08 19:24:03 +01:00
parent f08d6b6ecf
commit b85c962575
2 changed files with 104 additions and 102 deletions

@ -8,18 +8,18 @@ class Expedition(models.Model):
start_date = models.DateField(blank=True,null=True) start_date = models.DateField(blank=True,null=True)
end_date = models.DateField(blank=True,null=True) end_date = models.DateField(blank=True,null=True)
def __str__(self): def __unicode__(self):
return self.year return self.year
def GetPersonExpedition(self, name): def GetPersonExpedition(self, name):
personexpeditions = PersonExpedition.objects.filter(expedition=self) person_expeditions = PersonExpedition.objects.filter(expedition=self)
res = None res = None
for personexpedition in personexpeditions: for person_expedition in person_expeditions:
for possiblenameform in personexpedition.GetPossibleNameForms(): for possible_name_from in person_expedition.GetPossibleNameForms():
#print "nnn", possiblenameform #print "nnn", possiblenamefrom
if name == possiblenameform: if name == possible_name_from:
assert not res, "Ambiguous: " + name assert not res, "Ambiguous: " + name
res = personexpedition res = person_expedition
return res return res
@ -28,7 +28,7 @@ class Person(models.Model):
last_name = models.CharField(max_length=100) last_name = models.CharField(max_length=100)
is_vfho = models.BooleanField() is_vfho = models.BooleanField()
mug_shot = models.CharField(max_length=100, blank=True,null=True) mug_shot = models.CharField(max_length=100, blank=True,null=True)
def __str__(self): def __unicode__(self):
return "%s %s" % (self.first_name, self.last_name) return "%s %s" % (self.first_name, self.last_name)
class PersonExpedition(models.Model): class PersonExpedition(models.Model):
@ -49,7 +49,7 @@ class PersonExpedition(models.Model):
res.append(self.nickname) res.append(self.nickname)
return res return res
def __str__(self): def __unicode__(self):
return "%s: (%s)" % (self.person, self.expedition) return "%s: (%s)" % (self.person, self.expedition)
@ -64,22 +64,22 @@ class LogbookEntry(models.Model):
# several PersonTrips point in to this object # several PersonTrips point in to this object
def __str__(self): def __unicode__(self):
return "%s: (%s)" % (self.date, self.title) return "%s: (%s)" % (self.date, self.title)
class PersonTrip(models.Model): class PersonTrip(models.Model):
personexpedition = models.ForeignKey(PersonExpedition) person_expedition = models.ForeignKey(PersonExpedition)
# this will be a foreign key of the place(s) the trip went through # this will be a foreign key of the place(s) the trip went through
# possibly a trip has a plurality of triplets pointing into it # possibly a trip has a plurality of triplets pointing into it
place = models.CharField(max_length=100) place = models.CharField(max_length=100)
date = models.DateField() date = models.DateField()
timeunderground = models.CharField(max_length=100) time_underground = models.CharField(max_length=100)
logbookentry = models.ForeignKey(LogbookEntry) logbook_entry = models.ForeignKey(LogbookEntry)
is_logbookentryauthor = models.BooleanField() is_logbook_entry_author = models.BooleanField()
def __str__(self): def __unicode__(self):
return "%s %s (%s)" % (self.personexpedition, self.place, self.date) return "%s %s (%s)" % (self.person_expedition, self.place, self.date)

@ -30,15 +30,15 @@ def LoadPersons():
for person in personreader: for person in personreader:
name = person[header["Name"]] name = person[header["Name"]]
name = re.sub("<.*?>", "", name) name = re.sub("<.*?>", "", name)
mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name) mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
if mname.group(3): if mname.group(3):
nickname = mname.group(3) nickname = mname.group(3)
else: else:
nickname = "" nickname = ""
firstname, lastname = mname.group(1), mname.group(2) or "" firstname, lastname = mname.group(1), mname.group(2) or ""
#print firstname, lastname, "NNN", nickname #print firstname, lastname, "NNN", nickname
#assert lastname == person[header[""]], person #assert lastname == person[header[""]], person
@ -53,14 +53,14 @@ def LoadPersons():
yo = models.Expedition.objects.filter(year = year)[0] yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1": if attended == "1" or attended == "-1":
pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
pyo.save() pyo.save()
# error # error
elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001": elif (firstname, lastname) == ("Mike", "Richardson") and year == "2001":
print "Mike Richardson(2001) error" print "Mike Richardson(2001) error"
pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
pyo.save() pyo.save()
if name in expoers2008: if name in expoers2008:
print "2008:", name print "2008:", name
@ -98,8 +98,8 @@ def GetTripPersons(trippeople, expedition):
if tripperson and tripperson[0] != '*': if tripperson and tripperson[0] != '*':
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
personyear = expedition.GetPersonExpedition(tripperson) personyear = expedition.GetPersonExpedition(tripperson)
if not personyear: if not personyear:
print "NoMatchFor: '%s'" % tripperson print "NoMatchFor: '%s'" % tripperson
res.append(personyear) res.append(personyear)
if mul: if mul:
author = personyear author = personyear
@ -107,31 +107,31 @@ def GetTripPersons(trippeople, expedition):
author = res[-1] author = res[-1]
return res, author return res, author
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu): def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, tu):
trippersons, author = GetTripPersons(trippeople, expedition) trippersons, author = GetTripPersons(trippeople, expedition)
lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author) lbo = models.LogbookEntry(date=date, place=place, title=title[:50], text=text, author=author)
lbo.save() lbo.save()
print "ttt", date, place print "ttt", date, place
for tripperson in trippersons: for tripperson in trippersons:
pto = models.PersonTrip(personexpedition = tripperson, place=place, date=date, timeunderground=(tu or ""), pto = models.PersonTrip(person_expedition = tripperson, place=place, date=date, time_underground=(tu or ""),
logbookentry=lbo, is_logbookentryauthor=(tripperson == author)) logbook_entry=lbo, is_logbook_entry_author=(tripperson == author))
pto.save() pto.save()
def ParseDate(tripdate, year): def ParseDate(tripdate, year):
mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate) mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate) mdategoof = re.match("(\d\d?)/0?(\d)/(20|19)?(\d\d)", tripdate)
if mdatestandard: if mdatestandard:
assert mdatestandard.group(1) == year, (tripdate, year) assert mdatestandard.group(1) == year, (tripdate, year)
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3)) year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
elif mdategoof: elif mdategoof:
assert not mdategoof.group(3) or mdategoof.group(3) == year[:2] assert not mdategoof.group(3) or mdategoof.group(3) == year[:2]
yadd = int(year[:2]) * 100 yadd = int(year[:2]) * 100
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(4)) + yadd
else: else:
assert False, tripdate assert False, tripdate
return datetime.date(year, month, day) return datetime.date(year, month, day)
# 2007, 2008, 2006 # 2007, 2008, 2006
def Parselogwikitxt(year, expedition, txt): def Parselogwikitxt(year, expedition, txt):
trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt) trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara: for triphead, triptext in trippara:
@ -150,11 +150,11 @@ def Parselogwikitxt(year, expedition, txt):
tu = "" tu = ""
#assert tripcave == "Journey", (triphead, triptext) #assert tripcave == "Journey", (triphead, triptext)
ldate = ParseDate(tripdate.strip(), year) ldate = ParseDate(tripdate.strip(), year)
#print "\n", tripcave, "--- ppp", trippeople, len(triptext) #print "\n", tripcave, "--- ppp", trippeople, len(triptext)
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu) EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, tu=tu)
# 2002, 2004, 2005 # 2002, 2004, 2005
def Parseloghtmltxt(year, expedition, txt): def Parseloghtmltxt(year, expedition, txt):
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt) tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas: for trippara in tripparas:
@ -169,85 +169,86 @@ def Parseloghtmltxt(year, expedition, txt):
assert s, trippara assert s, trippara
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
ldate = ParseDate(tripdate.strip(), year) ldate = ParseDate(tripdate.strip(), year)
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate) #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
trippeople = re.sub("Ol(?!l)", "Olly", trippeople) trippeople = re.sub("Ol(?!l)", "Olly", trippeople)
trippeople = re.sub("Wook(?!e)", "Wookey", trippeople) trippeople = re.sub("Wook(?!e)", "Wookey", trippeople)
triptitles = triptitle.split(" - ") triptitles = triptitle.split(" - ")
if len(triptitles) >= 2: if len(triptitles) >= 2:
tripcave = triptitles[0] tripcave = triptitles[0]
else: else:
tripcave = "UNKNOWN" tripcave = "UNKNOWN"
#print "\n", tripcave, "--- ppp", trippeople, len(triptext) #print "\n", tripcave, "--- ppp", trippeople, len(triptext)
ltriptext = re.sub("</p>", "", triptext) ltriptext = re.sub("</p>", "", triptext)
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub("<p>", "\n\n", ltriptext).strip() EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
# main parser for pre-2001. simpler because the data has been hacked so much to fit it # main parser for pre-2001. simpler because the data has been hacked so much to fit it
def Parseloghtml01(year, expedition, txt): def Parseloghtml01(year, expedition, txt):
tripparas = re.findall("<hr[\s/]*>([\s\S]*?)(?=<hr)", txt) tripparas = re.findall("<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas: for trippara in tripparas:
s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara) s = re.match(u"(?s)\s*(?:<p>)?(.*?)</?p>(.*)$(?i)", trippara)
assert s, trippara[:100] assert s, trippara[:100]
tripheader, triptext = s.group(1), s.group(2) tripheader, triptext = s.group(1), s.group(2)
mtripid = re.search('<a id="(.*?)"', tripheader) mtripid = re.search('<a id="(.*?)"', tripheader)
tripid = mtripid and mtripid.group(1) or "" tripid = mtripid and mtripid.group(1) or ""
tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader) tripheader = re.sub("</?(?:[ab]|span)[^>]*>", "", tripheader)
#print [tripheader] #print [tripheader]
#continue #continue
tripdate, triptitle, trippeople = tripheader.split("|") tripdate, triptitle, trippeople = tripheader.split("|")
ldate = ParseDate(tripdate.strip(), year) ldate = ParseDate(tripdate.strip(), year)
mtu = re.search('<p[^>]*>(T/?U.*)', triptext) mtu = re.search('<p[^>]*>(T/?U.*)', triptext)
if mtu: if mtu:
tu = mtu.group(1) tu = mtu.group(1)
triptext = triptext[:mtu.start(0)] + triptext[mtu.end():] triptext = triptext[:mtu.start(0)] + triptext[mtu.end():]
else: else:
tu = "" tu = ""
triptitles = triptitle.split(" - ") triptitles = triptitle.split(" - ")
tripcave = triptitles[0].strip() tripcave = triptitles[0].strip()
ltriptext = re.sub("</p>", "", triptext) ltriptext = re.sub("</p>", "", triptext)
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub("<p>", "\n\n", ltriptext).strip() ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
#ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext) #ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!]", "NONASCII", ltriptext)
#print ldate, trippeople.strip() #print ldate, trippeople.strip()
# could includ the tripid (url link for cross referencing) # could includ the tripid (url link for cross referencing)
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
def Parseloghtml03(year, expedition, txt): def Parseloghtml03(year, expedition, txt):
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt) tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas: for trippara in tripparas:
s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara) s = re.match(u"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
assert s, trippara assert s, trippara
tripheader, triptext = s.group(1), s.group(2) tripheader, triptext = s.group(1), s.group(2)
tripheader = re.sub("&nbsp;", " ", tripheader) tripheader = re.sub("&nbsp;", " ", tripheader)
tripheader = re.sub("\s+", " ", tripheader).strip() tripheader = re.sub("\s+", " ", tripheader).strip()
sheader = tripheader.split(" -- ") sheader = tripheader.split(" -- ")
tu = "" tu = ""
if re.match("T/U|Time underwater", sheader[-1]): if re.match("T/U|Time underwater", sheader[-1]):
tu = sheader.pop() tu = sheader.pop()
if len(sheader) != 3: if len(sheader) != 3:
print sheader print sheader
# continue # continue
tripdate, triptitle, trippeople = sheader tripdate, triptitle, trippeople = sheader
ldate = ParseDate(tripdate.strip(), year) ldate = ParseDate(tripdate.strip(), year)
triptitles = triptitle.split(" , ") triptitles = triptitle.split(" , ")
if len(triptitles) >= 2: if len(triptitles) >= 2:
tripcave = triptitles[0] tripcave = triptitles[0]
else: else:
tripcave = "UNKNOWN" tripcave = "UNKNOWN"
#print tripcave, "--- ppp", triptitle, trippeople, len(triptext) #print tripcave, "--- ppp", triptitle, trippeople, len(triptext)
ltriptext = re.sub("</p>", "", triptext) ltriptext = re.sub("</p>", "", triptext)
ltriptext = re.sub("\s*?\n\s*", " ", ltriptext) ltriptext = re.sub("\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub("<p>", "\n\n", ltriptext).strip() ltriptext = re.sub("<p>", "\n\n", ltriptext).strip()
ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) ltriptext = re.sub("[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu) EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, tu=tu)
def LoadLogbooks(): def LoadLogbooks():
models.LogbookEntry.objects.all().delete() models.LogbookEntry.objects.all().delete()
@ -272,12 +273,13 @@ def LoadLogbooks():
expedition = models.Expedition.objects.filter(year = year)[0] expedition = models.Expedition.objects.filter(year = year)[0]
fin = open(os.path.join(expowebbase, lloc)) fin = open(os.path.join(expowebbase, lloc))
txt = fin.read() txt = fin.read()
fin.close() fin.close()
parsefunc(year, expedition, txt) parsefunc(year, expedition, txt)
# command line run through the loading stages # command line run through the loading stages
# you can comment out these in turn to control what gets reloaded # you can comment out these in turn to control what gets reloaded
LoadExpos() LoadPersons() LoadExpos()
LoadPersons()
LoadLogbooks() LoadLogbooks()