[svn r8036] we can parse one 2004 logbook in here. corrections made to folk.csv

This commit is contained in:
julian
2008-10-27 00:27:31 +01:00
parent 4fc413a570
commit 9ee6f4668b
4 changed files with 136 additions and 110 deletions

View File

@@ -46,7 +46,7 @@
"Spencer Davey","Davey",,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,, "Spencer Davey","Davey",,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,
"Andrew Davidson","Davidson",,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,, "Andrew Davidson","Davidson",,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,
"Nicky Davies","Davies",,,"l/nicky.htm",,,1,,,,,1,,,,,,,,,,,,,,,,,,,,,,,, "Nicky Davies","Davies",,,"l/nicky.htm",,,1,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,
"Anthony Day","Day",,,"l/ajday.htm",,,,,,,,,,,,,,,,,,1,1,1,1,1,,1,1,1,1,1,1,1,,11 "Anthony Day (Dour)","Day",,,"l/ajday.htm",,,,,,,,,,,,,,,,,,1,1,1,1,1,,1,1,1,1,1,1,1,,1
"Edvin Deadman","Deadman",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1 "Edvin Deadman","Deadman",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
"Chris Densham","Densham",,,"i/chrisd.jpg",,,,,,,,,,,,1,1,1,,,,,,,1,1,,,,,,,,,, "Chris Densham","Densham",,,"i/chrisd.jpg",,,,,,,,,,,,1,1,1,,,,,,,1,1,,,,,,,,,,
"Brian Derby","Derby",,,"i/bderby.jpg",,,,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,, "Brian Derby","Derby",,,"i/bderby.jpg",,,,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,
@@ -106,7 +106,7 @@
"Pete Lancaster","Lancaster",,,"l/planc.htm",,,,,,1,1,1,1,,,,,1,,,,,,,,,,,,,,,,,, "Pete Lancaster","Lancaster",,,"l/planc.htm",,,,,,1,1,1,1,,,,,1,,,,,,,,,,,,,,,,,,
"Mary Lane","Lane",,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,, "Mary Lane","Lane",,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,
"Fran Lane","Lane",,,"i/fran.jpg",,,,,,,,,,,,,,,,1,1,,,,1,,,,,,,,,,, "Fran Lane","Lane",,,"i/fran.jpg",,,,,,,,,,,,,,,,1,1,,,,1,,,,,,,,,,,
"Rebecca Lawson","Lawson",,,"l/beckal.htm",,,,,,,,,,,,1,1,1,,,,,,,1,1,,1,1,1,,1,1,1,,1 "Becka Lawson","Lawson",,,"l/beckal.htm",,,,,,,,,,,,1,1,1,,,,,,,1,1,,1,1,1,,1,1,1,,1
"Carole Leach","Leach",,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,, "Carole Leach","Leach",,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Jont Leach","Leach",,,"i/jont_leach.jpg",1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,, "Jont Leach","Leach",,,"i/jont_leach.jpg",1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Rod Leach","Leach",,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,, "Rod Leach","Leach",,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1 Name Guest VfHO member Mugshot 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
46 Spencer Davey Davey 1
47 Andrew Davidson Davidson 1
48 Nicky Davies Davies l/nicky.htm 1 1
49 Anthony Day Anthony Day (Dour) Day l/ajday.htm 1 1 1 1 1 1 1 1 1 1 1 1 11 1
50 Edvin Deadman Deadman 1
51 Chris Densham Densham i/chrisd.jpg 1 1 1 1 1
52 Brian Derby Derby i/bderby.jpg 1 1
106 Pete Lancaster Lancaster l/planc.htm 1 1 1 1 1
107 Mary Lane Lane 1
108 Fran Lane Lane i/fran.jpg 1 1 1
109 Rebecca Lawson Becka Lawson Lawson l/beckal.htm 1 1 1 1 1 1 1 1 1 1 1 1
110 Carole Leach Leach 1 1 1
111 Jont Leach Leach i/jont_leach.jpg 1 1 1
112 Rod Leach Leach 1 1 1

View File

@@ -12,27 +12,19 @@ class Expedition(models.Model):
return self.year return self.year
def GetPersonExpedition(self, name): def GetPersonExpedition(self, name):
if name == "Dour": personexpeditions = PersonExpedition.objects.filter(expedition=self)
name = "Anthony Day"
personyears = PersonExpedition.objects.filter(expedition=self)
res = None res = None
for personyear in personyears: for personexpedition in personexpeditions:
if name == "%s %s" % (personyear.person.first_name, personyear.person.last_name): for possiblenameform in personexpedition.GetPossibleNameForms():
assert not res, "Ambiguous:" + name if name == possiblenameform:
res = personyear assert not res, "Ambiguous: " + name
if name == "%s %s" % (personyear.person.first_name, personyear.person.last_name[0]): res = personexpedition
assert not res, "Ambiguous:" + name
res = personyear
if name == personyear.person.first_name:
assert not res, "Ambiguous:" + name
res = personyear
return res return res
class Person(models.Model): class Person(models.Model):
first_name = models.CharField(max_length=100) first_name = models.CharField(max_length=100)
last_name = models.CharField(max_length=100) last_name = models.CharField(max_length=100)
is_guest = models.BooleanField()
is_vfho = models.BooleanField() is_vfho = models.BooleanField()
mug_shot = models.CharField(max_length=100, blank=True,null=True) mug_shot = models.CharField(max_length=100, blank=True,null=True)
def __str__(self): def __str__(self):
@@ -43,32 +35,47 @@ class PersonExpedition(models.Model):
person = models.ForeignKey(Person) person = models.ForeignKey(Person)
from_date = models.DateField(blank=True,null=True) from_date = models.DateField(blank=True,null=True)
to_date = models.DateField(blank=True,null=True) to_date = models.DateField(blank=True,null=True)
is_guest = models.BooleanField()
nickname = models.CharField(max_length=100,blank=True,null=True) nickname = models.CharField(max_length=100,blank=True,null=True)
def GetPossibleNameForms(self):
res = [ ]
if self.person.last_name:
res.append("%s %s" % (self.person.first_name, self.person.last_name))
res.append("%s %s" % (self.person.first_name, self.person.last_name[0]))
res.append(self.person.first_name)
if self.nickname:
res.append(self.nickname)
return res
def __str__(self): def __str__(self):
return "%s: (%s)" % (self.person, self.expedition) return "%s: (%s)" % (self.person, self.expedition)
class LogbookEntry(models.Model): class LogbookEntry(models.Model):
date = models.DateField() date = models.DateField()
author = models.ForeignKey(PersonExpedition,blank=True,null=True) author = models.ForeignKey(PersonExpedition,blank=True,null=True) # the person who writes it up doesn't have to have been on the trip
title = models.CharField(max_length=100) title = models.CharField(max_length=100)
# this will be a foreign key # this will be a foreign key of the place the logbook is describing
place = models.CharField(max_length=100,blank=True,null=True) place = models.CharField(max_length=100,blank=True,null=True)
text = models.TextField() text = models.TextField()
#cavers = models.ManyToManyField(PersonYear) # several PersonTrips point in to this object
#tu = models.CharField(max_length=50)
def __str__(self): def __str__(self):
return "%s: (%s)" % (self.date, self.title) return "%s: (%s)" % (self.date, self.title)
class PersonTrip(models.Model): class PersonTrip(models.Model):
personexpedition = models.ForeignKey(PersonExpedition) personexpedition = models.ForeignKey(PersonExpedition)
place = models.CharField(max_length=100) # this will be a foreign key
# this will be a foreign key of the place(s) the trip went through
# possibly a trip has a plurality of triplets pointing into it
place = models.CharField(max_length=100)
date = models.DateField() date = models.DateField()
timeunderground = models.CharField(max_length=100) timeunderground = models.CharField(max_length=100)
logbookentry = models.ForeignKey(LogbookEntry) logbookentry = models.ForeignKey(LogbookEntry)
is_logbookentryauthor = models.BooleanField()
#is_author = models.BooleanField()
def __str__(self): def __str__(self):
return "%s %s (%s)" % (self.personexpedition, self.place, self.date) return "%s %s (%s)" % (self.personexpedition, self.place, self.date)

View File

@@ -3,7 +3,6 @@
import settings import settings
import expo.models as models import expo.models as models
import csv import csv
import sqlite3
import re import re
import os import os
import datetime import datetime
@@ -24,11 +23,12 @@ header = dict(zip(headers, range(len(headers))))
def LoadExpos(): def LoadExpos():
models.Expedition.objects.all().delete() models.Expedition.objects.all().delete()
y = models.Expedition(year = "2008", name = "CUCC expo2008") years = headers[5:]
y.save() years.append("2008")
for year in headers[5:]: for year in years:
y = models.Expedition(year = year, name = "CUCC expo%s" % y) y = models.Expedition(year = year, name = "CUCC expo%s" % year)
y.save() y.save()
print "lll", years
def LoadPersons(): def LoadPersons():
models.Person.objects.all().delete() models.Person.objects.all().delete()
@@ -40,44 +40,59 @@ def LoadPersons():
name = person[header["Name"]] name = person[header["Name"]]
name = re.sub("<.*?>", "", name) name = re.sub("<.*?>", "", name)
lname = name.split() lname = name.split()
if len(lname) >= 2: mbrack = re.match("\((.*?)\)", lname[-1])
firstname, lastname = lname[0], lname[1]
if mbrack:
nickname = mbrack.group(1)
del lname[-1]
elif name == "Anthony Day":
nickname = "Dour"
else: else:
nickname = ""
if len(lname) == 3: # van something
firstname, lastname = lname[0], "%s %s" % (lname[1], lname[2])
elif len(lname) == 2:
firstname, lastname = lname[0], lname[1]
elif len(lname) == 1:
firstname, lastname = lname[0], "" firstname, lastname = lname[0], ""
print firstname, lastname else:
assert False, lname
#print firstname, lastname
#assert lastname == person[header[""]], person #assert lastname == person[header[""]], person
pObject = models.Person(first_name = firstname, pObject = models.Person(first_name = firstname,
last_name = lastname, last_name = lastname,
is_guest = person[header["Guest"]] == "1",
is_vfho = person[header["VfHO member"]], is_vfho = person[header["VfHO member"]],
mug_shot = person[header["Mugshot"]]) mug_shot = person[header["Mugshot"]])
pObject.save() pObject.save()
is_guest = person[header["Guest"]] == "1" # this is really a per-expo catagory; not a permanent state
for year, attended in zip(headers, person)[5:]: for year, attended in zip(headers, person)[5:]:
yo = models.Expedition.objects.filter(year = year)[0] yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1": if attended == "1" or attended == "-1":
pyo = models.PersonExpedition(person = pObject, expedition = yo) pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
pyo.save() pyo.save()
if name in expoers2008: if name in expoers2008:
print "2008:", name print "2008:", name
expomissing.discard(name) expomissing.discard(name)
yo = models.Expedition.objects.filter(year = "2008")[0] yo = models.Expedition.objects.filter(year = "2008")[0]
pyo = models.PersonExpedition(person = pObject, expedition = yo) pyo = models.PersonExpedition(person = pObject, expedition = yo, is_guest=is_guest)
pyo.save() pyo.save()
print expomissing # this fills in those peopl for whom 2008 was their first expo
for name in expomissing: for name in expomissing:
firstname, lastname = name.split() firstname, lastname = name.split()
is_guest = name in ["Eeva Makiranta", "Kieth Curtis"]
pObject = models.Person(first_name = firstname, pObject = models.Person(first_name = firstname,
last_name = lastname, last_name = lastname,
is_guest = name in ["Eeva Makiranta", "Kieth Curtis"],
is_vfho = False, is_vfho = False,
mug_shot = "") mug_shot = "")
pObject.save() pObject.save()
yo = models.Expedition.objects.filter(year = "2008")[0] yo = models.Expedition.objects.filter(year = "2008")[0]
pyo = models.PersonExpedition(person = pObject, expedition = yo) pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname="", is_guest=is_guest)
pyo.save() pyo.save()
@@ -95,7 +110,7 @@ def GetTripPersons(trippeople, expedition):
if tripperson and tripperson[0] != '*': if tripperson and tripperson[0] != '*':
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
personyear = expedition.GetPersonExpedition(tripperson) personyear = expedition.GetPersonExpedition(tripperson)
print personyear #print personyear
res.append(personyear) res.append(personyear)
if mul: if mul:
author = personyear author = personyear
@@ -154,6 +169,7 @@ def Parseloghtmltxt(year, expedition, txt):
else: else:
assert False, tripdate assert False, tripdate
ldate = datetime.date(year, month, day) ldate = datetime.date(year, month, day)
print "ttt", tripdate
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate) #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
trippersons, author = GetTripPersons(trippeople, expedition) trippersons, author = GetTripPersons(trippeople, expedition)
tripcave = "" tripcave = ""
@@ -162,7 +178,8 @@ def Parseloghtmltxt(year, expedition, txt):
tu = timeug or "" tu = timeug or ""
for tripperson in trippersons: for tripperson in trippersons:
pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo) pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu,
logbookentry=lbo, is_logbookentryauthor=(tripperson == author))
pto.save() pto.save()
@@ -183,15 +200,16 @@ def LoadLogbooks():
fin = open(os.path.join(expowebbase, lloc)) fin = open(os.path.join(expowebbase, lloc))
txt = fin.read() txt = fin.read()
fin.close() fin.close()
#print personyearmap
if year >= "2007": if year >= "2007":
Parselogwikitxt(year, personyearmap, txt) Parselogwikitxt(year, personyearmap, txt)
else: else:
Parseloghtmltxt(year, expedition, txt) Parseloghtmltxt(year, expedition, txt)
# command line run through the loading stages # command line run through the loading stages
LoadExpos() # you can comment out these in turn to control what gets reloaded
LoadPersons() #LoadExpos()
#LoadPersons()
LoadLogbooks() LoadLogbooks()

View File

@@ -1,68 +1,69 @@
import settings import settings
import expo.models as models import expo.models as models
import re import re
import os import os
re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE) re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE) re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE) re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE)
re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE) re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE)
def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
try: #try:
x.save() x.save()
except sqlite3.OperationalError: #except Exception: #sqlite3.OperationalError:
print "Error" # print "Error"
time.sleep(1) # time.sleep(1)
save(x) # save(x)
def fileIterator(directory, filename): def fileIterator(directory, filename):
survex_file = os.path.join(directory, filename + ".svx") survex_file = os.path.join(directory, filename + ".svx")
f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb") f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb")
char = 0 char = 0
for line in f.readlines(): for line in f.readlines():
line = unicode(line, "latin1") line = unicode(line, "latin1").decode("utf-8")
include_extension = re_include_extension.match(line) include_extension = re_include_extension.match(line)
include_no_extension = re_include_no_extension.match(line) include_no_extension = re_include_no_extension.match(line)
def a(include): def a(include):
link = re.split(r"/|\\", include) link = re.split(r"/|\\", include)
print os.path.join(directory, *link[:-1]), link[-1] print os.path.join(directory, *link[:-1]), link[-1]
return fileIterator(os.path.join(directory, *link[:-1]), link[-1]) return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
if include_extension: if include_extension:
for sf, c, l in a(include_extension.groups()[0]): for sf, c, l in a(include_extension.groups()[0]):
yield sf, c, l yield sf, c, l
elif include_no_extension: elif include_no_extension:
for sf, c, l in a(include_no_extension.groups()[0]): for sf, c, l in a(include_no_extension.groups()[0]):
yield sf, c, l yield sf, c, l
else: else:
yield survex_file, char, line yield survex_file, char, line
char = char + len(line) char = char + len(line)
def make_model(name, parent, iter_lines, sf, c, l): def make_model(name, parent, iter_lines, sf, c, l):
if parent: if parent:
m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l) m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l)
else: else:
m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l) m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l)
save(m) save(m)
for survex_file, count, line in iter_lines: for survex_file, count, line in iter_lines:
begin = re_begin.match(line.split(";")[0]) begin = re_begin.match(line.split(";")[0])
end = re_end.match(line.split(";")[0]) end = re_end.match(line.split(";")[0])
if begin: if begin:
make_model(begin.groups()[0], m, iter_lines, survex_file, count, line) make_model(begin.groups()[0], m, iter_lines, survex_file, count, line)
elif end: elif end:
m.text = m.text + line m.text = m.text + line
m.end_file = survex_file m.end_file = survex_file
m.end_char = count m.end_char = count
save(m) print len(m.text)
assert (end.groups()[0]).lower() == (name).lower() save(m)
return None assert (end.groups()[0]).lower() == (name).lower()
else: return None
m.text = m.text + line else:
m.text = m.text + line m.text = m.text + line
m.end_file = survex_file m.text = m.text + line
m.end_char = count m.end_file = survex_file
save(m) m.end_char = count
save(m)
filename = "all"
make_model("", None, fileIterator("", filename), filename, 0, "") filename = "all"
make_model("", None, fileIterator("", filename), filename, 0, "")