[svn r8036] we can parse one 2004 logbook in here. corrections made to folk.csv

This commit is contained in:
julian
2008-10-27 00:27:31 +01:00
parent 4fc413a570
commit 9ee6f4668b
4 changed files with 136 additions and 110 deletions

View File

@@ -46,7 +46,7 @@
"Spencer Davey","Davey",,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,
"Andrew Davidson","Davidson",,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,
"Nicky Davies","Davies",,,"l/nicky.htm",,,1,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,
"Anthony Day","Day",,,"l/ajday.htm",,,,,,,,,,,,,,,,,,1,1,1,1,1,,1,1,1,1,1,1,1,,11
"Anthony Day (Dour)","Day",,,"l/ajday.htm",,,,,,,,,,,,,,,,,,1,1,1,1,1,,1,1,1,1,1,1,1,,1
"Edvin Deadman","Deadman",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
"Chris Densham","Densham",,,"i/chrisd.jpg",,,,,,,,,,,,1,1,1,,,,,,,1,1,,,,,,,,,,
"Brian Derby","Derby",,,"i/bderby.jpg",,,,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,
@@ -106,7 +106,7 @@
"Pete Lancaster","Lancaster",,,"l/planc.htm",,,,,,1,1,1,1,,,,,1,,,,,,,,,,,,,,,,,,
"Mary Lane","Lane",,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,
"Fran Lane","Lane",,,"i/fran.jpg",,,,,,,,,,,,,,,,1,1,,,,1,,,,,,,,,,,
"Rebecca Lawson","Lawson",,,"l/beckal.htm",,,,,,,,,,,,1,1,1,,,,,,,1,1,,1,1,1,,1,1,1,,1
"Becka Lawson","Lawson",,,"l/beckal.htm",,,,,,,,,,,,1,1,1,,,,,,,1,1,,1,1,1,,1,1,1,,1
"Carole Leach","Leach",,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Jont Leach","Leach",,,"i/jont_leach.jpg",1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Rod Leach","Leach",,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1 Name Guest VfHO member Mugshot 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
46 Spencer Davey Davey 1
47 Andrew Davidson Davidson 1
48 Nicky Davies Davies l/nicky.htm 1 1
49 Anthony Day Anthony Day (Dour) Day l/ajday.htm 1 1 1 1 1 1 1 1 1 1 1 1 11 1
50 Edvin Deadman Deadman 1
51 Chris Densham Densham i/chrisd.jpg 1 1 1 1 1
52 Brian Derby Derby i/bderby.jpg 1 1
106 Pete Lancaster Lancaster l/planc.htm 1 1 1 1 1
107 Mary Lane Lane 1
108 Fran Lane Lane i/fran.jpg 1 1 1
109 Rebecca Lawson Becka Lawson Lawson l/beckal.htm 1 1 1 1 1 1 1 1 1 1 1 1
110 Carole Leach Leach 1 1 1
111 Jont Leach Leach i/jont_leach.jpg 1 1 1
112 Rod Leach Leach 1 1 1

View File

@@ -12,27 +12,19 @@ class Expedition(models.Model):
return self.year
def GetPersonExpedition(self, name):
if name == "Dour":
name = "Anthony Day"
personyears = PersonExpedition.objects.filter(expedition=self)
personexpeditions = PersonExpedition.objects.filter(expedition=self)
res = None
for personyear in personyears:
if name == "%s %s" % (personyear.person.first_name, personyear.person.last_name):
assert not res, "Ambiguous:" + name
res = personyear
if name == "%s %s" % (personyear.person.first_name, personyear.person.last_name[0]):
assert not res, "Ambiguous:" + name
res = personyear
if name == personyear.person.first_name:
assert not res, "Ambiguous:" + name
res = personyear
for personexpedition in personexpeditions:
for possiblenameform in personexpedition.GetPossibleNameForms():
if name == possiblenameform:
assert not res, "Ambiguous: " + name
res = personexpedition
return res
class Person(models.Model):
first_name = models.CharField(max_length=100)
last_name = models.CharField(max_length=100)
is_guest = models.BooleanField()
is_vfho = models.BooleanField()
mug_shot = models.CharField(max_length=100, blank=True,null=True)
def __str__(self):
@@ -43,32 +35,47 @@ class PersonExpedition(models.Model):
person = models.ForeignKey(Person)
from_date = models.DateField(blank=True,null=True)
to_date = models.DateField(blank=True,null=True)
is_guest = models.BooleanField()
nickname = models.CharField(max_length=100,blank=True,null=True)
def GetPossibleNameForms(self):
res = [ ]
if self.person.last_name:
res.append("%s %s" % (self.person.first_name, self.person.last_name))
res.append("%s %s" % (self.person.first_name, self.person.last_name[0]))
res.append(self.person.first_name)
if self.nickname:
res.append(self.nickname)
return res
def __str__(self):
return "%s: (%s)" % (self.person, self.expedition)
class LogbookEntry(models.Model):
date = models.DateField()
author = models.ForeignKey(PersonExpedition,blank=True,null=True)
author = models.ForeignKey(PersonExpedition,blank=True,null=True) # the person who writes it up doesn't have to have been on the trip
title = models.CharField(max_length=100)
# this will be a foreign key
# this will be a foreign key of the place the logbook is describing
place = models.CharField(max_length=100,blank=True,null=True)
text = models.TextField()
#cavers = models.ManyToManyField(PersonYear)
#tu = models.CharField(max_length=50)
# several PersonTrips point in to this object
def __str__(self):
return "%s: (%s)" % (self.date, self.title)
class PersonTrip(models.Model):
personexpedition = models.ForeignKey(PersonExpedition)
place = models.CharField(max_length=100) # this will be a foreign key
# this will be a foreign key of the place(s) the trip went through
# possibly a trip has a plurality of triplets pointing into it
place = models.CharField(max_length=100)
date = models.DateField()
timeunderground = models.CharField(max_length=100)
logbookentry = models.ForeignKey(LogbookEntry)
#is_author = models.BooleanField()
is_logbookentryauthor = models.BooleanField()
def __str__(self):
return "%s %s (%s)" % (self.personexpedition, self.place, self.date)

View File

@@ -3,7 +3,6 @@
import settings
import expo.models as models
import csv
import sqlite3
import re
import os
import datetime
@@ -24,11 +23,12 @@ header = dict(zip(headers, range(len(headers))))
def LoadExpos():
models.Expedition.objects.all().delete()
y = models.Expedition(year = "2008", name = "CUCC expo2008")
y.save()
for year in headers[5:]:
y = models.Expedition(year = year, name = "CUCC expo%s" % y)
years = headers[5:]
years.append("2008")
for year in years:
y = models.Expedition(year = year, name = "CUCC expo%s" % year)
y.save()
print "lll", years
def LoadPersons():
models.Person.objects.all().delete()
@@ -40,44 +40,59 @@ def LoadPersons():
name = person[header["Name"]]
name = re.sub("<.*?>", "", name)
lname = name.split()
if len(lname) >= 2:
firstname, lastname = lname[0], lname[1]
mbrack = re.match("\((.*?)\)", lname[-1])
if mbrack:
nickname = mbrack.group(1)
del lname[-1]
elif name == "Anthony Day":
nickname = "Dour"
else:
nickname = ""
if len(lname) == 3: # van something
firstname, lastname = lname[0], "%s %s" % (lname[1], lname[2])
elif len(lname) == 2:
firstname, lastname = lname[0], lname[1]
elif len(lname) == 1:
firstname, lastname = lname[0], ""
print firstname, lastname
else:
assert False, lname
#print firstname, lastname
#assert lastname == person[header[""]], person
pObject = models.Person(first_name = firstname,
last_name = lastname,
is_guest = person[header["Guest"]] == "1",
is_vfho = person[header["VfHO member"]],
mug_shot = person[header["Mugshot"]])
pObject.save()
is_guest = person[header["Guest"]] == "1" # this is really a per-expo catagory; not a permanent state
for year, attended in zip(headers, person)[5:]:
yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1":
pyo = models.PersonExpedition(person = pObject, expedition = yo)
pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest)
pyo.save()
if name in expoers2008:
print "2008:", name
expomissing.discard(name)
yo = models.Expedition.objects.filter(year = "2008")[0]
pyo = models.PersonExpedition(person = pObject, expedition = yo)
pyo = models.PersonExpedition(person = pObject, expedition = yo, is_guest=is_guest)
pyo.save()
print expomissing
# this fills in those peopl for whom 2008 was their first expo
for name in expomissing:
firstname, lastname = name.split()
is_guest = name in ["Eeva Makiranta", "Kieth Curtis"]
pObject = models.Person(first_name = firstname,
last_name = lastname,
is_guest = name in ["Eeva Makiranta", "Kieth Curtis"],
is_vfho = False,
mug_shot = "")
pObject.save()
yo = models.Expedition.objects.filter(year = "2008")[0]
pyo = models.PersonExpedition(person = pObject, expedition = yo)
pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname="", is_guest=is_guest)
pyo.save()
@@ -95,7 +110,7 @@ def GetTripPersons(trippeople, expedition):
if tripperson and tripperson[0] != '*':
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
personyear = expedition.GetPersonExpedition(tripperson)
print personyear
#print personyear
res.append(personyear)
if mul:
author = personyear
@@ -154,6 +169,7 @@ def Parseloghtmltxt(year, expedition, txt):
else:
assert False, tripdate
ldate = datetime.date(year, month, day)
print "ttt", tripdate
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
trippersons, author = GetTripPersons(trippeople, expedition)
tripcave = ""
@@ -162,7 +178,8 @@ def Parseloghtmltxt(year, expedition, txt):
tu = timeug or ""
for tripperson in trippersons:
pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo)
pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu,
logbookentry=lbo, is_logbookentryauthor=(tripperson == author))
pto.save()
@@ -183,15 +200,16 @@ def LoadLogbooks():
fin = open(os.path.join(expowebbase, lloc))
txt = fin.read()
fin.close()
#print personyearmap
if year >= "2007":
Parselogwikitxt(year, personyearmap, txt)
else:
Parseloghtmltxt(year, expedition, txt)
# command line run through the loading stages
LoadExpos()
LoadPersons()
# you can comment out these in turn to control what gets reloaded
#LoadExpos()
#LoadPersons()
LoadLogbooks()

View File

@@ -9,19 +9,19 @@ re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE)
re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE)
def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
try:
#try:
x.save()
except sqlite3.OperationalError:
print "Error"
time.sleep(1)
save(x)
#except Exception: #sqlite3.OperationalError:
# print "Error"
# time.sleep(1)
# save(x)
def fileIterator(directory, filename):
survex_file = os.path.join(directory, filename + ".svx")
f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb")
char = 0
for line in f.readlines():
line = unicode(line, "latin1")
line = unicode(line, "latin1").decode("utf-8")
include_extension = re_include_extension.match(line)
include_no_extension = re_include_no_extension.match(line)
def a(include):
@@ -53,6 +53,7 @@ def make_model(name, parent, iter_lines, sf, c, l):
m.text = m.text + line
m.end_file = survex_file
m.end_char = count
print len(m.text)
save(m)
assert (end.groups()[0]).lower() == (name).lower()
return None