diff --git a/expo/models_logbooks.py b/expo/models_logbooks.py index 6438f73..8b95404 100644 --- a/expo/models_logbooks.py +++ b/expo/models_logbooks.py @@ -12,27 +12,19 @@ class Expedition(models.Model): return self.year def GetPersonExpedition(self, name): - if name == "Dour": - name = "Anthony Day" - personyears = PersonExpedition.objects.filter(expedition=self) + personexpeditions = PersonExpedition.objects.filter(expedition=self) res = None - for personyear in personyears: - if name == "%s %s" % (personyear.person.first_name, personyear.person.last_name): - assert not res, "Ambiguous:" + name - res = personyear - if name == "%s %s" % (personyear.person.first_name, personyear.person.last_name[0]): - assert not res, "Ambiguous:" + name - res = personyear - if name == personyear.person.first_name: - assert not res, "Ambiguous:" + name - res = personyear + for personexpedition in personexpeditions: + for possiblenameform in personexpedition.GetPossibleNameForms(): + if name == possiblenameform: + assert not res, "Ambiguous: " + name + res = personexpedition return res class Person(models.Model): first_name = models.CharField(max_length=100) last_name = models.CharField(max_length=100) - is_guest = models.BooleanField() is_vfho = models.BooleanField() mug_shot = models.CharField(max_length=100, blank=True,null=True) def __str__(self): @@ -43,32 +35,47 @@ class PersonExpedition(models.Model): person = models.ForeignKey(Person) from_date = models.DateField(blank=True,null=True) to_date = models.DateField(blank=True,null=True) + is_guest = models.BooleanField() nickname = models.CharField(max_length=100,blank=True,null=True) + + def GetPossibleNameForms(self): + res = [ ] + if self.person.last_name: + res.append("%s %s" % (self.person.first_name, self.person.last_name)) + res.append("%s %s" % (self.person.first_name, self.person.last_name[0])) + res.append(self.person.first_name) + if self.nickname: + res.append(self.nickname) + return res + def __str__(self): return "%s: (%s)" % (self.person, self.expedition) + class LogbookEntry(models.Model): date = models.DateField() - author = models.ForeignKey(PersonExpedition,blank=True,null=True) + author = models.ForeignKey(PersonExpedition,blank=True,null=True) # the person who writes it up doesn't have to have been on the trip title = models.CharField(max_length=100) - # this will be a foreign key + # this will be a foreign key of the place the logbook is describing place = models.CharField(max_length=100,blank=True,null=True) text = models.TextField() - #cavers = models.ManyToManyField(PersonYear) - #tu = models.CharField(max_length=50) + # several PersonTrips point in to this object + def __str__(self): return "%s: (%s)" % (self.date, self.title) class PersonTrip(models.Model): personexpedition = models.ForeignKey(PersonExpedition) - place = models.CharField(max_length=100) # this will be a foreign key + + # this will be a foreign key of the place(s) the trip went through + # possibly a trip has a plurality of triplets pointing into it + place = models.CharField(max_length=100) date = models.DateField() timeunderground = models.CharField(max_length=100) logbookentry = models.ForeignKey(LogbookEntry) - - #is_author = models.BooleanField() + is_logbookentryauthor = models.BooleanField() def __str__(self): return "%s %s (%s)" % (self.personexpedition, self.place, self.date) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 5c38d41..df2c9a9 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -3,7 +3,6 @@ import settings import expo.models as models import csv -import sqlite3 import re import os import datetime @@ -24,11 +23,12 @@ header = dict(zip(headers, range(len(headers)))) def LoadExpos(): models.Expedition.objects.all().delete() - y = models.Expedition(year = "2008", name = "CUCC expo2008") - y.save() - for year in headers[5:]: - y = models.Expedition(year = year, name = "CUCC expo%s" % y) + years = headers[5:] + years.append("2008") + for year in years: + y = models.Expedition(year = year, name = "CUCC expo%s" % year) y.save() + print "lll", years def LoadPersons(): models.Person.objects.all().delete() @@ -40,44 +40,59 @@ def LoadPersons(): name = person[header["Name"]] name = re.sub("<.*?>", "", name) lname = name.split() - if len(lname) >= 2: - firstname, lastname = lname[0], lname[1] + mbrack = re.match("\((.*?)\)", lname[-1]) + + if mbrack: + nickname = mbrack.group(1) + del lname[-1] + elif name == "Anthony Day": + nickname = "Dour" else: + nickname = "" + + if len(lname) == 3: # van something + firstname, lastname = lname[0], "%s %s" % (lname[1], lname[2]) + elif len(lname) == 2: + firstname, lastname = lname[0], lname[1] + elif len(lname) == 1: firstname, lastname = lname[0], "" - print firstname, lastname + else: + assert False, lname + #print firstname, lastname #assert lastname == person[header[""]], person + pObject = models.Person(first_name = firstname, last_name = lastname, - is_guest = person[header["Guest"]] == "1", is_vfho = person[header["VfHO member"]], mug_shot = person[header["Mugshot"]]) pObject.save() + is_guest = person[header["Guest"]] == "1" # this is really a per-expo catagory; not a permanent state for year, attended in zip(headers, person)[5:]: yo = models.Expedition.objects.filter(year = year)[0] if attended == "1" or attended == "-1": - pyo = models.PersonExpedition(person = pObject, expedition = yo) + pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname=nickname, is_guest=is_guest) pyo.save() if name in expoers2008: print "2008:", name expomissing.discard(name) yo = models.Expedition.objects.filter(year = "2008")[0] - pyo = models.PersonExpedition(person = pObject, expedition = yo) + pyo = models.PersonExpedition(person = pObject, expedition = yo, is_guest=is_guest) pyo.save() - print expomissing + # this fills in those peopl for whom 2008 was their first expo for name in expomissing: firstname, lastname = name.split() + is_guest = name in ["Eeva Makiranta", "Kieth Curtis"] pObject = models.Person(first_name = firstname, last_name = lastname, - is_guest = name in ["Eeva Makiranta", "Kieth Curtis"], is_vfho = False, mug_shot = "") pObject.save() yo = models.Expedition.objects.filter(year = "2008")[0] - pyo = models.PersonExpedition(person = pObject, expedition = yo) + pyo = models.PersonExpedition(person = pObject, expedition = yo, nickname="", is_guest=is_guest) pyo.save() @@ -95,7 +110,7 @@ def GetTripPersons(trippeople, expedition): if tripperson and tripperson[0] != '*': #assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap) personyear = expedition.GetPersonExpedition(tripperson) - print personyear + #print personyear res.append(personyear) if mul: author = personyear @@ -154,6 +169,7 @@ def Parseloghtmltxt(year, expedition, txt): else: assert False, tripdate ldate = datetime.date(year, month, day) + print "ttt", tripdate #assert tripid[:-1] == "t" + tripdate, (tripid, tripdate) trippersons, author = GetTripPersons(trippeople, expedition) tripcave = "" @@ -162,7 +178,8 @@ def Parseloghtmltxt(year, expedition, txt): tu = timeug or "" for tripperson in trippersons: - pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo) + pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, + logbookentry=lbo, is_logbookentryauthor=(tripperson == author)) pto.save() @@ -183,15 +200,16 @@ def LoadLogbooks(): fin = open(os.path.join(expowebbase, lloc)) txt = fin.read() fin.close() - #print personyearmap if year >= "2007": Parselogwikitxt(year, personyearmap, txt) else: Parseloghtmltxt(year, expedition, txt) + # command line run through the loading stages -LoadExpos() -LoadPersons() +# you can comment out these in turn to control what gets reloaded +#LoadExpos() +#LoadPersons() LoadLogbooks() diff --git a/parsers/survex.py b/parsers/survex.py index f6d11e4..99044cb 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1,68 +1,69 @@ -import settings -import expo.models as models -import re -import os - -re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE) -re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE) -re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE) -re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE) - -def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it - try: - x.save() - except sqlite3.OperationalError: - print "Error" - time.sleep(1) - save(x) - -def fileIterator(directory, filename): - survex_file = os.path.join(directory, filename + ".svx") - f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb") - char = 0 - for line in f.readlines(): - line = unicode(line, "latin1") - include_extension = re_include_extension.match(line) - include_no_extension = re_include_no_extension.match(line) - def a(include): - link = re.split(r"/|\\", include) - print os.path.join(directory, *link[:-1]), link[-1] - return fileIterator(os.path.join(directory, *link[:-1]), link[-1]) - if include_extension: - for sf, c, l in a(include_extension.groups()[0]): - yield sf, c, l - elif include_no_extension: - for sf, c, l in a(include_no_extension.groups()[0]): - yield sf, c, l - else: - yield survex_file, char, line - char = char + len(line) - -def make_model(name, parent, iter_lines, sf, c, l): - if parent: - m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l) - else: - m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l) - save(m) - for survex_file, count, line in iter_lines: - begin = re_begin.match(line.split(";")[0]) - end = re_end.match(line.split(";")[0]) - if begin: - make_model(begin.groups()[0], m, iter_lines, survex_file, count, line) - elif end: - m.text = m.text + line - m.end_file = survex_file - m.end_char = count - save(m) - assert (end.groups()[0]).lower() == (name).lower() - return None - else: - m.text = m.text + line - m.text = m.text + line - m.end_file = survex_file - m.end_char = count - save(m) - - -filename = "all" -make_model("", None, fileIterator("", filename), filename, 0, "") +import settings +import expo.models as models +import re +import os + +re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE) +re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE) +re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE) +re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE) + +def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it + #try: + x.save() + #except Exception: #sqlite3.OperationalError: + # print "Error" + # time.sleep(1) + # save(x) + +def fileIterator(directory, filename): + survex_file = os.path.join(directory, filename + ".svx") + f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb") + char = 0 + for line in f.readlines(): + line = unicode(line, "latin1").decode("utf-8") + include_extension = re_include_extension.match(line) + include_no_extension = re_include_no_extension.match(line) + def a(include): + link = re.split(r"/|\\", include) + print os.path.join(directory, *link[:-1]), link[-1] + return fileIterator(os.path.join(directory, *link[:-1]), link[-1]) + if include_extension: + for sf, c, l in a(include_extension.groups()[0]): + yield sf, c, l + elif include_no_extension: + for sf, c, l in a(include_no_extension.groups()[0]): + yield sf, c, l + else: + yield survex_file, char, line + char = char + len(line) + +def make_model(name, parent, iter_lines, sf, c, l): + if parent: + m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l) + else: + m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l) + save(m) + for survex_file, count, line in iter_lines: + begin = re_begin.match(line.split(";")[0]) + end = re_end.match(line.split(";")[0]) + if begin: + make_model(begin.groups()[0], m, iter_lines, survex_file, count, line) + elif end: + m.text = m.text + line + m.end_file = survex_file + m.end_char = count + print len(m.text) + save(m) + assert (end.groups()[0]).lower() == (name).lower() + return None + else: + m.text = m.text + line + m.text = m.text + line + m.end_file = survex_file + m.end_char = count + save(m) + + +filename = "all" +make_model("", None, fileIterator("", filename), filename, 0, "")