[svn] Initial troggle checkin

This is a development site using Django 1.0
Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM
This commit is contained in:
substantialnoninfringinguser
2009-05-13 05:13:38 +01:00
commit b503d3d588
29 changed files with 1366 additions and 0 deletions

0
parsers/__init__.py Normal file
View File

272
parsers/cavetab.py Normal file
View File

@@ -0,0 +1,272 @@
# -*- coding: utf-8 -*-
import settings
import expo.models as models
import csv
import time
import sqlite3
import re
import os
##format of CAVETAB2.CSV is
KatasterNumber = 0
KatStatusCode = 1
Entrances = 2
UnofficialNumber = 3
MultipleEntrances = 4
AutogenFile = 5
LinkFile = 6
LinkEntrance = 7
Name = 8
UnofficialName = 9
Comment = 10
Area = 11
Explorers = 12
UndergroundDescription = 13
Equipment = 14
QMList = 15
KatasterStatus = 16
References = 17
UndergroundCentreLine = 18
UndergroundDrawnSurvey = 19
SurvexFile = 20
Length = 21
Depth = 22
Extent = 23
Notes = 24
EntranceName = 25
TagPoint = 26
OtherPoint = 27
DescriptionOfOtherPoint = 28
ExactEntrance = 29
TypeOfFix = 30
GPSpreSA = 31
GPSpostSA = 32
Northing = 33
Easting = 34
Altitude = 35
Bearings = 36
Map = 37
Location = 38
Approach = 39
EntranceDescription = 40
PhotoOfLocation = 41
Marking = 42
MarkingComment = 43
Findability = 44
FindabilityComment = 45
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
caveReader = csv.reader(cavetab)
caveReader.next() # Strip out column headers
def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
try:
x.save()
except sqlite3.OperationalError:
print "Error"
time.sleep(1)
save(x)
def html_to_wiki(text):
if type(text) != str:
return text
text = unicode(text, "utf-8")
#Characters
#text = re.sub("ü", u"\xfc", text)
#text = re.sub("ö", u"\xf6", text)
#text = re.sub("ä", u"\xe4", text)
#text = re.sub("°", u"\xb0", text)
#text = re.sub("©", u"\xa9", text)
#text = re.sub("&", u"\x26", text)
#text = re.sub("ß", u"\xdf", text)
#text = re.sub("ß", u"\xdf", text)
#text = re.sub("&lt;", u"<", text)
#text = re.sub("&gt;", u">", text)
#text = re.sub("&egrave;", u"\xe8", text)
#text = re.sub("&eacute;", u"\xe9", text)
#text = re.sub("&quote;", u'"', text)
#text = re.sub("&quot;", u'"', text)
#text = re.sub("&Ouml;", u'\xd6', text)
#text = re.sub("&times;", u'"', text)
#text = re.sub("&(.*);", "/1", text)
#if s:
# print s.groups()
#Lists
text = re.sub("^</p>(.*)", r"\1", text)
text = re.sub("(.*)<p>$", r"\1", text)
out = ""
lists = ""
while text:
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
def min_(i, l):
try:
v = i.groups()[0]
l.remove(len(v))
return len(v) < min(l, 1000000000)
except:
return False
if min_(mstar, ms):
lists += "*"
pre, val, post = mstar.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mhash, ms):
lists += "#"
pre, val, post = mhash.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mitem, ms):
pre, val, post = mitem.groups()
out += "\n" + lists + " " + val
text = post
elif min_(munstar, ms):
lists = lists[:-1]
text = munstar.groups()[1]
elif min_(munhash, ms):
lists.pop()
text = munhash.groups()[1]
else:
out += text
text = ""
text2 = out
while text2:
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
if mtag:
text2 = mtag.groups()[2]
print mtag.groups()[1]
else:
text2 = ""
return out
for katArea in ['1623', '1626']:
if not models.Area.objects.filter(short_name = katArea):
newArea = models.Area(short_name = katArea)
save(newArea)
area1626 = models.Area.objects.filter(short_name = '1626')[0]
area1623 = models.Area.objects.filter(short_name = '1623')[0]
counter=0
for line in caveReader :
if line[Area] == 'nonexistent':
continue
entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
args = {}
def addToArgs(CSVname, modelName):
if line[CSVname]:
args[modelName] = html_to_wiki(line[CSVname])
addToArgs(KatasterNumber, "kataster_number")
addToArgs(KatStatusCode, "kataster_code")
addToArgs(UnofficialNumber, "unofficial_number")
addToArgs(Name, "official_name")
addToArgs(Comment, "notes")
addToArgs(Explorers, "explorers")
addToArgs(UndergroundDescription, "underground_description")
addToArgs(Equipment, "equipment")
addToArgs(KatasterStatus, "kataster_status")
addToArgs(References, "references")
addToArgs(UndergroundCentreLine, "underground_centre_line")
addToArgs(UndergroundDrawnSurvey, "survey")
addToArgs(Length, "length")
addToArgs(Depth, "depth")
addToArgs(Extent, "extent")
addToArgs(SurvexFile, "survex_file")
addToArgs(Notes, "notes")
newCave = models.Cave(**args)
save(newCave)
if line[Area]:
if line[Area] == "1626":
newCave.area.add(area1626)
else:
area = models.Area.objects.filter(short_name = line[Area])
if area:
newArea = area[0]
else:
newArea = models.Area(short_name = line[Area], parent = area1623)
save(newArea)
newCave.area.add(newArea)
else:
newCave.area.add(area1623)
save(newCave)
if line[UnofficialName]:
newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
save(newUnofficialName)
if line[MultipleEntrances] == '' or \
line[MultipleEntrances] == 'entrance' or \
line[MultipleEntrances] == 'last entrance':
args = {}
def addToArgs(CSVname, modelName):
if line[CSVname]:
args[modelName] = html_to_wiki(line[CSVname])
def addToArgsViaDict(CSVname, modelName, dictionary):
if line[CSVname]:
args[modelName] = dictionary[html_to_wiki(line[CSVname])]
addToArgs(EntranceName, 'name')
addToArgs(Explorers, 'explorers')
addToArgs(Map, 'map_description')
addToArgs(Location, 'location_description')
addToArgs(Approach, 'approach')
addToArgs(EntranceDescription, 'entrance_description')
addToArgs(UndergroundDescription, 'underground_description')
addToArgs(PhotoOfLocation, 'photo')
addToArgsViaDict(Marking, 'marking', {"Paint": "P",
"Paint (?)": "P?",
"Tag": "T",
"Tag (?)": "T?",
"Retagged": "R",
"Retag": "R",
"Spit": "S",
"Spit (?)": "S?",
"Unmarked": "U",
"": "?",
})
addToArgs(MarkingComment, 'marking_comment')
addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",
"Lost": "L",
"Refindable": "R",
"": "?",
"?": "?",
})
addToArgs(FindabilityComment, 'findability_description')
addToArgs(Easting, 'easting')
addToArgs(Northing, 'northing')
addToArgs(Altitude, 'alt')
addToArgs(DescriptionOfOtherPoint, 'other_description')
def addToArgsSurveyStation(CSVname, modelName):
if line[CSVname]:
surveyPoint = models.SurveyStation(name = line[CSVname])
save(surveyPoint)
args[modelName] = html_to_wiki(surveyPoint)
addToArgsSurveyStation(TagPoint, 'tag_station')
addToArgsSurveyStation(ExactEntrance, 'exact_station')
addToArgsSurveyStation(OtherPoint, 'other_station')
addToArgs(OtherPoint, 'other_description')
if line[GPSpreSA]:
addToArgsSurveyStation(GPSpreSA, 'other_station')
args['other_description'] = 'pre selective availability GPS'
if line[GPSpostSA]:
addToArgsSurveyStation(GPSpostSA, 'other_station')
args['other_description'] = 'post selective availability GPS'
addToArgs(Bearings, 'bearings')
newEntrance = models.Entrance(**args)
save(newEntrance)
if line[Entrances]:
entrance_letter = line[Entrances]
else:
entrance_letter = ''
newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
save(newCaveAndEntrance)

197
parsers/logbooks.py Normal file
View File

@@ -0,0 +1,197 @@
#.-*- coding: utf-8 -*-
import settings
import expo.models as models
import csv
import sqlite3
import re
import os
import datetime
# Dave Johnson (Stonker) is hacked -- are there two of this DJ name
# Dave Collins (Scout) is hacked
# Letty ten Harkel has middle , tu = timeug or ""name removed
# the <span lang=""sv""> have been removed
# Dave Milne (Lummat)
# Ben van Millingen
# Rebecca Lawson (Becka)
persontab = open(os.path.join(settings.EXPOWEB, "noinfo", "folk.csv"))
personreader = csv.reader(persontab)
headers = personreader.next()
header = dict(zip(headers, range(len(headers))))
def LoadExpos():
models.Expedition.objects.all().delete()
y = models.Expedition(year = "2008", name = "CUCC expo2008")
y.save()
for year in headers[5:]:
y = models.Expedition(year = year, name = "CUCC expo%s" % y)
y.save()
def LoadPersons():
models.Person.objects.all().delete()
models.PersonExpedition.objects.all().delete()
expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
expomissing = set(expoers2008)
for person in personreader:
name = person[header["Name"]]
name = re.sub("<.*?>", "", name)
lname = name.split()
if len(lname) >= 2:
firstname, lastname = lname[0], lname[1]
else:
firstname, lastname = lname[0], ""
print firstname, lastname
#assert lastname == person[header[""]], person
pObject = models.Person(first_name = firstname,
last_name = lastname,
is_guest = person[header["Guest"]] == "1",
is_vfho = person[header["VfHO member"]],
mug_shot = person[header["Mugshot"]])
pObject.save()
for year, attended in zip(headers, person)[5:]:
yo = models.Expedition.objects.filter(year = year)[0]
if attended == "1" or attended == "-1":
pyo = models.PersonExpedition(person = pObject, expedition = yo)
pyo.save()
if name in expoers2008:
print "2008:", name
expomissing.discard(name)
yo = models.Expedition.objects.filter(year = "2008")[0]
pyo = models.PersonExpedition(person = pObject, expedition = yo)
pyo.save()
print expomissing
for name in expomissing:
firstname, lastname = name.split()
pObject = models.Person(first_name = firstname,
last_name = lastname,
is_guest = name in ["Eeva Makiranta", "Kieth Curtis"],
is_vfho = False,
mug_shot = "")
pObject.save()
yo = models.Expedition.objects.filter(year = "2008")[0]
pyo = models.PersonExpedition(person = pObject, expedition = yo)
pyo.save()
#
# the logbook loading section
#
def GetTripPersons(trippeople, expedition):
res = [ ]
author = None
for tripperson in re.split(",|\+|&| and ", trippeople):
tripperson = tripperson.strip()
mul = re.match("<u>(.*?)</u>$", tripperson)
if mul:
tripperson = mul.group(1)
if tripperson and tripperson[0] != '*':
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
personyear = expedition.GetPersonExpedition(tripperson)
print personyear
res.append(personyear)
if mul:
author = personyear
if not author:
author = res[-1]
return res, author
def Parselogwikitxt(year, personyearmap, txt):
trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara:
tripheadp = triphead.split("|")
assert len(tripheadp) == 3, tripheadp
tripdate, tripplace, trippeople = tripheadp
tripsplace = tripplace.split(" - ")
tripcave = tripsplace[0]
tul = re.findall("T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
if tul:
#assert len(tul) <= 1, (triphead, triptext)
#assert tul[0][1] in ["hrs", "hours"], (triphead, triptext)
triptime = tul[0][0]
else:
triptime = ""
#assert tripcave == "Journey", (triphead, triptext)
assert re.match("\d\d\d\d-\d\d-\d\d", tripdate), tripdate
ldate = datetime.date(int(tripdate[:4]), int(tripdate[5:7]), int(tripdate[8:10]))
lbo = models.LogbookEntry(date = ldate, cave = tripcave, title = tripsplace[-1], text = triptext, tu = triptime)
lbo.save()
trippersons, author = GetTripPersons(trippeople, personyearmap)
for tripperson in trippersons:
lbo.cavers.add(tripperson)
# add the author
def Parseloghtmltxt(year, expedition, txt):
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
s = re.match('''(?x)\s*(?:<a\s+id="(.*?)"\s*/>)?
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>
\s*<div\s+class="trippeople">(.*?)</div>
\s*<div\s+class="triptitle">(.*?)</div>
([\s\S]*?)
\s*(?:<div\s+class="timeug">(.*?)</div>)?
\s*$
''', trippara)
assert s, trippara
tripid, tripid1, tripdate, trippeople, triptitle, triptext, timeug = s.groups()
mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
mdategoof = re.match("(\d\d?)/(\d)/(\d\d)", tripdate)
if mdatestandard:
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
elif mdategoof:
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(3)) + 2000
else:
assert False, tripdate
ldate = datetime.date(year, month, day)
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
trippersons, author = GetTripPersons(trippeople, expedition)
tripcave = ""
lbo = models.LogbookEntry(date = ldate, place = tripcave, title = triptitle, text = triptext, author=author)
lbo.save()
tu = timeug or ""
for tripperson in trippersons:
pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo)
pto.save()
def LoadLogbooks():
models.LogbookEntry.objects.all().delete()
expowebbase = os.path.join(settings.EXPOWEB, "years") # this could be a url
yearlinks = [
# ("2008", "2008/logbook/2008logbook.txt"),
# ("2007", "2007/logbook/2007logbook.txt"),
# ("2005", "2005/logbook.html"),
("2004", "2004/logbook.html"),
# ("2003", "2003/logbook.html"),
]
for year, lloc in yearlinks:
expedition = models.Expedition.objects.filter(year = year)[0]
fin = open(os.path.join(expowebbase, lloc))
txt = fin.read()
fin.close()
#print personyearmap
if year >= "2007":
Parselogwikitxt(year, personyearmap, txt)
else:
Parseloghtmltxt(year, expedition, txt)
# command line run through the loading stages
LoadExpos()
LoadPersons()
LoadLogbooks()

31
parsers/survex.py Normal file
View File

@@ -0,0 +1,31 @@
import settings
import expo.models as models
import re
import os
def readFile(filename):
for line in fileIterator(settings.SURVEX_DATA, filename):
print line
re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
def fileIterator(directory, filename):
f = open(os.path.join(directory, filename + ".svx"), "rb")
for line in f.readlines():
include_extension = re_include_extension.match(line)
include_no_extension = re_include_no_extension.match(line)
def a(include):
link = re.split(r"/|\\", include)
print os.path.join(directory, *link[:-1]), link[-1]
return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
if include_extension:
for b in a(include_extension.groups()[0]):
yield b
elif include_no_extension:
for b in a(include_no_extension.groups()[0]):
yield b
else:
yield line
readFile("all")