forked from expo/troggle
[svn] Initial troggle checkin
This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM
This commit is contained in:
0
parsers/__init__.py
Normal file
0
parsers/__init__.py
Normal file
272
parsers/cavetab.py
Normal file
272
parsers/cavetab.py
Normal file
@@ -0,0 +1,272 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import settings
|
||||
import expo.models as models
|
||||
import csv
|
||||
import time
|
||||
import sqlite3
|
||||
import re
|
||||
import os
|
||||
|
||||
##format of CAVETAB2.CSV is
|
||||
KatasterNumber = 0
|
||||
KatStatusCode = 1
|
||||
Entrances = 2
|
||||
UnofficialNumber = 3
|
||||
MultipleEntrances = 4
|
||||
AutogenFile = 5
|
||||
LinkFile = 6
|
||||
LinkEntrance = 7
|
||||
Name = 8
|
||||
UnofficialName = 9
|
||||
Comment = 10
|
||||
Area = 11
|
||||
Explorers = 12
|
||||
UndergroundDescription = 13
|
||||
Equipment = 14
|
||||
QMList = 15
|
||||
KatasterStatus = 16
|
||||
References = 17
|
||||
UndergroundCentreLine = 18
|
||||
UndergroundDrawnSurvey = 19
|
||||
SurvexFile = 20
|
||||
Length = 21
|
||||
Depth = 22
|
||||
Extent = 23
|
||||
Notes = 24
|
||||
EntranceName = 25
|
||||
TagPoint = 26
|
||||
OtherPoint = 27
|
||||
DescriptionOfOtherPoint = 28
|
||||
ExactEntrance = 29
|
||||
TypeOfFix = 30
|
||||
GPSpreSA = 31
|
||||
GPSpostSA = 32
|
||||
Northing = 33
|
||||
Easting = 34
|
||||
Altitude = 35
|
||||
Bearings = 36
|
||||
Map = 37
|
||||
Location = 38
|
||||
Approach = 39
|
||||
EntranceDescription = 40
|
||||
PhotoOfLocation = 41
|
||||
Marking = 42
|
||||
MarkingComment = 43
|
||||
Findability = 44
|
||||
FindabilityComment = 45
|
||||
|
||||
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
|
||||
caveReader = csv.reader(cavetab)
|
||||
caveReader.next() # Strip out column headers
|
||||
|
||||
|
||||
def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
|
||||
try:
|
||||
x.save()
|
||||
except sqlite3.OperationalError:
|
||||
print "Error"
|
||||
time.sleep(1)
|
||||
save(x)
|
||||
|
||||
def html_to_wiki(text):
|
||||
if type(text) != str:
|
||||
return text
|
||||
text = unicode(text, "utf-8")
|
||||
#Characters
|
||||
#text = re.sub("ü", u"\xfc", text)
|
||||
#text = re.sub("ö", u"\xf6", text)
|
||||
#text = re.sub("ä", u"\xe4", text)
|
||||
#text = re.sub("°", u"\xb0", text)
|
||||
#text = re.sub("©", u"\xa9", text)
|
||||
#text = re.sub("&", u"\x26", text)
|
||||
#text = re.sub("ß", u"\xdf", text)
|
||||
#text = re.sub("ß", u"\xdf", text)
|
||||
#text = re.sub("<", u"<", text)
|
||||
#text = re.sub(">", u">", text)
|
||||
#text = re.sub("è", u"\xe8", text)
|
||||
#text = re.sub("é", u"\xe9", text)
|
||||
#text = re.sub(""e;", u'"', text)
|
||||
#text = re.sub(""", u'"', text)
|
||||
#text = re.sub("Ö", u'\xd6', text)
|
||||
#text = re.sub("×", u'"', text)
|
||||
|
||||
#text = re.sub("&(.*);", "/1", text)
|
||||
#if s:
|
||||
# print s.groups()
|
||||
#Lists
|
||||
text = re.sub("^</p>(.*)", r"\1", text)
|
||||
text = re.sub("(.*)<p>$", r"\1", text)
|
||||
out = ""
|
||||
lists = ""
|
||||
while text:
|
||||
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
|
||||
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
|
||||
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
|
||||
def min_(i, l):
|
||||
try:
|
||||
v = i.groups()[0]
|
||||
l.remove(len(v))
|
||||
return len(v) < min(l, 1000000000)
|
||||
except:
|
||||
return False
|
||||
if min_(mstar, ms):
|
||||
lists += "*"
|
||||
pre, val, post = mstar.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mhash, ms):
|
||||
lists += "#"
|
||||
pre, val, post = mhash.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mitem, ms):
|
||||
pre, val, post = mitem.groups()
|
||||
out += "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(munstar, ms):
|
||||
lists = lists[:-1]
|
||||
text = munstar.groups()[1]
|
||||
elif min_(munhash, ms):
|
||||
lists.pop()
|
||||
text = munhash.groups()[1]
|
||||
else:
|
||||
out += text
|
||||
text = ""
|
||||
text2 = out
|
||||
while text2:
|
||||
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
|
||||
if mtag:
|
||||
text2 = mtag.groups()[2]
|
||||
print mtag.groups()[1]
|
||||
else:
|
||||
text2 = ""
|
||||
return out
|
||||
|
||||
for katArea in ['1623', '1626']:
|
||||
if not models.Area.objects.filter(short_name = katArea):
|
||||
newArea = models.Area(short_name = katArea)
|
||||
save(newArea)
|
||||
area1626 = models.Area.objects.filter(short_name = '1626')[0]
|
||||
area1623 = models.Area.objects.filter(short_name = '1623')[0]
|
||||
|
||||
counter=0
|
||||
for line in caveReader :
|
||||
if line[Area] == 'nonexistent':
|
||||
continue
|
||||
entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
|
||||
if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
|
||||
args = {}
|
||||
def addToArgs(CSVname, modelName):
|
||||
if line[CSVname]:
|
||||
args[modelName] = html_to_wiki(line[CSVname])
|
||||
addToArgs(KatasterNumber, "kataster_number")
|
||||
addToArgs(KatStatusCode, "kataster_code")
|
||||
addToArgs(UnofficialNumber, "unofficial_number")
|
||||
addToArgs(Name, "official_name")
|
||||
addToArgs(Comment, "notes")
|
||||
addToArgs(Explorers, "explorers")
|
||||
addToArgs(UndergroundDescription, "underground_description")
|
||||
addToArgs(Equipment, "equipment")
|
||||
addToArgs(KatasterStatus, "kataster_status")
|
||||
addToArgs(References, "references")
|
||||
addToArgs(UndergroundCentreLine, "underground_centre_line")
|
||||
addToArgs(UndergroundDrawnSurvey, "survey")
|
||||
addToArgs(Length, "length")
|
||||
addToArgs(Depth, "depth")
|
||||
addToArgs(Extent, "extent")
|
||||
addToArgs(SurvexFile, "survex_file")
|
||||
addToArgs(Notes, "notes")
|
||||
|
||||
newCave = models.Cave(**args)
|
||||
save(newCave)
|
||||
|
||||
if line[Area]:
|
||||
if line[Area] == "1626":
|
||||
newCave.area.add(area1626)
|
||||
else:
|
||||
area = models.Area.objects.filter(short_name = line[Area])
|
||||
if area:
|
||||
newArea = area[0]
|
||||
else:
|
||||
newArea = models.Area(short_name = line[Area], parent = area1623)
|
||||
save(newArea)
|
||||
newCave.area.add(newArea)
|
||||
else:
|
||||
newCave.area.add(area1623)
|
||||
|
||||
save(newCave)
|
||||
|
||||
if line[UnofficialName]:
|
||||
newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
|
||||
save(newUnofficialName)
|
||||
if line[MultipleEntrances] == '' or \
|
||||
line[MultipleEntrances] == 'entrance' or \
|
||||
line[MultipleEntrances] == 'last entrance':
|
||||
args = {}
|
||||
def addToArgs(CSVname, modelName):
|
||||
if line[CSVname]:
|
||||
args[modelName] = html_to_wiki(line[CSVname])
|
||||
def addToArgsViaDict(CSVname, modelName, dictionary):
|
||||
if line[CSVname]:
|
||||
args[modelName] = dictionary[html_to_wiki(line[CSVname])]
|
||||
addToArgs(EntranceName, 'name')
|
||||
addToArgs(Explorers, 'explorers')
|
||||
addToArgs(Map, 'map_description')
|
||||
addToArgs(Location, 'location_description')
|
||||
addToArgs(Approach, 'approach')
|
||||
addToArgs(EntranceDescription, 'entrance_description')
|
||||
addToArgs(UndergroundDescription, 'underground_description')
|
||||
addToArgs(PhotoOfLocation, 'photo')
|
||||
addToArgsViaDict(Marking, 'marking', {"Paint": "P",
|
||||
"Paint (?)": "P?",
|
||||
"Tag": "T",
|
||||
"Tag (?)": "T?",
|
||||
"Retagged": "R",
|
||||
"Retag": "R",
|
||||
"Spit": "S",
|
||||
"Spit (?)": "S?",
|
||||
"Unmarked": "U",
|
||||
"": "?",
|
||||
})
|
||||
addToArgs(MarkingComment, 'marking_comment')
|
||||
addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",
|
||||
"Lost": "L",
|
||||
"Refindable": "R",
|
||||
"": "?",
|
||||
"?": "?",
|
||||
})
|
||||
addToArgs(FindabilityComment, 'findability_description')
|
||||
addToArgs(Easting, 'easting')
|
||||
addToArgs(Northing, 'northing')
|
||||
addToArgs(Altitude, 'alt')
|
||||
addToArgs(DescriptionOfOtherPoint, 'other_description')
|
||||
def addToArgsSurveyStation(CSVname, modelName):
|
||||
if line[CSVname]:
|
||||
surveyPoint = models.SurveyStation(name = line[CSVname])
|
||||
save(surveyPoint)
|
||||
args[modelName] = html_to_wiki(surveyPoint)
|
||||
addToArgsSurveyStation(TagPoint, 'tag_station')
|
||||
addToArgsSurveyStation(ExactEntrance, 'exact_station')
|
||||
addToArgsSurveyStation(OtherPoint, 'other_station')
|
||||
addToArgs(OtherPoint, 'other_description')
|
||||
if line[GPSpreSA]:
|
||||
addToArgsSurveyStation(GPSpreSA, 'other_station')
|
||||
args['other_description'] = 'pre selective availability GPS'
|
||||
if line[GPSpostSA]:
|
||||
addToArgsSurveyStation(GPSpostSA, 'other_station')
|
||||
args['other_description'] = 'post selective availability GPS'
|
||||
addToArgs(Bearings, 'bearings')
|
||||
newEntrance = models.Entrance(**args)
|
||||
save(newEntrance)
|
||||
|
||||
if line[Entrances]:
|
||||
entrance_letter = line[Entrances]
|
||||
else:
|
||||
entrance_letter = ''
|
||||
|
||||
newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
|
||||
save(newCaveAndEntrance)
|
||||
197
parsers/logbooks.py
Normal file
197
parsers/logbooks.py
Normal file
@@ -0,0 +1,197 @@
|
||||
#.-*- coding: utf-8 -*-
|
||||
|
||||
import settings
|
||||
import expo.models as models
|
||||
import csv
|
||||
import sqlite3
|
||||
import re
|
||||
import os
|
||||
import datetime
|
||||
|
||||
# Dave Johnson (Stonker) is hacked -- are there two of this DJ name
|
||||
# Dave Collins (Scout) is hacked
|
||||
# Letty ten Harkel has middle , tu = timeug or ""name removed
|
||||
# the <span lang=""sv""> have been removed
|
||||
# Dave Milne (Lummat)
|
||||
# Ben van Millingen
|
||||
# Rebecca Lawson (Becka)
|
||||
|
||||
persontab = open(os.path.join(settings.EXPOWEB, "noinfo", "folk.csv"))
|
||||
personreader = csv.reader(persontab)
|
||||
headers = personreader.next()
|
||||
header = dict(zip(headers, range(len(headers))))
|
||||
|
||||
|
||||
def LoadExpos():
|
||||
models.Expedition.objects.all().delete()
|
||||
y = models.Expedition(year = "2008", name = "CUCC expo2008")
|
||||
y.save()
|
||||
for year in headers[5:]:
|
||||
y = models.Expedition(year = year, name = "CUCC expo%s" % y)
|
||||
y.save()
|
||||
|
||||
def LoadPersons():
|
||||
models.Person.objects.all().delete()
|
||||
models.PersonExpedition.objects.all().delete()
|
||||
expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
|
||||
expomissing = set(expoers2008)
|
||||
|
||||
for person in personreader:
|
||||
name = person[header["Name"]]
|
||||
name = re.sub("<.*?>", "", name)
|
||||
lname = name.split()
|
||||
if len(lname) >= 2:
|
||||
firstname, lastname = lname[0], lname[1]
|
||||
else:
|
||||
firstname, lastname = lname[0], ""
|
||||
print firstname, lastname
|
||||
#assert lastname == person[header[""]], person
|
||||
pObject = models.Person(first_name = firstname,
|
||||
last_name = lastname,
|
||||
is_guest = person[header["Guest"]] == "1",
|
||||
is_vfho = person[header["VfHO member"]],
|
||||
mug_shot = person[header["Mugshot"]])
|
||||
pObject.save()
|
||||
|
||||
for year, attended in zip(headers, person)[5:]:
|
||||
yo = models.Expedition.objects.filter(year = year)[0]
|
||||
if attended == "1" or attended == "-1":
|
||||
pyo = models.PersonExpedition(person = pObject, expedition = yo)
|
||||
pyo.save()
|
||||
|
||||
if name in expoers2008:
|
||||
print "2008:", name
|
||||
expomissing.discard(name)
|
||||
yo = models.Expedition.objects.filter(year = "2008")[0]
|
||||
pyo = models.PersonExpedition(person = pObject, expedition = yo)
|
||||
pyo.save()
|
||||
|
||||
|
||||
print expomissing
|
||||
for name in expomissing:
|
||||
firstname, lastname = name.split()
|
||||
pObject = models.Person(first_name = firstname,
|
||||
last_name = lastname,
|
||||
is_guest = name in ["Eeva Makiranta", "Kieth Curtis"],
|
||||
is_vfho = False,
|
||||
mug_shot = "")
|
||||
pObject.save()
|
||||
yo = models.Expedition.objects.filter(year = "2008")[0]
|
||||
pyo = models.PersonExpedition(person = pObject, expedition = yo)
|
||||
pyo.save()
|
||||
|
||||
|
||||
#
|
||||
# the logbook loading section
|
||||
#
|
||||
def GetTripPersons(trippeople, expedition):
|
||||
res = [ ]
|
||||
author = None
|
||||
for tripperson in re.split(",|\+|&| and ", trippeople):
|
||||
tripperson = tripperson.strip()
|
||||
mul = re.match("<u>(.*?)</u>$", tripperson)
|
||||
if mul:
|
||||
tripperson = mul.group(1)
|
||||
if tripperson and tripperson[0] != '*':
|
||||
#assert tripperson in personyearmap, "'%s' << %s\n\n %s" % (tripperson, trippeople, personyearmap)
|
||||
personyear = expedition.GetPersonExpedition(tripperson)
|
||||
print personyear
|
||||
res.append(personyear)
|
||||
if mul:
|
||||
author = personyear
|
||||
if not author:
|
||||
author = res[-1]
|
||||
return res, author
|
||||
|
||||
def Parselogwikitxt(year, personyearmap, txt):
|
||||
trippara = re.findall("===(.*?)===([\s\S]*?)(?====)", txt)
|
||||
for triphead, triptext in trippara:
|
||||
tripheadp = triphead.split("|")
|
||||
assert len(tripheadp) == 3, tripheadp
|
||||
tripdate, tripplace, trippeople = tripheadp
|
||||
tripsplace = tripplace.split(" - ")
|
||||
tripcave = tripsplace[0]
|
||||
|
||||
tul = re.findall("T/?U:?\s*(\d+(?:\.\d*)?|unknown)\s*(hrs|hours)?", triptext)
|
||||
if tul:
|
||||
#assert len(tul) <= 1, (triphead, triptext)
|
||||
#assert tul[0][1] in ["hrs", "hours"], (triphead, triptext)
|
||||
triptime = tul[0][0]
|
||||
else:
|
||||
triptime = ""
|
||||
#assert tripcave == "Journey", (triphead, triptext)
|
||||
|
||||
assert re.match("\d\d\d\d-\d\d-\d\d", tripdate), tripdate
|
||||
ldate = datetime.date(int(tripdate[:4]), int(tripdate[5:7]), int(tripdate[8:10]))
|
||||
lbo = models.LogbookEntry(date = ldate, cave = tripcave, title = tripsplace[-1], text = triptext, tu = triptime)
|
||||
lbo.save()
|
||||
|
||||
trippersons, author = GetTripPersons(trippeople, personyearmap)
|
||||
for tripperson in trippersons:
|
||||
lbo.cavers.add(tripperson)
|
||||
# add the author
|
||||
|
||||
def Parseloghtmltxt(year, expedition, txt):
|
||||
tripparas = re.findall("<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||
for trippara in tripparas:
|
||||
s = re.match('''(?x)\s*(?:<a\s+id="(.*?)"\s*/>)?
|
||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>
|
||||
\s*<div\s+class="trippeople">(.*?)</div>
|
||||
\s*<div\s+class="triptitle">(.*?)</div>
|
||||
([\s\S]*?)
|
||||
\s*(?:<div\s+class="timeug">(.*?)</div>)?
|
||||
\s*$
|
||||
''', trippara)
|
||||
assert s, trippara
|
||||
|
||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, timeug = s.groups()
|
||||
mdatestandard = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", tripdate)
|
||||
mdategoof = re.match("(\d\d?)/(\d)/(\d\d)", tripdate)
|
||||
if mdatestandard:
|
||||
year, month, day = int(mdatestandard.group(1)), int(mdatestandard.group(2)), int(mdatestandard.group(3))
|
||||
elif mdategoof:
|
||||
day, month, year = int(mdategoof.group(1)), int(mdategoof.group(2)), int(mdategoof.group(3)) + 2000
|
||||
else:
|
||||
assert False, tripdate
|
||||
ldate = datetime.date(year, month, day)
|
||||
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
|
||||
trippersons, author = GetTripPersons(trippeople, expedition)
|
||||
tripcave = ""
|
||||
lbo = models.LogbookEntry(date = ldate, place = tripcave, title = triptitle, text = triptext, author=author)
|
||||
lbo.save()
|
||||
tu = timeug or ""
|
||||
|
||||
for tripperson in trippersons:
|
||||
pto = models.PersonTrip(personexpedition = tripperson, place=tripcave, date=ldate, timeunderground=tu, logbookentry=lbo)
|
||||
pto.save()
|
||||
|
||||
|
||||
|
||||
def LoadLogbooks():
|
||||
models.LogbookEntry.objects.all().delete()
|
||||
expowebbase = os.path.join(settings.EXPOWEB, "years") # this could be a url
|
||||
yearlinks = [
|
||||
# ("2008", "2008/logbook/2008logbook.txt"),
|
||||
# ("2007", "2007/logbook/2007logbook.txt"),
|
||||
# ("2005", "2005/logbook.html"),
|
||||
("2004", "2004/logbook.html"),
|
||||
# ("2003", "2003/logbook.html"),
|
||||
]
|
||||
|
||||
for year, lloc in yearlinks:
|
||||
expedition = models.Expedition.objects.filter(year = year)[0]
|
||||
fin = open(os.path.join(expowebbase, lloc))
|
||||
txt = fin.read()
|
||||
fin.close()
|
||||
#print personyearmap
|
||||
if year >= "2007":
|
||||
Parselogwikitxt(year, personyearmap, txt)
|
||||
else:
|
||||
Parseloghtmltxt(year, expedition, txt)
|
||||
|
||||
# command line run through the loading stages
|
||||
LoadExpos()
|
||||
LoadPersons()
|
||||
LoadLogbooks()
|
||||
|
||||
|
||||
31
parsers/survex.py
Normal file
31
parsers/survex.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import settings
|
||||
import expo.models as models
|
||||
import re
|
||||
import os
|
||||
|
||||
def readFile(filename):
|
||||
for line in fileIterator(settings.SURVEX_DATA, filename):
|
||||
print line
|
||||
|
||||
re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
|
||||
re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
|
||||
|
||||
def fileIterator(directory, filename):
|
||||
f = open(os.path.join(directory, filename + ".svx"), "rb")
|
||||
for line in f.readlines():
|
||||
include_extension = re_include_extension.match(line)
|
||||
include_no_extension = re_include_no_extension.match(line)
|
||||
def a(include):
|
||||
link = re.split(r"/|\\", include)
|
||||
print os.path.join(directory, *link[:-1]), link[-1]
|
||||
return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
|
||||
if include_extension:
|
||||
for b in a(include_extension.groups()[0]):
|
||||
yield b
|
||||
elif include_no_extension:
|
||||
for b in a(include_no_extension.groups()[0]):
|
||||
yield b
|
||||
else:
|
||||
yield line
|
||||
|
||||
readFile("all")
|
||||
Reference in New Issue
Block a user