troggle-unchained/parsers/cavetab.py

274 lines
9.7 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
import settings
import expo.models as models
import csv
import time
#import sqlite3
import re
import os
##format of CAVETAB2.CSV is
KatasterNumber = 0
KatStatusCode = 1
Entrances = 2
UnofficialNumber = 3
MultipleEntrances = 4
AutogenFile = 5
LinkFile = 6
LinkEntrance = 7
Name = 8
UnofficialName = 9
Comment = 10
Area = 11
Explorers = 12
UndergroundDescription = 13
Equipment = 14
QMList = 15
KatasterStatus = 16
References = 17
UndergroundCentreLine = 18
UndergroundDrawnSurvey = 19
SurvexFile = 20
Length = 21
Depth = 22
Extent = 23
Notes = 24
EntranceName = 25
TagPoint = 26
OtherPoint = 27
DescriptionOfOtherPoint = 28
ExactEntrance = 29
TypeOfFix = 30
GPSpreSA = 31
GPSpostSA = 32
Northing = 33
Easting = 34
Altitude = 35
Bearings = 36
Map = 37
Location = 38
Approach = 39
EntranceDescription = 40
PhotoOfLocation = 41
Marking = 42
MarkingComment = 43
Findability = 44
FindabilityComment = 45
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
caveReader = csv.reader(cavetab)
caveReader.next() # Strip out column headers
def save(x): #There seems to be an intermitent problem with sqlite and Vista, uncomment the lines below, and the import sqlite3 statment above to fix it
# try:
x.save()
# except sqlite3.OperationalError:
# print "Error"
# time.sleep(1)
# save(x)
def html_to_wiki(text):
if type(text) != str:
return text
text = unicode(text, "utf-8")
#Characters
#text = re.sub("ü", u"\xfc", text)
#text = re.sub("ö", u"\xf6", text)
#text = re.sub("ä", u"\xe4", text)
#text = re.sub("°", u"\xb0", text)
#text = re.sub("©", u"\xa9", text)
#text = re.sub("&", u"\x26", text)
#text = re.sub("ß", u"\xdf", text)
#text = re.sub("ß", u"\xdf", text)
#text = re.sub("&lt;", u"<", text)
#text = re.sub("&gt;", u">", text)
#text = re.sub("&egrave;", u"\xe8", text)
#text = re.sub("&eacute;", u"\xe9", text)
#text = re.sub("&quote;", u'"', text)
#text = re.sub("&quot;", u'"', text)
#text = re.sub("&Ouml;", u'\xd6', text)
#text = re.sub("&times;", u'"', text)
#text = re.sub("&(.*);", "/1", text)
#if s:
# print s.groups()
#Lists
text = re.sub("^</p>(.*)", r"\1", text)
text = re.sub("(.*)<p>$", r"\1", text)
out = ""
lists = ""
while text:
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
def min_(i, l):
try:
v = i.groups()[0]
l.remove(len(v))
return len(v) < min(l, 1000000000)
except:
return False
if min_(mstar, ms):
lists += "*"
pre, val, post = mstar.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mhash, ms):
lists += "#"
pre, val, post = mhash.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mitem, ms):
pre, val, post = mitem.groups()
out += "\n" + lists + " " + val
text = post
elif min_(munstar, ms):
lists = lists[:-1]
text = munstar.groups()[1]
elif min_(munhash, ms):
lists.pop()
text = munhash.groups()[1]
else:
out += text
text = ""
text2 = out
while text2:
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
if mtag:
text2 = mtag.groups()[2]
print mtag.groups()[1]
else:
text2 = ""
return out
for katArea in ['1623', '1626']:
if not models.Area.objects.filter(short_name = katArea):
newArea = models.Area(short_name = katArea)
save(newArea)
area1626 = models.Area.objects.filter(short_name = '1626')[0]
area1623 = models.Area.objects.filter(short_name = '1623')[0]
counter=0
for line in caveReader :
if line[Area] == 'nonexistent':
continue
entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
args = {}
def addToArgs(CSVname, modelName):
if line[CSVname]:
args[modelName] = html_to_wiki(line[CSVname])
addToArgs(KatasterNumber, "kataster_number")
addToArgs(KatStatusCode, "kataster_code")
addToArgs(UnofficialNumber, "unofficial_number")
addToArgs(Name, "official_name")
addToArgs(Comment, "notes")
addToArgs(Explorers, "explorers")
addToArgs(UndergroundDescription, "underground_description")
addToArgs(Equipment, "equipment")
addToArgs(KatasterStatus, "kataster_status")
addToArgs(References, "references")
addToArgs(UndergroundCentreLine, "underground_centre_line")
addToArgs(UndergroundDrawnSurvey, "survey")
addToArgs(Length, "length")
addToArgs(Depth, "depth")
addToArgs(Extent, "extent")
addToArgs(SurvexFile, "survex_file")
addToArgs(Notes, "notes")
newCave = models.Cave(**args)
save(newCave)
if line[Area]:
if line[Area] == "1626":
newCave.area.add(area1626)
else:
area = models.Area.objects.filter(short_name = line[Area])
if area:
newArea = area[0]
else:
newArea = models.Area(short_name = line[Area], parent = area1623)
save(newArea)
newCave.area.add(newArea)
else:
newCave.area.add(area1623)
save(newCave)
if line[UnofficialName]:
newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
save(newUnofficialName)
if line[MultipleEntrances] == '' or \
line[MultipleEntrances] == 'entrance' or \
line[MultipleEntrances] == 'last entrance':
args = {}
def addToArgs(CSVname, modelName):
if line[CSVname]:
args[modelName] = html_to_wiki(line[CSVname])
def addToArgsViaDict(CSVname, modelName, dictionary):
if line[CSVname]:
args[modelName] = dictionary[html_to_wiki(line[CSVname])]
addToArgs(EntranceName, 'name')
addToArgs(Explorers, 'explorers')
addToArgs(Map, 'map_description')
addToArgs(Location, 'location_description')
addToArgs(Approach, 'approach')
addToArgs(EntranceDescription, 'entrance_description')
addToArgs(UndergroundDescription, 'underground_description')
addToArgs(PhotoOfLocation, 'photo')
addToArgsViaDict(Marking, 'marking', {"Paint": "P",
"Paint (?)": "P?",
"Tag": "T",
"Tag (?)": "T?",
"Retagged": "R",
"Retag": "R",
"Spit": "S",
"Spit (?)": "S?",
"Unmarked": "U",
"": "?",
})
addToArgs(MarkingComment, 'marking_comment')
addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",
"Lost": "L",
"Refindable": "R",
"": "?",
"?": "?",
})
addToArgs(FindabilityComment, 'findability_description')
addToArgs(Easting, 'easting')
addToArgs(Northing, 'northing')
addToArgs(Altitude, 'alt')
addToArgs(DescriptionOfOtherPoint, 'other_description')
def addToArgsSurveyStation(CSVname, modelName):
if line[CSVname]:
surveyPoint = models.SurveyStation(name = line[CSVname])
save(surveyPoint)
args[modelName] = html_to_wiki(surveyPoint)
addToArgsSurveyStation(TagPoint, 'tag_station')
addToArgsSurveyStation(ExactEntrance, 'exact_station')
addToArgsSurveyStation(OtherPoint, 'other_station')
addToArgs(OtherPoint, 'other_description')
if line[GPSpreSA]:
addToArgsSurveyStation(GPSpreSA, 'other_station')
args['other_description'] = 'pre selective availability GPS'
if line[GPSpostSA]:
addToArgsSurveyStation(GPSpostSA, 'other_station')
args['other_description'] = 'post selective availability GPS'
addToArgs(Bearings, 'bearings')
newEntrance = models.Entrance(**args)
save(newEntrance)
if line[Entrances]:
entrance_letter = line[Entrances]
else:
entrance_letter = ''
newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
save(newCaveAndEntrance)