troggle-unchained/parsers/cavetab.py

# -*- coding: utf-8 -*-

import settings
import expo.models as models
import csv
import time

#import sqlite3
import re
import os

##format of CAVETAB2.CSV is
KatasterNumber = 0
KatStatusCode = 1
Entrances = 2
UnofficialNumber = 3
MultipleEntrances = 4
AutogenFile = 5
LinkFile = 6
LinkEntrance = 7
Name = 8
UnofficialName = 9
Comment = 10
Area = 11
Explorers = 12
UndergroundDescription = 13
Equipment = 14
QMList = 15
KatasterStatus = 16
References = 17
UndergroundCentreLine = 18
UndergroundDrawnSurvey = 19
SurvexFile = 20
Length = 21
Depth = 22
Extent = 23
Notes = 24
EntranceName = 25
TagPoint = 26
OtherPoint = 27
DescriptionOfOtherPoint = 28
ExactEntrance = 29
TypeOfFix = 30
GPSpreSA = 31
GPSpostSA = 32
Northing = 33
Easting = 34
Altitude = 35
Bearings = 36
Map = 37
Location = 38
Approach = 39
EntranceDescription = 40
PhotoOfLocation = 41
Marking = 42
MarkingComment = 43
Findability = 44
FindabilityComment = 45

cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
caveReader = csv.reader(cavetab)
caveReader.next() # Strip out column headers


def save(x):   #There seems to be an intermitent problem with sqlite and Vista, uncomment the lines below, and the import sqlite3 statment above to fix it
#    try:
        x.save()
#    except sqlite3.OperationalError:
#        print "Error"
#        time.sleep(1)
#        save(x)

def html_to_wiki(text):
    if type(text) != str:
        return text
    text = unicode(text, "utf-8")
    #Characters
    #text = re.sub("&uuml;", u"\xfc", text)
    #text = re.sub("&ouml;", u"\xf6", text)
    #text = re.sub("&auml;", u"\xe4", text)
    #text = re.sub("&deg;", u"\xb0", text)
    #text = re.sub("&copy;", u"\xa9", text)
    #text = re.sub("&amp;", u"\x26", text)
    #text = re.sub("&szlig;", u"\xdf", text)
    #text = re.sub("&szlig;", u"\xdf", text)
    #text = re.sub("&lt;", u"<", text)
    #text = re.sub("&gt;", u">", text)
    #text = re.sub("&egrave;", u"\xe8", text)
    #text = re.sub("&eacute;", u"\xe9", text)
    #text = re.sub("&quote;", u'"', text)
    #text = re.sub("&quot;", u'"', text)
    #text = re.sub("&Ouml;", u'\xd6', text)
    #text = re.sub("&times;", u'"', text)

    #text = re.sub("&(.*);", "/1", text)
    #if s:
    #    print s.groups()
    #Lists
    text = re.sub("^</p>(.*)", r"\1", text)
    text = re.sub("(.*)<p>$", r"\1", text)
    out = ""
    lists = ""
    while text:
        mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
        munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
        mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
        munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
        mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
        ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
        def min_(i, l):
            try:
                v = i.groups()[0]
                l.remove(len(v))
                return len(v) < min(l, 1000000000)
            except:
                return False
        if min_(mstar, ms):
            lists += "*"
            pre, val, post = mstar.groups()
            out += pre + "\n" + lists + " " + val
            text = post
        elif min_(mhash, ms):
            lists += "#"
            pre, val, post = mhash.groups()
            out += pre + "\n" + lists + " " + val
            text = post
        elif min_(mitem, ms):
            pre, val, post = mitem.groups()
            out += "\n" + lists + " " + val
            text = post
        elif min_(munstar, ms):
            lists = lists[:-1]
            text = munstar.groups()[1]
        elif min_(munhash, ms):
            lists.pop()
            text = munhash.groups()[1]
        else:
            out += text
            text = ""
    text2 = out
    while text2:
        mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
        if mtag:
            text2 = mtag.groups()[2]
            print mtag.groups()[1]
        else:
            text2 = ""
    return out

for katArea in ['1623', '1626']:
    if not models.Area.objects.filter(short_name = katArea):
        newArea = models.Area(short_name = katArea)
        save(newArea)
area1626 = models.Area.objects.filter(short_name = '1626')[0]
area1623 = models.Area.objects.filter(short_name = '1623')[0]

counter=0
for line in caveReader :
    if line[Area] == 'nonexistent':
        continue
    entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
    if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
        args = {}
        def addToArgs(CSVname, modelName):
            if line[CSVname]:
                args[modelName] = html_to_wiki(line[CSVname])
        addToArgs(KatasterNumber, "kataster_number")
        addToArgs(KatStatusCode, "kataster_code")
        addToArgs(UnofficialNumber, "unofficial_number")
        addToArgs(Name, "official_name")
        addToArgs(Comment, "notes")
        addToArgs(Explorers, "explorers")
        addToArgs(UndergroundDescription, "underground_description")
        addToArgs(Equipment, "equipment")
        addToArgs(KatasterStatus, "kataster_status")
        addToArgs(References, "references")
        addToArgs(UndergroundCentreLine, "underground_centre_line")
        addToArgs(UndergroundDrawnSurvey, "survey")
        addToArgs(Length, "length")
        addToArgs(Depth, "depth")
        addToArgs(Extent, "extent")
        addToArgs(SurvexFile, "survex_file")
        addToArgs(Notes, "notes")

        newCave = models.Cave(**args)
        save(newCave)

        if line[Area]:
            if line[Area] ==  "1626":
                newCave.area.add(area1626)
            else:
                area = models.Area.objects.filter(short_name = line[Area])
                if area:
                    newArea = area[0]
                else:
                    newArea = models.Area(short_name = line[Area], parent = area1623)
                    save(newArea)
                newCave.area.add(newArea)
        else:
           newCave.area.add(area1623)

        save(newCave)

        if line[UnofficialName]:
            newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
            save(newUnofficialName)
    if line[MultipleEntrances] == '' or \
        line[MultipleEntrances] == 'entrance' or \
        line[MultipleEntrances] == 'last entrance':
        args = {}
        def addToArgs(CSVname, modelName):
            if line[CSVname]:
                args[modelName] = html_to_wiki(line[CSVname])
        def addToArgsViaDict(CSVname, modelName, dictionary):
            if line[CSVname]:
                args[modelName] = dictionary[html_to_wiki(line[CSVname])]
        addToArgs(EntranceName, 'name')
        addToArgs(Explorers, 'explorers')
        addToArgs(Map, 'map_description')
        addToArgs(Location, 'location_description')
        addToArgs(Approach, 'approach')
        addToArgs(EntranceDescription, 'entrance_description')
        addToArgs(UndergroundDescription, 'underground_description')
        addToArgs(PhotoOfLocation, 'photo')
        addToArgsViaDict(Marking, 'marking', {"Paint": "P",
                                              "Paint (?)": "P?",
                                              "Tag": "T",
                                              "Tag (?)": "T?",
                                              "Retagged": "R",
                                              "Retag": "R",
                                              "Spit": "S",
                                              "Spit (?)": "S?",
                                              "Unmarked": "U",
                                              "": "?",
                                              })
        addToArgs(MarkingComment, 'marking_comment')
        addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",
                                                      "Lost": "L",
                                                      "Refindable": "R",
                                                      "": "?",
                                                      "?": "?",
                                                      })
        addToArgs(FindabilityComment, 'findability_description')
        addToArgs(Easting, 'easting')
        addToArgs(Northing, 'northing')
        addToArgs(Altitude, 'alt')
        addToArgs(DescriptionOfOtherPoint, 'other_description')
        def addToArgsSurveyStation(CSVname, modelName):
            if line[CSVname]:
                surveyPoint = models.SurveyStation(name = line[CSVname])
                save(surveyPoint)
                args[modelName] = html_to_wiki(surveyPoint)
        addToArgsSurveyStation(TagPoint, 'tag_station')
        addToArgsSurveyStation(ExactEntrance, 'exact_station')
        addToArgsSurveyStation(OtherPoint, 'other_station')
        addToArgs(OtherPoint, 'other_description')
        if line[GPSpreSA]:
            addToArgsSurveyStation(GPSpreSA, 'other_station')
            args['other_description'] = 'pre selective availability GPS'
        if line[GPSpostSA]:
            addToArgsSurveyStation(GPSpostSA, 'other_station')
            args['other_description'] = 'post selective availability GPS'
        addToArgs(Bearings, 'bearings')
        newEntrance = models.Entrance(**args)
        save(newEntrance)

        if line[Entrances]:
            entrance_letter = line[Entrances]
        else:
            entrance_letter = ''

        newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
        save(newCaveAndEntrance)
[svn] Initial troggle checkin This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM 2009-05-13 05:13:38 +01:00			`# -- coding: utf-8 --`

			`import settings`
			`import expo.models as models`
			`import csv`
			`import time`
[svn] Fix edit conflict screwup Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8041 by aaron @ 10/27/2008 6:12 PM 2009-05-13 05:15:05 +01:00
[svn] parsing of 2007 logbook. still problems Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8039 by julian @ 10/27/2008 2:03 AM 2009-05-13 05:14:41 +01:00			`#import sqlite3`
[svn] Initial troggle checkin This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM 2009-05-13 05:13:38 +01:00			`import re`
			`import os`

			`##format of CAVETAB2.CSV is`
			`KatasterNumber = 0`
			`KatStatusCode = 1`
			`Entrances = 2`
			`UnofficialNumber = 3`
			`MultipleEntrances = 4`
			`AutogenFile = 5`
			`LinkFile = 6`
			`LinkEntrance = 7`
			`Name = 8`
			`UnofficialName = 9`
			`Comment = 10`
			`Area = 11`
			`Explorers = 12`
			`UndergroundDescription = 13`
			`Equipment = 14`
			`QMList = 15`
			`KatasterStatus = 16`
			`References = 17`
			`UndergroundCentreLine = 18`
			`UndergroundDrawnSurvey = 19`
			`SurvexFile = 20`
			`Length = 21`
			`Depth = 22`
			`Extent = 23`
			`Notes = 24`
			`EntranceName = 25`
			`TagPoint = 26`
			`OtherPoint = 27`
			`DescriptionOfOtherPoint = 28`
			`ExactEntrance = 29`
			`TypeOfFix = 30`
			`GPSpreSA = 31`
			`GPSpostSA = 32`
			`Northing = 33`
			`Easting = 34`
			`Altitude = 35`
			`Bearings = 36`
			`Map = 37`
			`Location = 38`
			`Approach = 39`
			`EntranceDescription = 40`
			`PhotoOfLocation = 41`
			`Marking = 42`
			`MarkingComment = 43`
			`Findability = 44`
			`FindabilityComment = 45`

			`cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))`
			`caveReader = csv.reader(cavetab)`
			`caveReader.next() # Strip out column headers`


[svn] Commented out sqlite reference for use on other databases Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8037 by aaron @ 10/27/2008 12:01 AM 2009-05-13 05:14:13 +01:00			`def save(x): #There seems to be an intermitent problem with sqlite and Vista, uncomment the lines below, and the import sqlite3 statment above to fix it`
			`# try:`
[svn] Initial troggle checkin This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM 2009-05-13 05:13:38 +01:00			`x.save()`
[svn] Commented out sqlite reference for use on other databases Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8037 by aaron @ 10/27/2008 12:01 AM 2009-05-13 05:14:13 +01:00			`# except sqlite3.OperationalError:`
			`# print "Error"`
			`# time.sleep(1)`
			`# save(x)`
[svn] Initial troggle checkin This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM 2009-05-13 05:13:38 +01:00
			`def html_to_wiki(text):`
			`if type(text) != str:`
			`return text`
			`text = unicode(text, "utf-8")`
			`#Characters`
			`#text = re.sub("ü", u"\xfc", text)`
			`#text = re.sub("ö", u"\xf6", text)`
			`#text = re.sub("ä", u"\xe4", text)`
			`#text = re.sub("°", u"\xb0", text)`
			`#text = re.sub("©", u"\xa9", text)`
			`#text = re.sub("&", u"\x26", text)`
			`#text = re.sub("ß", u"\xdf", text)`
			`#text = re.sub("ß", u"\xdf", text)`
			`#text = re.sub("<", u"<", text)`
			`#text = re.sub(">", u">", text)`
			`#text = re.sub("è", u"\xe8", text)`
			`#text = re.sub("é", u"\xe9", text)`
			`#text = re.sub("&quote;", u'"', text)`
			`#text = re.sub(""", u'"', text)`
			`#text = re.sub("Ö", u'\xd6', text)`
			`#text = re.sub("×", u'"', text)`

			`#text = re.sub("&(.*);", "/1", text)`
			`#if s:`
			`# print s.groups()`
			`#Lists`
			`text = re.sub("^</p>(.*)", r"\1", text)`
			`text = re.sub("(.*)<p>$", r"\1", text)`
			`out = ""`
			`lists = ""`
			`while text:`
			`mstar = re.match("^(.?)<ul>\s<li[^>]>(.?)</li>(.*)$", text, re.DOTALL)`
			`munstar = re.match("^(\s)</ul>(.)$", text, re.DOTALL)`
			`mhash = re.match("^(.?)<ol>\s<li[^>]>(.?)</li>(.*)$", text, re.DOTALL)`
			`munhash = re.match("^(\s)</ol>(.)$", text, re.DOTALL)`
			`mitem = re.match("^(\s)<li[^>]>(.?)</li>(.)$", text, re.DOTALL)`
			`ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]`
			`def min_(i, l):`
			`try:`
			`v = i.groups()[0]`
			`l.remove(len(v))`
			`return len(v) < min(l, 1000000000)`
			`except:`
			`return False`
			`if min_(mstar, ms):`
			`lists += "*"`
			`pre, val, post = mstar.groups()`
			`out += pre + "\n" + lists + " " + val`
			`text = post`
			`elif min_(mhash, ms):`
			`lists += "#"`
			`pre, val, post = mhash.groups()`
			`out += pre + "\n" + lists + " " + val`
			`text = post`
			`elif min_(mitem, ms):`
			`pre, val, post = mitem.groups()`
			`out += "\n" + lists + " " + val`
			`text = post`
			`elif min_(munstar, ms):`
			`lists = lists[:-1]`
			`text = munstar.groups()[1]`
			`elif min_(munhash, ms):`
			`lists.pop()`
			`text = munhash.groups()[1]`
			`else:`
			`out += text`
			`text = ""`
			`text2 = out`
			`while text2:`
			`mtag = re.match("^(.?)<(.?)>(.*)$", text, re.DOTALL)`
			`if mtag:`
			`text2 = mtag.groups()[2]`
			`print mtag.groups()[1]`
			`else:`
			`text2 = ""`
			`return out`

			`for katArea in ['1623', '1626']:`
			`if not models.Area.objects.filter(short_name = katArea):`
			`newArea = models.Area(short_name = katArea)`
			`save(newArea)`
			`area1626 = models.Area.objects.filter(short_name = '1626')[0]`
			`area1623 = models.Area.objects.filter(short_name = '1623')[0]`

			`counter=0`
			`for line in caveReader :`
			`if line[Area] == 'nonexistent':`
			`continue`
			`entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines`
			`if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':`
			`args = {}`
			`def addToArgs(CSVname, modelName):`
			`if line[CSVname]:`
			`args[modelName] = html_to_wiki(line[CSVname])`
			`addToArgs(KatasterNumber, "kataster_number")`
			`addToArgs(KatStatusCode, "kataster_code")`
			`addToArgs(UnofficialNumber, "unofficial_number")`
			`addToArgs(Name, "official_name")`
			`addToArgs(Comment, "notes")`
			`addToArgs(Explorers, "explorers")`
			`addToArgs(UndergroundDescription, "underground_description")`
			`addToArgs(Equipment, "equipment")`
			`addToArgs(KatasterStatus, "kataster_status")`
			`addToArgs(References, "references")`
			`addToArgs(UndergroundCentreLine, "underground_centre_line")`
			`addToArgs(UndergroundDrawnSurvey, "survey")`
			`addToArgs(Length, "length")`
			`addToArgs(Depth, "depth")`
			`addToArgs(Extent, "extent")`
			`addToArgs(SurvexFile, "survex_file")`
			`addToArgs(Notes, "notes")`

			`newCave = models.Cave(**args)`
			`save(newCave)`

			`if line[Area]:`
			`if line[Area] == "1626":`
			`newCave.area.add(area1626)`
			`else:`
			`area = models.Area.objects.filter(short_name = line[Area])`
			`if area:`
			`newArea = area[0]`
			`else:`
			`newArea = models.Area(short_name = line[Area], parent = area1623)`
			`save(newArea)`
			`newCave.area.add(newArea)`
			`else:`
			`newCave.area.add(area1623)`

			`save(newCave)`

			`if line[UnofficialName]:`
			`newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])`
			`save(newUnofficialName)`
			`if line[MultipleEntrances] == '' or \`
			`line[MultipleEntrances] == 'entrance' or \`
			`line[MultipleEntrances] == 'last entrance':`
			`args = {}`
			`def addToArgs(CSVname, modelName):`
			`if line[CSVname]:`
			`args[modelName] = html_to_wiki(line[CSVname])`
			`def addToArgsViaDict(CSVname, modelName, dictionary):`
			`if line[CSVname]:`
			`args[modelName] = dictionary[html_to_wiki(line[CSVname])]`
			`addToArgs(EntranceName, 'name')`
			`addToArgs(Explorers, 'explorers')`
			`addToArgs(Map, 'map_description')`
			`addToArgs(Location, 'location_description')`
			`addToArgs(Approach, 'approach')`
			`addToArgs(EntranceDescription, 'entrance_description')`
			`addToArgs(UndergroundDescription, 'underground_description')`
			`addToArgs(PhotoOfLocation, 'photo')`
			`addToArgsViaDict(Marking, 'marking', {"Paint": "P",`
			`"Paint (?)": "P?",`
			`"Tag": "T",`
			`"Tag (?)": "T?",`
			`"Retagged": "R",`
			`"Retag": "R",`
			`"Spit": "S",`
			`"Spit (?)": "S?",`
			`"Unmarked": "U",`
			`"": "?",`
			`})`
			`addToArgs(MarkingComment, 'marking_comment')`
			`addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",`
			`"Lost": "L",`
			`"Refindable": "R",`
			`"": "?",`
			`"?": "?",`
			`})`
			`addToArgs(FindabilityComment, 'findability_description')`
			`addToArgs(Easting, 'easting')`
			`addToArgs(Northing, 'northing')`
			`addToArgs(Altitude, 'alt')`
			`addToArgs(DescriptionOfOtherPoint, 'other_description')`
			`def addToArgsSurveyStation(CSVname, modelName):`
			`if line[CSVname]:`
			`surveyPoint = models.SurveyStation(name = line[CSVname])`
			`save(surveyPoint)`
			`args[modelName] = html_to_wiki(surveyPoint)`
			`addToArgsSurveyStation(TagPoint, 'tag_station')`
			`addToArgsSurveyStation(ExactEntrance, 'exact_station')`
			`addToArgsSurveyStation(OtherPoint, 'other_station')`
			`addToArgs(OtherPoint, 'other_description')`
			`if line[GPSpreSA]:`
			`addToArgsSurveyStation(GPSpreSA, 'other_station')`
			`args['other_description'] = 'pre selective availability GPS'`
			`if line[GPSpostSA]:`
			`addToArgsSurveyStation(GPSpostSA, 'other_station')`
			`args['other_description'] = 'post selective availability GPS'`
			`addToArgs(Bearings, 'bearings')`
			`newEntrance = models.Entrance(**args)`
			`save(newEntrance)`

			`if line[Entrances]:`
			`entrance_letter = line[Entrances]`
			`else:`
			`entrance_letter = ''`

			`newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)`
[svn] parsing of 2007 logbook. still problems Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8039 by julian @ 10/27/2008 2:03 AM 2009-05-13 05:14:41 +01:00			`save(newCaveAndEntrance)`