2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-14 18:37:18 +00:00

[svn] Work on turn html pages into cavedescription models.py.

Moved parser/cavetabs html_to_wiki function to utils.py
Added databaseReset.py desc to refresh the cavedescriptions.
This commit is contained in:
martin speleo
2009-07-04 16:42:17 +01:00
parent fa9a6416d1
commit c377d0376f
4 changed files with 133 additions and 93 deletions

View File

@@ -3,6 +3,7 @@ import troggle.core.models as models
from django.conf import settings
import csv, time, re, os, logging
from utils import save_carefully
from utils import html_to_wiki
##format of CAVETAB2.CSV is
KatasterNumber = 0
@@ -52,85 +53,6 @@ MarkingComment = 43
Findability = 44
FindabilityComment = 45
def html_to_wiki(text):
if type(text) != str:
return text
text = unicode(text, "utf-8")
#Characters
#text = re.sub("ü", u"\xfc", text)
#text = re.sub("ö", u"\xf6", text)
#text = re.sub("ä", u"\xe4", text)
#text = re.sub("°", u"\xb0", text)
#text = re.sub("©", u"\xa9", text)
#text = re.sub("&", u"\x26", text)
#text = re.sub("ß", u"\xdf", text)
#text = re.sub("ß", u"\xdf", text)
#text = re.sub("&lt;", u"<", text)
#text = re.sub("&gt;", u">", text)
#text = re.sub("&egrave;", u"\xe8", text)
#text = re.sub("&eacute;", u"\xe9", text)
#text = re.sub("&quote;", u'"', text)
#text = re.sub("&quot;", u'"', text)
#text = re.sub("&Ouml;", u'\xd6', text)
#text = re.sub("&times;", u'"', text)
#text = re.sub("&(.*);", "/1", text)
#if s:
# print s.groups()
#Lists
text = re.sub("</p>", r"", text)
text = re.sub("<p>$", r"", text)
text = re.sub("<p>", r"\n\n", text)
out = ""
lists = ""
while text:
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
def min_(i, l):
try:
v = i.groups()[0]
l.remove(len(v))
return len(v) < min(l, 1000000000)
except:
return False
if min_(mstar, ms):
lists += "*"
pre, val, post = mstar.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mhash, ms):
lists += "#"
pre, val, post = mhash.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mitem, ms):
pre, val, post = mitem.groups()
out += "\n" + lists + " " + val
text = post
elif min_(munstar, ms):
lists = lists[:-1]
text = munstar.groups()[1]
elif min_(munhash, ms):
lists.pop()
text = munhash.groups()[1]
else:
out += text
text = ""
text2 = out
while text2:
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
if mtag:
text2 = mtag.groups()[2]
print mtag.groups()[1]
else:
text2 = ""
return out
def LoadCaveTab():
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU')
caveReader = csv.reader(cavetab)