mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2025-12-14 18:37:18 +00:00
[svn] Work on turn html pages into cavedescription models.py.
Moved parser/cavetabs html_to_wiki function to utils.py Added databaseReset.py desc to refresh the cavedescriptions.
This commit is contained in:
@@ -3,6 +3,7 @@ import troggle.core.models as models
|
||||
from django.conf import settings
|
||||
import csv, time, re, os, logging
|
||||
from utils import save_carefully
|
||||
from utils import html_to_wiki
|
||||
|
||||
##format of CAVETAB2.CSV is
|
||||
KatasterNumber = 0
|
||||
@@ -52,85 +53,6 @@ MarkingComment = 43
|
||||
Findability = 44
|
||||
FindabilityComment = 45
|
||||
|
||||
|
||||
def html_to_wiki(text):
|
||||
if type(text) != str:
|
||||
return text
|
||||
text = unicode(text, "utf-8")
|
||||
#Characters
|
||||
#text = re.sub("ü", u"\xfc", text)
|
||||
#text = re.sub("ö", u"\xf6", text)
|
||||
#text = re.sub("ä", u"\xe4", text)
|
||||
#text = re.sub("°", u"\xb0", text)
|
||||
#text = re.sub("©", u"\xa9", text)
|
||||
#text = re.sub("&", u"\x26", text)
|
||||
#text = re.sub("ß", u"\xdf", text)
|
||||
#text = re.sub("ß", u"\xdf", text)
|
||||
#text = re.sub("<", u"<", text)
|
||||
#text = re.sub(">", u">", text)
|
||||
#text = re.sub("è", u"\xe8", text)
|
||||
#text = re.sub("é", u"\xe9", text)
|
||||
#text = re.sub(""e;", u'"', text)
|
||||
#text = re.sub(""", u'"', text)
|
||||
#text = re.sub("Ö", u'\xd6', text)
|
||||
#text = re.sub("×", u'"', text)
|
||||
|
||||
#text = re.sub("&(.*);", "/1", text)
|
||||
#if s:
|
||||
# print s.groups()
|
||||
#Lists
|
||||
text = re.sub("</p>", r"", text)
|
||||
text = re.sub("<p>$", r"", text)
|
||||
text = re.sub("<p>", r"\n\n", text)
|
||||
out = ""
|
||||
lists = ""
|
||||
while text:
|
||||
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
|
||||
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
|
||||
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
|
||||
def min_(i, l):
|
||||
try:
|
||||
v = i.groups()[0]
|
||||
l.remove(len(v))
|
||||
return len(v) < min(l, 1000000000)
|
||||
except:
|
||||
return False
|
||||
if min_(mstar, ms):
|
||||
lists += "*"
|
||||
pre, val, post = mstar.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mhash, ms):
|
||||
lists += "#"
|
||||
pre, val, post = mhash.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mitem, ms):
|
||||
pre, val, post = mitem.groups()
|
||||
out += "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(munstar, ms):
|
||||
lists = lists[:-1]
|
||||
text = munstar.groups()[1]
|
||||
elif min_(munhash, ms):
|
||||
lists.pop()
|
||||
text = munhash.groups()[1]
|
||||
else:
|
||||
out += text
|
||||
text = ""
|
||||
text2 = out
|
||||
while text2:
|
||||
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
|
||||
if mtag:
|
||||
text2 = mtag.groups()[2]
|
||||
print mtag.groups()[1]
|
||||
else:
|
||||
text2 = ""
|
||||
return out
|
||||
|
||||
def LoadCaveTab():
|
||||
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU')
|
||||
caveReader = csv.reader(cavetab)
|
||||
|
||||
Reference in New Issue
Block a user