forked from expo/troggle
[svn] Work on turn html pages into cavedescription models.py.
Moved parser/cavetabs html_to_wiki function to utils.py Added databaseReset.py desc to refresh the cavedescriptions.
This commit is contained in:
parent
fa9a6416d1
commit
c377d0376f
@ -1,5 +1,10 @@
|
||||
import urllib, urlparse, string, os, datetime, logging
|
||||
import troggle.mptt as mptt
|
||||
try:
|
||||
import mptt
|
||||
except ImportError:
|
||||
#I think we should be having troggle directory as the base import place
|
||||
#but I am leaving the following line in to make sure I do not break anything
|
||||
import troggle.mptt as mptt
|
||||
from django.forms import ModelForm
|
||||
from django.db import models
|
||||
from django.contrib import admin
|
||||
@ -541,16 +546,17 @@ except mptt.AlreadyRegistered:
|
||||
print "mptt already registered"
|
||||
|
||||
class CaveDescription(TroggleModel):
|
||||
name = models.CharField(max_length=50)
|
||||
short_name = models.CharField(max_length=50, unique = True)
|
||||
long_name = models.CharField(max_length=200, blank=True, null=True)
|
||||
description = models.TextField(blank=True,null=True)
|
||||
linked_subcaves = models.ManyToManyField("Subcave")
|
||||
linked_entrances = models.ManyToManyField("Entrance")
|
||||
linked_qms = models.ManyToManyField("QM")
|
||||
def __unicode__(self):
|
||||
return unicode(self.name)
|
||||
return unicode(self.short_name)
|
||||
|
||||
class NewSubCave(TroggleModel):
|
||||
name = models.CharField(max_length=200)
|
||||
name = models.CharField(max_length=200, unique = True)
|
||||
def __unicode__(self):
|
||||
return unicode(self.name)
|
||||
|
||||
|
@ -58,6 +58,14 @@ def import_surveys():
|
||||
import parsers.surveys
|
||||
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
|
||||
|
||||
def import_descriptions():
|
||||
import parsers.descriptions
|
||||
parsers.descriptions.getDescriptions()
|
||||
|
||||
def parse_descriptions():
|
||||
import parsers.descriptions
|
||||
parsers.descriptions.parseDescriptions()
|
||||
|
||||
def reset():
|
||||
""" Wipe the troggle database and import everything from legacy data
|
||||
"""
|
||||
@ -69,6 +77,17 @@ def reset():
|
||||
import_survex()
|
||||
import_QMs()
|
||||
import_surveys()
|
||||
import_descriptions()
|
||||
parse_descriptions()
|
||||
|
||||
def resetdesc():
|
||||
""" Wipe the troggle database and import descriptions
|
||||
"""
|
||||
import core.models
|
||||
for desc in core.models.CaveDescription.objects.all():
|
||||
desc.delete()
|
||||
import_descriptions()
|
||||
parse_descriptions()
|
||||
|
||||
def export_cavetab():
|
||||
from export import tocavetab
|
||||
@ -78,7 +97,9 @@ def export_cavetab():
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if "reset" in sys.argv:
|
||||
if "desc" in sys.argv:
|
||||
resetdesc()
|
||||
elif "reset" in sys.argv:
|
||||
reset()
|
||||
else:
|
||||
print "Do 'python databaseReset.py reset'"
|
||||
|
@ -3,6 +3,7 @@ import troggle.core.models as models
|
||||
from django.conf import settings
|
||||
import csv, time, re, os, logging
|
||||
from utils import save_carefully
|
||||
from utils import html_to_wiki
|
||||
|
||||
##format of CAVETAB2.CSV is
|
||||
KatasterNumber = 0
|
||||
@ -52,85 +53,6 @@ MarkingComment = 43
|
||||
Findability = 44
|
||||
FindabilityComment = 45
|
||||
|
||||
|
||||
def html_to_wiki(text):
|
||||
if type(text) != str:
|
||||
return text
|
||||
text = unicode(text, "utf-8")
|
||||
#Characters
|
||||
#text = re.sub("ü", u"\xfc", text)
|
||||
#text = re.sub("ö", u"\xf6", text)
|
||||
#text = re.sub("ä", u"\xe4", text)
|
||||
#text = re.sub("°", u"\xb0", text)
|
||||
#text = re.sub("©", u"\xa9", text)
|
||||
#text = re.sub("&", u"\x26", text)
|
||||
#text = re.sub("ß", u"\xdf", text)
|
||||
#text = re.sub("ß", u"\xdf", text)
|
||||
#text = re.sub("<", u"<", text)
|
||||
#text = re.sub(">", u">", text)
|
||||
#text = re.sub("è", u"\xe8", text)
|
||||
#text = re.sub("é", u"\xe9", text)
|
||||
#text = re.sub(""e;", u'"', text)
|
||||
#text = re.sub(""", u'"', text)
|
||||
#text = re.sub("Ö", u'\xd6', text)
|
||||
#text = re.sub("×", u'"', text)
|
||||
|
||||
#text = re.sub("&(.*);", "/1", text)
|
||||
#if s:
|
||||
# print s.groups()
|
||||
#Lists
|
||||
text = re.sub("</p>", r"", text)
|
||||
text = re.sub("<p>$", r"", text)
|
||||
text = re.sub("<p>", r"\n\n", text)
|
||||
out = ""
|
||||
lists = ""
|
||||
while text:
|
||||
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
|
||||
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
|
||||
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
|
||||
def min_(i, l):
|
||||
try:
|
||||
v = i.groups()[0]
|
||||
l.remove(len(v))
|
||||
return len(v) < min(l, 1000000000)
|
||||
except:
|
||||
return False
|
||||
if min_(mstar, ms):
|
||||
lists += "*"
|
||||
pre, val, post = mstar.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mhash, ms):
|
||||
lists += "#"
|
||||
pre, val, post = mhash.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mitem, ms):
|
||||
pre, val, post = mitem.groups()
|
||||
out += "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(munstar, ms):
|
||||
lists = lists[:-1]
|
||||
text = munstar.groups()[1]
|
||||
elif min_(munhash, ms):
|
||||
lists.pop()
|
||||
text = munhash.groups()[1]
|
||||
else:
|
||||
out += text
|
||||
text = ""
|
||||
text2 = out
|
||||
while text2:
|
||||
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
|
||||
if mtag:
|
||||
text2 = mtag.groups()[2]
|
||||
print mtag.groups()[1]
|
||||
else:
|
||||
text2 = ""
|
||||
return out
|
||||
|
||||
def LoadCaveTab():
|
||||
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU')
|
||||
caveReader = csv.reader(cavetab)
|
||||
|
99
utils.py
99
utils.py
@ -1,5 +1,8 @@
|
||||
from django.conf import settings
|
||||
from troggle.core.models import LogbookEntry
|
||||
try:
|
||||
from django.db import models
|
||||
except:#We want to get rid of this try statement if possible
|
||||
from troggle.core.models import LogbookEntry
|
||||
import random, re, logging
|
||||
|
||||
def weighted_choice(lst):
|
||||
@ -16,11 +19,11 @@ def randomLogbookSentence():
|
||||
# needs to handle empty logbooks without crashing
|
||||
|
||||
#Choose a random logbook entry
|
||||
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
|
||||
randSent['entry']=models.LogbookEntry.objects.order_by('?')[0]
|
||||
|
||||
#Choose again if there are no sentances (this happens if it is a placeholder entry)
|
||||
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
|
||||
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
|
||||
randSent['entry']=models.LogbookEntry.objects.order_by('?')[0]
|
||||
|
||||
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
|
||||
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
|
||||
@ -61,9 +64,97 @@ def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
|
||||
def render_with_context(req, *args, **kwargs):
|
||||
"""this is the snippet from http://www.djangosnippets.org/snippets/3/
|
||||
|
||||
Django uses Context, not RequestContext when you call render_to_response. We always want to use RequestContext, so that django adds the context from settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get necessary settings variables passed to each template. So we use a custom method, render_response instead of render_to_response. Hopefully future Django releases will make this unnecessary."""
|
||||
Django uses Context, not RequestContext when you call render_to_response.
|
||||
We always want to use RequestContext, so that django adds the context from
|
||||
settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get
|
||||
necessary settings variables passed to each template. So we use a custom
|
||||
method, render_response instead of render_to_response. Hopefully future
|
||||
Django releases will make this unnecessary."""
|
||||
|
||||
from django.shortcuts import render_to_response
|
||||
from django.template import RequestContext
|
||||
kwargs['context_instance'] = RequestContext(req)
|
||||
return render_to_response(*args, **kwargs)
|
||||
|
||||
re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
|
||||
re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)
|
||||
def get_html_body(text):
|
||||
return get_single_match(re_body, text)
|
||||
|
||||
def get_html_title(text):
|
||||
return get_single_match(re_title, text)
|
||||
|
||||
def get_single_match(regex, text):
|
||||
match = regex.search(text)
|
||||
|
||||
if match:
|
||||
return match.groups()[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"),
|
||||
(re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"),
|
||||
(re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="),
|
||||
(re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="),
|
||||
(re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="),
|
||||
(re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="),
|
||||
(re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="),
|
||||
(re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"),
|
||||
(re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[subcave:\1|\2]"),
|
||||
#interpage link needed
|
||||
(re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[cavedescription:\1|\2]"),
|
||||
(re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"![qm:\1]"),
|
||||
|
||||
]
|
||||
|
||||
def html_to_wiki(text, codec = "utf-8"):
|
||||
if type(text) == str:
|
||||
text = unicode(text, codec)
|
||||
text = re.sub("</p>", r"", text)
|
||||
text = re.sub("<p>$", r"", text)
|
||||
text = re.sub("<p>", r"\n\n", text)
|
||||
out = ""
|
||||
lists = ""
|
||||
#lists
|
||||
while text:
|
||||
mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
|
||||
mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
|
||||
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
|
||||
def min_(i, l):
|
||||
try:
|
||||
v = i.groups()[0]
|
||||
l.remove(len(v))
|
||||
return len(v) < min(l, 1000000000)
|
||||
except:
|
||||
return False
|
||||
if min_(mstar, ms):
|
||||
lists += "*"
|
||||
pre, val, post = mstar.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mhash, ms):
|
||||
lists += "#"
|
||||
pre, val, post = mhash.groups()
|
||||
out += pre + "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(mitem, ms):
|
||||
pre, val, post = mitem.groups()
|
||||
out += "\n" + lists + " " + val
|
||||
text = post
|
||||
elif min_(munstar, ms):
|
||||
lists = lists[:-1]
|
||||
text = munstar.groups()[1]
|
||||
elif min_(munhash, ms):
|
||||
lists.pop()
|
||||
text = munhash.groups()[1]
|
||||
else:
|
||||
out += text
|
||||
text = ""
|
||||
#substitutions
|
||||
for regex, repl in re_subs:
|
||||
out = regex.sub(repl, out)
|
||||
return out
|
Loading…
Reference in New Issue
Block a user