forked from expo/troggle
[svn] Work on turn html pages into cavedescription models.py.
Moved parser/cavetabs html_to_wiki function to utils.py Added databaseReset.py desc to refresh the cavedescriptions.
This commit is contained in:
parent
fa9a6416d1
commit
c377d0376f
@ -1,5 +1,10 @@
|
|||||||
import urllib, urlparse, string, os, datetime, logging
|
import urllib, urlparse, string, os, datetime, logging
|
||||||
import troggle.mptt as mptt
|
try:
|
||||||
|
import mptt
|
||||||
|
except ImportError:
|
||||||
|
#I think we should be having troggle directory as the base import place
|
||||||
|
#but I am leaving the following line in to make sure I do not break anything
|
||||||
|
import troggle.mptt as mptt
|
||||||
from django.forms import ModelForm
|
from django.forms import ModelForm
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
@ -539,18 +544,19 @@ try:
|
|||||||
mptt.register(Subcave, order_insertion_by=['title'])
|
mptt.register(Subcave, order_insertion_by=['title'])
|
||||||
except mptt.AlreadyRegistered:
|
except mptt.AlreadyRegistered:
|
||||||
print "mptt already registered"
|
print "mptt already registered"
|
||||||
|
|
||||||
class CaveDescription(TroggleModel):
|
class CaveDescription(TroggleModel):
|
||||||
name = models.CharField(max_length=50)
|
short_name = models.CharField(max_length=50, unique = True)
|
||||||
|
long_name = models.CharField(max_length=200, blank=True, null=True)
|
||||||
description = models.TextField(blank=True,null=True)
|
description = models.TextField(blank=True,null=True)
|
||||||
linked_subcaves = models.ManyToManyField("Subcave")
|
linked_subcaves = models.ManyToManyField("Subcave")
|
||||||
linked_entrances = models.ManyToManyField("Entrance")
|
linked_entrances = models.ManyToManyField("Entrance")
|
||||||
linked_qms = models.ManyToManyField("QM")
|
linked_qms = models.ManyToManyField("QM")
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return unicode(self.name)
|
return unicode(self.short_name)
|
||||||
|
|
||||||
class NewSubCave(TroggleModel):
|
class NewSubCave(TroggleModel):
|
||||||
name = models.CharField(max_length=200)
|
name = models.CharField(max_length=200, unique = True)
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return unicode(self.name)
|
return unicode(self.name)
|
||||||
|
|
||||||
|
@ -58,6 +58,14 @@ def import_surveys():
|
|||||||
import parsers.surveys
|
import parsers.surveys
|
||||||
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
|
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
|
||||||
|
|
||||||
|
def import_descriptions():
|
||||||
|
import parsers.descriptions
|
||||||
|
parsers.descriptions.getDescriptions()
|
||||||
|
|
||||||
|
def parse_descriptions():
|
||||||
|
import parsers.descriptions
|
||||||
|
parsers.descriptions.parseDescriptions()
|
||||||
|
|
||||||
def reset():
|
def reset():
|
||||||
""" Wipe the troggle database and import everything from legacy data
|
""" Wipe the troggle database and import everything from legacy data
|
||||||
"""
|
"""
|
||||||
@ -69,16 +77,29 @@ def reset():
|
|||||||
import_survex()
|
import_survex()
|
||||||
import_QMs()
|
import_QMs()
|
||||||
import_surveys()
|
import_surveys()
|
||||||
|
import_descriptions()
|
||||||
|
parse_descriptions()
|
||||||
|
|
||||||
|
def resetdesc():
|
||||||
|
""" Wipe the troggle database and import descriptions
|
||||||
|
"""
|
||||||
|
import core.models
|
||||||
|
for desc in core.models.CaveDescription.objects.all():
|
||||||
|
desc.delete()
|
||||||
|
import_descriptions()
|
||||||
|
parse_descriptions()
|
||||||
|
|
||||||
def export_cavetab():
|
def export_cavetab():
|
||||||
from export import tocavetab
|
from export import tocavetab
|
||||||
outfile=file(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'w')
|
outfile=file(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'w')
|
||||||
tocavetab.writeCaveTab(outfile)
|
tocavetab.writeCaveTab(outfile)
|
||||||
outfile.close()
|
outfile.close()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
if "reset" in sys.argv:
|
if "desc" in sys.argv:
|
||||||
|
resetdesc()
|
||||||
|
elif "reset" in sys.argv:
|
||||||
reset()
|
reset()
|
||||||
else:
|
else:
|
||||||
print "Do 'python databaseReset.py reset'"
|
print "Do 'python databaseReset.py reset'"
|
||||||
|
@ -3,6 +3,7 @@ import troggle.core.models as models
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
import csv, time, re, os, logging
|
import csv, time, re, os, logging
|
||||||
from utils import save_carefully
|
from utils import save_carefully
|
||||||
|
from utils import html_to_wiki
|
||||||
|
|
||||||
##format of CAVETAB2.CSV is
|
##format of CAVETAB2.CSV is
|
||||||
KatasterNumber = 0
|
KatasterNumber = 0
|
||||||
@ -52,85 +53,6 @@ MarkingComment = 43
|
|||||||
Findability = 44
|
Findability = 44
|
||||||
FindabilityComment = 45
|
FindabilityComment = 45
|
||||||
|
|
||||||
|
|
||||||
def html_to_wiki(text):
|
|
||||||
if type(text) != str:
|
|
||||||
return text
|
|
||||||
text = unicode(text, "utf-8")
|
|
||||||
#Characters
|
|
||||||
#text = re.sub("ü", u"\xfc", text)
|
|
||||||
#text = re.sub("ö", u"\xf6", text)
|
|
||||||
#text = re.sub("ä", u"\xe4", text)
|
|
||||||
#text = re.sub("°", u"\xb0", text)
|
|
||||||
#text = re.sub("©", u"\xa9", text)
|
|
||||||
#text = re.sub("&", u"\x26", text)
|
|
||||||
#text = re.sub("ß", u"\xdf", text)
|
|
||||||
#text = re.sub("ß", u"\xdf", text)
|
|
||||||
#text = re.sub("<", u"<", text)
|
|
||||||
#text = re.sub(">", u">", text)
|
|
||||||
#text = re.sub("è", u"\xe8", text)
|
|
||||||
#text = re.sub("é", u"\xe9", text)
|
|
||||||
#text = re.sub(""e;", u'"', text)
|
|
||||||
#text = re.sub(""", u'"', text)
|
|
||||||
#text = re.sub("Ö", u'\xd6', text)
|
|
||||||
#text = re.sub("×", u'"', text)
|
|
||||||
|
|
||||||
#text = re.sub("&(.*);", "/1", text)
|
|
||||||
#if s:
|
|
||||||
# print s.groups()
|
|
||||||
#Lists
|
|
||||||
text = re.sub("</p>", r"", text)
|
|
||||||
text = re.sub("<p>$", r"", text)
|
|
||||||
text = re.sub("<p>", r"\n\n", text)
|
|
||||||
out = ""
|
|
||||||
lists = ""
|
|
||||||
while text:
|
|
||||||
mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
|
||||||
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
|
|
||||||
mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
|
||||||
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
|
|
||||||
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
|
||||||
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
|
|
||||||
def min_(i, l):
|
|
||||||
try:
|
|
||||||
v = i.groups()[0]
|
|
||||||
l.remove(len(v))
|
|
||||||
return len(v) < min(l, 1000000000)
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
if min_(mstar, ms):
|
|
||||||
lists += "*"
|
|
||||||
pre, val, post = mstar.groups()
|
|
||||||
out += pre + "\n" + lists + " " + val
|
|
||||||
text = post
|
|
||||||
elif min_(mhash, ms):
|
|
||||||
lists += "#"
|
|
||||||
pre, val, post = mhash.groups()
|
|
||||||
out += pre + "\n" + lists + " " + val
|
|
||||||
text = post
|
|
||||||
elif min_(mitem, ms):
|
|
||||||
pre, val, post = mitem.groups()
|
|
||||||
out += "\n" + lists + " " + val
|
|
||||||
text = post
|
|
||||||
elif min_(munstar, ms):
|
|
||||||
lists = lists[:-1]
|
|
||||||
text = munstar.groups()[1]
|
|
||||||
elif min_(munhash, ms):
|
|
||||||
lists.pop()
|
|
||||||
text = munhash.groups()[1]
|
|
||||||
else:
|
|
||||||
out += text
|
|
||||||
text = ""
|
|
||||||
text2 = out
|
|
||||||
while text2:
|
|
||||||
mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
|
|
||||||
if mtag:
|
|
||||||
text2 = mtag.groups()[2]
|
|
||||||
print mtag.groups()[1]
|
|
||||||
else:
|
|
||||||
text2 = ""
|
|
||||||
return out
|
|
||||||
|
|
||||||
def LoadCaveTab():
|
def LoadCaveTab():
|
||||||
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU')
|
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU')
|
||||||
caveReader = csv.reader(cavetab)
|
caveReader = csv.reader(cavetab)
|
||||||
|
103
utils.py
103
utils.py
@ -1,5 +1,8 @@
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from troggle.core.models import LogbookEntry
|
try:
|
||||||
|
from django.db import models
|
||||||
|
except:#We want to get rid of this try statement if possible
|
||||||
|
from troggle.core.models import LogbookEntry
|
||||||
import random, re, logging
|
import random, re, logging
|
||||||
|
|
||||||
def weighted_choice(lst):
|
def weighted_choice(lst):
|
||||||
@ -16,11 +19,11 @@ def randomLogbookSentence():
|
|||||||
# needs to handle empty logbooks without crashing
|
# needs to handle empty logbooks without crashing
|
||||||
|
|
||||||
#Choose a random logbook entry
|
#Choose a random logbook entry
|
||||||
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
|
randSent['entry']=models.LogbookEntry.objects.order_by('?')[0]
|
||||||
|
|
||||||
#Choose again if there are no sentances (this happens if it is a placeholder entry)
|
#Choose again if there are no sentances (this happens if it is a placeholder entry)
|
||||||
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
|
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
|
||||||
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
|
randSent['entry']=models.LogbookEntry.objects.order_by('?')[0]
|
||||||
|
|
||||||
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
|
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
|
||||||
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
|
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
|
||||||
@ -60,10 +63,98 @@ def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
|
|||||||
|
|
||||||
def render_with_context(req, *args, **kwargs):
|
def render_with_context(req, *args, **kwargs):
|
||||||
"""this is the snippet from http://www.djangosnippets.org/snippets/3/
|
"""this is the snippet from http://www.djangosnippets.org/snippets/3/
|
||||||
|
|
||||||
Django uses Context, not RequestContext when you call render_to_response. We always want to use RequestContext, so that django adds the context from settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get necessary settings variables passed to each template. So we use a custom method, render_response instead of render_to_response. Hopefully future Django releases will make this unnecessary."""
|
Django uses Context, not RequestContext when you call render_to_response.
|
||||||
|
We always want to use RequestContext, so that django adds the context from
|
||||||
|
settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get
|
||||||
|
necessary settings variables passed to each template. So we use a custom
|
||||||
|
method, render_response instead of render_to_response. Hopefully future
|
||||||
|
Django releases will make this unnecessary."""
|
||||||
|
|
||||||
from django.shortcuts import render_to_response
|
from django.shortcuts import render_to_response
|
||||||
from django.template import RequestContext
|
from django.template import RequestContext
|
||||||
kwargs['context_instance'] = RequestContext(req)
|
kwargs['context_instance'] = RequestContext(req)
|
||||||
return render_to_response(*args, **kwargs)
|
return render_to_response(*args, **kwargs)
|
||||||
|
|
||||||
|
re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
|
||||||
|
re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)
|
||||||
|
def get_html_body(text):
|
||||||
|
return get_single_match(re_body, text)
|
||||||
|
|
||||||
|
def get_html_title(text):
|
||||||
|
return get_single_match(re_title, text)
|
||||||
|
|
||||||
|
def get_single_match(regex, text):
|
||||||
|
match = regex.search(text)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
return match.groups()[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"),
|
||||||
|
(re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"),
|
||||||
|
(re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="),
|
||||||
|
(re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="),
|
||||||
|
(re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="),
|
||||||
|
(re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="),
|
||||||
|
(re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="),
|
||||||
|
(re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"),
|
||||||
|
(re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[subcave:\1|\2]"),
|
||||||
|
#interpage link needed
|
||||||
|
(re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[cavedescription:\1|\2]"),
|
||||||
|
(re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"![qm:\1]"),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
def html_to_wiki(text, codec = "utf-8"):
|
||||||
|
if type(text) == str:
|
||||||
|
text = unicode(text, codec)
|
||||||
|
text = re.sub("</p>", r"", text)
|
||||||
|
text = re.sub("<p>$", r"", text)
|
||||||
|
text = re.sub("<p>", r"\n\n", text)
|
||||||
|
out = ""
|
||||||
|
lists = ""
|
||||||
|
#lists
|
||||||
|
while text:
|
||||||
|
mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||||
|
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
|
||||||
|
mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||||
|
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
|
||||||
|
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
|
||||||
|
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
|
||||||
|
def min_(i, l):
|
||||||
|
try:
|
||||||
|
v = i.groups()[0]
|
||||||
|
l.remove(len(v))
|
||||||
|
return len(v) < min(l, 1000000000)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
if min_(mstar, ms):
|
||||||
|
lists += "*"
|
||||||
|
pre, val, post = mstar.groups()
|
||||||
|
out += pre + "\n" + lists + " " + val
|
||||||
|
text = post
|
||||||
|
elif min_(mhash, ms):
|
||||||
|
lists += "#"
|
||||||
|
pre, val, post = mhash.groups()
|
||||||
|
out += pre + "\n" + lists + " " + val
|
||||||
|
text = post
|
||||||
|
elif min_(mitem, ms):
|
||||||
|
pre, val, post = mitem.groups()
|
||||||
|
out += "\n" + lists + " " + val
|
||||||
|
text = post
|
||||||
|
elif min_(munstar, ms):
|
||||||
|
lists = lists[:-1]
|
||||||
|
text = munstar.groups()[1]
|
||||||
|
elif min_(munhash, ms):
|
||||||
|
lists.pop()
|
||||||
|
text = munhash.groups()[1]
|
||||||
|
else:
|
||||||
|
out += text
|
||||||
|
text = ""
|
||||||
|
#substitutions
|
||||||
|
for regex, repl in re_subs:
|
||||||
|
out = regex.sub(repl, out)
|
||||||
|
return out
|
Loading…
Reference in New Issue
Block a user