diff --git a/core/models.py b/core/models.py index 1424f40..f794775 100644 --- a/core/models.py +++ b/core/models.py @@ -1,5 +1,10 @@ import urllib, urlparse, string, os, datetime, logging -import troggle.mptt as mptt +try: + import mptt +except ImportError: + #I think we should be having troggle directory as the base import place + #but I am leaving the following line in to make sure I do not break anything + import troggle.mptt as mptt from django.forms import ModelForm from django.db import models from django.contrib import admin @@ -539,18 +544,19 @@ try: mptt.register(Subcave, order_insertion_by=['title']) except mptt.AlreadyRegistered: print "mptt already registered" - + class CaveDescription(TroggleModel): - name = models.CharField(max_length=50) + short_name = models.CharField(max_length=50, unique = True) + long_name = models.CharField(max_length=200, blank=True, null=True) description = models.TextField(blank=True,null=True) linked_subcaves = models.ManyToManyField("Subcave") linked_entrances = models.ManyToManyField("Entrance") linked_qms = models.ManyToManyField("QM") def __unicode__(self): - return unicode(self.name) + return unicode(self.short_name) class NewSubCave(TroggleModel): - name = models.CharField(max_length=200) + name = models.CharField(max_length=200, unique = True) def __unicode__(self): return unicode(self.name) diff --git a/databaseReset.py b/databaseReset.py index 7bc1021..e6bd13c 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -58,6 +58,14 @@ def import_surveys(): import parsers.surveys parsers.surveys.parseSurveys(logfile=settings.LOGFILE) +def import_descriptions(): + import parsers.descriptions + parsers.descriptions.getDescriptions() + +def parse_descriptions(): + import parsers.descriptions + parsers.descriptions.parseDescriptions() + def reset(): """ Wipe the troggle database and import everything from legacy data """ @@ -69,16 +77,29 @@ def reset(): import_survex() import_QMs() import_surveys() + import_descriptions() + parse_descriptions() +def resetdesc(): + """ Wipe the troggle database and import descriptions + """ + import core.models + for desc in core.models.CaveDescription.objects.all(): + desc.delete() + import_descriptions() + parse_descriptions() + def export_cavetab(): from export import tocavetab outfile=file(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'w') tocavetab.writeCaveTab(outfile) outfile.close() - -if __name__ == "__main__": + +if __name__ == "__main__": import sys - if "reset" in sys.argv: + if "desc" in sys.argv: + resetdesc() + elif "reset" in sys.argv: reset() else: print "Do 'python databaseReset.py reset'" diff --git a/parsers/cavetab.py b/parsers/cavetab.py index 0c7b985..20c7658 100644 --- a/parsers/cavetab.py +++ b/parsers/cavetab.py @@ -3,6 +3,7 @@ import troggle.core.models as models from django.conf import settings import csv, time, re, os, logging from utils import save_carefully +from utils import html_to_wiki ##format of CAVETAB2.CSV is KatasterNumber = 0 @@ -52,85 +53,6 @@ MarkingComment = 43 Findability = 44 FindabilityComment = 45 - -def html_to_wiki(text): - if type(text) != str: - return text - text = unicode(text, "utf-8") - #Characters - #text = re.sub("ü", u"\xfc", text) - #text = re.sub("ö", u"\xf6", text) - #text = re.sub("ä", u"\xe4", text) - #text = re.sub("°", u"\xb0", text) - #text = re.sub("©", u"\xa9", text) - #text = re.sub("&", u"\x26", text) - #text = re.sub("ß", u"\xdf", text) - #text = re.sub("ß", u"\xdf", text) - #text = re.sub("<", u"<", text) - #text = re.sub(">", u">", text) - #text = re.sub("è", u"\xe8", text) - #text = re.sub("é", u"\xe9", text) - #text = re.sub(""e;", u'"', text) - #text = re.sub(""", u'"', text) - #text = re.sub("Ö", u'\xd6', text) - #text = re.sub("×", u'"', text) - - #text = re.sub("&(.*);", "/1", text) - #if s: - # print s.groups() - #Lists - text = re.sub("

", r"", text) - text = re.sub("

$", r"", text) - text = re.sub("

", r"\n\n", text) - out = "" - lists = "" - while text: - mstar = re.match("^(.*?)

(.*)$", text, re.DOTALL) - mhash = re.match("^(.*?)
    \s*]*>(.*?)(.*)$", text, re.DOTALL) - munhash = re.match("^(\s*)
(.*)$", text, re.DOTALL) - mitem = re.match("^(\s*)]*>(.*?)(.*)$", text, re.DOTALL) - ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m] - def min_(i, l): - try: - v = i.groups()[0] - l.remove(len(v)) - return len(v) < min(l, 1000000000) - except: - return False - if min_(mstar, ms): - lists += "*" - pre, val, post = mstar.groups() - out += pre + "\n" + lists + " " + val - text = post - elif min_(mhash, ms): - lists += "#" - pre, val, post = mhash.groups() - out += pre + "\n" + lists + " " + val - text = post - elif min_(mitem, ms): - pre, val, post = mitem.groups() - out += "\n" + lists + " " + val - text = post - elif min_(munstar, ms): - lists = lists[:-1] - text = munstar.groups()[1] - elif min_(munhash, ms): - lists.pop() - text = munhash.groups()[1] - else: - out += text - text = "" - text2 = out - while text2: - mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL) - if mtag: - text2 = mtag.groups()[2] - print mtag.groups()[1] - else: - text2 = "" - return out - def LoadCaveTab(): cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU') caveReader = csv.reader(cavetab) diff --git a/utils.py b/utils.py index 4eced09..1db7e0b 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,8 @@ from django.conf import settings -from troggle.core.models import LogbookEntry +try: + from django.db import models +except:#We want to get rid of this try statement if possible + from troggle.core.models import LogbookEntry import random, re, logging def weighted_choice(lst): @@ -16,11 +19,11 @@ def randomLogbookSentence(): # needs to handle empty logbooks without crashing #Choose a random logbook entry - randSent['entry']=LogbookEntry.objects.order_by('?')[0] + randSent['entry']=models.LogbookEntry.objects.order_by('?')[0] #Choose again if there are no sentances (this happens if it is a placeholder entry) while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0: - randSent['entry']=LogbookEntry.objects.order_by('?')[0] + randSent['entry']=models.LogbookEntry.objects.order_by('?')[0] #Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number'] sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text) @@ -60,10 +63,98 @@ def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}): def render_with_context(req, *args, **kwargs): """this is the snippet from http://www.djangosnippets.org/snippets/3/ - - Django uses Context, not RequestContext when you call render_to_response. We always want to use RequestContext, so that django adds the context from settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get necessary settings variables passed to each template. So we use a custom method, render_response instead of render_to_response. Hopefully future Django releases will make this unnecessary.""" + + Django uses Context, not RequestContext when you call render_to_response. + We always want to use RequestContext, so that django adds the context from + settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get + necessary settings variables passed to each template. So we use a custom + method, render_response instead of render_to_response. Hopefully future + Django releases will make this unnecessary.""" from django.shortcuts import render_to_response from django.template import RequestContext kwargs['context_instance'] = RequestContext(req) - return render_to_response(*args, **kwargs) \ No newline at end of file + return render_to_response(*args, **kwargs) + +re_body = re.compile(r"\]*\>(.*)\", re.DOTALL) +re_title = re.compile(r"\]*\>(.*)\", re.DOTALL) +def get_html_body(text): + return get_single_match(re_body, text) + +def get_html_title(text): + return get_single_match(re_title, text) + +def get_single_match(regex, text): + match = regex.search(text) + + if match: + return match.groups()[0] + else: + return None + + +re_subs = [(re.compile(r"\]*\>(.*?)\", re.DOTALL), r"'''\1'''"), + (re.compile(r"\(.*?)\", re.DOTALL), r"''\1''"), + (re.compile(r"\]*\>(.*?)\", re.DOTALL), r"=\1="), + (re.compile(r"\]*\>(.*?)\", re.DOTALL), r"==\1=="), + (re.compile(r"\]*\>(.*?)\", re.DOTALL), r"===\1==="), + (re.compile(r"\]*\>(.*?)\", re.DOTALL), r"====\1===="), + (re.compile(r"\]*\>(.*?)\", re.DOTALL), r"=====\1====="), + (re.compile(r"\]*\>(.*?)\", re.DOTALL), r"======\1======"), + (re.compile(r"\(.*?)\", re.DOTALL), r"[subcave:\1|\2]"), + #interpage link needed + (re.compile(r"\(.*?)\", re.DOTALL), r"[cavedescription:\1|\2]"), + (re.compile(r"\[\([^\s]*).*?\\]", re.DOTALL), r"![qm:\1]"), + + ] + +def html_to_wiki(text, codec = "utf-8"): + if type(text) == str: + text = unicode(text, codec) + text = re.sub("

", r"", text) + text = re.sub("

$", r"", text) + text = re.sub("

", r"\n\n", text) + out = "" + lists = "" + #lists + while text: + mstar = re.match("^(.*?)]*>\s*]*>(.*?)(.*)$", text, re.DOTALL) + munstar = re.match("^(\s*)(.*)$", text, re.DOTALL) + mhash = re.match("^(.*?)]*>\s*]*>(.*?)(.*)$", text, re.DOTALL) + munhash = re.match("^(\s*)(.*)$", text, re.DOTALL) + mitem = re.match("^(\s*)]*>(.*?)(.*)$", text, re.DOTALL) + ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m] + def min_(i, l): + try: + v = i.groups()[0] + l.remove(len(v)) + return len(v) < min(l, 1000000000) + except: + return False + if min_(mstar, ms): + lists += "*" + pre, val, post = mstar.groups() + out += pre + "\n" + lists + " " + val + text = post + elif min_(mhash, ms): + lists += "#" + pre, val, post = mhash.groups() + out += pre + "\n" + lists + " " + val + text = post + elif min_(mitem, ms): + pre, val, post = mitem.groups() + out += "\n" + lists + " " + val + text = post + elif min_(munstar, ms): + lists = lists[:-1] + text = munstar.groups()[1] + elif min_(munhash, ms): + lists.pop() + text = munhash.groups()[1] + else: + out += text + text = "" + #substitutions + for regex, repl in re_subs: + out = regex.sub(repl, out) + return out \ No newline at end of file