From c377d0376f38e5461ab2a6e32236d8ded27a9b68 Mon Sep 17 00:00:00 2001
From: martin speleo
Date: Sat, 4 Jul 2009 16:42:17 +0100
Subject: [PATCH] [svn] Work on turn html pages into cavedescription models.py.
Moved parser/cavetabs html_to_wiki function to utils.py Added
databaseReset.py desc to refresh the cavedescriptions.
---
core/models.py | 16 ++++---
databaseReset.py | 27 ++++++++++--
parsers/cavetab.py | 80 +----------------------------------
utils.py | 103 ++++++++++++++++++++++++++++++++++++++++++---
4 files changed, 133 insertions(+), 93 deletions(-)
diff --git a/core/models.py b/core/models.py
index 1424f40..f794775 100644
--- a/core/models.py
+++ b/core/models.py
@@ -1,5 +1,10 @@
import urllib, urlparse, string, os, datetime, logging
-import troggle.mptt as mptt
+try:
+ import mptt
+except ImportError:
+ #I think we should be having troggle directory as the base import place
+ #but I am leaving the following line in to make sure I do not break anything
+ import troggle.mptt as mptt
from django.forms import ModelForm
from django.db import models
from django.contrib import admin
@@ -539,18 +544,19 @@ try:
mptt.register(Subcave, order_insertion_by=['title'])
except mptt.AlreadyRegistered:
print "mptt already registered"
-
+
class CaveDescription(TroggleModel):
- name = models.CharField(max_length=50)
+ short_name = models.CharField(max_length=50, unique = True)
+ long_name = models.CharField(max_length=200, blank=True, null=True)
description = models.TextField(blank=True,null=True)
linked_subcaves = models.ManyToManyField("Subcave")
linked_entrances = models.ManyToManyField("Entrance")
linked_qms = models.ManyToManyField("QM")
def __unicode__(self):
- return unicode(self.name)
+ return unicode(self.short_name)
class NewSubCave(TroggleModel):
- name = models.CharField(max_length=200)
+ name = models.CharField(max_length=200, unique = True)
def __unicode__(self):
return unicode(self.name)
diff --git a/databaseReset.py b/databaseReset.py
index 7bc1021..e6bd13c 100644
--- a/databaseReset.py
+++ b/databaseReset.py
@@ -58,6 +58,14 @@ def import_surveys():
import parsers.surveys
parsers.surveys.parseSurveys(logfile=settings.LOGFILE)
+def import_descriptions():
+ import parsers.descriptions
+ parsers.descriptions.getDescriptions()
+
+def parse_descriptions():
+ import parsers.descriptions
+ parsers.descriptions.parseDescriptions()
+
def reset():
""" Wipe the troggle database and import everything from legacy data
"""
@@ -69,16 +77,29 @@ def reset():
import_survex()
import_QMs()
import_surveys()
+ import_descriptions()
+ parse_descriptions()
+def resetdesc():
+ """ Wipe the troggle database and import descriptions
+ """
+ import core.models
+ for desc in core.models.CaveDescription.objects.all():
+ desc.delete()
+ import_descriptions()
+ parse_descriptions()
+
def export_cavetab():
from export import tocavetab
outfile=file(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'w')
tocavetab.writeCaveTab(outfile)
outfile.close()
-
-if __name__ == "__main__":
+
+if __name__ == "__main__":
import sys
- if "reset" in sys.argv:
+ if "desc" in sys.argv:
+ resetdesc()
+ elif "reset" in sys.argv:
reset()
else:
print "Do 'python databaseReset.py reset'"
diff --git a/parsers/cavetab.py b/parsers/cavetab.py
index 0c7b985..20c7658 100644
--- a/parsers/cavetab.py
+++ b/parsers/cavetab.py
@@ -3,6 +3,7 @@ import troggle.core.models as models
from django.conf import settings
import csv, time, re, os, logging
from utils import save_carefully
+from utils import html_to_wiki
##format of CAVETAB2.CSV is
KatasterNumber = 0
@@ -52,85 +53,6 @@ MarkingComment = 43
Findability = 44
FindabilityComment = 45
-
-def html_to_wiki(text):
- if type(text) != str:
- return text
- text = unicode(text, "utf-8")
- #Characters
- #text = re.sub("ü", u"\xfc", text)
- #text = re.sub("ö", u"\xf6", text)
- #text = re.sub("ä", u"\xe4", text)
- #text = re.sub("°", u"\xb0", text)
- #text = re.sub("©", u"\xa9", text)
- #text = re.sub("&", u"\x26", text)
- #text = re.sub("ß", u"\xdf", text)
- #text = re.sub("ß", u"\xdf", text)
- #text = re.sub("<", u"<", text)
- #text = re.sub(">", u">", text)
- #text = re.sub("è", u"\xe8", text)
- #text = re.sub("é", u"\xe9", text)
- #text = re.sub(""e;", u'"', text)
- #text = re.sub(""", u'"', text)
- #text = re.sub("Ö", u'\xd6', text)
- #text = re.sub("×", u'"', text)
-
- #text = re.sub("&(.*);", "/1", text)
- #if s:
- # print s.groups()
- #Lists
- text = re.sub("
", r"", text)
- text = re.sub("$", r"", text)
- text = re.sub("
", r"\n\n", text)
- out = ""
- lists = ""
- while text:
- mstar = re.match("^(.*?)
\s*- ]*>(.*?)
(.*)$", text, re.DOTALL)
- munstar = re.match("^(\s*)
(.*)$", text, re.DOTALL)
- mhash = re.match("^(.*?)\s*- ]*>(.*?)
(.*)$", text, re.DOTALL)
- munhash = re.match("^(\s*)
(.*)$", text, re.DOTALL)
- mitem = re.match("^(\s*)]*>(.*?)(.*)$", text, re.DOTALL)
- ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
- def min_(i, l):
- try:
- v = i.groups()[0]
- l.remove(len(v))
- return len(v) < min(l, 1000000000)
- except:
- return False
- if min_(mstar, ms):
- lists += "*"
- pre, val, post = mstar.groups()
- out += pre + "\n" + lists + " " + val
- text = post
- elif min_(mhash, ms):
- lists += "#"
- pre, val, post = mhash.groups()
- out += pre + "\n" + lists + " " + val
- text = post
- elif min_(mitem, ms):
- pre, val, post = mitem.groups()
- out += "\n" + lists + " " + val
- text = post
- elif min_(munstar, ms):
- lists = lists[:-1]
- text = munstar.groups()[1]
- elif min_(munhash, ms):
- lists.pop()
- text = munhash.groups()[1]
- else:
- out += text
- text = ""
- text2 = out
- while text2:
- mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
- if mtag:
- text2 = mtag.groups()[2]
- print mtag.groups()[1]
- else:
- text2 = ""
- return out
-
def LoadCaveTab():
cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"),'rU')
caveReader = csv.reader(cavetab)
diff --git a/utils.py b/utils.py
index 4eced09..1db7e0b 100644
--- a/utils.py
+++ b/utils.py
@@ -1,5 +1,8 @@
from django.conf import settings
-from troggle.core.models import LogbookEntry
+try:
+ from django.db import models
+except:#We want to get rid of this try statement if possible
+ from troggle.core.models import LogbookEntry
import random, re, logging
def weighted_choice(lst):
@@ -16,11 +19,11 @@ def randomLogbookSentence():
# needs to handle empty logbooks without crashing
#Choose a random logbook entry
- randSent['entry']=LogbookEntry.objects.order_by('?')[0]
+ randSent['entry']=models.LogbookEntry.objects.order_by('?')[0]
#Choose again if there are no sentances (this happens if it is a placeholder entry)
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
- randSent['entry']=LogbookEntry.objects.order_by('?')[0]
+ randSent['entry']=models.LogbookEntry.objects.order_by('?')[0]
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
@@ -60,10 +63,98 @@ def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
def render_with_context(req, *args, **kwargs):
"""this is the snippet from http://www.djangosnippets.org/snippets/3/
-
- Django uses Context, not RequestContext when you call render_to_response. We always want to use RequestContext, so that django adds the context from settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get necessary settings variables passed to each template. So we use a custom method, render_response instead of render_to_response. Hopefully future Django releases will make this unnecessary."""
+
+ Django uses Context, not RequestContext when you call render_to_response.
+ We always want to use RequestContext, so that django adds the context from
+ settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get
+ necessary settings variables passed to each template. So we use a custom
+ method, render_response instead of render_to_response. Hopefully future
+ Django releases will make this unnecessary."""
from django.shortcuts import render_to_response
from django.template import RequestContext
kwargs['context_instance'] = RequestContext(req)
- return render_to_response(*args, **kwargs)
\ No newline at end of file
+ return render_to_response(*args, **kwargs)
+
+re_body = re.compile(r"\]*\>(.*)\", re.DOTALL)
+re_title = re.compile(r"\]*\>(.*)\", re.DOTALL)
+def get_html_body(text):
+ return get_single_match(re_body, text)
+
+def get_html_title(text):
+ return get_single_match(re_title, text)
+
+def get_single_match(regex, text):
+ match = regex.search(text)
+
+ if match:
+ return match.groups()[0]
+ else:
+ return None
+
+
+re_subs = [(re.compile(r"\]*\>(.*?)\", re.DOTALL), r"'''\1'''"),
+ (re.compile(r"\(.*?)\", re.DOTALL), r"''\1''"),
+ (re.compile(r"\]*\>(.*?)\
", re.DOTALL), r"=\1="),
+ (re.compile(r"\]*\>(.*?)\
", re.DOTALL), r"==\1=="),
+ (re.compile(r"\]*\>(.*?)\
", re.DOTALL), r"===\1==="),
+ (re.compile(r"\]*\>(.*?)\
", re.DOTALL), r"====\1===="),
+ (re.compile(r"\]*\>(.*?)\
", re.DOTALL), r"=====\1====="),
+ (re.compile(r"\]*\>(.*?)\
", re.DOTALL), r"======\1======"),
+ (re.compile(r"\(.*?)\", re.DOTALL), r"[subcave:\1|\2]"),
+ #interpage link needed
+ (re.compile(r"\(.*?)\", re.DOTALL), r"[cavedescription:\1|\2]"),
+ (re.compile(r"\[\([^\s]*).*?\\]", re.DOTALL), r"![qm:\1]"),
+
+ ]
+
+def html_to_wiki(text, codec = "utf-8"):
+ if type(text) == str:
+ text = unicode(text, codec)
+ text = re.sub("", r"", text)
+ text = re.sub("$", r"", text)
+ text = re.sub("
", r"\n\n", text)
+ out = ""
+ lists = ""
+ #lists
+ while text:
+ mstar = re.match("^(.*?)
]*>\s*- ]*>(.*?)
(.*)$", text, re.DOTALL)
+ munstar = re.match("^(\s*)
(.*)$", text, re.DOTALL)
+ mhash = re.match("^(.*?)]*>\s*- ]*>(.*?)
(.*)$", text, re.DOTALL)
+ munhash = re.match("^(\s*)
(.*)$", text, re.DOTALL)
+ mitem = re.match("^(\s*)]*>(.*?)(.*)$", text, re.DOTALL)
+ ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
+ def min_(i, l):
+ try:
+ v = i.groups()[0]
+ l.remove(len(v))
+ return len(v) < min(l, 1000000000)
+ except:
+ return False
+ if min_(mstar, ms):
+ lists += "*"
+ pre, val, post = mstar.groups()
+ out += pre + "\n" + lists + " " + val
+ text = post
+ elif min_(mhash, ms):
+ lists += "#"
+ pre, val, post = mhash.groups()
+ out += pre + "\n" + lists + " " + val
+ text = post
+ elif min_(mitem, ms):
+ pre, val, post = mitem.groups()
+ out += "\n" + lists + " " + val
+ text = post
+ elif min_(munstar, ms):
+ lists = lists[:-1]
+ text = munstar.groups()[1]
+ elif min_(munhash, ms):
+ lists.pop()
+ text = munhash.groups()[1]
+ else:
+ out += text
+ text = ""
+ #substitutions
+ for regex, repl in re_subs:
+ out = regex.sub(repl, out)
+ return out
\ No newline at end of file