undosify lineends

This commit is contained in:
Wookey 2011-07-11 01:49:03 +01:00
parent bab92cb88c
commit f766df597c
2 changed files with 327 additions and 327 deletions

298
urls.py
View File

@ -1,149 +1,149 @@
from django.conf.urls.defaults import *
from django.conf import settings
from core.views import * # flat import
from core.views_other import *
from core.views_caves import *
from core.views_survex import *
from core.models import *
from django.views.generic.create_update import create_object
from django.contrib import admin
from django.views.generic.list_detail import object_list
from django.contrib import admin
admin.autodiscover()
# type url probably means it's used.
actualurlpatterns = patterns('',
url(r'^$', views_other.frontpage, name="frontpage"),
url(r'^todo/$', views_other.todo, name="todo"),
url(r'^caves/?$', views_caves.caveindex, name="caveindex"),
url(r'^people/?$', views_logbooks.personindex, name="personindex"),
url(r'^newqmnumber/?$', views_other.ajax_QM_number, ),
url(r'^lbo_suggestions/?$', logbook_entry_suggestions),
#(r'^person/(?P<person_id>\d*)/?$', views_logbooks.person),
url(r'^person/(?P<first_name>[A-Z]*[a-z\-\']*)[^a-zA-Z]*(?P<last_name>[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-]*)/?', views_logbooks.person, name="person"),
#url(r'^person/(\w+_\w+)$', views_logbooks.person, name="person"),
url(r'^expedition/(\d+)$', views_logbooks.expedition, name="expedition"),
url(r'^expeditions/?$', object_list, {'queryset':Expedition.objects.all(),'template_name':'object_list.html'},name="expeditions"),
url(r'^personexpedition/(?P<first_name>[A-Z]*[a-z]*)[^a-zA-Z]*(?P<last_name>[A-Z]*[a-z]*)/(?P<year>\d+)/?$', views_logbooks.personexpedition, name="personexpedition"),
url(r'^logbookentry/(?P<date>.*)/(?P<slug>.*)/?$', views_logbooks.logbookentry,name="logbookentry"),
url(r'^newlogbookentry/(?P<expeditionyear>.*)$', views_logbooks.newLogbookEntry, name="newLogBookEntry"),
url(r'^editlogbookentry/(?P<expeditionyear>[^/]*)/(?P<pdate>[^/]*)/(?P<pslug>[^/]*)/$', views_logbooks.newLogbookEntry, name="editLogBookEntry"),
url(r'^deletelogbookentry/(?P<expeditionyear>[^/]*)/(?P<date>[^/]*)/(?P<slug>[^/]*)/$', views_logbooks.deleteLogbookEntry, name="deleteLogBookEntry"),
url(r'^newfile', views_other.newFile, name="newFile"),
url(r'^getEntrances/(?P<caveslug>.*)', views_caves.get_entrances, name = "get_entrances"),
url(r'^getQMs/(?P<caveslug>.*)', views_caves.get_qms, name = "get_qms"),
url(r'^getPeople/(?P<expeditionslug>.*)', views_logbooks.get_people, name = "get_people"),
url(r'^getLogBookEntries/(?P<expeditionslug>.*)', views_logbooks.get_logbook_entries, name = "get_logbook_entries"),
url(r'^cave/(?P<cave_id>[^/]+)/?$', views_caves.cave, name="cave"),
url(r'^caveslug/([^/]+)/?$', views_caves.caveSlug, name="caveSlug"),
url(r'^cave/entrance/([^/]+)/?$', views_caves.caveEntrance),
url(r'^cave/description/([^/]+)/?$', views_caves.caveDescription),
url(r'^cave/qms/([^/]+)/?$', views_caves.caveQMs),
url(r'^cave/logbook/([^/]+)/?$', views_caves.caveLogbook),
#url(r'^cavedescription/(?P<cavedescription_name>[^/]+)/?$', views_caves.cave_description, name="cavedescription"),
#url(r'^cavedescription/?$', object_list, {'queryset':CaveDescription.objects.all(),'template_name':'object_list.html'}, name="cavedescriptions"),
#url(r'^cavehref/(.+)$', views_caves.cave, name="cave"),url(r'cave'),
url(r'^jgtfile/(.*)$', view_surveys.jgtfile, name="jgtfile"),
url(r'^jgtuploadfile$', view_surveys.jgtuploadfile, name="jgtuploadfile"),
url(r'^cave/(?P<cave_id>[^/]+)/?(?P<ent_letter>[^/])$', ent),
#(r'^cave/(?P<cave_id>[^/]+)/edit/$', edit_cave),
#(r'^cavesearch', caveSearch),
url(r'^cave/(?P<cave_id>[^/]+)/(?P<year>\d\d\d\d)-(?P<qm_id>\d*)(?P<grade>[ABCDX]?)?$', views_caves.qm, name="qm"),
url(r'^logbooksearch/(.*)/?$', views_logbooks.logbookSearch),
url(r'^statistics/?$', views_other.stats, name="stats"),
url(r'^survey/?$', surveyindex, name="survey"),
url(r'^survey/(?P<year>\d\d\d\d)\#(?P<wallet_number>\d*)$', survey, name="survey"),
url(r'^controlpanel/?$', views_other.controlPanel, name="controlpanel"),
url(r'^CAVETAB2\.CSV/?$', views_other.downloadCavetab, name="downloadcavetab"),
url(r'^Surveys\.csv/?$', views_other.downloadSurveys, name="downloadsurveys"),
url(r'^logbook(?P<year>\d\d\d\d)\.(?P<extension>.*)/?$',views_other.downloadLogbook),
url(r'^logbook/?$',views_other.downloadLogbook, name="downloadlogbook"),
url(r'^cave/(?P<cave_id>[^/]+)/qm\.csv/?$', views_other.downloadQMs, name="downloadqms"),
(r'^downloadqms$', views_other.downloadQMs),
url(r'^eyecandy$', views_other.eyecandy),
(r'^admin/doc/?', include('django.contrib.admindocs.urls')),
url(r'^admin/(.*)', admin.site.root, name="admin"),
# don't know why this needs troggle/ in here. nice to get it out
url(r'^troggle/media-admin/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.MEDIA_ADMIN_DIR, 'show_indexes':True}),
(r'^accounts/', include('registration.urls')),
(r'^profiles/', include('profiles.urls')),
# (r'^personform/(.*)$', personForm),
(r'^site_media/(?P<path>.*)$', 'django.views.static.serve',
{'document_root': settings.MEDIA_ROOT, 'show_indexes': True}),
(r'^tinymce_media/(?P<path>.*)$', 'django.views.static.serve',
{'document_root': settings.TINY_MCE_MEDIA_ROOT, 'show_indexes': True}),
url(r'^survexblock/(.+)$', views_caves.survexblock, name="survexblock"),
url(r'^survexfile/(?P<survex_file>.*?)\.svx$', views_survex.svx, name="svx"),
url(r'^survexfile/(?P<survex_file>.*?)\.3d$', views_survex.threed, name="threed"),
url(r'^survexfile/(?P<survex_file>.*?)\.log$', views_survex.svxraw),
url(r'^survexfile/(?P<survex_file>.*?)\.err$', views_survex.err),
url(r'^survexfile/caves/$', views_survex.survexcaveslist, name="survexcaveslist"),
url(r'^survexfile/caves/(?P<survex_cave>.*)$', views_survex.survexcavesingle, name="survexcavessingle"),
url(r'^survexfileraw/(?P<survex_file>.*?)\.svx$', views_survex.svxraw, name="svxraw"),
(r'^survey_files/listdir/(?P<path>.*)$', view_surveys.listdir),
(r'^survey_files/download/(?P<path>.*)$', view_surveys.download),
#(r'^survey_files/upload/(?P<path>.*)$', view_surveys.upload),
#(r'^survey_scans/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.SURVEY_SCANS, 'show_indexes':True}),
url(r'^survey_scans/$', view_surveys.surveyscansfolders, name="surveyscansfolders"),
url(r'^survey_scans/(?P<path>[^/]+)/$', view_surveys.surveyscansfolder, name="surveyscansfolder"),
url(r'^survey_scans/(?P<path>[^/]+)/(?P<file>[^/]+(?:png|jpg))$',
view_surveys.surveyscansingle, name="surveyscansingle"),
url(r'^tunneldata/$', view_surveys.tunneldata, name="tunneldata"),
url(r'^tunneldataraw/(?P<path>.+?\.xml)$', view_surveys.tunnelfile, name="tunnelfile"),
url(r'^tunneldataraw/(?P<path>.+?\.xml)/upload$',view_surveys.tunnelfileupload, name="tunnelfileupload"),
#url(r'^tunneldatainfo/(?P<path>.+?\.xml)$', view_surveys.tunnelfileinfo, name="tunnelfileinfo"),
(r'^photos/(?P<path>.*)$', 'django.views.static.serve',
{'document_root': settings.PHOTOS_ROOT, 'show_indexes':True}),
# for those silly ideas
url(r'^experimental.*$', views_logbooks.experimental, name="experimental"),
#url(r'^trip_report/?$',views_other.tripreport,name="trip_report")
url(r'^(.*)_edit$', 'flatpages.views.editflatpage', name="editflatpage"),
url(r'^(.*)$', 'flatpages.views.flatpage', name="flatpage"),
)
#Allow prefix to all urls
urlpatterns = patterns ('',
('^%s' % settings.DIR_ROOT, include(actualurlpatterns))
)
from django.conf.urls.defaults import *
from django.conf import settings
from core.views import * # flat import
from core.views_other import *
from core.views_caves import *
from core.views_survex import *
from core.models import *
from django.views.generic.create_update import create_object
from django.contrib import admin
from django.views.generic.list_detail import object_list
from django.contrib import admin
admin.autodiscover()
# type url probably means it's used.
actualurlpatterns = patterns('',
url(r'^$', views_other.frontpage, name="frontpage"),
url(r'^todo/$', views_other.todo, name="todo"),
url(r'^caves/?$', views_caves.caveindex, name="caveindex"),
url(r'^people/?$', views_logbooks.personindex, name="personindex"),
url(r'^newqmnumber/?$', views_other.ajax_QM_number, ),
url(r'^lbo_suggestions/?$', logbook_entry_suggestions),
#(r'^person/(?P<person_id>\d*)/?$', views_logbooks.person),
url(r'^person/(?P<first_name>[A-Z]*[a-z\-\']*)[^a-zA-Z]*(?P<last_name>[a-z\-\']*[^a-zA-Z]*[A-Z]*[a-z\-]*)/?', views_logbooks.person, name="person"),
#url(r'^person/(\w+_\w+)$', views_logbooks.person, name="person"),
url(r'^expedition/(\d+)$', views_logbooks.expedition, name="expedition"),
url(r'^expeditions/?$', object_list, {'queryset':Expedition.objects.all(),'template_name':'object_list.html'},name="expeditions"),
url(r'^personexpedition/(?P<first_name>[A-Z]*[a-z]*)[^a-zA-Z]*(?P<last_name>[A-Z]*[a-z]*)/(?P<year>\d+)/?$', views_logbooks.personexpedition, name="personexpedition"),
url(r'^logbookentry/(?P<date>.*)/(?P<slug>.*)/?$', views_logbooks.logbookentry,name="logbookentry"),
url(r'^newlogbookentry/(?P<expeditionyear>.*)$', views_logbooks.newLogbookEntry, name="newLogBookEntry"),
url(r'^editlogbookentry/(?P<expeditionyear>[^/]*)/(?P<pdate>[^/]*)/(?P<pslug>[^/]*)/$', views_logbooks.newLogbookEntry, name="editLogBookEntry"),
url(r'^deletelogbookentry/(?P<expeditionyear>[^/]*)/(?P<date>[^/]*)/(?P<slug>[^/]*)/$', views_logbooks.deleteLogbookEntry, name="deleteLogBookEntry"),
url(r'^newfile', views_other.newFile, name="newFile"),
url(r'^getEntrances/(?P<caveslug>.*)', views_caves.get_entrances, name = "get_entrances"),
url(r'^getQMs/(?P<caveslug>.*)', views_caves.get_qms, name = "get_qms"),
url(r'^getPeople/(?P<expeditionslug>.*)', views_logbooks.get_people, name = "get_people"),
url(r'^getLogBookEntries/(?P<expeditionslug>.*)', views_logbooks.get_logbook_entries, name = "get_logbook_entries"),
url(r'^cave/(?P<cave_id>[^/]+)/?$', views_caves.cave, name="cave"),
url(r'^caveslug/([^/]+)/?$', views_caves.caveSlug, name="caveSlug"),
url(r'^cave/entrance/([^/]+)/?$', views_caves.caveEntrance),
url(r'^cave/description/([^/]+)/?$', views_caves.caveDescription),
url(r'^cave/qms/([^/]+)/?$', views_caves.caveQMs),
url(r'^cave/logbook/([^/]+)/?$', views_caves.caveLogbook),
#url(r'^cavedescription/(?P<cavedescription_name>[^/]+)/?$', views_caves.cave_description, name="cavedescription"),
#url(r'^cavedescription/?$', object_list, {'queryset':CaveDescription.objects.all(),'template_name':'object_list.html'}, name="cavedescriptions"),
#url(r'^cavehref/(.+)$', views_caves.cave, name="cave"),url(r'cave'),
url(r'^jgtfile/(.*)$', view_surveys.jgtfile, name="jgtfile"),
url(r'^jgtuploadfile$', view_surveys.jgtuploadfile, name="jgtuploadfile"),
url(r'^cave/(?P<cave_id>[^/]+)/?(?P<ent_letter>[^/])$', ent),
#(r'^cave/(?P<cave_id>[^/]+)/edit/$', edit_cave),
#(r'^cavesearch', caveSearch),
url(r'^cave/(?P<cave_id>[^/]+)/(?P<year>\d\d\d\d)-(?P<qm_id>\d*)(?P<grade>[ABCDX]?)?$', views_caves.qm, name="qm"),
url(r'^logbooksearch/(.*)/?$', views_logbooks.logbookSearch),
url(r'^statistics/?$', views_other.stats, name="stats"),
url(r'^survey/?$', surveyindex, name="survey"),
url(r'^survey/(?P<year>\d\d\d\d)\#(?P<wallet_number>\d*)$', survey, name="survey"),
url(r'^controlpanel/?$', views_other.controlPanel, name="controlpanel"),
url(r'^CAVETAB2\.CSV/?$', views_other.downloadCavetab, name="downloadcavetab"),
url(r'^Surveys\.csv/?$', views_other.downloadSurveys, name="downloadsurveys"),
url(r'^logbook(?P<year>\d\d\d\d)\.(?P<extension>.*)/?$',views_other.downloadLogbook),
url(r'^logbook/?$',views_other.downloadLogbook, name="downloadlogbook"),
url(r'^cave/(?P<cave_id>[^/]+)/qm\.csv/?$', views_other.downloadQMs, name="downloadqms"),
(r'^downloadqms$', views_other.downloadQMs),
url(r'^eyecandy$', views_other.eyecandy),
(r'^admin/doc/?', include('django.contrib.admindocs.urls')),
url(r'^admin/(.*)', admin.site.root, name="admin"),
# don't know why this needs troggle/ in here. nice to get it out
url(r'^troggle/media-admin/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.MEDIA_ADMIN_DIR, 'show_indexes':True}),
(r'^accounts/', include('registration.urls')),
(r'^profiles/', include('profiles.urls')),
# (r'^personform/(.*)$', personForm),
(r'^site_media/(?P<path>.*)$', 'django.views.static.serve',
{'document_root': settings.MEDIA_ROOT, 'show_indexes': True}),
(r'^tinymce_media/(?P<path>.*)$', 'django.views.static.serve',
{'document_root': settings.TINY_MCE_MEDIA_ROOT, 'show_indexes': True}),
url(r'^survexblock/(.+)$', views_caves.survexblock, name="survexblock"),
url(r'^survexfile/(?P<survex_file>.*?)\.svx$', views_survex.svx, name="svx"),
url(r'^survexfile/(?P<survex_file>.*?)\.3d$', views_survex.threed, name="threed"),
url(r'^survexfile/(?P<survex_file>.*?)\.log$', views_survex.svxraw),
url(r'^survexfile/(?P<survex_file>.*?)\.err$', views_survex.err),
url(r'^survexfile/caves/$', views_survex.survexcaveslist, name="survexcaveslist"),
url(r'^survexfile/caves/(?P<survex_cave>.*)$', views_survex.survexcavesingle, name="survexcavessingle"),
url(r'^survexfileraw/(?P<survex_file>.*?)\.svx$', views_survex.svxraw, name="svxraw"),
(r'^survey_files/listdir/(?P<path>.*)$', view_surveys.listdir),
(r'^survey_files/download/(?P<path>.*)$', view_surveys.download),
#(r'^survey_files/upload/(?P<path>.*)$', view_surveys.upload),
#(r'^survey_scans/(?P<path>.*)$', 'django.views.static.serve', {'document_root': settings.SURVEY_SCANS, 'show_indexes':True}),
url(r'^survey_scans/$', view_surveys.surveyscansfolders, name="surveyscansfolders"),
url(r'^survey_scans/(?P<path>[^/]+)/$', view_surveys.surveyscansfolder, name="surveyscansfolder"),
url(r'^survey_scans/(?P<path>[^/]+)/(?P<file>[^/]+(?:png|jpg))$',
view_surveys.surveyscansingle, name="surveyscansingle"),
url(r'^tunneldata/$', view_surveys.tunneldata, name="tunneldata"),
url(r'^tunneldataraw/(?P<path>.+?\.xml)$', view_surveys.tunnelfile, name="tunnelfile"),
url(r'^tunneldataraw/(?P<path>.+?\.xml)/upload$',view_surveys.tunnelfileupload, name="tunnelfileupload"),
#url(r'^tunneldatainfo/(?P<path>.+?\.xml)$', view_surveys.tunnelfileinfo, name="tunnelfileinfo"),
(r'^photos/(?P<path>.*)$', 'django.views.static.serve',
{'document_root': settings.PHOTOS_ROOT, 'show_indexes':True}),
# for those silly ideas
url(r'^experimental.*$', views_logbooks.experimental, name="experimental"),
#url(r'^trip_report/?$',views_other.tripreport,name="trip_report")
url(r'^(.*)_edit$', 'flatpages.views.editflatpage', name="editflatpage"),
url(r'^(.*)$', 'flatpages.views.flatpage', name="flatpage"),
)
#Allow prefix to all urls
urlpatterns = patterns ('',
('^%s' % settings.DIR_ROOT, include(actualurlpatterns))
)

356
utils.py
View File

@ -1,178 +1,178 @@
from django.conf import settings
import random, re, logging
from core.models import CaveDescription
def weighted_choice(lst):
n = random.uniform(0,1)
for item, weight in lst:
if n < weight:
break
n = n - weight
return item
def randomLogbookSentence():
from troggle.core.models import LogbookEntry
randSent={}
# needs to handle empty logbooks without crashing
#Choose a random logbook entry
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
#Choose again if there are no sentances (this happens if it is a placeholder entry)
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
randSent['number']=random.randrange(0,len(sentenceList))
randSent['sentence']=sentenceList[randSent['number']]
return randSent
def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
"""Looks up instance using lookupAttribs and carries out the following:
-if instance does not exist in DB: add instance to DB, return (new instance, True)
-if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
-if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
defined in core.models.TroggleModel.
"""
instance, created=objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
if not created and not instance.new_since_parsing:
for k, v in nonLookupAttribs.items(): #overwrite the existing attributes from the logbook text (except date and title)
setattr(instance, k, v)
instance.save()
if created:
logging.info(str(instance) + ' was just added to the database for the first time. \n')
if not created and instance.new_since_parsing:
logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n")
if not created and not instance.new_since_parsing:
logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n")
return (instance, created)
def render_with_context(req, *args, **kwargs):
"""this is the snippet from http://www.djangosnippets.org/snippets/3/
Django uses Context, not RequestContext when you call render_to_response.
We always want to use RequestContext, so that django adds the context from
settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get
necessary settings variables passed to each template. So we use a custom
method, render_response instead of render_to_response. Hopefully future
Django releases will make this unnecessary."""
from django.shortcuts import render_to_response
from django.template import RequestContext
kwargs['context_instance'] = RequestContext(req)
return render_to_response(*args, **kwargs)
re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)
def get_html_body(text):
return get_single_match(re_body, text)
def get_html_title(text):
return get_single_match(re_title, text)
def get_single_match(regex, text):
match = regex.search(text)
if match:
return match.groups()[0]
else:
return None
def href_to_wikilinks(matchobj):
"""
Given an html link, checks for possible valid wikilinks.
Returns the first valid wikilink. Valid means the target
object actually exists.
"""
res=CaveDescription.objects.filter(long_name__icontains=matchobj.groupdict()['text'])
if res and res[0]:
return r'[[cavedescription:'+res[0].short_name+'|'+res[0].long_name+']]'
else:
return matchobj.group()
#except:
#print 'fail'
re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"),
(re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"),
(re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="),
(re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="),
(re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="),
(re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="),
(re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="),
(re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"),
(re.compile(r'(<a href="?(?P<target>.*)"?>)?<img class="?(?P<class>\w*)"? src="?t/?(?P<source>[\w/\.]*)"?(?P<rest>></img>|\s/>(</a>)?)', re.DOTALL),r'[[display:\g<class> photo:\g<source>]]'), #
(re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[subcave:\1|\2]]"), #assumes that all links with id attributes are subcaves. Not great.
#interpage link needed
(re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[cavedescription:\1|\2]]"), #assumes that all links with target ids are cave descriptions. Not great.
(re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"[[qm:\1]]"),
(re.compile(r'<a\shref="?(?P<target>.*)"?>(?P<text>.*)</a>'),href_to_wikilinks),
]
def html_to_wiki(text, codec = "utf-8"):
if type(text) == str:
text = unicode(text, codec)
text = re.sub("</p>", r"", text)
text = re.sub("<p>$", r"", text)
text = re.sub("<p>", r"\n\n", text)
out = ""
lists = ""
#lists
while text:
mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
def min_(i, l):
try:
v = i.groups()[0]
l.remove(len(v))
return len(v) < min(l, 1000000000)
except:
return False
if min_(mstar, ms):
lists += "*"
pre, val, post = mstar.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mhash, ms):
lists += "#"
pre, val, post = mhash.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mitem, ms):
pre, val, post = mitem.groups()
out += "\n" + lists + " " + val
text = post
elif min_(munstar, ms):
lists = lists[:-1]
text = munstar.groups()[1]
elif min_(munhash, ms):
lists.pop()
text = munhash.groups()[1]
else:
out += text
text = ""
#substitutions
for regex, repl in re_subs:
out = regex.sub(repl, out)
return out
from django.conf import settings
import random, re, logging
from core.models import CaveDescription
def weighted_choice(lst):
n = random.uniform(0,1)
for item, weight in lst:
if n < weight:
break
n = n - weight
return item
def randomLogbookSentence():
from troggle.core.models import LogbookEntry
randSent={}
# needs to handle empty logbooks without crashing
#Choose a random logbook entry
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
#Choose again if there are no sentances (this happens if it is a placeholder entry)
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
randSent['number']=random.randrange(0,len(sentenceList))
randSent['sentence']=sentenceList[randSent['number']]
return randSent
def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
"""Looks up instance using lookupAttribs and carries out the following:
-if instance does not exist in DB: add instance to DB, return (new instance, True)
-if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
-if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
defined in core.models.TroggleModel.
"""
instance, created=objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
if not created and not instance.new_since_parsing:
for k, v in nonLookupAttribs.items(): #overwrite the existing attributes from the logbook text (except date and title)
setattr(instance, k, v)
instance.save()
if created:
logging.info(str(instance) + ' was just added to the database for the first time. \n')
if not created and instance.new_since_parsing:
logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n")
if not created and not instance.new_since_parsing:
logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n")
return (instance, created)
def render_with_context(req, *args, **kwargs):
"""this is the snippet from http://www.djangosnippets.org/snippets/3/
Django uses Context, not RequestContext when you call render_to_response.
We always want to use RequestContext, so that django adds the context from
settings.TEMPLATE_CONTEXT_PROCESSORS. This way we automatically get
necessary settings variables passed to each template. So we use a custom
method, render_response instead of render_to_response. Hopefully future
Django releases will make this unnecessary."""
from django.shortcuts import render_to_response
from django.template import RequestContext
kwargs['context_instance'] = RequestContext(req)
return render_to_response(*args, **kwargs)
re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)
def get_html_body(text):
return get_single_match(re_body, text)
def get_html_title(text):
return get_single_match(re_title, text)
def get_single_match(regex, text):
match = regex.search(text)
if match:
return match.groups()[0]
else:
return None
def href_to_wikilinks(matchobj):
"""
Given an html link, checks for possible valid wikilinks.
Returns the first valid wikilink. Valid means the target
object actually exists.
"""
res=CaveDescription.objects.filter(long_name__icontains=matchobj.groupdict()['text'])
if res and res[0]:
return r'[[cavedescription:'+res[0].short_name+'|'+res[0].long_name+']]'
else:
return matchobj.group()
#except:
#print 'fail'
re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"),
(re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"),
(re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="),
(re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="),
(re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="),
(re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="),
(re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="),
(re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"),
(re.compile(r'(<a href="?(?P<target>.*)"?>)?<img class="?(?P<class>\w*)"? src="?t/?(?P<source>[\w/\.]*)"?(?P<rest>></img>|\s/>(</a>)?)', re.DOTALL),r'[[display:\g<class> photo:\g<source>]]'), #
(re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[subcave:\1|\2]]"), #assumes that all links with id attributes are subcaves. Not great.
#interpage link needed
(re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[cavedescription:\1|\2]]"), #assumes that all links with target ids are cave descriptions. Not great.
(re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"[[qm:\1]]"),
(re.compile(r'<a\shref="?(?P<target>.*)"?>(?P<text>.*)</a>'),href_to_wikilinks),
]
def html_to_wiki(text, codec = "utf-8"):
if type(text) == str:
text = unicode(text, codec)
text = re.sub("</p>", r"", text)
text = re.sub("<p>$", r"", text)
text = re.sub("<p>", r"\n\n", text)
out = ""
lists = ""
#lists
while text:
mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
def min_(i, l):
try:
v = i.groups()[0]
l.remove(len(v))
return len(v) < min(l, 1000000000)
except:
return False
if min_(mstar, ms):
lists += "*"
pre, val, post = mstar.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mhash, ms):
lists += "#"
pre, val, post = mhash.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mitem, ms):
pre, val, post = mitem.groups()
out += "\n" + lists + " " + val
text = post
elif min_(munstar, ms):
lists = lists[:-1]
text = munstar.groups()[1]
elif min_(munhash, ms):
lists.pop()
text = munhash.groups()[1]
else:
out += text
text = ""
#substitutions
for regex, repl in re_subs:
out = regex.sub(repl, out)
return out