troggle-unchained/utils.py
2021-04-12 23:58:48 +01:00

173 lines
7.1 KiB
Python

import sys
import random
import re
import logging
from django.conf import settings
from django.shortcuts import render
"""Oddball mixture of critical, superfluous and useful functions which should
be re-located more sensibly to other modules:
various HTML/wiki functions presumably for logbooks?
Use unknown:
weighted_choice(lst)
randomLogbookSentence()
"""
def weighted_choice(lst):
n = random.uniform(0,1)
for item, weight in lst:
if n < weight:
break
n = n - weight
return item
def randomLogbookSentence():
from troggle.core.models import LogbookEntry
randSent={}
# needs to handle empty logbooks without crashing
#Choose a random logbook entry
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
#Choose again if there are no sentances (this happens if it is a placeholder entry)
while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0:
randSent['entry']=LogbookEntry.objects.order_by('?')[0]
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text)
randSent['number']=random.randrange(0,len(sentenceList))
randSent['sentence']=sentenceList[randSent['number']]
return randSent
def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
"""Looks up instance using lookupAttribs and carries out the following:
-if instance does not exist in DB: add instance to DB, return (new instance, True)
-if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
-if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
defined in core.models.TroggleModel.
"""
try:
instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
except:
print(" !! - SAVE CAREFULLY ===================", objectType)
print(" !! - -- objects.get_or_create()")
print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
raise
if not created and not instance.new_since_parsing:
for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title)
setattr(instance, k, v)
try:
instance.save()
except:
print(" !! - SAVE CAREFULLY ===================", objectType)
print(" !! - -- instance.save()")
print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
raise
try:
msg = str(instance)
except:
msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs)
if created:
logging.info(str(instance) + ' was just added to the database for the first time. \n')
if not created and instance.new_since_parsing:
logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n")
if not created and not instance.new_since_parsing:
logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n")
return (instance, created)
re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)
def get_html_body(text):
return get_single_match(re_body, text)
def get_html_title(text):
return get_single_match(re_title, text)
def get_single_match(regex, text):
match = regex.search(text)
if match:
return match.groups()[0]
else:
return None
re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"),
(re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"),
(re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="),
(re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="),
(re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="),
(re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="),
(re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="),
(re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"),
(re.compile(r'(<a href="?(?P<target>.*)"?>)?<img class="?(?P<class>\w*)"? src="?t/?(?P<source>[\w/\.]*)"?(?P<rest>></img>|\s/>(</a>)?)', re.DOTALL),r'[[display:\g<class> photo:\g<source>]]'), #
(re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[subcave:\1|\2]]"), #assumes that all links with id attributes are subcaves. Not great.
#interpage link needed
(re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[cavedescription:\1|\2]]"), #assumes that all links with target ids are cave descriptions. Not great.
(re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"[[qm:\1]]"),
# (re.compile(r'<a\shref="?(?P<target>.*)"?>(?P<text>.*)</a>'),href_to_wikilinks),
]
def html_to_wiki(text, codec = "utf-8"):
if isinstance(text, str):
text = str(text, codec)
text = re.sub("</p>", r"", text)
text = re.sub("<p>$", r"", text)
text = re.sub("<p>", r"\n\n", text)
out = ""
lists = ""
#lists
while text:
mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
def min_(i, l):
try:
v = i.groups()[0]
l.remove(len(v))
return len(v) < min(l, 1000000000)
except:
return False
if min_(mstar, ms):
lists += "*"
pre, val, post = mstar.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mhash, ms):
lists += "#"
pre, val, post = mhash.groups()
out += pre + "\n" + lists + " " + val
text = post
elif min_(mitem, ms):
pre, val, post = mitem.groups()
out += "\n" + lists + " " + val
text = post
elif min_(munstar, ms):
lists = lists[:-1]
text = munstar.groups()[1]
elif min_(munhash, ms):
lists.pop()
text = munhash.groups()[1]
else:
out += text
text = ""
#substitutions
for regex, repl in re_subs:
out = regex.sub(repl, out)
return out