import sys import random import re import logging from django.conf import settings from django.shortcuts import render """Oddball mixture of critical, superfluous and useful functions which should be re-located more sensibly to other modules: ChaosMonkey(n) - used by survex import to regenerate some .3d files save_carefully() - core function that saves troggle objects in the database various HTML/wiki functions presumably for logbooks? Use unknown: weighted_choice(lst) randomLogbookSentence() """ def ChaosMonkey(n): # returns True once every n calls - randomly if random.randrange(0,n) != 0: return False # print("CHAOS strikes !", file=sys.stderr) return True def weighted_choice(lst): n = random.uniform(0,1) for item, weight in lst: if n < weight: break n = n - weight return item def randomLogbookSentence(): from troggle.core.models import LogbookEntry randSent={} # needs to handle empty logbooks without crashing #Choose a random logbook entry randSent['entry']=LogbookEntry.objects.order_by('?')[0] #Choose again if there are no sentances (this happens if it is a placeholder entry) while len(re.findall('[A-Z].*?\.',randSent['entry'].text))==0: randSent['entry']=LogbookEntry.objects.order_by('?')[0] #Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number'] sentenceList=re.findall('[A-Z].*?\.',randSent['entry'].text) randSent['number']=random.randrange(0,len(sentenceList)) randSent['sentence']=sentenceList[randSent['number']] return randSent def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}): """Looks up instance using lookupAttribs and carries out the following: -if instance does not exist in DB: add instance to DB, return (new instance, True) -if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False) -if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False) The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field defined in core.models.TroggleModel. """ try: instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs) except: print(" !! - SAVE CAREFULLY ===================", objectType) print(" !! - -- objects.get_or_create()") print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs)) raise if not created and not instance.new_since_parsing: for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title) setattr(instance, k, v) try: instance.save() except: print(" !! - SAVE CAREFULLY ===================", objectType) print(" !! - -- instance.save()") print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs)) raise try: msg = str(instance) except: msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs) if created: logging.info(str(instance) + ' was just added to the database for the first time. \n') if not created and instance.new_since_parsing: logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n") if not created and not instance.new_since_parsing: logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n") return (instance, created) re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL) re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL) def get_html_body(text): return get_single_match(re_body, text) def get_html_title(text): return get_single_match(re_title, text) def get_single_match(regex, text): match = regex.search(text) if match: return match.groups()[0] else: return None re_subs = [(re.compile(r"\<b[^>]*\>(.*?)\</b\>", re.DOTALL), r"'''\1'''"), (re.compile(r"\<i\>(.*?)\</i\>", re.DOTALL), r"''\1''"), (re.compile(r"\<h1[^>]*\>(.*?)\</h1\>", re.DOTALL), r"=\1="), (re.compile(r"\<h2[^>]*\>(.*?)\</h2\>", re.DOTALL), r"==\1=="), (re.compile(r"\<h3[^>]*\>(.*?)\</h3\>", re.DOTALL), r"===\1==="), (re.compile(r"\<h4[^>]*\>(.*?)\</h4\>", re.DOTALL), r"====\1===="), (re.compile(r"\<h5[^>]*\>(.*?)\</h5\>", re.DOTALL), r"=====\1====="), (re.compile(r"\<h6[^>]*\>(.*?)\</h6\>", re.DOTALL), r"======\1======"), (re.compile(r'(<a href="?(?P<target>.*)"?>)?<img class="?(?P<class>\w*)"? src="?t/?(?P<source>[\w/\.]*)"?(?P<rest>></img>|\s/>(</a>)?)', re.DOTALL),r'[[display:\g<class> photo:\g<source>]]'), # (re.compile(r"\<a\s+id=['\"]([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[subcave:\1|\2]]"), #assumes that all links with id attributes are subcaves. Not great. #interpage link needed (re.compile(r"\<a\s+href=['\"]#([^'\"]*)['\"]\s*\>(.*?)\</a\>", re.DOTALL), r"[[cavedescription:\1|\2]]"), #assumes that all links with target ids are cave descriptions. Not great. (re.compile(r"\[\<a\s+href=['\"][^'\"]*['\"]\s+id=['\"][^'\"]*['\"]\s*\>([^\s]*).*?\</a\>\]", re.DOTALL), r"[[qm:\1]]"), # (re.compile(r'<a\shref="?(?P<target>.*)"?>(?P<text>.*)</a>'),href_to_wikilinks), ] def html_to_wiki(text, codec = "utf-8"): if isinstance(text, str): text = str(text, codec) text = re.sub("</p>", r"", text) text = re.sub("<p>$", r"", text) text = re.sub("<p>", r"\n\n", text) out = "" lists = "" #lists while text: mstar = re.match("^(.*?)<ul[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL) munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL) mhash = re.match("^(.*?)<ol[^>]*>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL) munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL) mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL) ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m] def min_(i, l): try: v = i.groups()[0] l.remove(len(v)) return len(v) < min(l, 1000000000) except: return False if min_(mstar, ms): lists += "*" pre, val, post = mstar.groups() out += pre + "\n" + lists + " " + val text = post elif min_(mhash, ms): lists += "#" pre, val, post = mhash.groups() out += pre + "\n" + lists + " " + val text = post elif min_(mitem, ms): pre, val, post = mitem.groups() out += "\n" + lists + " " + val text = post elif min_(munstar, ms): lists = lists[:-1] text = munstar.groups()[1] elif min_(munhash, ms): lists.pop() text = munhash.groups()[1] else: out += text text = "" #substitutions for regex, repl in re_subs: out = regex.sub(repl, out) return out