2020-07-02 16:26:05 +01:00
import sys
2020-05-28 01:38:35 +01:00
import random
import re
import logging
2011-07-11 01:49:03 +01:00
from django . conf import settings
2019-03-30 17:02:07 +00:00
from django . shortcuts import render
2020-07-26 02:26:04 +01:00
2020-07-18 16:23:54 +01:00
""" Oddball mixture of critical, superfluous and useful functions which should
be re - located more sensibly to other modules :
various HTML / wiki functions presumably for logbooks ?
Use unknown :
weighted_choice ( lst )
randomLogbookSentence ( )
"""
2011-07-11 01:49:03 +01:00
2020-07-02 16:26:05 +01:00
2011-07-11 01:49:03 +01:00
def weighted_choice ( lst ) :
2018-04-15 16:28:13 +01:00
n = random . uniform ( 0 , 1 )
for item , weight in lst :
if n < weight :
break
n = n - weight
return item
2011-07-11 01:49:03 +01:00
def randomLogbookSentence ( ) :
from troggle . core . models import LogbookEntry
randSent = { }
# needs to handle empty logbooks without crashing
#Choose a random logbook entry
randSent [ ' entry ' ] = LogbookEntry . objects . order_by ( ' ? ' ) [ 0 ]
#Choose again if there are no sentances (this happens if it is a placeholder entry)
while len ( re . findall ( ' [A-Z].*? \ . ' , randSent [ ' entry ' ] . text ) ) == 0 :
randSent [ ' entry ' ] = LogbookEntry . objects . order_by ( ' ? ' ) [ 0 ]
#Choose a random sentence from that entry. Store the sentence as randSent['sentence'], and the number of that sentence in the entry as randSent['number']
sentenceList = re . findall ( ' [A-Z].*? \ . ' , randSent [ ' entry ' ] . text )
randSent [ ' number ' ] = random . randrange ( 0 , len ( sentenceList ) )
randSent [ ' sentence ' ] = sentenceList [ randSent [ ' number ' ] ]
return randSent
2021-04-13 00:11:08 +01:00
2011-07-11 01:49:03 +01:00
re_body = re . compile ( r " \ <body[^>]* \ >(.*) \ </body \ > " , re . DOTALL )
re_title = re . compile ( r " \ <title[^>]* \ >(.*) \ </title \ > " , re . DOTALL )
def get_html_body ( text ) :
return get_single_match ( re_body , text )
def get_html_title ( text ) :
return get_single_match ( re_title , text )
def get_single_match ( regex , text ) :
match = regex . search ( text )
if match :
return match . groups ( ) [ 0 ]
else :
return None
re_subs = [ ( re . compile ( r " \ <b[^>]* \ >(.*?) \ </b \ > " , re . DOTALL ) , r " ' ' ' \ 1 ' ' ' " ) ,
( re . compile ( r " \ <i \ >(.*?) \ </i \ > " , re . DOTALL ) , r " ' ' \ 1 ' ' " ) ,
( re . compile ( r " \ <h1[^>]* \ >(.*?) \ </h1 \ > " , re . DOTALL ) , r " = \ 1= " ) ,
( re . compile ( r " \ <h2[^>]* \ >(.*?) \ </h2 \ > " , re . DOTALL ) , r " == \ 1== " ) ,
( re . compile ( r " \ <h3[^>]* \ >(.*?) \ </h3 \ > " , re . DOTALL ) , r " === \ 1=== " ) ,
( re . compile ( r " \ <h4[^>]* \ >(.*?) \ </h4 \ > " , re . DOTALL ) , r " ==== \ 1==== " ) ,
( re . compile ( r " \ <h5[^>]* \ >(.*?) \ </h5 \ > " , re . DOTALL ) , r " ===== \ 1===== " ) ,
( re . compile ( r " \ <h6[^>]* \ >(.*?) \ </h6 \ > " , re . DOTALL ) , r " ====== \ 1====== " ) ,
( re . compile ( r ' (<a href= " ?(?P<target>.*) " ?>)?<img class= " ?(?P<class> \ w*) " ? src= " ?t/?(?P<source>[ \ w/ \ .]*) " ?(?P<rest>></img>| \ s/>(</a>)?) ' , re . DOTALL ) , r ' [[display: \ g<class> photo: \ g<source>]] ' ) , #
( re . compile ( r " \ <a \ s+id=[ ' \" ]([^ ' \" ]*)[ ' \" ] \ s* \ >(.*?) \ </a \ > " , re . DOTALL ) , r " [[subcave: \ 1| \ 2]] " ) , #assumes that all links with id attributes are subcaves. Not great.
#interpage link needed
( re . compile ( r " \ <a \ s+href=[ ' \" ]#([^ ' \" ]*)[ ' \" ] \ s* \ >(.*?) \ </a \ > " , re . DOTALL ) , r " [[cavedescription: \ 1| \ 2]] " ) , #assumes that all links with target ids are cave descriptions. Not great.
( re . compile ( r " \ [ \ <a \ s+href=[ ' \" ][^ ' \" ]*[ ' \" ] \ s+id=[ ' \" ][^ ' \" ]*[ ' \" ] \ s* \ >([^ \ s]*).*? \ </a \ > \ ] " , re . DOTALL ) , r " [[qm: \ 1]] " ) ,
2020-07-23 01:24:06 +01:00
# (re.compile(r'<a\shref="?(?P<target>.*)"?>(?P<text>.*)</a>'),href_to_wikilinks),
2011-07-11 01:49:03 +01:00
]
def html_to_wiki ( text , codec = " utf-8 " ) :
2020-05-24 01:57:06 +01:00
if isinstance ( text , str ) :
text = str ( text , codec )
2011-07-11 01:49:03 +01:00
text = re . sub ( " </p> " , r " " , text )
text = re . sub ( " <p>$ " , r " " , text )
text = re . sub ( " <p> " , r " \ n \ n " , text )
out = " "
lists = " "
#lists
while text :
mstar = re . match ( " ^(.*?)<ul[^>]*> \ s*<li[^>]*>(.*?)</li>(.*)$ " , text , re . DOTALL )
munstar = re . match ( " ^( \ s*)</ul>(.*)$ " , text , re . DOTALL )
mhash = re . match ( " ^(.*?)<ol[^>]*> \ s*<li[^>]*>(.*?)</li>(.*)$ " , text , re . DOTALL )
munhash = re . match ( " ^( \ s*)</ol>(.*)$ " , text , re . DOTALL )
mitem = re . match ( " ^( \ s*)<li[^>]*>(.*?)</li>(.*)$ " , text , re . DOTALL )
ms = [ len ( m . groups ( ) [ 0 ] ) for m in [ mstar , munstar , mhash , munhash , mitem ] if m ]
def min_ ( i , l ) :
try :
v = i . groups ( ) [ 0 ]
l . remove ( len ( v ) )
return len ( v ) < min ( l , 1000000000 )
except :
return False
if min_ ( mstar , ms ) :
lists + = " * "
pre , val , post = mstar . groups ( )
out + = pre + " \n " + lists + " " + val
text = post
elif min_ ( mhash , ms ) :
lists + = " # "
pre , val , post = mhash . groups ( )
out + = pre + " \n " + lists + " " + val
text = post
elif min_ ( mitem , ms ) :
pre , val , post = mitem . groups ( )
out + = " \n " + lists + " " + val
text = post
elif min_ ( munstar , ms ) :
lists = lists [ : - 1 ]
text = munstar . groups ( ) [ 1 ]
elif min_ ( munhash , ms ) :
lists . pop ( )
text = munhash . groups ( ) [ 1 ]
else :
out + = text
text = " "
#substitutions
for regex , repl in re_subs :
out = regex . sub ( repl , out )
2020-05-28 01:38:35 +01:00
return out