forked from expo/troggle
2003 logbook export/re-import as now HTML format
This commit is contained in:
parent
17b2b7b89c
commit
cabcada0b8
@ -148,15 +148,13 @@ def controlpanel(request):
|
||||
|
||||
|
||||
def exportlogbook(request,year=None,extension=None):
|
||||
'''Constructs, from the database, a complete HTML formatted logbook - but TEXT ONLY
|
||||
for the current year. Formats available are HTML2005 or HTML2022 (planned)
|
||||
'''Constructs, from the database, a complete HTML formatted logbook
|
||||
for the current year. Formats available are HTML2005 (others old & broken or not written yet)
|
||||
|
||||
There are no images stored in the database, so this is only a tool for a first pass, to be followed by
|
||||
extensive hand-editing.
|
||||
hand-editing. However links to images work int he HTML text of a logbook entry
|
||||
|
||||
NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter.
|
||||
|
||||
This function DOES NOT WORK.
|
||||
NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter.
|
||||
|
||||
This function is the recipient of the POST action os the export form in the control panel
|
||||
'''
|
||||
@ -170,26 +168,18 @@ def exportlogbook(request,year=None,extension=None):
|
||||
else:
|
||||
print(f'Logbook export {request.POST}')
|
||||
|
||||
if request.POST.get("year", '2016'):
|
||||
year = request.POST['year']
|
||||
if request.POST.get("extension", 'html'):
|
||||
extension = request.POST['extension'] # e.g. html
|
||||
|
||||
year = request.POST['year']
|
||||
current_expedition=Expedition.objects.get(year=year)
|
||||
logbook_entries=LogbookEntry.objects.filter(expedition=current_expedition).order_by('date') # need to be sorted by date!
|
||||
|
||||
#print(f'Logbook has {len(logbook_entries)} entries in it.')
|
||||
print(f'Logbook has {len(logbook_entries)} entries in it.')
|
||||
|
||||
if extension == 'html2005':
|
||||
response = HttpResponse(content_type='text/html')
|
||||
style='2005'
|
||||
else :
|
||||
extension == 'html2022'
|
||||
response = HttpResponse(content_type='text/html')
|
||||
style='2022'
|
||||
|
||||
filename='newlogbook.' + extension
|
||||
template='logbook'+style+'style.'+extension
|
||||
extension ='html'
|
||||
response = HttpResponse(content_type='text/html')
|
||||
style='2005'
|
||||
|
||||
filename='logbook-new-format.' + extension
|
||||
template='logbook'+style+'style.'+ extension
|
||||
response['Content-Disposition'] = 'attachment; filename='+filename
|
||||
t=loader.get_template(template)
|
||||
logbookfile = (t.render({'logbook_entries':logbook_entries}))
|
||||
|
@ -47,7 +47,7 @@ from django.db import transaction
|
||||
from troggle.core.utils import get_process_memory
|
||||
from troggle.core.models.caves import Cave, Entrance
|
||||
from troggle.parsers.imports import import_caves, import_people, import_surveyscans, \
|
||||
import_logbooks, import_QMs, import_survex, import_loadpos, import_drawingsfiles
|
||||
import_logbooks, import_logbook, import_QMs, import_survex, import_loadpos, import_drawingsfiles
|
||||
|
||||
if os.geteuid() == 0:
|
||||
# This protects the server from having the wrong file permissions written on logs and caches
|
||||
@ -343,7 +343,8 @@ def usage():
|
||||
drawings - read in the Tunnel & Therion files - which scans the survey scans too
|
||||
survex - read in the survex files - all the survex blocks and entrances x/y/z
|
||||
|
||||
dumplogbooks - Not used. write out autologbooks (not working?)
|
||||
dumplogbooks - Not used. write out autologbooks (not working? use http://localhost:8000/controlpanel )
|
||||
logbook - read a single logbook. Defautl set in python code
|
||||
|
||||
and [runlabel] is an optional string identifying this run of the script
|
||||
in the stored profiling data 'import-profile.json'
|
||||
@ -394,6 +395,8 @@ if __name__ == "__main__":
|
||||
jq.enq("caves",import_caves)
|
||||
elif "logbooks" in sys.argv:
|
||||
jq.enq("logbooks",import_logbooks)
|
||||
elif "logbook" in sys.argv:
|
||||
jq.enq("logbooks",import_logbook) # default year set in imports.py
|
||||
elif "people" in sys.argv:
|
||||
jq.enq("people",import_people)
|
||||
elif "QMs" in sys.argv:
|
||||
|
@ -41,6 +41,11 @@ def import_logbooks():
|
||||
with transaction.atomic():
|
||||
troggle.parsers.logbooks.LoadLogbooks()
|
||||
|
||||
def import_logbook(year=2003):
|
||||
print(f"-- Importing Logbook {year}")
|
||||
with transaction.atomic():
|
||||
troggle.parsers.logbooks.LoadLogbook(year)
|
||||
|
||||
def import_QMs():
|
||||
print("-- Importing old QMs for 161, 204, 234 from CSV files")
|
||||
with transaction.atomic():
|
||||
|
@ -26,6 +26,8 @@ Parses and imports logbooks in all their wonderful confusion
|
||||
todo='''
|
||||
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
|
||||
|
||||
- remove the TROG and lbo things since we need the database for multiuser access? Or not?
|
||||
|
||||
- profile the code to find bad repetitive things, of which there are many.
|
||||
|
||||
- far too many uses of Django field dereferencing to get values, which is SLOW
|
||||
@ -55,15 +57,15 @@ DEFAULT_LOGBOOK_FILE = "logbook.html"
|
||||
# but several don't work, and are skipped by the parsing code, e.g. 1983
|
||||
LOGBOOK_PARSER_SETTINGS = {
|
||||
"2010": ("logbook.html", "parser_html"),
|
||||
"2009": ("2009logbook.txt", "parser_wiki"),
|
||||
"2008": ("2008logbook.txt", "parser_wiki"),
|
||||
"2009": ("2009logbook.txt", "wiki_parser"),
|
||||
"2008": ("2008logbook.txt", "wiki_parser"),
|
||||
"2007": ("logbook.html", "parser_html"),
|
||||
"2006": ("logbook.html", "parser_html"),
|
||||
# "2006": ("logbook/logbook_06.txt", "parser_wiki"),
|
||||
# "2006": ("logbook/logbook_06.txt", "wiki_parser"),
|
||||
"2006": ("logbook.html", "parser_html"),
|
||||
"2005": ("logbook.html", "parser_html"),
|
||||
"2004": ("logbook.html", "parser_html"),
|
||||
"2003": ("logbook.html", "parser_html_03"),
|
||||
"2003": ("logbook.html", "parser_html"),
|
||||
"2002": ("logbook.html", "parser_html"),
|
||||
"2001": ("log.htm", "parser_html_01"),
|
||||
"2000": ("log.htm", "parser_html_01"),
|
||||
@ -88,7 +90,7 @@ LOGBOOK_PARSER_SETTINGS = {
|
||||
|
||||
entries = { "2022": 64, "2019": 56, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
|
||||
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
|
||||
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31,
|
||||
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,
|
||||
"1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1,
|
||||
"1985": 24,"1984": 32,"1983": 52,"1982": 42,}
|
||||
@ -114,8 +116,9 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
||||
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
res = [ ]
|
||||
author = None
|
||||
#print(f'# {tid}')
|
||||
|
||||
# print(f'# {tid}')
|
||||
# print(f" - {tid} '{trippeople}' ")
|
||||
|
||||
for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople):
|
||||
tripperson = tripperson.strip()
|
||||
# mul = re.match(r"(?i)<u>(.*?)</u>$", tripperson)
|
||||
@ -147,6 +150,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
return None, None
|
||||
author = res[-1][0]
|
||||
|
||||
#print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
||||
return res, author
|
||||
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None):
|
||||
@ -195,9 +199,10 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
#NEW slug for a logbook entry here! Unique id + slugified title fragment
|
||||
|
||||
if tid is not None:
|
||||
slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
||||
slug = tid
|
||||
# slug = tid + "_" + slugify(title)[:10].replace('-','_')
|
||||
else:
|
||||
slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
|
||||
slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_')
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug}
|
||||
|
||||
# This creates the lbo instance of LogbookEntry
|
||||
@ -205,6 +210,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
|
||||
|
||||
|
||||
for tripperson, time_underground in trippersons:
|
||||
# print(f" - {tid} '{tripperson}' author:{tripperson == author}")
|
||||
lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo}
|
||||
nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)}
|
||||
# this creates the PersonTrip instance.
|
||||
@ -251,7 +257,7 @@ def ParseDate(tripdate, year):
|
||||
return datetime.date(1970, 1, 1)
|
||||
|
||||
# (2006 - not any more), 2008 - 2009
|
||||
def parser_wiki(year, expedition, txt):
|
||||
def wiki_parser(year, expedition, txt):
|
||||
global logentries
|
||||
global logdataissues
|
||||
|
||||
@ -316,6 +322,11 @@ def parser_html(year, expedition, txt):
|
||||
if s:
|
||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||
else: # allow title and people to be swapped in order
|
||||
msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:40]}'..."
|
||||
print(msg)
|
||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||
logdataissues[tid]=msg
|
||||
|
||||
s2 = re.match(r'''(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
\s*(?:<a\s+id="(.*?)"\s*/>\s*</a>)?
|
||||
\s*<div\s+class="tripdate"\s*(?:id="(.*?)")?>(.*?)</div>(?:<p>)?
|
||||
@ -329,7 +340,7 @@ def parser_html(year, expedition, txt):
|
||||
tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups()
|
||||
else:
|
||||
if not re.search(r"Rigging Guide", trippara):
|
||||
msg = f" !- Logbook. Can't parse {tripid1}: {trippara} entry:{logbook_entry_count} "
|
||||
msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:40]}'..."
|
||||
print(msg)
|
||||
DataIssue.objects.create(parser='logbooks', message=msg)
|
||||
logdataissues[tid]=msg
|
||||
@ -343,7 +354,7 @@ def parser_html(year, expedition, txt):
|
||||
tripcave = "UNKNOWN"
|
||||
ltriptext = re.sub(r"</p>", "", triptext)
|
||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "</br></br>", ltriptext).strip()
|
||||
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
||||
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||
trippeople, expedition, tu, tripid1)
|
||||
@ -430,11 +441,11 @@ def parser_html_01(year, expedition, txt):
|
||||
ltriptext = ltriptext[:mtail.start(0)]
|
||||
ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>|<br>", "\n\n", ltriptext).strip()
|
||||
ltriptext = re.sub(r"</?u>", "_", ltriptext)
|
||||
ltriptext = re.sub(r"</?i>", "''", ltriptext)
|
||||
ltriptext = re.sub(r"</?b>", "'''", ltriptext)
|
||||
|
||||
ltriptext = re.sub(r"<p>", "<br /><br />", ltriptext).strip()
|
||||
|
||||
if ltriptext == "":
|
||||
message = " ! - Zero content for logbook entry!: " + tid
|
||||
DataIssue.objects.create(parser='logbooks', message=message)
|
||||
@ -469,7 +480,7 @@ def parser_html_03(year, expedition, txt):
|
||||
logbook_entry_count = 0
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_id(year,logbook_entry_count)
|
||||
tid = set_trip_id(year,logbook_entry_count) # default trip id, before we read the date
|
||||
|
||||
s = re.match(r"(?s)\s*<p>(.*?)</p>(.*)$", trippara)
|
||||
if not ( s ) :
|
||||
@ -485,23 +496,30 @@ def parser_html_03(year, expedition, txt):
|
||||
sheader = tripheader.split(" -- ")
|
||||
tu = ""
|
||||
if re.match("T/U|Time underwater", sheader[-1]):
|
||||
tu = sheader.pop()
|
||||
tu = sheader.pop() # not a number in 2003 usually
|
||||
# print(f" - {logbook_entry_count} '{tu}' ")
|
||||
if len(sheader) != 3:
|
||||
print(" ! Header not three pieces", sheader)
|
||||
print(" ! Header not three pieces for parser_html_03() ", sheader)
|
||||
tripdate, triptitle, trippeople = sheader
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
triptitles = triptitle.split(" , ")
|
||||
if len(triptitles) >= 2:
|
||||
tripcave = triptitles[0]
|
||||
# print(f" - {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ")
|
||||
# print(f" - {logbook_entry_count} '{trippeople}' ")
|
||||
titlelist = triptitle.split(" , ")
|
||||
if len(titlelist) >= 2:
|
||||
location, *namelist = titlelist # list unpacking operator
|
||||
tripname = ", ".join(namelist) # concatenate strings
|
||||
# print(f" - {logbook_entry_count} {location} '{tripname}'")
|
||||
else:
|
||||
tripcave = "UNKNOWN"
|
||||
ltriptext = re.sub(r"</p>", "", triptext)
|
||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
|
||||
ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
||||
location = "UNKNOWN"
|
||||
|
||||
ltriptext = triptext + "<br /><br />\n\n" + tu
|
||||
ltriptext = re.sub(r"</p>", "", ltriptext)
|
||||
#ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "<br /><br />\n\n", ltriptext).strip()
|
||||
#ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
||||
|
||||
|
||||
entrytuple = (ldate, tripcave, triptitle, ltriptext,
|
||||
entrytuple = (ldate, location, tripname, ltriptext,
|
||||
trippeople, expedition, tu, tid)
|
||||
logentries.append(entrytuple)
|
||||
|
||||
@ -509,8 +527,8 @@ def parser_html_03(year, expedition, txt):
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
""" Parses all logbook entries for one expedition
|
||||
"""
|
||||
# absolutely horrid. REFACTOR THIS (all my fault..)
|
||||
global logentries
|
||||
# absolutely horrid. REFACTOR THIS (all my fault..)
|
||||
global logdataissues
|
||||
global entries
|
||||
|
||||
@ -556,6 +574,10 @@ def LoadLogbookForExpedition(expedition):
|
||||
parsefunc = DEFAULT_LOGBOOK_PARSER
|
||||
|
||||
expedition.save()
|
||||
|
||||
lbes = LogbookEntry.objects.filter(expedition=expedition)
|
||||
for lbe in lbes:
|
||||
lbe.delete()
|
||||
|
||||
try:
|
||||
file_in = open(logbookpath,'rb')
|
||||
@ -594,6 +616,14 @@ def LoadLogbookForExpedition(expedition):
|
||||
|
||||
return len(logentries)
|
||||
|
||||
def LoadLogbook(year):
|
||||
nlbe={}
|
||||
TROG['pagecache']['expedition'][year] = None # clear cache
|
||||
|
||||
expo = Expedition.objects.get(year=year)
|
||||
nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo
|
||||
|
||||
|
||||
def LoadLogbooks():
|
||||
""" This is the master function for parsing all logbooks into the Troggle database.
|
||||
This should be rewritten to use coroutines to load all logbooks from disc in parallel,
|
||||
|
@ -101,7 +101,7 @@
|
||||
|
||||
|
||||
<h3>Export to a different format:</h3>
|
||||
<p>This creates 'newlogbook.html' in the years/<year>/ folder
|
||||
<p>This creates 'logbook-new-format.html' in the years/<year>/ folder
|
||||
<table>
|
||||
<tr>
|
||||
|
||||
@ -128,8 +128,7 @@
|
||||
Output style:
|
||||
<select name="extension">
|
||||
<option value="html2005">.html file - 2005 style</option>
|
||||
<option value="html2022">.html file - 2022 style</option>
|
||||
</select>
|
||||
</select>
|
||||
</p>
|
||||
<p>
|
||||
<input name="download_logbook" type="submit" value="Download logbook" />
|
||||
|
@ -1,26 +1,27 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>{{logbook_entries.0.expedition}} Expo Logbook</title></head>
|
||||
<link rel="stylesheet" type="text/css" href="../../css/main2.css" />
|
||||
<style type="text/css">
|
||||
.tripdate { float: left;}
|
||||
.trippeople { float: right;}
|
||||
.triptitle { font-size: 120%; text-align: center; font-weight: bold; clear: both }
|
||||
.timeug { text-align: right; font-weight: bold }
|
||||
p { clear: both }
|
||||
</style>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>{{logbook_entries.0.expedition}} Expo Logbook</title>
|
||||
<link rel="stylesheet" href="../../css/main2.css" />
|
||||
</head>
|
||||
<!-- Exported by troggle in this format after having been imported using a different format and a different parser.
|
||||
This is because we are steadily converting old formats to a new common format so that we do not need to maintain half
|
||||
a dozen parser functions.
|
||||
Exported on {% now 'Y-m-d D' %} using control panel webpage and exportlogbook() in troggle/code/views/other.py
|
||||
-->
|
||||
<body>
|
||||
|
||||
<h1>Expo {{logbook_entries.0.expedition}}</h1>
|
||||
|
||||
{%for logbook_entry in logbook_entries%}
|
||||
<hr />
|
||||
|
||||
<div class="tripdate" id="t{{logbook_entry.date}}A">{{logbook_entry.date}}</div>
|
||||
<div class="trippeople"><u>{{logbook_entry.author.person}}</u>
|
||||
{% for persontrip in logbook_entry.persontrip_set.all %}{{ persontrip.personexpedition.person }} {{ persontrip.personexpedition.time_underground }}, {% endfor %}
|
||||
</div>
|
||||
|
||||
<div class="tripdate" id="{{logbook_entry.slug}}">{{logbook_entry.date|date:'Y-m-d'}}</div>
|
||||
<div class="trippeople">{% for persontrip in logbook_entry.persontrip_set.all %}{% if persontrip.is_logbook_entry_author %}<u>{{persontrip.personexpedition.person}}</u>{% else %}{{ persontrip.personexpedition.person }}{% endif %}, {% endfor %}</div>
|
||||
<div class="triptitle">{{logbook_entry.place}} - {{logbook_entry.title}}</div>
|
||||
|
||||
{{logbook_entry.text|safe}}
|
||||
<div class="timeug">T/U: {{logbook_entry.time_underground}}</div>
|
||||
{% endfor %}
|
||||
<hr />
|
||||
</body>
|
||||
</html>
|
||||
|
Loading…
Reference in New Issue
Block a user