From cabcada0b8738366bce33173ad1b3d376e8fb73c Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Fri, 9 Dec 2022 23:45:07 +0000 Subject: [PATCH] 2003 logbook export/re-import as now HTML format --- core/views/other.py | 34 +++++-------- databaseReset.py | 7 ++- parsers/imports.py | 5 ++ parsers/logbooks.py | 84 ++++++++++++++++++++++----------- templates/controlPanel.html | 5 +- templates/logbook2005style.html | 33 ++++++------- 6 files changed, 98 insertions(+), 70 deletions(-) diff --git a/core/views/other.py b/core/views/other.py index aadb613..88d0589 100644 --- a/core/views/other.py +++ b/core/views/other.py @@ -148,15 +148,13 @@ def controlpanel(request): def exportlogbook(request,year=None,extension=None): - '''Constructs, from the database, a complete HTML formatted logbook - but TEXT ONLY - for the current year. Formats available are HTML2005 or HTML2022 (planned) + '''Constructs, from the database, a complete HTML formatted logbook + for the current year. Formats available are HTML2005 (others old & broken or not written yet) There are no images stored in the database, so this is only a tool for a first pass, to be followed by - extensive hand-editing. + hand-editing. However links to images work int he HTML text of a logbook entry - NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter. - - This function DOES NOT WORK. + NEED TO ADD IN THE MATERIAL WHICH IS NOT IN ANY LBE ! e.g. front matter. This function is the recipient of the POST action os the export form in the control panel ''' @@ -170,26 +168,18 @@ def exportlogbook(request,year=None,extension=None): else: print(f'Logbook export {request.POST}') - if request.POST.get("year", '2016'): - year = request.POST['year'] - if request.POST.get("extension", 'html'): - extension = request.POST['extension'] # e.g. html - + year = request.POST['year'] current_expedition=Expedition.objects.get(year=year) logbook_entries=LogbookEntry.objects.filter(expedition=current_expedition).order_by('date') # need to be sorted by date! - #print(f'Logbook has {len(logbook_entries)} entries in it.') + print(f'Logbook has {len(logbook_entries)} entries in it.') - if extension == 'html2005': - response = HttpResponse(content_type='text/html') - style='2005' - else : - extension == 'html2022' - response = HttpResponse(content_type='text/html') - style='2022' - - filename='newlogbook.' + extension - template='logbook'+style+'style.'+extension + extension ='html' + response = HttpResponse(content_type='text/html') + style='2005' + + filename='logbook-new-format.' + extension + template='logbook'+style+'style.'+ extension response['Content-Disposition'] = 'attachment; filename='+filename t=loader.get_template(template) logbookfile = (t.render({'logbook_entries':logbook_entries})) diff --git a/databaseReset.py b/databaseReset.py index 630d3c6..db7a781 100644 --- a/databaseReset.py +++ b/databaseReset.py @@ -47,7 +47,7 @@ from django.db import transaction from troggle.core.utils import get_process_memory from troggle.core.models.caves import Cave, Entrance from troggle.parsers.imports import import_caves, import_people, import_surveyscans, \ - import_logbooks, import_QMs, import_survex, import_loadpos, import_drawingsfiles + import_logbooks, import_logbook, import_QMs, import_survex, import_loadpos, import_drawingsfiles if os.geteuid() == 0: # This protects the server from having the wrong file permissions written on logs and caches @@ -343,7 +343,8 @@ def usage(): drawings - read in the Tunnel & Therion files - which scans the survey scans too survex - read in the survex files - all the survex blocks and entrances x/y/z - dumplogbooks - Not used. write out autologbooks (not working?) + dumplogbooks - Not used. write out autologbooks (not working? use http://localhost:8000/controlpanel ) + logbook - read a single logbook. Defautl set in python code and [runlabel] is an optional string identifying this run of the script in the stored profiling data 'import-profile.json' @@ -394,6 +395,8 @@ if __name__ == "__main__": jq.enq("caves",import_caves) elif "logbooks" in sys.argv: jq.enq("logbooks",import_logbooks) + elif "logbook" in sys.argv: + jq.enq("logbooks",import_logbook) # default year set in imports.py elif "people" in sys.argv: jq.enq("people",import_people) elif "QMs" in sys.argv: diff --git a/parsers/imports.py b/parsers/imports.py index df6e583..9cc945c 100644 --- a/parsers/imports.py +++ b/parsers/imports.py @@ -41,6 +41,11 @@ def import_logbooks(): with transaction.atomic(): troggle.parsers.logbooks.LoadLogbooks() +def import_logbook(year=2003): + print(f"-- Importing Logbook {year}") + with transaction.atomic(): + troggle.parsers.logbooks.LoadLogbook(year) + def import_QMs(): print("-- Importing old QMs for 161, 204, 234 from CSV files") with transaction.atomic(): diff --git a/parsers/logbooks.py b/parsers/logbooks.py index b67369c..889387d 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -26,6 +26,8 @@ Parses and imports logbooks in all their wonderful confusion todo=''' - refactor everything with some urgency, esp. LoadLogbookForExpedition() +- remove the TROG and lbo things since we need the database for multiuser access? Or not? + - profile the code to find bad repetitive things, of which there are many. - far too many uses of Django field dereferencing to get values, which is SLOW @@ -55,15 +57,15 @@ DEFAULT_LOGBOOK_FILE = "logbook.html" # but several don't work, and are skipped by the parsing code, e.g. 1983 LOGBOOK_PARSER_SETTINGS = { "2010": ("logbook.html", "parser_html"), - "2009": ("2009logbook.txt", "parser_wiki"), - "2008": ("2008logbook.txt", "parser_wiki"), + "2009": ("2009logbook.txt", "wiki_parser"), + "2008": ("2008logbook.txt", "wiki_parser"), "2007": ("logbook.html", "parser_html"), "2006": ("logbook.html", "parser_html"), -# "2006": ("logbook/logbook_06.txt", "parser_wiki"), +# "2006": ("logbook/logbook_06.txt", "wiki_parser"), "2006": ("logbook.html", "parser_html"), "2005": ("logbook.html", "parser_html"), "2004": ("logbook.html", "parser_html"), - "2003": ("logbook.html", "parser_html_03"), + "2003": ("logbook.html", "parser_html"), "2002": ("logbook.html", "parser_html"), "2001": ("log.htm", "parser_html_01"), "2000": ("log.htm", "parser_html_01"), @@ -88,7 +90,7 @@ LOGBOOK_PARSER_SETTINGS = { entries = { "2022": 64, "2019": 56, "2018": 74, "2017": 60, "2016": 81, "2015": 79, "2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52, - "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31, + "2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 42, "2002": 31, "2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41, "1994": 32, "1993": 41, "1992": 61, "1991": 38, "1990": 87, "1989": 1,"1988": 1,"1987": 1, "1985": 24,"1984": 32,"1983": 52,"1982": 42,} @@ -114,8 +116,9 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]") def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): res = [ ] author = None - #print(f'# {tid}') - + # print(f'# {tid}') + # print(f" - {tid} '{trippeople}' ") + for tripperson in re.split(r",|\+|&|&(?!\w+;)| and ", trippeople): tripperson = tripperson.strip() # mul = re.match(r"(?i)(.*?)$", tripperson) @@ -147,6 +150,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None): return None, None author = res[-1][0] + #print(f" - {tid} [{author.person}] '{res[0][0].person}'...") return res, author def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, tid=None): @@ -195,9 +199,10 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ #NEW slug for a logbook entry here! Unique id + slugified title fragment if tid is not None: - slug = tid + "_" + slugify(title)[:10].replace('-','_') + slug = tid + # slug = tid + "_" + slugify(title)[:10].replace('-','_') else: - slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_') + slug = str(randint(1000,9999)) + "_" + slugify(title)[:10].replace('-','_') nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug} # This creates the lbo instance of LogbookEntry @@ -205,6 +210,7 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ for tripperson, time_underground in trippersons: + # print(f" - {tid} '{tripperson}' author:{tripperson == author}") lookupAttribs={'personexpedition':tripperson, 'logbook_entry':lbo} nonLookupAttribs={'time_underground':time_underground, 'is_logbook_entry_author':(tripperson == author)} # this creates the PersonTrip instance. @@ -251,7 +257,7 @@ def ParseDate(tripdate, year): return datetime.date(1970, 1, 1) # (2006 - not any more), 2008 - 2009 -def parser_wiki(year, expedition, txt): +def wiki_parser(year, expedition, txt): global logentries global logdataissues @@ -316,6 +322,11 @@ def parser_html(year, expedition, txt): if s: tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups() else: # allow title and people to be swapped in order + msg = f" !- {year} Can't parse:{logbook_entry_count} '{trippara[:40]}'..." + print(msg) + DataIssue.objects.create(parser='logbooks', message=msg) + logdataissues[tid]=msg + s2 = re.match(r'''(?x)(?:\s*.*?\s*

)? # second date \s*(?:\s*)? \s*(.*?)(?:

)? @@ -329,7 +340,7 @@ def parser_html(year, expedition, txt): tripid, tripid1, tripdate, triptitle, trippeople, triptext, tu = s2.groups() else: if not re.search(r"Rigging Guide", trippara): - msg = f" !- Logbook. Can't parse {tripid1}: {trippara} entry:{logbook_entry_count} " + msg = f" !- Logbook. Can't parse entry on 2nd pass:{logbook_entry_count} '{trippara[:40]}'..." print(msg) DataIssue.objects.create(parser='logbooks', message=msg) logdataissues[tid]=msg @@ -343,7 +354,7 @@ def parser_html(year, expedition, txt): tripcave = "UNKNOWN" ltriptext = re.sub(r"

", "", triptext) ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub(r"

", "

", ltriptext).strip() + ltriptext = re.sub(r"

", "

", ltriptext).strip() entrytuple = (ldate, tripcave, triptitle, ltriptext, trippeople, expedition, tu, tripid1) @@ -430,11 +441,11 @@ def parser_html_01(year, expedition, txt): ltriptext = ltriptext[:mtail.start(0)] ltriptext = re.sub(r"

", "", ltriptext) ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub(r"

|
", "\n\n", ltriptext).strip() ltriptext = re.sub(r"", "_", ltriptext) ltriptext = re.sub(r"", "''", ltriptext) ltriptext = re.sub(r"", "'''", ltriptext) - + ltriptext = re.sub(r"

", "

", ltriptext).strip() + if ltriptext == "": message = " ! - Zero content for logbook entry!: " + tid DataIssue.objects.create(parser='logbooks', message=message) @@ -469,7 +480,7 @@ def parser_html_03(year, expedition, txt): logbook_entry_count = 0 for trippara in tripparas: logbook_entry_count += 1 - tid = set_trip_id(year,logbook_entry_count) + tid = set_trip_id(year,logbook_entry_count) # default trip id, before we read the date s = re.match(r"(?s)\s*

(.*?)

(.*)$", trippara) if not ( s ) : @@ -485,23 +496,30 @@ def parser_html_03(year, expedition, txt): sheader = tripheader.split(" -- ") tu = "" if re.match("T/U|Time underwater", sheader[-1]): - tu = sheader.pop() + tu = sheader.pop() # not a number in 2003 usually + # print(f" - {logbook_entry_count} '{tu}' ") if len(sheader) != 3: - print(" ! Header not three pieces", sheader) + print(" ! Header not three pieces for parser_html_03() ", sheader) tripdate, triptitle, trippeople = sheader ldate = ParseDate(tripdate.strip(), year) - triptitles = triptitle.split(" , ") - if len(triptitles) >= 2: - tripcave = triptitles[0] + # print(f" - {logbook_entry_count} '{ldate}' from '{tripdate.strip()}' ") + # print(f" - {logbook_entry_count} '{trippeople}' ") + titlelist = triptitle.split(" , ") + if len(titlelist) >= 2: + location, *namelist = titlelist # list unpacking operator + tripname = ", ".join(namelist) # concatenate strings + # print(f" - {logbook_entry_count} {location} '{tripname}'") else: - tripcave = "UNKNOWN" - ltriptext = re.sub(r"

", "", triptext) - ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) - ltriptext = re.sub(r"

", "\n\n", ltriptext).strip() - ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) + location = "UNKNOWN" + + ltriptext = triptext + "

\n\n" + tu + ltriptext = re.sub(r"

", "", ltriptext) + #ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext) + ltriptext = re.sub(r"

", "

\n\n", ltriptext).strip() + #ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext) - entrytuple = (ldate, tripcave, triptitle, ltriptext, + entrytuple = (ldate, location, tripname, ltriptext, trippeople, expedition, tu, tid) logentries.append(entrytuple) @@ -509,8 +527,8 @@ def parser_html_03(year, expedition, txt): def LoadLogbookForExpedition(expedition): """ Parses all logbook entries for one expedition """ - # absolutely horrid. REFACTOR THIS (all my fault..) global logentries + # absolutely horrid. REFACTOR THIS (all my fault..) global logdataissues global entries @@ -556,6 +574,10 @@ def LoadLogbookForExpedition(expedition): parsefunc = DEFAULT_LOGBOOK_PARSER expedition.save() + + lbes = LogbookEntry.objects.filter(expedition=expedition) + for lbe in lbes: + lbe.delete() try: file_in = open(logbookpath,'rb') @@ -594,6 +616,14 @@ def LoadLogbookForExpedition(expedition): return len(logentries) +def LoadLogbook(year): + nlbe={} + TROG['pagecache']['expedition'][year] = None # clear cache + + expo = Expedition.objects.get(year=year) + nlbe[expo] = LoadLogbookForExpedition(expo) # this actually loads the logbook for one expo + + def LoadLogbooks(): """ This is the master function for parsing all logbooks into the Troggle database. This should be rewritten to use coroutines to load all logbooks from disc in parallel, diff --git a/templates/controlPanel.html b/templates/controlPanel.html index 5d4cac0..52b0d7e 100644 --- a/templates/controlPanel.html +++ b/templates/controlPanel.html @@ -101,7 +101,7 @@

Export to a different format:

-

This creates 'newlogbook.html' in the years/<year>/ folder +

This creates 'logbook-new-format.html' in the years/<year>/ folder @@ -128,8 +128,7 @@ Output style: +

diff --git a/templates/logbook2005style.html b/templates/logbook2005style.html index af320a3..846e88d 100644 --- a/templates/logbook2005style.html +++ b/templates/logbook2005style.html @@ -1,26 +1,27 @@ + -{{logbook_entries.0.expedition}} Expo Logbook - - + + +{{logbook_entries.0.expedition}} Expo Logbook + + + -

Expo {{logbook_entries.0.expedition}}

- {%for logbook_entry in logbook_entries%}
-
{{logbook_entry.date}}
-
{{logbook_entry.author.person}} -{% for persontrip in logbook_entry.persontrip_set.all %}{{ persontrip.personexpedition.person }} {{ persontrip.personexpedition.time_underground }}, {% endfor %} -
- +
{{logbook_entry.date|date:'Y-m-d'}}
+
{% for persontrip in logbook_entry.persontrip_set.all %}{% if persontrip.is_logbook_entry_author %}{{persontrip.personexpedition.person}}{% else %}{{ persontrip.personexpedition.person }}{% endif %}, {% endfor %}
{{logbook_entry.place}} - {{logbook_entry.title}}
{{logbook_entry.text|safe}} +
T/U: {{logbook_entry.time_underground}}
{% endfor %} +
+ +