From ca9fd8ec5552b26b133e31a3ded97976185cc629 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Sun, 21 Sep 2025 20:45:21 +0300 Subject: [PATCH] comments updated --- parsers/logbooks.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index b609ebfbe..531103197 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -25,22 +25,19 @@ Parses and imports logbooks in all their wonderful confusion https://expo.survex.com/handbook/computing/logbooks-parsing.html """ todo = """ -- check cross-references in other logbooks and other HTML frahments +- check cross-references to specific logbook entries in other logbooks and other HTML frahments e.g. cave descriptions - Most of the time is during the database writing (6s out of 8s). +profile the code to find bad repetitive things, of which there are many. But probably we just have too many Django database operations. +Currently we store each entry individually. It should be done using Django bulk entry. +Look at Person & PersonExpedition all in python in parsers/people.py and then commit as two bulk transactions. test if links between them work when done like that. -- profile the code to find bad repetitive things, of which there are many. - -- attach or link a DataIssue to an individual expo (logbook) so that it can be found and deleted +- attach or link a DataIssue to an individual expo (logbook) so that it can be found and deleted in the DataIssue bug output - rewrite to use generators rather than storing everything intermediate in lists - to - reduce memory impact [low priority] + reduce memory impact [very low priority] -- We should ensure logbook.html is utf-8 and stop this crap: - file_in = open(logbookfile,'rb') - txt = file_in.read().decode("latin1") - """ @dataclass @@ -57,7 +54,7 @@ class LogbookEntryData: guests: List[str] expedition: Any tu: float # time underground, not actually used anywhere - tid: str + tid: str # trip identifier MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200 BLOG_PARSER_SETTINGS = { # no default, must be explicit @@ -70,14 +67,14 @@ BLOG_PARSER_SETTINGS = { # no default, must be explicit DEFAULT_LOGBOOK_FILE = "logbook.html" DEFAULT_LOGBOOK_PARSER = "parser_html" # All years now (Jan.2023) use the default value for Logbook parser -# dont forget to update expoweb/pubs.htm to match. 1982 left as reminder of expected format. +# dont forget to update expoweb/pubs.htm to match. 1982 left here as reminder of expected format: LOGBOOK_PARSER_SETTINGS = { "1982": ("logbook.html", "parser_html"), } LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB ENTRIES = { - "2025": 78, + "2025": 114, "2024": 127, "2023": 131, "2022": 93,