2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 18:37:11 +00:00

comments updated

This commit is contained in:
2025-09-21 20:45:21 +03:00
parent 64419ffb7c
commit ca9fd8ec55

View File

@@ -25,22 +25,19 @@ Parses and imports logbooks in all their wonderful confusion
https://expo.survex.com/handbook/computing/logbooks-parsing.html
"""
todo = """
- check cross-references in other logbooks and other HTML frahments
- check cross-references to specific logbook entries in other logbooks and other HTML frahments
e.g. cave descriptions
- Most of the time is during the database writing (6s out of 8s).
profile the code to find bad repetitive things, of which there are many. But probably we just have too many Django database operations.
Currently we store each entry individually. It should be done using Django bulk entry.
Look at Person & PersonExpedition all in python in parsers/people.py and then commit as two bulk transactions. test if links between them work when done like that.
- profile the code to find bad repetitive things, of which there are many.
- attach or link a DataIssue to an individual expo (logbook) so that it can be found and deleted
- attach or link a DataIssue to an individual expo (logbook) so that it can be found and deleted in the DataIssue bug output
- rewrite to use generators rather than storing everything intermediate in lists - to
reduce memory impact [low priority]
reduce memory impact [very low priority]
- We should ensure logbook.html is utf-8 and stop this crap:
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
"""
@dataclass
@@ -57,7 +54,7 @@ class LogbookEntryData:
guests: List[str]
expedition: Any
tu: float # time underground, not actually used anywhere
tid: str
tid: str # trip identifier
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
BLOG_PARSER_SETTINGS = { # no default, must be explicit
@@ -70,14 +67,14 @@ BLOG_PARSER_SETTINGS = { # no default, must be explicit
DEFAULT_LOGBOOK_FILE = "logbook.html"
DEFAULT_LOGBOOK_PARSER = "parser_html"
# All years now (Jan.2023) use the default value for Logbook parser
# dont forget to update expoweb/pubs.htm to match. 1982 left as reminder of expected format.
# dont forget to update expoweb/pubs.htm to match. 1982 left here as reminder of expected format:
LOGBOOK_PARSER_SETTINGS = {
"1982": ("logbook.html", "parser_html"),
}
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
ENTRIES = {
"2025": 78,
"2025": 114,
"2024": 127,
"2023": 131,
"2022": 93,