diff --git a/core/utils.py b/core/utils.py index a24a1dcf6..2aba92452 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,14 +1,15 @@ +import hashlib import logging +import os import random import resource +import string import subprocess -import os from decimal import getcontext from pathlib import Path getcontext().prec = 2 # use 2 significant figures for decimal calculations - import settings """This file declares TROG a globally visible object for caches. @@ -30,6 +31,8 @@ thread. """ TROG = {"pagecache": {"expedition": {}}, "caves": {"gcavelookup": {}, "gcavecount": {}}} +alphabet = [] +sha = hashlib.new('sha256') # This is module-level executable. This is a Bad Thing. Especially when it touches the file system. try: @@ -50,7 +53,27 @@ def chaosmonkey(n): return False # print("CHAOS strikes !", file=sys.stderr) return True - + +def unique_slug(text, n): + """This gives an almost-unique id based on the text, + 2 hex digits would seem adequate, but we might get a collision. + Not used anywhere. + """ + sha.update(text.encode('utf-8')) + return sha.hexdigest()[0:n] + +def alphabet_suffix(n): + """This is called repeatedly during initial parsing import, hence the cached list + """ + global alphabet + if not alphabet: + alphabet = list(string.ascii_lowercase) + + if n < len(alphabet): + suffix = alphabet[n] + else: + suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase) + return suffix def only_commit(fname, message): """Only used to commit a survex file edited and saved in view/survex.py""" diff --git a/core/views/uploads.py b/core/views/uploads.py index f0dedfae6..5c2f4f426 100644 --- a/core/views/uploads.py +++ b/core/views/uploads.py @@ -1,5 +1,4 @@ import subprocess -import hashlib import string from datetime import datetime from pathlib import Path @@ -9,10 +8,11 @@ from django.core.files.storage import FileSystemStorage from django.shortcuts import render, redirect import settings -from troggle.core.models.caves import GetCaveLookup + from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry from troggle.core.models.survex import DrawingFile from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition +from troggle.core.utils import alphabet_suffix from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner # from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time* @@ -45,29 +45,15 @@ todo = """ - Make file rename utility less ugly. """ -sha = hashlib.new('sha256') - -def unique_slug(text, n): - """This gives each logbook entry a unique id based on the date+content, so the order of entries on a particular day - does not matter. This is a change (August 2023) from previous process. - - 2 hex digits would seem adequate for each expo day, but we might get a collision. - The hash is based on the content after substitution of
so should be stable. Which means these ids
- can be used elsewhere in the troggle system as permanent slugs.
-
- When SAVING an edited entry (as opposed to a new one) we will have a different hash so we will have to
- delete the original database object
- """
- sha.update(text.encode('utf-8'))
- return sha.hexdigest()[0:n]
def create_new_lbe_slug(date):
onthisdate = LogbookEntry.objects.filter(date=date)
n = len(onthisdate)
# print(f" Already entries on this date: {n}\n {onthisdate}")
- alphabet = list(string.ascii_lowercase)
- tid = f"{date}{alphabet[n]}"
+ suffix = alphabet_suffix(n)
+
+ tid = f"{date}{suffix}"
print(tid)
return tid
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index a5f663179..511ed4766 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -15,8 +15,7 @@ from parsers.people import GetPersonExpeditionNameLookup, load_people_expos, kno
from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry
from troggle.core.models.troggle import DataIssue, Expedition
-from troggle.core.utils import get_process_memory
-from troggle.core.views.uploads import unique_slug
+from troggle.core.utils import get_process_memory, alphabet_suffix, unique_slug
"""
Parses and imports logbooks in all their wonderful confusion
@@ -109,7 +108,7 @@ ENTRIES = {
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = ["travel", "Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"]
tripsdate = {}
-alphabet = []
+
def set_trip_seq_id(year, seq):
'''We have not parsed the trip date yet, so this is a sequence numer
@@ -121,15 +120,14 @@ def reset_trip_id(date):
'''Now we have the date, we can set the tripid (the lbe slug) to be in our standard form
of