2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2024-11-22 07:11:52 +00:00

more robust tripid labelling

This commit is contained in:
Philip Sargent 2023-09-01 20:31:19 +03:00
parent 1cf02afec9
commit 98412c140d
3 changed files with 38 additions and 31 deletions

View File

@ -1,14 +1,15 @@
import hashlib
import logging import logging
import os
import random import random
import resource import resource
import string
import subprocess import subprocess
import os
from decimal import getcontext from decimal import getcontext
from pathlib import Path from pathlib import Path
getcontext().prec = 2 # use 2 significant figures for decimal calculations getcontext().prec = 2 # use 2 significant figures for decimal calculations
import settings import settings
"""This file declares TROG a globally visible object for caches. """This file declares TROG a globally visible object for caches.
@ -30,6 +31,8 @@ thread.
""" """
TROG = {"pagecache": {"expedition": {}}, "caves": {"gcavelookup": {}, "gcavecount": {}}} TROG = {"pagecache": {"expedition": {}}, "caves": {"gcavelookup": {}, "gcavecount": {}}}
alphabet = []
sha = hashlib.new('sha256')
# This is module-level executable. This is a Bad Thing. Especially when it touches the file system. # This is module-level executable. This is a Bad Thing. Especially when it touches the file system.
try: try:
@ -50,7 +53,27 @@ def chaosmonkey(n):
return False return False
# print("CHAOS strikes !", file=sys.stderr) # print("CHAOS strikes !", file=sys.stderr)
return True return True
def unique_slug(text, n):
"""This gives an almost-unique id based on the text,
2 hex digits would seem adequate, but we might get a collision.
Not used anywhere.
"""
sha.update(text.encode('utf-8'))
return sha.hexdigest()[0:n]
def alphabet_suffix(n):
"""This is called repeatedly during initial parsing import, hence the cached list
"""
global alphabet
if not alphabet:
alphabet = list(string.ascii_lowercase)
if n < len(alphabet):
suffix = alphabet[n]
else:
suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
return suffix
def only_commit(fname, message): def only_commit(fname, message):
"""Only used to commit a survex file edited and saved in view/survex.py""" """Only used to commit a survex file edited and saved in view/survex.py"""

View File

@ -1,5 +1,4 @@
import subprocess import subprocess
import hashlib
import string import string
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@ -9,10 +8,11 @@ from django.core.files.storage import FileSystemStorage
from django.shortcuts import render, redirect from django.shortcuts import render, redirect
import settings import settings
from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry
from troggle.core.models.survex import DrawingFile from troggle.core.models.survex import DrawingFile
from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition
from troggle.core.utils import alphabet_suffix
from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
# from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time* # from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time*
@ -45,29 +45,15 @@ todo = """
- Make file rename utility less ugly. - Make file rename utility less ugly.
""" """
sha = hashlib.new('sha256')
def unique_slug(text, n):
"""This gives each logbook entry a unique id based on the date+content, so the order of entries on a particular day
does not matter. This is a change (August 2023) from previous process.
2 hex digits would seem adequate for each expo day, but we might get a collision.
The hash is based on the content after substitution of <p> so should be stable. Which means these ids
can be used elsewhere in the troggle system as permanent slugs.
When SAVING an edited entry (as opposed to a new one) we will have a different hash so we will have to
delete the original database object
"""
sha.update(text.encode('utf-8'))
return sha.hexdigest()[0:n]
def create_new_lbe_slug(date): def create_new_lbe_slug(date):
onthisdate = LogbookEntry.objects.filter(date=date) onthisdate = LogbookEntry.objects.filter(date=date)
n = len(onthisdate) n = len(onthisdate)
# print(f" Already entries on this date: {n}\n {onthisdate}") # print(f" Already entries on this date: {n}\n {onthisdate}")
alphabet = list(string.ascii_lowercase) suffix = alphabet_suffix(n)
tid = f"{date}{alphabet[n]}"
tid = f"{date}{suffix}"
print(tid) print(tid)
return tid return tid

View File

@ -15,8 +15,7 @@ from parsers.people import GetPersonExpeditionNameLookup, load_people_expos, kno
from troggle.core.models.caves import GetCaveLookup from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry
from troggle.core.models.troggle import DataIssue, Expedition from troggle.core.models.troggle import DataIssue, Expedition
from troggle.core.utils import get_process_memory from troggle.core.utils import get_process_memory, alphabet_suffix, unique_slug
from troggle.core.views.uploads import unique_slug
""" """
Parses and imports logbooks in all their wonderful confusion Parses and imports logbooks in all their wonderful confusion
@ -109,7 +108,7 @@ ENTRIES = {
logentries = [] # the entire logbook for one year is a single object: a list of entries logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = ["travel", "Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"] noncaveplaces = ["travel", "Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"]
tripsdate = {} tripsdate = {}
alphabet = []
def set_trip_seq_id(year, seq): def set_trip_seq_id(year, seq):
'''We have not parsed the trip date yet, so this is a sequence numer '''We have not parsed the trip date yet, so this is a sequence numer
@ -121,15 +120,14 @@ def reset_trip_id(date):
'''Now we have the date, we can set the tripid (the lbe slug) to be in our standard form '''Now we have the date, we can set the tripid (the lbe slug) to be in our standard form
of <date><letter>, i.e. '2003-07-30b' of <date><letter>, i.e. '2003-07-30b'
BUT this gets re-set every time the logbook is imported, BUT this gets re-set every time the logbook is imported,
so they are not persistent as we would much prefer. However these are persistent as the entries are ordered on this field.
''' '''
global alphabet
already =tripsdate.get(date, 0) # returns zero if none found already =tripsdate.get(date, 0) # returns zero if none found
tripsdate[date] = already +1 n = already + 1
if not alphabet: tripsdate[date] = n
alphabet = list(string.ascii_lowercase) suffix = alphabet_suffix(n)
tid = f"{date}{alphabet[already]}" tid = f"{date}{suffix}"
# print(tid) # print(tid)
return tid return tid