diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 9007d4f..6b6c121 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -1,10 +1,12 @@ import csv -from datetime import datetime, date, time import os import re -#import time import pickle import shelve +import time +from random import randint +from datetime import datetime, date +from pathlib import Path from django.conf import settings from django.template.defaultfilters import slugify @@ -24,6 +26,9 @@ Parses and imports logbooks in all their wonderful confusion todo=''' - Put the object store 'trips' and the 'logdataissues' into TROG global object +- works parsing logbooks but when reading cache files fails on storing data + and is slower than parsing from scratch now! + - refactor everything with some urgency, esp. LoadLogbookForExpedition() - Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser, @@ -141,7 +146,11 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_ # but it is a db query which we should try to avoid - rewrite this #NEW slug for a logbook entry here! Unique id + slugified title fragment - slug = tid + "_" + slugify(title)[:10].replace('-','_') + # working for all cache files 2019-2005, failed on 2004; but fine when parsing logbook and not reading cache. Hmm. + if tid is not None: + slug = tid + "_" + slugify(title)[:10].replace('-','_') + else: + slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_') nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type} lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs) @@ -565,12 +574,14 @@ def LoadLogbookForExpedition(expedition, expect): logbookfile = os.path.join(expologbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE) expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE parsefunc = settings.DEFAULT_LOGBOOK_PARSER - cache_filename = logbookfile + ".cache" - expedition.save() + cache_filename = Path(logbookfile + ".cache") + if not cache_filename.is_file(): + print(" - Cache file does not exist \"" + str(cache_filename) +"\"") + expedition.save() + now = time.time() + bad_cache = True # emporarily disable reading the cache - buggy try: - bad_cache = False - now = time.time() cache_t = os.path.getmtime(cache_filename) if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later bad_cache= True @@ -582,7 +593,7 @@ def LoadLogbookForExpedition(expedition, expect): logentries=[] print(" ! Removed stale or corrupt cache file") raise - print(" - Reading cache: " + cache_filename, end='') + print(" - Reading cache: " + str(cache_filename), end='') try: with open(cache_filename, "rb") as f: year,n,logentries = pickle.load(f) @@ -597,8 +608,8 @@ def LoadLogbookForExpedition(expedition, expect): os.remove(cache_filename) logentries=[] raise - except : # no cache found - #print(" - No cache \"" + cache_filename +"\"") + except : + print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"") try: file_in = open(logbookfile,'rb') txt = file_in.read().decode("latin1") @@ -627,10 +638,11 @@ def LoadLogbookForExpedition(expedition, expect): i=0 for entrytuple in range(len(logentries)): date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i] + #print(" - entry tuple " , i, " tid", tripid1) EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0, - entry_type) + entry_type, tripid1) EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground, - entry_type, tripid1, i) + entry_type, tripid1, i) i +=1 SetDatesFromLogbookEntries(expedition) return len(logentries) diff --git a/pre-run.sh b/pre-run.sh index 4153294..876a374 100755 --- a/pre-run.sh +++ b/pre-run.sh @@ -24,4 +24,4 @@ echo "" echo `tail -1 lines-of-python.txt` non-comment lines of python. echo `tail -1 lines-of-templates.txt` non-comment lines of HTML templates. -echo 'If you have an error running manage.py, maybe you are not in an activated venv ? \ No newline at end of file +echo 'If you have an error running manage.py, maybe you are not in an activated venv ?' \ No newline at end of file