From a60a495c83ab6eb8910f0778ee90e5f5fe8f5d82 Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 29 Jun 2020 21:15:42 +0100 Subject: [PATCH] Creating forgotten caves & better GetCaveLookup() --- .gitignore | 5 ++ core/models_caves.py | 167 +++++++++++++++++++++++++++++++++++++++++++ parsers/caves.py | 53 ++++++++++---- parsers/logbooks.py | 29 +------- 4 files changed, 211 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index 76c0d7c..b5a2e9a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,8 @@ posnotfound troggle.sqlite-journal loadsurvexblks.log logbktrips.shelve +cave-lookup.json +svxblks.log +svxlinear.log +loadlogbk.log +_1623.svx diff --git a/core/models_caves.py b/core/models_caves.py index e20b17f..60a62a9 100644 --- a/core/models_caves.py +++ b/core/models_caves.py @@ -3,6 +3,7 @@ import os import datetime import logging import re +import json from subprocess import call from urllib.parse import urljoin @@ -574,3 +575,169 @@ class PersonTrip(TroggleModel): def __str__(self): return "%s (%s)" % (self.personexpedition, self.logbook_entry.date) + +# lookup function modelled on GetPersonExpeditionNameLookup +# repeated assignment each call, needs refactoring +Gcavelookup = None +def GetCaveLookup(): + global Gcavelookup + if Gcavelookup: + return Gcavelookup + Gcavelookup = {"NONEPLACEHOLDER":None} + for cave in Cave.objects.all(): + Gcavelookup[cave.official_name.lower()] = cave + if cave.kataster_number: + Gcavelookup[cave.kataster_number] = cave + if cave.unofficial_number: + Gcavelookup[cave.unofficial_number.lower()] = cave + if cave.filename: + # this is the slug - usually.. + Gcavelookup[cave.filename.replace(".html","").lower()] = cave + if cave.slug(): + slug = cave.slug() + Gcavelookup[slug.lower()] = cave + # These are exact matches! edit to check for prefix only! + # mostly taken from expoweb/noinfo/cave-number-index + # and Becka's email of 25 may 2020 on new kataster numbers + # this should be re-done as a JSON file upload + Gcavelookup["1987-02"] = Gcavelookup["267"] + Gcavelookup["1990-01"] = Gcavelookup["171"] + Gcavelookup["1990-02"] = Gcavelookup["172"] + Gcavelookup["1990-03"] = Gcavelookup["173"] + Gcavelookup["1990-04"] = Gcavelookup["174"] + Gcavelookup["1990-05"] = Gcavelookup["175"] + Gcavelookup["1990-06"] = Gcavelookup["176"] + Gcavelookup["1990-07"] = Gcavelookup["177"] + Gcavelookup["1990-08"] = Gcavelookup["178"] + Gcavelookup["1990-09"] = Gcavelookup["179"] + Gcavelookup["1990-10"] = Gcavelookup["180"] + Gcavelookup["1990-11"] = Gcavelookup["181"] + Gcavelookup["1990-12"] = Gcavelookup["182"] + Gcavelookup["1990-13"] = Gcavelookup["183"] + Gcavelookup["1990-14"] = Gcavelookup["184"] + Gcavelookup["1990-18"] = Gcavelookup["188"] + Gcavelookup["1990-adam"] = Gcavelookup["225"] + Gcavelookup["1993-01"] = Gcavelookup["200"] + Gcavelookup["1996-02"] = Gcavelookup["224"] + Gcavelookup["1996-03"] = Gcavelookup["223"] + Gcavelookup["1996-04"] = Gcavelookup["222"] + Gcavelookup["1996wk2"] = Gcavelookup["207"] + Gcavelookup["1996wk3"] = Gcavelookup["208"] + Gcavelookup["1996wk5"] = Gcavelookup["219"] + Gcavelookup["1996wk6"] = Gcavelookup["218"] + Gcavelookup["1996wk8"] = Gcavelookup["209"] + Gcavelookup["1996wk11"] = Gcavelookup["268"] + Gcavelookup["96wk11"] = Gcavelookup["268"] + Gcavelookup["1998-01"] = Gcavelookup["201"] + Gcavelookup["1998-03"] = Gcavelookup["210"] + Gcavelookup["1999-03"] = Gcavelookup["204"] + Gcavelookup["1999-04"] = Gcavelookup["230"] + Gcavelookup["1999-10"] = Gcavelookup["162"] + Gcavelookup["1999-bo-01"] = Gcavelookup["205"] + Gcavelookup["1999-ob-01"] = Gcavelookup["205"] + Gcavelookup["1999-ob-03"] = Gcavelookup["226"] + Gcavelookup["1999-ob-04"] = Gcavelookup["227"] + Gcavelookup["2000-01"] = Gcavelookup["231"] + Gcavelookup["2000-03"] = Gcavelookup["214"] + Gcavelookup["2000-04"] = Gcavelookup["220"] + Gcavelookup["2000-05"] = Gcavelookup["215"] + Gcavelookup["2000-06"] = Gcavelookup["216"] + Gcavelookup["2000-07"] = Gcavelookup["217"] + Gcavelookup["2000-09"] = Gcavelookup["234"] + Gcavelookup["2000-aa-01"] = Gcavelookup["250"] + Gcavelookup["2001-04"] = Gcavelookup["239"] + Gcavelookup["2001-05"] = Gcavelookup["243"] + Gcavelookup["2002-01"] = Gcavelookup["249"] + Gcavelookup["2002-02"] = Gcavelookup["234"] + Gcavelookup["2002-04"] = Gcavelookup["242"] + Gcavelookup["2002-05"] = Gcavelookup["294"] + Gcavelookup["2003-01"] = Gcavelookup["256"] + Gcavelookup["2003-02"] = Gcavelookup["248"] + Gcavelookup["2003-03"] = Gcavelookup["247"] + Gcavelookup["2003-04"] = Gcavelookup["241"] + Gcavelookup["2003-05"] = Gcavelookup["246"] + Gcavelookup["2003-06"] = Gcavelookup["161"] + Gcavelookup["2003-08"] = Gcavelookup["240"] + Gcavelookup["2003-09"] = Gcavelookup["245"] + Gcavelookup["2003-10"] = Gcavelookup["244"] + Gcavelookup["2004-01"] = Gcavelookup["269"] + Gcavelookup["2004-03"] = Gcavelookup["270"] + Gcavelookup["2004-11"] = Gcavelookup["251"] + Gcavelookup["2004-12"] = Gcavelookup["161"] + Gcavelookup["2004-15"] = Gcavelookup["253"] + Gcavelookup["2004-19"] = Gcavelookup["254"] + Gcavelookup["2004-20"] = Gcavelookup["255"] + Gcavelookup["2005-04"] = Gcavelookup["204"] + Gcavelookup["2005-05"] = Gcavelookup["264"] + Gcavelookup["2005-07"] = Gcavelookup["257"] + Gcavelookup["2006-08"] = Gcavelookup["285"] + Gcavelookup["2006-09"] = Gcavelookup["298"] + Gcavelookup["2007-71"] = Gcavelookup["271"] + Gcavelookup["2010-01"] = Gcavelookup["263"] + Gcavelookup["2010-03"] = Gcavelookup["293"] + Gcavelookup["2011-01"] = Gcavelookup["292"] + Gcavelookup["2012-dd-05"] = Gcavelookup["286"] + Gcavelookup["2012-ns-13"] = Gcavelookup["292"] + Gcavelookup["2014-neo-01"] = Gcavelookup["273"] + Gcavelookup["2014-sd-01"] = Gcavelookup["274"] + Gcavelookup["2014-ms-14"] = Gcavelookup["287"] + Gcavelookup["2015-mf-06"] = Gcavelookup["288"] + Gcavelookup["2016-jb-01"] = Gcavelookup["289"] + Gcavelookup["2017-pw-01"] = Gcavelookup["277"] + Gcavelookup["2018-dm-07"] = Gcavelookup["359"] + Gcavelookup["2017_cucc_24"] = Gcavelookup["291"] + Gcavelookup["2017_cucc_23"] = Gcavelookup["295"] + Gcavelookup["2017_cucc_28"] = Gcavelookup["290"] + Gcavelookup["bs17"] = Gcavelookup["283"] + + Gcavelookup["1976/b11"] = Gcavelookup["198"] + Gcavelookup["1976/b8"] = Gcavelookup["197"] + Gcavelookup["1976/b9"] = Gcavelookup["190"] + Gcavelookup["b11"] = Gcavelookup["1976/b11"] + Gcavelookup["b8"] = Gcavelookup["1976/b8"] + Gcavelookup["b9"] = Gcavelookup["1976/b9"] + + Gcavelookup["2011-01-bs30"] = Gcavelookup["190"] + Gcavelookup["bs30"] = Gcavelookup["190"] + Gcavelookup["2011-01"] = Gcavelookup["190"] + + Gcavelookup["2002-x11"] = Gcavelookup["2005-08"] + Gcavelookup["2002-x12"] = Gcavelookup["2005-07"] + Gcavelookup["2002-x13"] = Gcavelookup["2005-06"] + Gcavelookup["2002-x14"] = Gcavelookup["2005-05"] + + Gcavelookup["kh"] = Gcavelookup["161"] + Gcavelookup["161-kh"] = Gcavelookup["161"] + Gcavelookup["204-steinBH"] = Gcavelookup["204"] + Gcavelookup["stonebridge"] = Gcavelookup["204"] + Gcavelookup["hauchhole"] = Gcavelookup["234"] + Gcavelookup["hauch"] = Gcavelookup["234"] + Gcavelookup["234-hauch"] = Gcavelookup["234"] + Gcavelookup["tunnocks"] = Gcavelookup["258"] + Gcavelookup["balcony"] = Gcavelookup["264"] + Gcavelookup["balkon"] = Gcavelookup["264"] + Gcavelookup["fgh"] = Gcavelookup["290"] + Gcavelookup["gsh"] = Gcavelookup["291"] + + Gcavelookup["homecoming"] = Gcavelookup["2018-dm-07"] + Gcavelookup["99ob02"] = Gcavelookup["1999-ob-02"] + + addmore = {} + for id in Gcavelookup: + addmore[id.replace("-","_")] = Gcavelookup[id] + addmore[id.replace("_","-")] = Gcavelookup[id] + Gcavelookup = {**addmore, **Gcavelookup} + + addmore ={} + for id in Gcavelookup: + if not Gcavelookup[id]: + pass + elif Gcavelookup[id].kataster_number: + #print(Gcavelookup[id], file=sys.stderr) + addmore[id] = Gcavelookup[id].kataster_number + elif Gcavelookup[id].unofficial_number: + addmore[id] = Gcavelookup[id].unofficial_number.lower() + with open("cave-lookup.json", 'w') as f: + json.dump(addmore, f) + + return Gcavelookup diff --git a/parsers/caves.py b/parsers/caves.py index 7f7364c..3c5d98e 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -8,21 +8,44 @@ import troggle.core.models as models import troggle.core.models_caves as models_caves def readcaves(): - # Clear the cave data issues as we are reloading - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - models.DataIssue.objects.filter(parser='caves').delete() + # Clear the cave data issues as we are reloading + models.DataIssue.objects.filter(parser='caves').delete() + + # Do this first, so that these empty entries are overwritten as they get properly created. + # For those caves which do not have XML files even though they exist and have surveys + forgotten = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", + "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", + "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", + "2018-pf-01", "2018-pf-02", "haldenloch", "gruenstein"] + for k in forgotten: + try: + cave = models_caves.Cave( + unofficial_number = k, + official_name = "Mislaid cave - created as empty object. No XML available at this time.", + notes="_Survex file found in loser repo but no description in expoweb") + if cave: + print("{} {}".format(cave.unofficial_number, cave.official_name)) + cave.save() + else: + print("Failed to create cave {} ".format(k)) + except: + message = " ! Forgotten cave error, slug: %s forgotten-id: %s" % (slug, k) + models.DataIssue.objects.create(parser='caves', message=message) + print(message) + + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. + area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) + area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) + print(" - Reading Entrances") + for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readentrance(filename) + print (" - Reading Caves") + for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readcave(filename) + - area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) - area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) - print(" - Reading Entrances") - #print "list of " - for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readentrance(filename) - print (" - Reading Caves") - for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readcave(filename) def readentrance(filename): @@ -204,7 +227,7 @@ def readcave(filename): message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) models.DataIssue.objects.create(parser='caves', message=message) print(message) - + def getXML(text, itemname, minItems = 1, maxItems = None, printwarnings = True, context = ""): items = re.findall("<%(itemname)s>(.*?)" % {"itemname": itemname}, text, re.S) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index a492135..c4f2c9c 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -12,7 +12,7 @@ from django.template.defaultfilters import slugify from django.utils.timezone import get_current_timezone, make_aware from troggle.core.models import DataIssue, Expedition -from troggle.core.models_caves import Cave, OtherCaveName, LogbookEntry, PersonTrip +from troggle.core.models_caves import Cave, OtherCaveName, LogbookEntry, PersonTrip, GetCaveLookup from parsers.people import GetPersonExpeditionNameLookup from utils import save_carefully @@ -78,33 +78,6 @@ def GetTripCave(place): print(("No cave found for place " , place)) return None -# lookup function modelled on GetPersonExpeditionNameLookup -# repeated assignment each call, needs refactoring -Gcavelookup = None -def GetCaveLookup(): - global Gcavelookup - if Gcavelookup: - return Gcavelookup - Gcavelookup = {"NONEPLACEHOLDER":None} - for cave in Cave.objects.all(): - Gcavelookup[cave.official_name.lower()] = cave - if cave.kataster_number: - Gcavelookup[cave.kataster_number] = cave - if cave.unofficial_number: - Gcavelookup[cave.unofficial_number.lower()] = cave - if cave.filename: - # this is the slug - usually.. - Gcavelookup[cave.filename.replace(".html","").lower()] = cave - # These are exact matches! edit to check for prefix only! - Gcavelookup["tunnocks"] = Gcavelookup["258"] - Gcavelookup["hauchhole"] = Gcavelookup["234"] - Gcavelookup["KH"] = Gcavelookup["161"] - Gcavelookup["Balcony"] = Gcavelookup["264"] - Gcavelookup["Balkon"] = Gcavelookup["264"] - Gcavelookup["FGH"] = Gcavelookup["290"] - Gcavelookup["GSH"] = Gcavelookup["291"] - Gcavelookup["Homecoming"] = Gcavelookup["2018-dm-07"] - return Gcavelookup logentries = [] # the entire logbook for one year is a single object: a list of entries