2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-15 04:47:11 +00:00

Fixing multiple caves with same kataser no

This commit is contained in:
Philip Sargent
2021-03-28 23:47:47 +01:00
parent 0ecaa9b8ee
commit 623483f3b1
5 changed files with 97 additions and 53 deletions

View File

@@ -5,6 +5,7 @@ import logging
import re
import json
from subprocess import call
from collections import defaultdict
from urllib.parse import urljoin
@@ -21,6 +22,7 @@ from django.template import Context, loader
from troggle.core.models import TroggleModel, Person, Expedition
from troggle.core.models_survex import SurvexStation
from troggle.core.models import DataIssue
class Area(TroggleModel):
short_name = models.CharField(max_length=100)
@@ -35,7 +37,7 @@ class Area(TroggleModel):
return str(self.short_name)
def kat_area(self):
if self.short_name in ["1623", "1626"]:
if self.short_name in ["1623", "1626", "1624", "1627"]:
return self.short_name
elif self.parent:
return self.parent.kat_area()
@@ -515,30 +517,47 @@ def get_scan_path(instance, filename):
return os.path.join('./',year,year+r'#'+number,str(instance.contents)+str(instance.number_in_wallet)+r'.jpg')
Gcavelookup = None
Gcave_count = None
def GetCaveLookup():
"""lookup function modelled on GetPersonExpeditionNameLookup
repeated assignment each call, needs refactoring
Does NOT detect duplicates! Needs fixing.
Needs to be a proper funciton that raises an exception if there is a duplicate.
OR we could set it to return None if there are duplictes, and require the caller to
fall back on doing the actual database query it wants rathe rthna using this cache shortcut
"""
global Gcavelookup
if Gcavelookup:
return Gcavelookup
Gcavelookup = {"NONEPLACEHOLDER":None}
Gcavelookup = {"NONEPLACEHOLDER": None}
global Gcave_count
Gcave_count = defaultdict(int) # sets default value to int(0)
for cave in Cave.objects.all():
Gcavelookup[cave.official_name.lower()] = cave
key = cave.official_name.lower()
Gcavelookup[key] = cave
Gcave_count[key] += 1
if cave.kataster_number:
Gcavelookup[cave.kataster_number] = cave
Gcavelookup[cave.kataster_number] = cave # DUPLICATE as we have 1623-55 and 1626-55
Gcave_count[cave.kataster_number] += 1
if cave.unofficial_number:
Gcavelookup[cave.unofficial_number.lower()] = cave
Gcave_count[cave.unofficial_number.lower()] += 1
if cave.filename:
# this is the slug - usually..
Gcavelookup[cave.filename.replace(".html","").lower()] = cave
Gcave_count[cave.filename.replace(".html","").lower()] += 1
if cave.slug():
slug = cave.slug()
Gcavelookup[slug.lower()] = cave
Gcave_count[slug.lower()] += 1
# These are exact matches! edit to check for prefix only!
# mostly taken from expoweb/noinfo/cave-number-index
# and Becka's email of 25 may 2020 on new kataster numbers
# this should be re-done as a JSON file upload
# These might alse create more duplicate entries, so re-write it to check
Gcavelookup["1987-02"] = Gcavelookup["267"]
Gcavelookup["1990-01"] = Gcavelookup["171"]
Gcavelookup["1990-02"] = Gcavelookup["172"]
@@ -624,7 +643,7 @@ def GetCaveLookup():
Gcavelookup["2016-jb-01"] = Gcavelookup["289"]
Gcavelookup["2017-pw-01"] = Gcavelookup["277"]
Gcavelookup["2018-dm-07"] = Gcavelookup["359"]
Gcavelookup["2017_cucc_24"] = Gcavelookup["291"]
Gcavelookup["2017_cucc_24"] = Gcavelookup["291"] # note _ not - here
Gcavelookup["2017_cucc_23"] = Gcavelookup["295"]
Gcavelookup["2017_cucc_28"] = Gcavelookup["290"]
Gcavelookup["bs17"] = Gcavelookup["283"]
@@ -679,5 +698,12 @@ def GetCaveLookup():
addmore[id] = Gcavelookup[id].unofficial_number.lower()
with open("cave-lookup.json", 'w') as f:
json.dump(addmore, f)
for c in Gcave_count:
if Gcave_count[c] > 1:
message = " ** Duplicate cave id: {}:{}:{}".format(Gcave_count[c], Gcavelookup[c], c)
#print(message)
#DataIssue.objects.create(parser='caves', message=message)
# logdataissues[Gcavelookup[c]]=message # pending troggle-wide issues logging system
return Gcavelookup