From d2833d26ccc365d1f24620271cfe74236bcef1de Mon Sep 17 00:00:00 2001 From: Philip Sargent Date: Mon, 6 Jul 2020 20:27:31 +0100 Subject: [PATCH] fix schema and try cache caves import --- core/models.py | 7 ----- parsers/caves.py | 71 ++++++++++++++++++++++++++++------------------- parsers/people.py | 21 +------------- 3 files changed, 44 insertions(+), 55 deletions(-) diff --git a/core/models.py b/core/models.py index 9900dd4..c8eb965 100644 --- a/core/models.py +++ b/core/models.py @@ -160,12 +160,6 @@ class Person(TroggleModel): notability += Decimal(1) / (max_expo_val - int(personexpedition.expedition.year)) return notability - def legslength(self): - for personexpedition in self.personexpedition_set.all(): - if not personexpedition.is_guest: - length += personexpedition.legslength - return length - def bisnotable(self): return self.notability() > Decimal(1)/Decimal(3) @@ -183,7 +177,6 @@ class PersonExpedition(TroggleModel): expedition = models.ForeignKey(Expedition,on_delete=models.CASCADE) person = models.ForeignKey(Person,on_delete=models.CASCADE) slugfield = models.SlugField(max_length=50,blank=True, null=True) - legslength = models.FloatField(null=True) is_guest = models.BooleanField(default=False) COMMITTEE_CHOICES = ( diff --git a/parsers/caves.py b/parsers/caves.py index 2bb2ccc..4a058fd 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -6,6 +6,10 @@ from django.conf import settings from troggle.core.models import DataIssue, get_process_memory import troggle.core.models_caves as models_caves +entrances_xslug = {} +caves_xslug = {} +areas_xslug = {} + def readcaves(): print(" - Deleting Caves and Entrances") models_caves.Cave.objects.all().delete() @@ -37,7 +41,7 @@ def readcaves(): cave.save() # must save to have id before foreign keys work cave.area = area_1623 cave.save() - message = " ! {} {}".format(cave.unofficial_number, cave.underground_description) + message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description) DataIssue.objects.create(parser='caves', message=message) print(message) else: @@ -47,17 +51,23 @@ def readcaves(): DataIssue.objects.create(parser='caves', message=message) print(message) raise + print(" - Reading Entrances from entrance descriptions xml files") for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readentrance(filename) + print(" - Reading Caves from cave descriptions xml files") for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files if filename.endswith('.html'): readcave(filename) def readentrance(filename): - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. + global entrances_xslug + global caves_xslug + global areas_xslug + + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.ENTRANCEDESCRIPTIONS, filename)) as f: contents = f.read() context = "in file %s" % filename @@ -138,13 +148,15 @@ def readentrance(filename): primary = False def readcave(filename): - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. + global entrances_xslug + global caves_xslug + global areas_xslug + + # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. with open(os.path.join(settings.CAVEDESCRIPTIONS, filename)) as f: contents = f.read() context = " in file %s" % filename - #print("Reading file CAVE {}".format(filename)) cavecontentslist = getXML(contents, "cave", maxItems = 1, context = context) - #print cavecontentslist if len(cavecontentslist) == 1: cavecontents = cavecontentslist[0] non_public = getXML(cavecontents, "non_public", maxItems = 1, context = context) @@ -192,9 +204,6 @@ def readcave(filename): url = url[0], filename = filename) except: - # this slow db query happens on every cave, but on import we have all this in memory - # and don't need to do a db query. Fix this to speed it up! - # need to cope with duplicates print(" ! FAILED to get only one CAVE when updating using: "+filename) kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0]) for k in kaves: @@ -208,27 +217,31 @@ def readcave(filename): c = k for area_slug in areas: - # this slow db query happens on every cave, but on import we have all this in memory - # and don't need to do a db query. Fix this to speed it up! - area = models_caves.Area.objects.filter(short_name = area_slug) - if area: - newArea = area[0] + if area_slug in areas_xslug: + newArea = areas_xslug[area_slug] else: - newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623")) - newArea.save() + area = models_caves.Area.objects.filter(short_name = area_slug) + if area: + newArea = area[0] + else: + newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623")) + newArea.save() + areas_xslug[area_slug] = newArea c.area.add(newArea) primary = True for slug in slugs: - try: - # this slow db query happens on every cave, but on import we have all this in memory - # and don't need to do a db query. Fix this to speed it up! - cs = models_caves.CaveSlug.objects.update_or_create(cave = c, - slug = slug, - primary = primary) - except: - message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context) - DataIssue.objects.create(parser='caves', message=message) - print(message) + if slug in caves_xslug: + cs = caves_xslug[slug] + else: + try: + cs = models_caves.CaveSlug.objects.update_or_create(cave = c, + slug = slug, + primary = primary) + caves_xslug[slug] = cs + except: + message = " ! Cave update/create failure: %s, skipping file %s" % (slug, context) + DataIssue.objects.create(parser='caves', message=message) + print(message) primary = False @@ -236,9 +249,11 @@ def readcave(filename): slug = getXML(entrance, "entranceslug", maxItems = 1, context = context)[0] letter = getXML(entrance, "letter", maxItems = 1, context = context)[0] try: - # this slow db query happens on every entrance, but on import we have all this in memory - # and don't need to do a db query. Fix this to speed it up! - entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug) + if slug in entrances_xslug: + entrance = entrances_xslug[slug] + else: + entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug) + entrances_xslug[slug] = entrance ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) diff --git a/parsers/people.py b/parsers/people.py index d0fa242..8fd39b6 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -107,29 +107,10 @@ def LoadPersonsExpos(): expedition = models.Expedition.objects.get(year=year) if attended == "1" or attended == "-1": lookupAttribs = {'person':person, 'expedition':expedition} - nonLookupAttribs = {'nickname':nickname, 'legslength':0.0,'is_guest':(personline[header["Guest"]] == "1")} + nonLookupAttribs = {'nickname':nickname, 'is_guest':(personline[header["Guest"]] == "1")} save_carefully(models.PersonExpedition, lookupAttribs, nonLookupAttribs) - # this fills in those people for whom 2008 was their first expo - #print "Loading personexpeditions 2008" - #expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",") - #expomissing = set(expoers2008) - #for name in expomissing: - # firstname, lastname = name.split() - # is_guest = name in ["Eeva Makiranta", "Keith Curtis"] - # print "2008:", name - # persons = list(models.Person.objects.filter(first_name=firstname, last_name=lastname)) - # if not persons: - # person = models.Person(first_name=firstname, last_name = lastname, is_vfho = False, mug_shot = "") - # #person.Sethref() - # person.save() - # else: - # person = persons[0] - # expedition = models.Expedition.objects.get(year="2008") - # personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest) - # personexpedition.save() - # used in other referencing parser functions # expedition name lookup cached for speed (it's a very big list) Gpersonexpeditionnamelookup = { }