2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-17 06:07:16 +00:00

refactoring people import from folk.csv

This commit is contained in:
2025-07-21 15:41:05 +02:00
parent 1cfcbccf76
commit 2039501672
8 changed files with 141 additions and 34 deletions

View File

@@ -67,7 +67,7 @@ class Expedition(TroggleModel):
logbookfile = models.CharField(max_length=100, blank=True, null=True) logbookfile = models.CharField(max_length=100, blank=True, null=True)
def __str__(self): def __str__(self):
return self.year return str(self.year)
class Meta: class Meta:
ordering = ("-year",) ordering = ("-year",)
@@ -85,11 +85,13 @@ class Person(TroggleModel):
definied in django.contrib.auth.models definied in django.contrib.auth.models
""" """
first_name = models.CharField(max_length=100) input_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100) input_surname = models.CharField(max_length=100, blank=True, null=True)
fullname = models.CharField(max_length=200) # display name, but should not be used for lookups first_name = models.CharField(max_length=100, blank=True, null=True)
nickname = models.CharField(max_length=200, blank=True) last_name = models.CharField(max_length=100, blank=True, null=True)
slug = models.SlugField(max_length=50, blank=True, null=True) # unique, enforced in code not in db fullname = models.CharField(max_length=200, blank=True, null=True) # display name, but should not be used for lookups
nickname = models.CharField(max_length=200, blank=True, null=True)
slug = models.SlugField(max_length=50) # unique, enforced in code not in db
# no delete cascade. We have users without Persons, and Persons without users # no delete cascade. We have users without Persons, and Persons without users
user = models.OneToOneField(User, models.SET_NULL, blank=True, null=True) user = models.OneToOneField(User, models.SET_NULL, blank=True, null=True)
@@ -99,6 +101,7 @@ class Person(TroggleModel):
) )
is_guest = models.BooleanField(default=False) # This is per-Person, not per-PersonExpedition is_guest = models.BooleanField(default=False) # This is per-Person, not per-PersonExpedition
mug_shot = models.CharField(max_length=100, blank=True, null=True) mug_shot = models.CharField(max_length=100, blank=True, null=True)
blurbfile = models.CharField(max_length=100, blank=True, null=True)
blurb = models.TextField(blank=True, null=True) blurb = models.TextField(blank=True, null=True)
orderref = models.CharField(max_length=200, blank=True) # for alphabetic orderref = models.CharField(max_length=200, blank=True) # for alphabetic
@@ -164,6 +167,10 @@ class PersonExpedition(TroggleModel):
expedition = models.ForeignKey(Expedition, on_delete=models.CASCADE, db_index=True) expedition = models.ForeignKey(Expedition, on_delete=models.CASCADE, db_index=True)
person = models.ForeignKey(Person, on_delete=models.CASCADE, db_index=True) person = models.ForeignKey(Person, on_delete=models.CASCADE, db_index=True)
slugfield = models.SlugField(max_length=50, blank=True, null=True) # 2022 to be used in future slugfield = models.SlugField(max_length=50, blank=True, null=True) # 2022 to be used in future
noncaver = models.BooleanField(
help_text="if flagged as -1 in the folk.csv input file",
default=True,
)
# is_guest = models.BooleanField(default=False) # This is per-Person, not per-PersonExpedition # is_guest = models.BooleanField(default=False) # This is per-Person, not per-PersonExpedition

View File

@@ -316,7 +316,7 @@ def kataster(request, slug=None):
if dir.is_dir(): if dir.is_dir():
error += f"STOP: a target directory exists. REMOVE or RENAME it first: {dir}<br>\n" error += f"STOP: a target directory exists. REMOVE or RENAME it first: {dir}<br>\n"
# 1623 : fixedpts/gps/gps23.svx:26:*fix p2023-mg-03 reference 13.81514 47.69169 1767 # 1623 : fixedpts/gps/gps23.svx:26:*fix p2023-mg-03 13.81514 47.69169 1767
# 1626: fixedpts/1626-no-schoenberg-hs-not-tied-to-caves.svx # 1626: fixedpts/1626-no-schoenberg-hs-not-tied-to-caves.svx
return l_script, error return l_script, error

View File

@@ -1,4 +1,5 @@
import re import re
from datetime import datetime
from pathlib import Path from pathlib import Path
from django.conf import settings from django.conf import settings
@@ -10,7 +11,8 @@ from troggle.core.models.caves import Cave
from troggle.core.models.logbooks import LogbookEntry, writelogbook # , PersonLogEntry from troggle.core.models.logbooks import LogbookEntry, writelogbook # , PersonLogEntry
# from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time* # from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time*
from troggle.core.models.troggle import Expedition from troggle.core.models.troggle import Expedition, Person, PersonExpedition
from troggle.core.utils import current_expo, COOKIE_SHORT_TIMEOUT, PUBLIC_LAPTOP_COOKIE_NAME, PUBLIC_LAPTOP_COOKIE_TEXT from troggle.core.utils import current_expo, COOKIE_SHORT_TIMEOUT, PUBLIC_LAPTOP_COOKIE_NAME, PUBLIC_LAPTOP_COOKIE_TEXT
from troggle.parsers.imports import ( from troggle.parsers.imports import (
import_caves, import_caves,
@@ -145,6 +147,67 @@ def controlpanel(request):
{"expeditions": Expedition.objects.all(), "year": current_expo()}, {"expeditions": Expedition.objects.all(), "year": current_expo()},
) )
def folk_export(request):
"""Recreates the folk.csv file from the database contents
WORK IN PROGRESS JULY 2025
"""
def deslugify(slug):
deslug = slug.replace("-"," ",1).title()
return deslug
def real_surname(person):
return deslugify(person.slug).replace(person.first_name,"").strip()
expo = {}
yearlist = range(1976, int(current_expo())+1)
newfile = settings.EXPOWEB / 'folk' / 'new_folk.csv'
with open(newfile, 'w') as f:
f.write("Name,Lastname,Guest,VfHO member,Mugshot")
for y in yearlist:
try:
expo[y] = Expedition.objects.get(year=y)
except:
expo[y] = None
f.write(f",{y}")
f.write("\n")
for person in Person.objects.all().order_by("last_name"):
f.write(f"{person.input_name}")
if person.nickname:
f.write(f" ({person.nickname})")
f.write(f",{person.input_surname}")
if person.is_guest:
f.write(",1")
else:
f.write(",")
if person.is_vfho:
f.write(",1")
else:
f.write(",")
if person.mug_shot:
f.write(f",{person.mug_shot.replace("/folk/","")}")
elif person.blurbfile:
f.write(f",{person.blurbfile.replace("/folk/","")}")
else:
f.write(",")
for y in yearlist:
present = PersonExpedition.objects.filter(expedition=expo[y], person=person)
if present.count() == 1:
if present[0].noncaver:
f.write(f",-1")
else:
f.write(f",1")
else:
f.write(",")
f.write("\n")
return render(
request,
"controlPanel.html",
{"expeditions": Expedition.objects.all(),
"jobs_completed": f"GENERATED {str(newfile)} {datetime.now().strftime("%Y-%m-%d %H:%M")
}"}
)
def exportlogbook(request, year=None): def exportlogbook(request, year=None):
"""Constructs, from the database, a complete HTML formatted logbook """Constructs, from the database, a complete HTML formatted logbook

View File

@@ -61,7 +61,7 @@ ENTRIES = {
"2025": 27, "2025": 27,
"2024": 127, "2024": 127,
"2023": 131, "2023": 131,
"2022": 94, "2022": 93,
"2019": 55, "2019": 55,
"2018": 98, "2018": 98,
"2017": 74, "2017": 74,

View File

@@ -22,38 +22,40 @@ todo = """
- [copy these from paper notes] - [copy these from paper notes]
""" """
def parse_blurb(personline, header, person): def parse_blurb(personline, header):
"""create mugshot Photo instance """create mugshot Photo instance
Would be better if all this was done before the Person object was created in the db, then it would not Would be better if all this was done before the Person object was created in the db, then it would not
need re-saving (which is slow)""" need re-saving (which is slow)"""
person = {}
ms_filename = personline[header["Mugshot"]] ms_filename = personline[header["Mugshot"]]
ms_path = Path(settings.EXPOWEB, "folk", ms_filename) ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
if ms_filename: if ms_filename:
if not ms_path.is_file(): if not ms_path.is_file():
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}" message = f"! INVALID mug_shot field '{ms_filename}' for {personline[header["Mugshot"]]}"
print(message) print(message)
DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}") DataIssue.objects.create(parser="people", message=message, url=f"/person/{personline[header["fullname"]]}")
return return
if ms_filename.startswith("i/"): if ms_filename.startswith("i/"):
# if person just has an image, add it. It has format 'i/adama2018.jpg' # if person just has an image, add it. It has format 'i/adama2018.jpg'
person.mug_shot = str(Path("/folk", ms_filename)) person["mug_shot"] = str(Path("/folk", ms_filename))
person.blurb = None person["blurb"] = None
elif ms_filename.startswith("l/"): elif ms_filename.startswith("l/"):
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images # it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
person["blurbfile"] = str(Path("/folk", ms_filename))
with open(ms_path, "r") as blurbfile: with open(ms_path, "r") as blurbfile:
blrb = blurbfile.read() blrb = blurbfile.read()
pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL) pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
if pblurb: if pblurb:
person.mug_shot = None person["mug_shot"] = None
fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1) fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
fragment = fragment.replace('src="../i/', 'src="/folk/i/') fragment = fragment.replace('src="../i/', 'src="/folk/i/')
fragment = fragment.replace("src='../i/", "src='/folk/i/") fragment = fragment.replace("src='../i/", "src='/folk/i/")
fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment) fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
# replace src="../i/ with src="/folk/i # replace src="../i/ with src="/folk/i
person.blurb = fragment person["blurb"] = fragment
else: else:
message = f"! Blurb parse error in {ms_filename}" message = f"! Blurb parse error in {ms_filename}"
print(message) print(message)
@@ -66,7 +68,8 @@ def parse_blurb(personline, header, person):
print(message) print(message)
DataIssue.objects.create(parser="people", message=message, url="/folk/") DataIssue.objects.create(parser="people", message=message, url="/folk/")
person.save() return person
slug_cache = {} slug_cache = {}
def troggle_slugify(longname): def troggle_slugify(longname):
@@ -104,6 +107,8 @@ def load_people_expos():
those created here, if it does not exist. those created here, if it does not exist.
""" """
DataIssue.objects.filter(parser="people").delete() DataIssue.objects.filter(parser="people").delete()
Person.objects.all().delete()
PersonExpedition.objects.all().delete()
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
personreader = csv.reader(persontab) # this is an iterator personreader = csv.reader(persontab) # this is an iterator
@@ -120,8 +125,9 @@ def load_people_expos():
otherAttribs = {"name": f"CUCC expo {year}"} otherAttribs = {"name": f"CUCC expo {year}"}
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs) e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
print(" - Loading personexpeditions") print(" - Loading persons and personexpeditions")
pe_list = []
for personline in personreader: for personline in personreader:
# This is all horrible: refactor it. # This is all horrible: refactor it.
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,.. # CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
@@ -130,13 +136,16 @@ def load_people_expos():
plainname = re.sub(r"<.*?>", "", name) # now in slugify plainname = re.sub(r"<.*?>", "", name) # now in slugify
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
displayname = match.group(1) displayname = match.group(1).strip()
input_name = displayname
slug = troggle_slugify(displayname) slug = troggle_slugify(displayname)
firstname = "" firstname = ""
nick = "" nick = ""
rawlastname = personline[header["Lastname"]].strip() rawlastname = personline[header["Lastname"]].strip()
input_surname = rawlastname
if rawlastname == "": if rawlastname == "":
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.") print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname): if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname):
@@ -148,6 +157,8 @@ def load_people_expos():
splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname) splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname)
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
nick = splitnick.group(2) or "" nick = splitnick.group(2) or ""
if nick:
nick = nick.strip()
fullname = fullname.strip() fullname = fullname.strip()
@@ -164,21 +175,32 @@ def load_people_expos():
else: else:
vfho = True vfho = True
# would be better to just create the python object, and only cmmit to db once all done inc blurb # would be better to just create the python object, and only commit to db once all done inc blurb
# and better to save all the Persons in a bulk update, then do all the PersonExpeditions # and better to save all the Persons in a bulk update, then do all the PersonExpeditions
coUniqueAttribs = {"slug": slug} blurb_fields = parse_blurb(personline=personline, header=header)
otherAttribs = {"first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick,"is_guest": (personline[header["Guest"]] == "1")}
person = Person.objects.create(**otherAttribs, **coUniqueAttribs)
parse_blurb(personline=personline, header=header, person=person) # saves to db too attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
"is_guest": (personline[header["Guest"]] == "1")}
person = Person.objects.create(**attribs, **blurb_fields)
# make person expedition from table #person.save()
#print(" - Loading personexpeditions")
# make person expedition
for year, attended in list(zip(headers, personline))[5:]: for year, attended in list(zip(headers, personline))[5:]:
expedition = Expedition.objects.get(year=year) expedition = Expedition.objects.get(year=year)
if attended == "1" or attended == "-1":
coUniqueAttribs = {"person": person, "expedition": expedition} if attended in ("1", "-1"):
# otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")} pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
pe = PersonExpedition.objects.create(**coUniqueAttribs) # pe = PersonExpedition.objects.create(
# person=person,
# expedition=expedition,
# noncaver=(attended == "-1")
# )
PersonExpedition.objects.bulk_create(pe_list)
print("", flush=True) print("", flush=True)
ensure_users_are_persons() ensure_users_are_persons()
most_recent = Expedition.objects.all().first() most_recent = Expedition.objects.all().first()
@@ -188,7 +210,7 @@ def check_new_signups(expedition):
signups_clear = read_signups() signups_clear = read_signups()
# print(signups_clear) # print(signups_clear)
for slug in signups_clear: for slug in signups_clear:
print(f"check_new_signups: {slug}") print(f" - Checking signups {slug}")
p = Person.objects.get(slug=slug) p = Person.objects.get(slug=slug)
pe = PersonExpedition.objects.update_or_create(person=p, expedition=expedition) pe = PersonExpedition.objects.update_or_create(person=p, expedition=expedition)
# print("ADDING ",pe, expedition) # print("ADDING ",pe, expedition)
@@ -196,7 +218,7 @@ def check_new_signups(expedition):
def ensure_users_are_persons(): def ensure_users_are_persons():
# Just ensure this is up to date. # Just ensure this is up to date.
print(f"# ensure_users_are_persons() - except for expo and expoadmin of course") print(f" - Ensure_users_are_persons() - except for expo and expoadmin of course")
users = User.objects.all() users = User.objects.all()
for u in users: for u in users:
ps = Person.objects.filter(slug=u.username) ps = Person.objects.filter(slug=u.username)
@@ -204,7 +226,7 @@ def ensure_users_are_persons():
p = ps[0] p = ps[0]
p.user = u p.user = u
p.save() p.save()
print(f" - {p.user} {u=}") print(f" {p.user} {u=}")
def who_is_this(year, possibleid): def who_is_this(year, possibleid):
expo = Expedition.objects.filter(year=year) expo = Expedition.objects.filter(year=year)

View File

@@ -676,6 +676,18 @@ class LoadingSurvex:
SO we have to recognise the '*fix' too SO we have to recognise the '*fix' too
""" """
# *fix|36|reference|36359.40|82216.08|2000.00\n # *fix|36|reference|36359.40|82216.08|2000.00\n
# *fix|36|36359.40|82216.08|2000.00\n
# Regex explanation:
# (?i) # Case-insensitive matching
# ^\s*[*]fix\s+ # Line starts with optional whitespace, then "*fix" followed by one or more spaces
# ([\w\d_.\-]+) # Capture group 1: an identifier (alphanumeric, underscore, dot, or hyphen)
# \s+ # One or more spaces
# (?:reference)? # Optional literal word "reference" (non-capturing)
# \s*([\d\.]*) # Capture group 2: optional number (digits and periods)
# \s+([\d\.]*) # Capture group 3: another number (digits and periods)
# \s+([\d\.]*) # Capture group 4: yet another number (digits and periods)
# \s*;? # Optional whitespace and optional semicolon
# (.*)$ # Capture group 5: remainder of the line (any characters), a comment
rx_fixline = re.compile(r"(?i)^\s*[*]fix\s+([\w\d_\.\-]+)\s+(?:reference)?\s*([\d\.]*)\s+([\d\.]*)\s+([\d\.]*)\s*;?(.*)$") rx_fixline = re.compile(r"(?i)^\s*[*]fix\s+([\w\d_\.\-]+)\s+(?:reference)?\s*([\d\.]*)\s+([\d\.]*)\s+([\d\.]*)\s*;?(.*)$")
line = line.replace("\n","") line = line.replace("\n","")

View File

@@ -24,5 +24,7 @@
<li><a href="/admin/">Django admin</a> - Deep magic access to all models and data <span style="color:red">{{error}}</span> <li><a href="/admin/">Django admin</a> - Deep magic access to all models and data <span style="color:red">{{error}}</span>
</ul> </ul>
<p>
{{jobs_completed}}
{% endblock %} {% endblock %}

View File

@@ -55,7 +55,7 @@ from troggle.core.views.logbooks import (
person, person,
personexpedition, personexpedition,
) )
from troggle.core.views.other import controlpanel, exportlogbook, frontpage, todos, public_laptop from troggle.core.views.other import controlpanel, exportlogbook, frontpage, todos, public_laptop, folk_export
from troggle.core.views.prospect import prospecting from troggle.core.views.prospect import prospecting
from troggle.core.views.user_registration import register, newregister, reset_done, ExpoPasswordResetForm from troggle.core.views.user_registration import register, newregister, reset_done, ExpoPasswordResetForm
from troggle.core.views.scans import allscans, cavewallets, scansingle, walletslistperson, walletslistyear from troggle.core.views.scans import allscans, cavewallets, scansingle, walletslistperson, walletslistyear
@@ -155,6 +155,7 @@ trogglepatterns = [
re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files re_path(r'^indxal.htm$', caveindex, name="caveindex"), # ~420 hrefs to this url in expoweb files
re_path(r'^people/?$', notablepersons, name="notablepersons"), re_path(r'^people/?$', notablepersons, name="notablepersons"),
path('people_ids', people_ids, name="people_ids"), path('people_ids', people_ids, name="people_ids"),
path('folk_export', folk_export, name="folk_export"),
path('caveslist', caveslist, name="caveslist"), path('caveslist', caveslist, name="caveslist"),
path('entrances', entranceindex, name="entranceindex"), path('entrances', entranceindex, name="entranceindex"),