mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2026-02-08 11:28:23 +00:00
refactoring people import from folk.csv
This commit is contained in:
@@ -61,7 +61,7 @@ ENTRIES = {
|
||||
"2025": 27,
|
||||
"2024": 127,
|
||||
"2023": 131,
|
||||
"2022": 94,
|
||||
"2022": 93,
|
||||
"2019": 55,
|
||||
"2018": 98,
|
||||
"2017": 74,
|
||||
|
||||
@@ -22,38 +22,40 @@ todo = """
|
||||
- [copy these from paper notes]
|
||||
"""
|
||||
|
||||
def parse_blurb(personline, header, person):
|
||||
def parse_blurb(personline, header):
|
||||
"""create mugshot Photo instance
|
||||
Would be better if all this was done before the Person object was created in the db, then it would not
|
||||
need re-saving (which is slow)"""
|
||||
person = {}
|
||||
ms_filename = personline[header["Mugshot"]]
|
||||
ms_path = Path(settings.EXPOWEB, "folk", ms_filename)
|
||||
|
||||
if ms_filename:
|
||||
if not ms_path.is_file():
|
||||
message = f"! INVALID mug_shot field '{ms_filename}' for {person.fullname}"
|
||||
message = f"! INVALID mug_shot field '{ms_filename}' for {personline[header["Mugshot"]]}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="people", message=message, url=f"/person/{person.fullname}")
|
||||
DataIssue.objects.create(parser="people", message=message, url=f"/person/{personline[header["fullname"]]}")
|
||||
return
|
||||
|
||||
if ms_filename.startswith("i/"):
|
||||
# if person just has an image, add it. It has format 'i/adama2018.jpg'
|
||||
person.mug_shot = str(Path("/folk", ms_filename))
|
||||
person.blurb = None
|
||||
person["mug_shot"] = str(Path("/folk", ms_filename))
|
||||
person["blurb"] = None
|
||||
|
||||
elif ms_filename.startswith("l/"):
|
||||
# it has the format 'l/ollybetts.htm' the file may contain <img src="../i/mymug.jpg"> images
|
||||
person["blurbfile"] = str(Path("/folk", ms_filename))
|
||||
with open(ms_path, "r") as blurbfile:
|
||||
blrb = blurbfile.read()
|
||||
pblurb = re.search(r"<body>.*<hr", blrb, re.DOTALL)
|
||||
if pblurb:
|
||||
person.mug_shot = None
|
||||
person["mug_shot"] = None
|
||||
fragment = re.search("<body>(.*)<hr", blrb, re.DOTALL).group(1)
|
||||
fragment = fragment.replace('src="../i/', 'src="/folk/i/')
|
||||
fragment = fragment.replace("src='../i/", "src='/folk/i/")
|
||||
fragment = re.sub(r"<h.*>[^<]*</h.>", "", fragment)
|
||||
# replace src="../i/ with src="/folk/i
|
||||
person.blurb = fragment
|
||||
person["blurb"] = fragment
|
||||
else:
|
||||
message = f"! Blurb parse error in {ms_filename}"
|
||||
print(message)
|
||||
@@ -65,8 +67,9 @@ def parse_blurb(personline, header, person):
|
||||
message = f"! Unrecognised type of file at mug_shot field '{ms_filename}' for {person.fullname}"
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="people", message=message, url="/folk/")
|
||||
|
||||
return person
|
||||
|
||||
person.save()
|
||||
|
||||
slug_cache = {}
|
||||
def troggle_slugify(longname):
|
||||
@@ -104,6 +107,8 @@ def load_people_expos():
|
||||
those created here, if it does not exist.
|
||||
"""
|
||||
DataIssue.objects.filter(parser="people").delete()
|
||||
Person.objects.all().delete()
|
||||
PersonExpedition.objects.all().delete()
|
||||
|
||||
persontab = open(os.path.join(settings.EXPOWEB, "folk", "folk.csv")) # should really be EXPOFOLK I guess
|
||||
personreader = csv.reader(persontab) # this is an iterator
|
||||
@@ -120,8 +125,9 @@ def load_people_expos():
|
||||
otherAttribs = {"name": f"CUCC expo {year}"}
|
||||
e = Expedition.objects.create(**otherAttribs, **coUniqueAttribs)
|
||||
|
||||
print(" - Loading personexpeditions")
|
||||
print(" - Loading persons and personexpeditions")
|
||||
|
||||
pe_list = []
|
||||
for personline in personreader:
|
||||
# This is all horrible: refactor it.
|
||||
# CSV: Name,Lastname,Guest,VfHO member,Mugshot,..
|
||||
@@ -130,13 +136,16 @@ def load_people_expos():
|
||||
plainname = re.sub(r"<.*?>", "", name) # now in slugify
|
||||
|
||||
match = re.match(r"^([^(]*)(\(([^)]*)\))?", name) # removes nickname in brackets
|
||||
displayname = match.group(1)
|
||||
displayname = match.group(1).strip()
|
||||
input_name = displayname
|
||||
slug = troggle_slugify(displayname)
|
||||
|
||||
firstname = ""
|
||||
nick = ""
|
||||
|
||||
rawlastname = personline[header["Lastname"]].strip()
|
||||
input_surname = rawlastname
|
||||
|
||||
if rawlastname == "":
|
||||
print(f"MISSING SURNAME FIELD for {name} - check against similar names in the list to see what you have done.")
|
||||
if matchlastname := re.match(r"^([\w&;\s]+)(?:\(([^)]*)\))?", rawlastname):
|
||||
@@ -148,6 +157,8 @@ def load_people_expos():
|
||||
splitnick = re.match(r"^([\w&;\s\-]+)(?:\(([^)]*)\))?", plainname)
|
||||
fullname = splitnick.group(1) # removes Nickname in brackets, but also cuts hyphenated names
|
||||
nick = splitnick.group(2) or ""
|
||||
if nick:
|
||||
nick = nick.strip()
|
||||
|
||||
fullname = fullname.strip()
|
||||
|
||||
@@ -164,21 +175,32 @@ def load_people_expos():
|
||||
else:
|
||||
vfho = True
|
||||
|
||||
# would be better to just create the python object, and only cmmit to db once all done inc blurb
|
||||
# would be better to just create the python object, and only commit to db once all done inc blurb
|
||||
# and better to save all the Persons in a bulk update, then do all the PersonExpeditions
|
||||
coUniqueAttribs = {"slug": slug}
|
||||
otherAttribs = {"first_name": firstname, "last_name": (lastname or ""), "is_vfho": vfho, "fullname": fullname, "nickname": nick,"is_guest": (personline[header["Guest"]] == "1")}
|
||||
person = Person.objects.create(**otherAttribs, **coUniqueAttribs)
|
||||
blurb_fields = parse_blurb(personline=personline, header=header)
|
||||
|
||||
attribs = {"slug": slug, "first_name": firstname, "last_name": (lastname or ""),
|
||||
"is_vfho": vfho, "fullname": fullname, "nickname": nick, "input_name": input_name, "input_surname": input_surname,
|
||||
"is_guest": (personline[header["Guest"]] == "1")}
|
||||
person = Person.objects.create(**attribs, **blurb_fields)
|
||||
|
||||
parse_blurb(personline=personline, header=header, person=person) # saves to db too
|
||||
#person.save()
|
||||
|
||||
# make person expedition from table
|
||||
#print(" - Loading personexpeditions")
|
||||
|
||||
# make person expedition
|
||||
for year, attended in list(zip(headers, personline))[5:]:
|
||||
expedition = Expedition.objects.get(year=year)
|
||||
if attended == "1" or attended == "-1":
|
||||
coUniqueAttribs = {"person": person, "expedition": expedition}
|
||||
# otherAttribs = {"is_guest": (personline[header["Guest"]] == "1")}
|
||||
pe = PersonExpedition.objects.create(**coUniqueAttribs)
|
||||
|
||||
if attended in ("1", "-1"):
|
||||
pe_list.append(PersonExpedition(person=person, expedition=expedition, noncaver=(attended == "-1")))
|
||||
# pe = PersonExpedition.objects.create(
|
||||
# person=person,
|
||||
# expedition=expedition,
|
||||
# noncaver=(attended == "-1")
|
||||
# )
|
||||
PersonExpedition.objects.bulk_create(pe_list)
|
||||
|
||||
print("", flush=True)
|
||||
ensure_users_are_persons()
|
||||
most_recent = Expedition.objects.all().first()
|
||||
@@ -188,7 +210,7 @@ def check_new_signups(expedition):
|
||||
signups_clear = read_signups()
|
||||
# print(signups_clear)
|
||||
for slug in signups_clear:
|
||||
print(f"check_new_signups: {slug}")
|
||||
print(f" - Checking signups {slug}")
|
||||
p = Person.objects.get(slug=slug)
|
||||
pe = PersonExpedition.objects.update_or_create(person=p, expedition=expedition)
|
||||
# print("ADDING ",pe, expedition)
|
||||
@@ -196,7 +218,7 @@ def check_new_signups(expedition):
|
||||
|
||||
def ensure_users_are_persons():
|
||||
# Just ensure this is up to date.
|
||||
print(f"# ensure_users_are_persons() - except for expo and expoadmin of course")
|
||||
print(f" - Ensure_users_are_persons() - except for expo and expoadmin of course")
|
||||
users = User.objects.all()
|
||||
for u in users:
|
||||
ps = Person.objects.filter(slug=u.username)
|
||||
@@ -204,7 +226,7 @@ def ensure_users_are_persons():
|
||||
p = ps[0]
|
||||
p.user = u
|
||||
p.save()
|
||||
print(f" - {p.user} {u=}")
|
||||
print(f" {p.user} {u=}")
|
||||
|
||||
def who_is_this(year, possibleid):
|
||||
expo = Expedition.objects.filter(year=year)
|
||||
|
||||
@@ -676,6 +676,18 @@ class LoadingSurvex:
|
||||
SO we have to recognise the '*fix' too
|
||||
"""
|
||||
# *fix|36|reference|36359.40|82216.08|2000.00\n
|
||||
# *fix|36|36359.40|82216.08|2000.00\n
|
||||
# Regex explanation:
|
||||
# (?i) # Case-insensitive matching
|
||||
# ^\s*[*]fix\s+ # Line starts with optional whitespace, then "*fix" followed by one or more spaces
|
||||
# ([\w\d_.\-]+) # Capture group 1: an identifier (alphanumeric, underscore, dot, or hyphen)
|
||||
# \s+ # One or more spaces
|
||||
# (?:reference)? # Optional literal word "reference" (non-capturing)
|
||||
# \s*([\d\.]*) # Capture group 2: optional number (digits and periods)
|
||||
# \s+([\d\.]*) # Capture group 3: another number (digits and periods)
|
||||
# \s+([\d\.]*) # Capture group 4: yet another number (digits and periods)
|
||||
# \s*;? # Optional whitespace and optional semicolon
|
||||
# (.*)$ # Capture group 5: remainder of the line (any characters), a comment
|
||||
rx_fixline = re.compile(r"(?i)^\s*[*]fix\s+([\w\d_\.\-]+)\s+(?:reference)?\s*([\d\.]*)\s+([\d\.]*)\s+([\d\.]*)\s*;?(.*)$")
|
||||
|
||||
line = line.replace("\n","")
|
||||
|
||||
Reference in New Issue
Block a user