2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-01-19 01:12:32 +00:00

Fixed parsers

This commit is contained in:
Philip Sargent 2023-09-02 17:49:37 +03:00
parent c9729c046c
commit 1a8bc17f80
5 changed files with 49 additions and 33 deletions

View File

@ -69,8 +69,8 @@ def alphabet_suffix(n):
if not alphabet:
alphabet = list(string.ascii_lowercase)
if n < len(alphabet):
suffix = alphabet[n]
if n < len(alphabet) and n > 0:
suffix = alphabet[n-1]
else:
suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
return suffix

View File

@ -4,6 +4,7 @@ from django.contrib.auth import authenticate
from django.contrib.auth import forms as auth_forms
from django.contrib.auth import login, logout
from django.contrib.auth.decorators import login_required
from django.http import HttpResponse, HttpResponseRedirect
from django.shortcuts import redirect, render
from django.utils.http import url_has_allowed_host_and_scheme
@ -22,7 +23,7 @@ class login_required_if_public(object):
def __call__(self, *args, **kwargs):
return self.f(*args, **kwargs)
# This is copied from CUYC.cuy.website.view.auth
# If we want to do the whole online-email thing, we would also need to copy across the code in these

View File

@ -8,6 +8,7 @@ from django.shortcuts import render, redirect
import settings
from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry
from troggle.core.models.survex import DrawingFile
from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition
@ -340,7 +341,7 @@ def logbookedit(request, year=None, slug=None):
"textrows": rows,
},
)
else: # no slug
else: # no slug or bad slug for an lbe which does not exist
# NEW logbook entry
return render(
request,

View File

@ -60,8 +60,8 @@ LOGBOOK_PARSER_SETTINGS = {
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
ENTRIES = {
"2023": 81,
"2022": 93,
"2023": 83,
"2022": 94,
"2019": 55,
"2018": 95,
"2017": 74,
@ -127,7 +127,7 @@ def reset_trip_id(date):
suffix = alphabet_suffix(n)
tid = f"{date}{suffix}"
# print(tid)
# print(already, n, tid)
return tid
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
@ -136,6 +136,7 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
res = []
author = None
guests = []
# print(f'# {tid}')
# print(f" - {tid} '{trippeople}' ")
@ -154,11 +155,12 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
try:
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
if not personyear:
if known_foreigner(tripperson):
message = f" ! - {expedition.year} Known foreigner: '{tripperson}' in entry {tid=}"
guests.append(nickname_used)
if known_foreigner(nickname_used):
message = f" ! - {expedition.year} Known foreigner: '{nickname_used}' in entry {tid=}"
print(message)
else:
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this year."
message = f" ! - {expedition.year} No name match for: '{nickname_used}' in entry {tid=} for this year."
print(message)
DataIssue.objects.create(parser="logbooks", message=message)
res.append((personyear, nickname_used, logtime_underground))
@ -170,10 +172,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
raise
if author_u:
author = personyear
else:
# a person but with * prefix. Ignored everywhere.
# print(f" ! - {expedition.year} * person : {tripperson}")
pass
else: # *guest
guests.append(tripperson)
# print(f" ! - {expedition.year} * GUEST : {tripperson}")
if not author:
if not res:
@ -181,7 +182,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
author = res[-1][0] # the previous valid person and a time of 0 hours
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
return res, author
return res, author, guests
def tidy_time_underground(logtime_underground):
# Nasty hack, must tidy this up..
@ -202,7 +203,7 @@ def tidy_time_underground(logtime_underground):
def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
try:
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
trippersons, author, guests = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
# trippersons is a list of tuples (personyear, nickname_used, logtime_underground)
except:
message = f" ! - {expedition.year} Logentry: {title} - GetTripPersons FAIL to recognise nickname"
@ -216,7 +217,7 @@ def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
DataIssue.objects.create(parser="logbooks", message=message)
print(message)
return trippersons, author
return trippersons, author, guests
def tidy_trip_cave(place):
# GetCaveLookup() need to work better. None of this data is *used* though?
@ -251,16 +252,18 @@ def tidy_tid(tid, title):
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
return tid
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, expedition, logtime_underground, tid):
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid):
"""saves a single logbook entry and related personlogentry items
We could do a bulk update to save all the entries, but then we would need to do a query on
each one to get the primary key to asign to the PersonLogEntries. So overall probably not much
each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
faster ?
"""
other_people = ", ".join(guests) # join list members separated by comma
nonLookupAttribs = {
"place": place,
"other_people": other_people, # *Ol's Mum, foreigners..
"text": text,
"expedition": expedition,
"time_underground": logtime_underground,
@ -324,6 +327,17 @@ def parser_date(tripdate, year):
def parser_html(year, expedition, txt, seq=""):
"""This uses some of the more obscure capabilities of regular expressions,
see https://docs.python.org/3/library/re.html
e.g.
* is greedy
*? is non-greedy
(?x) flag means VERBOSE
(?: ) non-capturing parentheses
\s whitespace
\S NOT whitespace
You can't see it here, but a round-trip export-then-import will move
the endmatter up to the frontmatter. This made sense when translating
@ -357,7 +371,7 @@ def parser_html(year, expedition, txt, seq=""):
for trippara in tripparas:
logbook_entry_count += 1
tid = set_trip_seq_id(year, logbook_entry_count)
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
# print(f' - new seq tid:{tid} lbe count: {logbook_entry_count}')
s = re.match(
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
@ -367,15 +381,17 @@ def parser_html(year, expedition, txt, seq=""):
\s*<div\s+class="triptitle">\s*(.*?)</div>
([\s\S]*?)
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
\s*(?:<div\s+class="editentry"\s*.*?</div>)?
\s*$
""",
trippara,
)
if s:
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
# print(f"#{logbook_entry_count} {tu} {len(triptext)} ")
else:
# if not re.search(r"Rigging Guide", trippara):
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:55]}'...'{trippara}'"
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:75]}'..."
print(msg)
DataIssue.objects.create(parser="logbooks", message=msg)
continue
@ -403,12 +419,12 @@ def parser_html(year, expedition, txt, seq=""):
dupl[check] = 1
tu = tidy_time_underground(tu)
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
tripcave = tidy_trip_cave(place)
tripcontent = tidy_trip_image_urls(tripcontent, ldate)
tid = tidy_tid(tid, triptitle)
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entrytuple)
return logentries
@ -509,13 +525,13 @@ def parser_blog(year, expedition, txt, sq=""):
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
logtime_underground = 0
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
# print(f" - author: {author}")
tripcave = tidy_trip_cave(place)
tripcontent = tidy_trip_image_urls(tripcontent, year)
tid = tidy_tid(tid, triptitle)
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
logentries.append(entrytuple)
return logentries
@ -621,10 +637,10 @@ def LoadLogbook(year):
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
)
for entrytuple in logentries:
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
if expo == expedition: # unneeded check, we zeroed it before filling it
# print(f" -- {triptitle}")
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
else:
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
expo.save() # to save logbook name property
@ -708,8 +724,8 @@ def LoadLogbooks():
# - LogBookEntry (text, who when etc.)
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
for entrytuple in allentries:
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
for expo in expos:
expo.save() # to save logbook name property

View File

@ -22,13 +22,11 @@ See troggle/code/views/other.py and core.models/logbooks.py writelogbook(year, f
<hr />
<div class="tripdate" id="{{logbook_entry.slug}}">{{logbook_entry.date|date:'Y-m-d'}}</div>
<div class="trippeople">{% for personlogentry in logbook_entry.personlogentry_set.all %}{% if personlogentry.is_logbook_entry_author %}<u>{{personlogentry.personexpedition.person|safe}}</u>{% else %}{{ personlogentry.personexpedition.person|safe }}{% endif %}, {% endfor %}</div>
<div class="trippeople">{% for personlogentry in logbook_entry.personlogentry_set.all %}{% if personlogentry.is_logbook_entry_author %}<u>{{personlogentry.personexpedition.person|safe}}</u>{% else %}{{ personlogentry.personexpedition.person|safe }}{% endif %}, {% endfor %}{% if logbook_entry.other_people %}, {{logbook_entry.other_people}}{% endif %}</div>
<div class="triptitle">{{logbook_entry.title|safe}}</div>
<br />
<a href="/logbookedit/{{logbook_entry.slug}}">Edit this entry</a>
<br />
{{logbook_entry.text|safe}}
<div class="timeug">T/U: {{logbook_entry.time_underground|safe}} hours</div>
<div class="editentry"><br /><a href="/logbookedit/{{logbook_entry.slug}}">Edit this entry</a><br /></div>
{% endfor %}
<hr />
</body>