mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-21 23:01:52 +00:00
Fixed parsers
This commit is contained in:
parent
c9729c046c
commit
1a8bc17f80
@ -69,8 +69,8 @@ def alphabet_suffix(n):
|
||||
if not alphabet:
|
||||
alphabet = list(string.ascii_lowercase)
|
||||
|
||||
if n < len(alphabet):
|
||||
suffix = alphabet[n]
|
||||
if n < len(alphabet) and n > 0:
|
||||
suffix = alphabet[n-1]
|
||||
else:
|
||||
suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
|
||||
return suffix
|
||||
|
@ -4,6 +4,7 @@ from django.contrib.auth import authenticate
|
||||
from django.contrib.auth import forms as auth_forms
|
||||
from django.contrib.auth import login, logout
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.http import HttpResponse, HttpResponseRedirect
|
||||
from django.shortcuts import redirect, render
|
||||
from django.utils.http import url_has_allowed_host_and_scheme
|
||||
|
||||
@ -22,7 +23,7 @@ class login_required_if_public(object):
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.f(*args, **kwargs)
|
||||
|
||||
|
||||
|
||||
# This is copied from CUYC.cuy.website.view.auth
|
||||
# If we want to do the whole online-email thing, we would also need to copy across the code in these
|
||||
|
@ -8,6 +8,7 @@ from django.shortcuts import render, redirect
|
||||
|
||||
import settings
|
||||
|
||||
from troggle.core.models.caves import GetCaveLookup
|
||||
from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry
|
||||
from troggle.core.models.survex import DrawingFile
|
||||
from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition
|
||||
@ -340,7 +341,7 @@ def logbookedit(request, year=None, slug=None):
|
||||
"textrows": rows,
|
||||
},
|
||||
)
|
||||
else: # no slug
|
||||
else: # no slug or bad slug for an lbe which does not exist
|
||||
# NEW logbook entry
|
||||
return render(
|
||||
request,
|
||||
|
@ -60,8 +60,8 @@ LOGBOOK_PARSER_SETTINGS = {
|
||||
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
|
||||
|
||||
ENTRIES = {
|
||||
"2023": 81,
|
||||
"2022": 93,
|
||||
"2023": 83,
|
||||
"2022": 94,
|
||||
"2019": 55,
|
||||
"2018": 95,
|
||||
"2017": 74,
|
||||
@ -127,7 +127,7 @@ def reset_trip_id(date):
|
||||
suffix = alphabet_suffix(n)
|
||||
|
||||
tid = f"{date}{suffix}"
|
||||
# print(tid)
|
||||
# print(already, n, tid)
|
||||
return tid
|
||||
|
||||
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
|
||||
@ -136,6 +136,7 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
||||
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
res = []
|
||||
author = None
|
||||
guests = []
|
||||
# print(f'# {tid}')
|
||||
# print(f" - {tid} '{trippeople}' ")
|
||||
|
||||
@ -154,11 +155,12 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
try:
|
||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||
if not personyear:
|
||||
if known_foreigner(tripperson):
|
||||
message = f" ! - {expedition.year} Known foreigner: '{tripperson}' in entry {tid=}"
|
||||
guests.append(nickname_used)
|
||||
if known_foreigner(nickname_used):
|
||||
message = f" ! - {expedition.year} Known foreigner: '{nickname_used}' in entry {tid=}"
|
||||
print(message)
|
||||
else:
|
||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this year."
|
||||
message = f" ! - {expedition.year} No name match for: '{nickname_used}' in entry {tid=} for this year."
|
||||
print(message)
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
res.append((personyear, nickname_used, logtime_underground))
|
||||
@ -170,10 +172,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
raise
|
||||
if author_u:
|
||||
author = personyear
|
||||
else:
|
||||
# a person but with * prefix. Ignored everywhere.
|
||||
# print(f" ! - {expedition.year} * person : {tripperson}")
|
||||
pass
|
||||
else: # *guest
|
||||
guests.append(tripperson)
|
||||
# print(f" ! - {expedition.year} * GUEST : {tripperson}")
|
||||
|
||||
if not author:
|
||||
if not res:
|
||||
@ -181,7 +182,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||
author = res[-1][0] # the previous valid person and a time of 0 hours
|
||||
|
||||
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
||||
return res, author
|
||||
return res, author, guests
|
||||
|
||||
def tidy_time_underground(logtime_underground):
|
||||
# Nasty hack, must tidy this up..
|
||||
@ -202,7 +203,7 @@ def tidy_time_underground(logtime_underground):
|
||||
|
||||
def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
||||
try:
|
||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
||||
trippersons, author, guests = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
||||
# trippersons is a list of tuples (personyear, nickname_used, logtime_underground)
|
||||
except:
|
||||
message = f" ! - {expedition.year} Logentry: {title} - GetTripPersons FAIL to recognise nickname"
|
||||
@ -216,7 +217,7 @@ def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
||||
DataIssue.objects.create(parser="logbooks", message=message)
|
||||
print(message)
|
||||
|
||||
return trippersons, author
|
||||
return trippersons, author, guests
|
||||
|
||||
def tidy_trip_cave(place):
|
||||
# GetCaveLookup() need to work better. None of this data is *used* though?
|
||||
@ -251,16 +252,18 @@ def tidy_tid(tid, title):
|
||||
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
|
||||
return tid
|
||||
|
||||
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, expedition, logtime_underground, tid):
|
||||
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid):
|
||||
"""saves a single logbook entry and related personlogentry items
|
||||
|
||||
We could do a bulk update to save all the entries, but then we would need to do a query on
|
||||
each one to get the primary key to asign to the PersonLogEntries. So overall probably not much
|
||||
each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
|
||||
faster ?
|
||||
"""
|
||||
other_people = ", ".join(guests) # join list members separated by comma
|
||||
|
||||
nonLookupAttribs = {
|
||||
"place": place,
|
||||
"other_people": other_people, # *Ol's Mum, foreigners..
|
||||
"text": text,
|
||||
"expedition": expedition,
|
||||
"time_underground": logtime_underground,
|
||||
@ -324,6 +327,17 @@ def parser_date(tripdate, year):
|
||||
def parser_html(year, expedition, txt, seq=""):
|
||||
"""This uses some of the more obscure capabilities of regular expressions,
|
||||
see https://docs.python.org/3/library/re.html
|
||||
|
||||
e.g.
|
||||
* is greedy
|
||||
*? is non-greedy
|
||||
|
||||
(?x) flag means VERBOSE
|
||||
|
||||
(?: ) non-capturing parentheses
|
||||
|
||||
\s whitespace
|
||||
\S NOT whitespace
|
||||
|
||||
You can't see it here, but a round-trip export-then-import will move
|
||||
the endmatter up to the frontmatter. This made sense when translating
|
||||
@ -357,7 +371,7 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
for trippara in tripparas:
|
||||
logbook_entry_count += 1
|
||||
tid = set_trip_seq_id(year, logbook_entry_count)
|
||||
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
||||
# print(f' - new seq tid:{tid} lbe count: {logbook_entry_count}')
|
||||
|
||||
s = re.match(
|
||||
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||
@ -367,15 +381,17 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
||||
([\s\S]*?)
|
||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||
\s*(?:<div\s+class="editentry"\s*.*?</div>)?
|
||||
\s*$
|
||||
""",
|
||||
trippara,
|
||||
)
|
||||
if s:
|
||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||
# print(f"#{logbook_entry_count} {tu} {len(triptext)} ")
|
||||
else:
|
||||
# if not re.search(r"Rigging Guide", trippara):
|
||||
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:55]}'...'{trippara}'"
|
||||
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:75]}'..."
|
||||
print(msg)
|
||||
DataIssue.objects.create(parser="logbooks", message=msg)
|
||||
continue
|
||||
@ -403,12 +419,12 @@ def parser_html(year, expedition, txt, seq=""):
|
||||
dupl[check] = 1
|
||||
|
||||
tu = tidy_time_underground(tu)
|
||||
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
|
||||
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
|
||||
tripcave = tidy_trip_cave(place)
|
||||
tripcontent = tidy_trip_image_urls(tripcontent, ldate)
|
||||
tid = tidy_tid(tid, triptitle)
|
||||
|
||||
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
||||
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
|
||||
logentries.append(entrytuple)
|
||||
return logentries
|
||||
|
||||
@ -509,13 +525,13 @@ def parser_blog(year, expedition, txt, sq=""):
|
||||
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
||||
|
||||
logtime_underground = 0
|
||||
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
|
||||
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
|
||||
# print(f" - author: {author}")
|
||||
tripcave = tidy_trip_cave(place)
|
||||
tripcontent = tidy_trip_image_urls(tripcontent, year)
|
||||
tid = tidy_tid(tid, triptitle)
|
||||
|
||||
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
||||
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
|
||||
logentries.append(entrytuple)
|
||||
return logentries
|
||||
|
||||
@ -621,10 +637,10 @@ def LoadLogbook(year):
|
||||
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
||||
)
|
||||
for entrytuple in logentries:
|
||||
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
|
||||
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
|
||||
if expo == expedition: # unneeded check, we zeroed it before filling it
|
||||
# print(f" -- {triptitle}")
|
||||
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
|
||||
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
|
||||
else:
|
||||
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
|
||||
expo.save() # to save logbook name property
|
||||
@ -708,8 +724,8 @@ def LoadLogbooks():
|
||||
# - LogBookEntry (text, who when etc.)
|
||||
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
|
||||
for entrytuple in allentries:
|
||||
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
|
||||
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
|
||||
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
|
||||
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
|
||||
|
||||
for expo in expos:
|
||||
expo.save() # to save logbook name property
|
||||
|
@ -22,13 +22,11 @@ See troggle/code/views/other.py and core.models/logbooks.py writelogbook(year, f
|
||||
<hr />
|
||||
|
||||
<div class="tripdate" id="{{logbook_entry.slug}}">{{logbook_entry.date|date:'Y-m-d'}}</div>
|
||||
<div class="trippeople">{% for personlogentry in logbook_entry.personlogentry_set.all %}{% if personlogentry.is_logbook_entry_author %}<u>{{personlogentry.personexpedition.person|safe}}</u>{% else %}{{ personlogentry.personexpedition.person|safe }}{% endif %}, {% endfor %}</div>
|
||||
<div class="trippeople">{% for personlogentry in logbook_entry.personlogentry_set.all %}{% if personlogentry.is_logbook_entry_author %}<u>{{personlogentry.personexpedition.person|safe}}</u>{% else %}{{ personlogentry.personexpedition.person|safe }}{% endif %}, {% endfor %}{% if logbook_entry.other_people %}, {{logbook_entry.other_people}}{% endif %}</div>
|
||||
<div class="triptitle">{{logbook_entry.title|safe}}</div>
|
||||
<br />
|
||||
<a href="/logbookedit/{{logbook_entry.slug}}">Edit this entry</a>
|
||||
<br />
|
||||
{{logbook_entry.text|safe}}
|
||||
<div class="timeug">T/U: {{logbook_entry.time_underground|safe}} hours</div>
|
||||
<div class="editentry"><br /><a href="/logbookedit/{{logbook_entry.slug}}">Edit this entry</a><br /></div>
|
||||
{% endfor %}
|
||||
<hr />
|
||||
</body>
|
||||
|
Loading…
Reference in New Issue
Block a user