mirror of
https://expo.survex.com/repositories/troggle/.git
synced 2024-11-22 07:11:52 +00:00
Fixed parsers
This commit is contained in:
parent
c9729c046c
commit
1a8bc17f80
@ -69,8 +69,8 @@ def alphabet_suffix(n):
|
|||||||
if not alphabet:
|
if not alphabet:
|
||||||
alphabet = list(string.ascii_lowercase)
|
alphabet = list(string.ascii_lowercase)
|
||||||
|
|
||||||
if n < len(alphabet):
|
if n < len(alphabet) and n > 0:
|
||||||
suffix = alphabet[n]
|
suffix = alphabet[n-1]
|
||||||
else:
|
else:
|
||||||
suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
|
suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
|
||||||
return suffix
|
return suffix
|
||||||
|
@ -4,6 +4,7 @@ from django.contrib.auth import authenticate
|
|||||||
from django.contrib.auth import forms as auth_forms
|
from django.contrib.auth import forms as auth_forms
|
||||||
from django.contrib.auth import login, logout
|
from django.contrib.auth import login, logout
|
||||||
from django.contrib.auth.decorators import login_required
|
from django.contrib.auth.decorators import login_required
|
||||||
|
from django.http import HttpResponse, HttpResponseRedirect
|
||||||
from django.shortcuts import redirect, render
|
from django.shortcuts import redirect, render
|
||||||
from django.utils.http import url_has_allowed_host_and_scheme
|
from django.utils.http import url_has_allowed_host_and_scheme
|
||||||
|
|
||||||
@ -22,7 +23,7 @@ class login_required_if_public(object):
|
|||||||
|
|
||||||
def __call__(self, *args, **kwargs):
|
def __call__(self, *args, **kwargs):
|
||||||
return self.f(*args, **kwargs)
|
return self.f(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
# This is copied from CUYC.cuy.website.view.auth
|
# This is copied from CUYC.cuy.website.view.auth
|
||||||
# If we want to do the whole online-email thing, we would also need to copy across the code in these
|
# If we want to do the whole online-email thing, we would also need to copy across the code in these
|
||||||
|
@ -8,6 +8,7 @@ from django.shortcuts import render, redirect
|
|||||||
|
|
||||||
import settings
|
import settings
|
||||||
|
|
||||||
|
from troggle.core.models.caves import GetCaveLookup
|
||||||
from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry
|
from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry
|
||||||
from troggle.core.models.survex import DrawingFile
|
from troggle.core.models.survex import DrawingFile
|
||||||
from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition
|
from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition
|
||||||
@ -340,7 +341,7 @@ def logbookedit(request, year=None, slug=None):
|
|||||||
"textrows": rows,
|
"textrows": rows,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
else: # no slug
|
else: # no slug or bad slug for an lbe which does not exist
|
||||||
# NEW logbook entry
|
# NEW logbook entry
|
||||||
return render(
|
return render(
|
||||||
request,
|
request,
|
||||||
|
@ -60,8 +60,8 @@ LOGBOOK_PARSER_SETTINGS = {
|
|||||||
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
|
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
|
||||||
|
|
||||||
ENTRIES = {
|
ENTRIES = {
|
||||||
"2023": 81,
|
"2023": 83,
|
||||||
"2022": 93,
|
"2022": 94,
|
||||||
"2019": 55,
|
"2019": 55,
|
||||||
"2018": 95,
|
"2018": 95,
|
||||||
"2017": 74,
|
"2017": 74,
|
||||||
@ -127,7 +127,7 @@ def reset_trip_id(date):
|
|||||||
suffix = alphabet_suffix(n)
|
suffix = alphabet_suffix(n)
|
||||||
|
|
||||||
tid = f"{date}{suffix}"
|
tid = f"{date}{suffix}"
|
||||||
# print(tid)
|
# print(already, n, tid)
|
||||||
return tid
|
return tid
|
||||||
|
|
||||||
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
|
rx_tripperson = re.compile(r"(?i)<u>(.*?)</u>$")
|
||||||
@ -136,6 +136,7 @@ rx_round_bracket = re.compile(r"[\(\[].*?[\)\]]")
|
|||||||
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
||||||
res = []
|
res = []
|
||||||
author = None
|
author = None
|
||||||
|
guests = []
|
||||||
# print(f'# {tid}')
|
# print(f'# {tid}')
|
||||||
# print(f" - {tid} '{trippeople}' ")
|
# print(f" - {tid} '{trippeople}' ")
|
||||||
|
|
||||||
@ -154,11 +155,12 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
try:
|
try:
|
||||||
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
personyear = GetPersonExpeditionNameLookup(expedition).get(tripperson.lower())
|
||||||
if not personyear:
|
if not personyear:
|
||||||
if known_foreigner(tripperson):
|
guests.append(nickname_used)
|
||||||
message = f" ! - {expedition.year} Known foreigner: '{tripperson}' in entry {tid=}"
|
if known_foreigner(nickname_used):
|
||||||
|
message = f" ! - {expedition.year} Known foreigner: '{nickname_used}' in entry {tid=}"
|
||||||
print(message)
|
print(message)
|
||||||
else:
|
else:
|
||||||
message = f" ! - {expedition.year} No name match for: '{tripperson}' in entry {tid=} for this year."
|
message = f" ! - {expedition.year} No name match for: '{nickname_used}' in entry {tid=} for this year."
|
||||||
print(message)
|
print(message)
|
||||||
DataIssue.objects.create(parser="logbooks", message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
res.append((personyear, nickname_used, logtime_underground))
|
res.append((personyear, nickname_used, logtime_underground))
|
||||||
@ -170,10 +172,9 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
raise
|
raise
|
||||||
if author_u:
|
if author_u:
|
||||||
author = personyear
|
author = personyear
|
||||||
else:
|
else: # *guest
|
||||||
# a person but with * prefix. Ignored everywhere.
|
guests.append(tripperson)
|
||||||
# print(f" ! - {expedition.year} * person : {tripperson}")
|
# print(f" ! - {expedition.year} * GUEST : {tripperson}")
|
||||||
pass
|
|
||||||
|
|
||||||
if not author:
|
if not author:
|
||||||
if not res:
|
if not res:
|
||||||
@ -181,7 +182,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
|
|||||||
author = res[-1][0] # the previous valid person and a time of 0 hours
|
author = res[-1][0] # the previous valid person and a time of 0 hours
|
||||||
|
|
||||||
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
# print(f" - {tid} [{author.person}] '{res[0][0].person}'...")
|
||||||
return res, author
|
return res, author, guests
|
||||||
|
|
||||||
def tidy_time_underground(logtime_underground):
|
def tidy_time_underground(logtime_underground):
|
||||||
# Nasty hack, must tidy this up..
|
# Nasty hack, must tidy this up..
|
||||||
@ -202,7 +203,7 @@ def tidy_time_underground(logtime_underground):
|
|||||||
|
|
||||||
def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
||||||
try:
|
try:
|
||||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
trippersons, author, guests = GetTripPersons(trippeople, expedition, logtime_underground, tid=tid)
|
||||||
# trippersons is a list of tuples (personyear, nickname_used, logtime_underground)
|
# trippersons is a list of tuples (personyear, nickname_used, logtime_underground)
|
||||||
except:
|
except:
|
||||||
message = f" ! - {expedition.year} Logentry: {title} - GetTripPersons FAIL to recognise nickname"
|
message = f" ! - {expedition.year} Logentry: {title} - GetTripPersons FAIL to recognise nickname"
|
||||||
@ -216,7 +217,7 @@ def tidy_trip_persons(trippeople, title, expedition, logtime_underground, tid):
|
|||||||
DataIssue.objects.create(parser="logbooks", message=message)
|
DataIssue.objects.create(parser="logbooks", message=message)
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
return trippersons, author
|
return trippersons, author, guests
|
||||||
|
|
||||||
def tidy_trip_cave(place):
|
def tidy_trip_cave(place):
|
||||||
# GetCaveLookup() need to work better. None of this data is *used* though?
|
# GetCaveLookup() need to work better. None of this data is *used* though?
|
||||||
@ -251,16 +252,18 @@ def tidy_tid(tid, title):
|
|||||||
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
|
tid = str(randint(1000, 9999)) + "_" + slugify(title)[:10].replace("-", "_")
|
||||||
return tid
|
return tid
|
||||||
|
|
||||||
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, expedition, logtime_underground, tid):
|
def store_entry_into_database(date, place, tripcave, title, text, trippersons, author, guests, expedition, logtime_underground, tid):
|
||||||
"""saves a single logbook entry and related personlogentry items
|
"""saves a single logbook entry and related personlogentry items
|
||||||
|
|
||||||
We could do a bulk update to save all the entries, but then we would need to do a query on
|
We could do a bulk update to save all the entries, but then we would need to do a query on
|
||||||
each one to get the primary key to asign to the PersonLogEntries. So overall probably not much
|
each one to get the primary key to assign to the PersonLogEntries. So overall probably not much
|
||||||
faster ?
|
faster ?
|
||||||
"""
|
"""
|
||||||
|
other_people = ", ".join(guests) # join list members separated by comma
|
||||||
|
|
||||||
nonLookupAttribs = {
|
nonLookupAttribs = {
|
||||||
"place": place,
|
"place": place,
|
||||||
|
"other_people": other_people, # *Ol's Mum, foreigners..
|
||||||
"text": text,
|
"text": text,
|
||||||
"expedition": expedition,
|
"expedition": expedition,
|
||||||
"time_underground": logtime_underground,
|
"time_underground": logtime_underground,
|
||||||
@ -324,6 +327,17 @@ def parser_date(tripdate, year):
|
|||||||
def parser_html(year, expedition, txt, seq=""):
|
def parser_html(year, expedition, txt, seq=""):
|
||||||
"""This uses some of the more obscure capabilities of regular expressions,
|
"""This uses some of the more obscure capabilities of regular expressions,
|
||||||
see https://docs.python.org/3/library/re.html
|
see https://docs.python.org/3/library/re.html
|
||||||
|
|
||||||
|
e.g.
|
||||||
|
* is greedy
|
||||||
|
*? is non-greedy
|
||||||
|
|
||||||
|
(?x) flag means VERBOSE
|
||||||
|
|
||||||
|
(?: ) non-capturing parentheses
|
||||||
|
|
||||||
|
\s whitespace
|
||||||
|
\S NOT whitespace
|
||||||
|
|
||||||
You can't see it here, but a round-trip export-then-import will move
|
You can't see it here, but a round-trip export-then-import will move
|
||||||
the endmatter up to the frontmatter. This made sense when translating
|
the endmatter up to the frontmatter. This made sense when translating
|
||||||
@ -357,7 +371,7 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
for trippara in tripparas:
|
for trippara in tripparas:
|
||||||
logbook_entry_count += 1
|
logbook_entry_count += 1
|
||||||
tid = set_trip_seq_id(year, logbook_entry_count)
|
tid = set_trip_seq_id(year, logbook_entry_count)
|
||||||
# print(f' - new tid:{tid} lbe count: {logbook_entry_count}')
|
# print(f' - new seq tid:{tid} lbe count: {logbook_entry_count}')
|
||||||
|
|
||||||
s = re.match(
|
s = re.match(
|
||||||
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
r"""(?x)(?:\s*<div\sclass="tripdate"\sid=".*?">.*?</div>\s*<p>)? # second date
|
||||||
@ -367,15 +381,17 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
\s*<div\s+class="triptitle">\s*(.*?)</div>
|
||||||
([\s\S]*?)
|
([\s\S]*?)
|
||||||
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
\s*(?:<div\s+class="timeug">\s*(.*?)</div>)?
|
||||||
|
\s*(?:<div\s+class="editentry"\s*.*?</div>)?
|
||||||
\s*$
|
\s*$
|
||||||
""",
|
""",
|
||||||
trippara,
|
trippara,
|
||||||
)
|
)
|
||||||
if s:
|
if s:
|
||||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||||
|
# print(f"#{logbook_entry_count} {tu} {len(triptext)} ")
|
||||||
else:
|
else:
|
||||||
# if not re.search(r"Rigging Guide", trippara):
|
# if not re.search(r"Rigging Guide", trippara):
|
||||||
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:55]}'...'{trippara}'"
|
msg = f" !- Logbook. Can't parse entry, skipping:{logbook_entry_count} '{trippara[:75]}'..."
|
||||||
print(msg)
|
print(msg)
|
||||||
DataIssue.objects.create(parser="logbooks", message=msg)
|
DataIssue.objects.create(parser="logbooks", message=msg)
|
||||||
continue
|
continue
|
||||||
@ -403,12 +419,12 @@ def parser_html(year, expedition, txt, seq=""):
|
|||||||
dupl[check] = 1
|
dupl[check] = 1
|
||||||
|
|
||||||
tu = tidy_time_underground(tu)
|
tu = tidy_time_underground(tu)
|
||||||
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
|
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, tu, tid)
|
||||||
tripcave = tidy_trip_cave(place)
|
tripcave = tidy_trip_cave(place)
|
||||||
tripcontent = tidy_trip_image_urls(tripcontent, ldate)
|
tripcontent = tidy_trip_image_urls(tripcontent, ldate)
|
||||||
tid = tidy_tid(tid, triptitle)
|
tid = tidy_tid(tid, triptitle)
|
||||||
|
|
||||||
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
entrytuple = (ldate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
return logentries
|
return logentries
|
||||||
|
|
||||||
@ -509,13 +525,13 @@ def parser_blog(year, expedition, txt, sq=""):
|
|||||||
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
tripcontent = f"\n\n<!-- Content parsed from UK Caving Blog -->\nBlog Author: {trippeople}" + tripcontent
|
||||||
|
|
||||||
logtime_underground = 0
|
logtime_underground = 0
|
||||||
trippersons, author = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
|
trippersons, author, guests = tidy_trip_persons(trippeople, triptitle, expedition, logtime_underground, tid)
|
||||||
# print(f" - author: {author}")
|
# print(f" - author: {author}")
|
||||||
tripcave = tidy_trip_cave(place)
|
tripcave = tidy_trip_cave(place)
|
||||||
tripcontent = tidy_trip_image_urls(tripcontent, year)
|
tripcontent = tidy_trip_image_urls(tripcontent, year)
|
||||||
tid = tidy_tid(tid, triptitle)
|
tid = tidy_tid(tid, triptitle)
|
||||||
|
|
||||||
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, expedition, tu, tid)
|
entrytuple = (tripdate, place, tripcave, triptitle, tripcontent, trippersons, author, guests, expedition, tu, tid)
|
||||||
logentries.append(entrytuple)
|
logentries.append(entrytuple)
|
||||||
return logentries
|
return logentries
|
||||||
|
|
||||||
@ -621,10 +637,10 @@ def LoadLogbook(year):
|
|||||||
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
f" - Not a year with extant blog entries to import: '{year}' not in BLOG_PARSER_SETTINGS {BLOG_PARSER_SETTINGS}"
|
||||||
)
|
)
|
||||||
for entrytuple in logentries:
|
for entrytuple in logentries:
|
||||||
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
|
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
|
||||||
if expo == expedition: # unneeded check, we zeroed it before filling it
|
if expo == expedition: # unneeded check, we zeroed it before filling it
|
||||||
# print(f" -- {triptitle}")
|
# print(f" -- {triptitle}")
|
||||||
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
|
||||||
else:
|
else:
|
||||||
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
|
print(f" ! unexpected log entry labelled as '{expedition}' {tid}" )
|
||||||
expo.save() # to save logbook name property
|
expo.save() # to save logbook name property
|
||||||
@ -708,8 +724,8 @@ def LoadLogbooks():
|
|||||||
# - LogBookEntry (text, who when etc.)
|
# - LogBookEntry (text, who when etc.)
|
||||||
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
|
# - PersonLogEntry (who was on that specific trip mentione din the logbook entry)
|
||||||
for entrytuple in allentries:
|
for entrytuple in allentries:
|
||||||
date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid = entrytuple
|
date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid = entrytuple
|
||||||
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, expedition, tu, tid)
|
store_entry_into_database(date, place, tripcave, triptitle, text, trippersons, author, guests, expedition, tu, tid)
|
||||||
|
|
||||||
for expo in expos:
|
for expo in expos:
|
||||||
expo.save() # to save logbook name property
|
expo.save() # to save logbook name property
|
||||||
|
@ -22,13 +22,11 @@ See troggle/code/views/other.py and core.models/logbooks.py writelogbook(year, f
|
|||||||
<hr />
|
<hr />
|
||||||
|
|
||||||
<div class="tripdate" id="{{logbook_entry.slug}}">{{logbook_entry.date|date:'Y-m-d'}}</div>
|
<div class="tripdate" id="{{logbook_entry.slug}}">{{logbook_entry.date|date:'Y-m-d'}}</div>
|
||||||
<div class="trippeople">{% for personlogentry in logbook_entry.personlogentry_set.all %}{% if personlogentry.is_logbook_entry_author %}<u>{{personlogentry.personexpedition.person|safe}}</u>{% else %}{{ personlogentry.personexpedition.person|safe }}{% endif %}, {% endfor %}</div>
|
<div class="trippeople">{% for personlogentry in logbook_entry.personlogentry_set.all %}{% if personlogentry.is_logbook_entry_author %}<u>{{personlogentry.personexpedition.person|safe}}</u>{% else %}{{ personlogentry.personexpedition.person|safe }}{% endif %}, {% endfor %}{% if logbook_entry.other_people %}, {{logbook_entry.other_people}}{% endif %}</div>
|
||||||
<div class="triptitle">{{logbook_entry.title|safe}}</div>
|
<div class="triptitle">{{logbook_entry.title|safe}}</div>
|
||||||
<br />
|
|
||||||
<a href="/logbookedit/{{logbook_entry.slug}}">Edit this entry</a>
|
|
||||||
<br />
|
|
||||||
{{logbook_entry.text|safe}}
|
{{logbook_entry.text|safe}}
|
||||||
<div class="timeug">T/U: {{logbook_entry.time_underground|safe}} hours</div>
|
<div class="timeug">T/U: {{logbook_entry.time_underground|safe}} hours</div>
|
||||||
|
<div class="editentry"><br /><a href="/logbookedit/{{logbook_entry.slug}}">Edit this entry</a><br /></div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<hr />
|
<hr />
|
||||||
</body>
|
</body>
|
||||||
|
Loading…
Reference in New Issue
Block a user