fix comments and 2019 LB parsing

This commit is contained in:
Philip Sargent 2022-12-07 18:22:09 +00:00
parent 1eab261b30
commit 17b2b7b89c
5 changed files with 29 additions and 10 deletions

View File

@ -135,6 +135,7 @@ def do_pending_cave(k, url, area):
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
default_note += f"<br><br>\n\n - (0) look in the <a href=\"/noinfo/cave-number-index\">cave number index</a> for notes on this cave, "
default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "

View File

@ -20,14 +20,23 @@ for tunnel and therion files
todo='''- Rename functions more consistently between tunnel and therion variants
- Recode rx_valid_ext to use profile suffix() function
- Recode to use pathlib instead of whacky resetting of loop variable inside loop
to scan sub-folders.
- Recode rx_valid_ext to use pathlib suffix() function
- Recode load_drawings_files() to use a list of suffices not huge if-else monstrosity
'''
rx_valid_ext = re.compile(r'(?i)\.(?:png|jpg|pdf|jpeg|gif|txt)$')
def find_dwg_file(dwgfile, path):
'''Is given a line of text 'path' which may or may not contain a recognisable name of a scanned file
which we have already seen when we imported all the files we could find in the surveyscans direstories
which we have already seen when we imported all the files we could find in the surveyscans direstories.
The purpose is to find cross-references between Tunnel drawing files. But this is not reported anywhere yet ?
What is all this really for ?! Is this data used anywhere ??
'''
wallet, scansfile = None, None
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path)
@ -58,15 +67,15 @@ def find_dwg_file(dwgfile, path):
if scansfile:
dwgfile.scans.add(scansfile)
elif path and not rx_valid_ext.search(path):
elif path and not rx_valid_ext.search(path): # ie not recognised as a path where wallets live and not an image file type
name = os.path.split(path)[1]
rdwgfilel = DrawingFile.objects.filter(dwgname=name)
rdwgfilel = DrawingFile.objects.filter(dwgname=name) # Check if it is another drawing file we have already seen
if len(rdwgfilel):
if len(rdwgfilel) > 1:
plist =[]
for df in rdwgfilel:
plist.append(df.dwgname)
message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}"
plist.append(df.dwgpath)
message = f"- Warning {len(rdwgfilel)} files named '{name}' {plist}" # should not be a problem?
print(message)
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}')
rdwgfile = rdwgfilel[0]
@ -224,6 +233,7 @@ def setdrwfileinfo(dwgfile):
def load_drawings_files():
'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
This is brain-damaged very early code. Should be replaced with proper use of pathlib.
Why do we have all this detection of file types/! Why not use get_mime_types ?
What is it all for ??
@ -251,7 +261,7 @@ def load_drawings_files():
lf = os.path.join(drawdir, f)
ff = os.path.join(drawdatadir, lf)
if os.path.isdir(ff):
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! Replace with pathlib functions.
elif Path(f).suffix.lower() == ".txt":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])

View File

@ -44,6 +44,9 @@ todo='''
- this is a slow and uncertain function: cave = getCaveByReference(caveRef)
- use Fixtures https://docs.djangoproject.com/en/4.1/ref/django-admin/#django-admin-loaddata to cache
data for old logbooks. New design needed, with a mechanism for flagging fixtures as outdated after edits.
'''
MAX_LOGBOOK_ENTRY_TITLE_LENGTH = 200
DEFAULT_LOGBOOK_PARSER = "parser_html"
@ -83,7 +86,7 @@ LOGBOOK_PARSER_SETTINGS = {
"1982": ("log.htm", "parser_html_01"),
}
entries = { "2022": 64, "2019": 55, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
entries = { "2022": 64, "2019": 56, "2018": 74, "2017": 60, "2016": 81, "2015": 79,
"2014": 65, "2013": 51, "2012": 75, "2011": 68, "2010": 22, "2009": 52,
"2008": 49, "2007": 111, "2006": 60, "2005": 55, "2004": 76, "2003": 40, "2002": 31,
"2001": 48, "2000": 54, "1999": 79, "1998": 43, "1997": 53, "1996": 94, "1995": 41,

View File

@ -17,8 +17,11 @@
{% endif %}
{% endfor %}
</p>
<p>See also the <a href="/years/{{expedition.year}}/">documentation index</a> for this Expo
<p>See also the <a href="/wallets/year/{{expedition.year}}">wallet completion status</a> for this Expo
<p>See also the
<ul>
<li> <a href="/years/{{expedition.year}}/">documentation index</a> for this Expo
<li> <a href="/wallets/year/{{expedition.year}}">wallet completion status</a> for this Expo
<li> <a href="/years/{{expedition.year}}/{{expedition.logbookfile}}">full logbook</a> for this Expo
{% if logged_in %}
<p>Reparse and reload this year's logbook by clicking here: <a href="/expedition/{{expedition.year}}?reload">RELOAD</a>
{% endif %}

View File

@ -64,6 +64,7 @@ If anyone really cares, they can always look in the original survex file
{% endfor %}
</table>
</div>
{% comment %}
<div style="color: slategray; background-color:lightcyan">
<p>Horrible convoluted bug here, but only for some survex files. Mostly, a survex block is duplicated. But not always.
@ -82,4 +83,5 @@ instead or writing it in python anyone could understand.<br>
- the connection between the two is made in the URL resolver in <var>troggle/urls.py</var>
<p>To be fixed!
</div>
{% endcomment %}
{% endblock %}