forked from expo/troggle
2526 lines
114 KiB
Python
2526 lines
114 KiB
Python
import copy
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
import troggle.settings as settings
|
|
from troggle.core.models.caves import Cave, Entrance
|
|
from troggle.core.models.logbooks import QM
|
|
from troggle.core.models.survex import SurvexBlock, SurvexDirectory, SurvexFile, SurvexPersonRole, SurvexStation
|
|
from troggle.core.models.wallets import Wallet
|
|
from troggle.core.models.troggle import DataIssue, Expedition
|
|
from troggle.core.utils import chaosmonkey, get_process_memory
|
|
from troggle.parsers.logbooks import GetCaveLookup
|
|
from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
|
|
|
|
"""Imports the tree of survex files following from a defined root .svx file
|
|
It also scans the Loser repo for all the svx files, which it loads individually afterwards.
|
|
"""
|
|
|
|
todo = """
|
|
- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. SHould
|
|
speed it up noticably.
|
|
|
|
- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
|
|
|
|
- Learn to use Django .select_related() and .prefetch_related() to speed things up
|
|
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
|
|
|
|
- LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory
|
|
The survexblock passed-in is not necessarily the parent. FIX THIS.
|
|
|
|
- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
|
|
easier if this is a completely new file rather than an existing file.. nasty.
|
|
|
|
- When Olly implements LEG in the 'dump3d --legs' utility, then we can use that to get the length of
|
|
all the legs in a survex block instead of adding them up oursleves. Which means that we can
|
|
ignore all the Units and offset stuff, that troggle will work with survex files with backsights,
|
|
repeated readings from distox etc.. Not actually useful for pre 2022 survey data,
|
|
but good future-proofing.
|
|
Also it will be a tiny bit more accurate as these leg lengths are after loop closure fixup.
|
|
"""
|
|
survexblockroot = None
|
|
survexomitsroot = None
|
|
ROOTBLOCK = "rootblock"
|
|
OMITBLOCK = "omitblock"
|
|
METRESINFEET = 3.28084
|
|
UNSEENS = "_unseens.svx"
|
|
|
|
stop_dup_warning = False
|
|
dup_includes = 0
|
|
debugprint = False # Turns on debug printout for just one *include file
|
|
debugprinttrigger = "!"
|
|
|
|
dataissues = []
|
|
|
|
class SurvexLeg:
|
|
"""No longer a models.Model subclass, so no longer a database table"""
|
|
|
|
tape = 0.0
|
|
compass = 0.0
|
|
clino = 0.0
|
|
|
|
def datewallet(w, earliest):
|
|
"""Gets the date of the youngest survexblock associated with the wallet
|
|
REFACTOR this to do the whole date-getting task
|
|
|
|
Currently there is only one SurvexBlock, but this is in anticipation of
|
|
chnaging the schema to allow many.
|
|
"""
|
|
first = earliest
|
|
blocks = SurvexBlock.objects.filter(scanswallet=w) # only ONE I think ?!
|
|
for b in blocks:
|
|
if b.date:
|
|
if b.date < first:
|
|
first = b.date
|
|
if first == earliest:
|
|
# no date found
|
|
w.date = None
|
|
else:
|
|
w.date = first.isoformat()
|
|
return w.date
|
|
|
|
def set_walletdate(w):
|
|
earliest = datetime.now().date()
|
|
if not w.date(): # sets .walletdate as a side-effect if it gets it from JSON
|
|
d = datewallet(w, earliest) # Not in JSON, so checks all the survex blocks
|
|
w.walletdate = d
|
|
w.save()
|
|
|
|
def stash_data_issue(parser=None, message=None, url=None, sb=None):
|
|
"""Avoid hitting the database for error messages until the end of the import"""
|
|
global dataissues
|
|
dataissues.append((parser, message, url, sb))
|
|
|
|
def store_data_issues():
|
|
"""Take the stash and store it permanently in the database instead
|
|
|
|
use BULK creation here !"""
|
|
global dataissues
|
|
print(f" - Storing {len(dataissues)} Data Issues into database")
|
|
|
|
# make a list of objects, but don't commit to database yet
|
|
di_list = []
|
|
for issue in dataissues:
|
|
parser, message, url, sb = issue
|
|
if url is None:
|
|
if sb is not None:
|
|
url = get_offending_filename(sb)
|
|
di_list.append(DataIssue(parser=parser, message=message, url=url))
|
|
# Now commit to db
|
|
DataIssue.objects.bulk_create(di_list)
|
|
dataissues = [] # in database now, so empty cache
|
|
|
|
def get_offending_filename(path):
|
|
"""Used to provide the URL for a line in the DataErrors page
|
|
which reports problems on importing data into troggle
|
|
"""
|
|
return "/survexfile/" + path + ".svx"
|
|
|
|
trip_people_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
|
def get_team_on_trip(survexblock):
|
|
"""Uses a cache to avoid a database query if it doesn't need to.
|
|
Only used for complete team."""
|
|
global trip_people_cache
|
|
|
|
if survexblock in trip_people_cache:
|
|
return trip_people_cache[survexblock]
|
|
|
|
qpeople = SurvexPersonRole.objects.filter(survexblock=survexblock) # not very good Django style
|
|
trip_people_cache[survexblock] = qpeople # this is a query list
|
|
return qpeople
|
|
|
|
def get_people_on_trip(survexblock):
|
|
"""Gets the displayable names of the people on a survexbock trip.
|
|
Only used for complete team."""
|
|
qpeople = get_team_on_trip(survexblock)
|
|
|
|
people = []
|
|
for p in qpeople:
|
|
people.append(f"{p.personname}")
|
|
|
|
return list(set(people))
|
|
|
|
trip_person_record = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
|
trip_team_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
|
def put_person_on_trip(survexblock, personexpedition, tm):
|
|
"""Uses a cache to avoid a database query if it doesn't need to.
|
|
Only used for a single person"""
|
|
global trip_person_record
|
|
global trip_team_cache
|
|
|
|
if (survexblock, personexpedition) in trip_person_record:
|
|
return True
|
|
|
|
try:
|
|
personrole = SurvexPersonRole( # does not commit to db yet
|
|
survexblock=survexblock,
|
|
person = personexpedition.person,
|
|
personexpedition=personexpedition,
|
|
personname=tm
|
|
)
|
|
except:
|
|
message = f"! *team '{tm}' FAIL, already created {survexblock.survexfile.path} ({survexblock}) "
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
if survexblock not in trip_team_cache:
|
|
trip_team_cache[survexblock] = []
|
|
trip_team_cache[survexblock].append(personrole)
|
|
|
|
trip_person_record[(survexblock, personexpedition)] = 1
|
|
return False
|
|
|
|
def confirm_team_on_trip(survexblock):
|
|
global trip_team_cache
|
|
|
|
if survexblock not in trip_team_cache:
|
|
return
|
|
# Now commit to db
|
|
SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
|
|
trip_team_cache[survexblock] = [] # in database now, so empty cache
|
|
|
|
def check_team_cache():
|
|
global trip_team_cache
|
|
|
|
message = f"! *team CACHEFAIL check_team_cache() called "
|
|
print(message)
|
|
|
|
for block in trip_team_cache:
|
|
message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block}) "
|
|
print(message)
|
|
|
|
|
|
person_pending_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
|
|
def add_to_pending(survexblock, tm):
|
|
"""Collects team names before we have a date so cannot validate against
|
|
expo attendance yet"""
|
|
global person_pending_cache
|
|
|
|
if survexblock not in person_pending_cache:
|
|
person_pending_cache[survexblock] = set()
|
|
|
|
person_pending_cache[survexblock].add(tm)
|
|
|
|
def get_team_pending(survexblock):
|
|
"""A set of *team names before we get to the *date line in a survexblock
|
|
"""
|
|
global person_pending_cache
|
|
|
|
if survexblock in person_pending_cache:
|
|
teamnames = person_pending_cache[survexblock] # a set of names
|
|
person_pending_cache[survexblock] = ()
|
|
return teamnames
|
|
return
|
|
|
|
class LoadingSurvex:
|
|
"""A 'survex block' is a *begin...*end set of cave data.
|
|
A survex file can contain many begin-end blocks, which can be nested, and which can *include
|
|
other survex files.
|
|
A 'scanswallet' is what we today call a "survey scans folder" or a "wallet".
|
|
"""
|
|
|
|
# python regex flags (?i) means case-insentitive, (?s) means . matches newline too
|
|
# see https://docs.python.org/3/library/re.html
|
|
rx_begin = re.compile(r"(?i)begin")
|
|
rx_begin2 = re.compile("(?i)begin$")
|
|
|
|
rx_end = re.compile(r"(?i)end$")
|
|
rx_end2 = re.compile("(?i)end$")
|
|
rx_title = re.compile(r"(?i)title$")
|
|
rx_title2 = re.compile("(?i)title$")
|
|
rx_ref = re.compile(r"(?i)ref$")
|
|
rx_data = re.compile(r"(?i)data$")
|
|
rx_flags = re.compile(r"(?i)flags$")
|
|
rx_alias = re.compile(r"(?i)alias$")
|
|
rx_entrance = re.compile(r"(?i)entrance$")
|
|
rx_date = re.compile(r"(?i)date$")
|
|
rx_units = re.compile(r"(?i)units$")
|
|
rx_team = re.compile(r"(?i)team$")
|
|
rx_set = re.compile(r"(?i)set$")
|
|
|
|
rx_names = re.compile(r"(?i)names")
|
|
rx_flagsnot = re.compile(r"not\s")
|
|
rx_linelen = re.compile(r"[\d\-+.]+$")
|
|
instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)"
|
|
rx_teammem = re.compile(r"(?i)" + instruments + "?(?:es|s)?\s+(.*)$")
|
|
rx_teamold = re.compile(r"(?i)(.*)\s+" + instruments + "?(?:es|s)?$")
|
|
rx_teamabs = re.compile(r"(?i)^\s*(" + instruments + ")?(?:es|s)?\s*$")
|
|
rx_person = re.compile(r"(?i) and |/| / |, | , |&| & | \+ |^both$|^none$")
|
|
rx_qm = re.compile(
|
|
# r"(?i)^\s*QM(\d+)\s+(.+)\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$"
|
|
r"(?i)^\s*QM(\d+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+(.+)$"
|
|
)
|
|
# does not recognise non numeric suffix survey point ids
|
|
rx_qm0 = re.compile(r"(?i)^\s*QM(\d+)\s+(.+)$")
|
|
rx_qm_tick = re.compile(r"(?i)^\s*QM(\d+)\s+TICK\s([\d\-]+)\s(.*)$")
|
|
# remember there is also QM_PATTERN used in views.other and set in settings.py
|
|
rx_tapelng = re.compile(r"(?i).*(tape|length).*$")
|
|
|
|
rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
|
|
rx_comment = re.compile(r"([^;]*?)\s*(?:;\s*(.*))?\n?$")
|
|
rx_comminc = re.compile(r"(?i)^\|\*include[\s]*([-\w/]*).*$") # inserted by linear collate ;|*include
|
|
rx_commcni = re.compile(r"(?i)^\|\*edulcni[\s]*([-\w/]*).*$") # inserted by linear collate ;|*edulcni
|
|
rx_include = re.compile(r"(?i)^\s*(\*include[\s].*)$")
|
|
rx_include2 = re.compile("(?i)include$")
|
|
rx_commref = re.compile(r"(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)")
|
|
rx_ref_text = re.compile(r'(?i)^\s*\"[^"]*\"\s*$')
|
|
rx_star = re.compile(r"(?i)\s*\*[\s,]*(\w+)\s*(.*?)\s*(?:;.*)?$")
|
|
rx_starref = re.compile(r"(?i)^\s*\*ref[\s.:]*((?:19[6789]\d)|(?:20[0123]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$")
|
|
rx_argsref = re.compile(r"(?i)^[\s.:]*((?:19[6789]\d)|(?:20[012345]\d))\s*#?\s*(X)?\s*(.*?\d+.*?)$")
|
|
rx_badmerge = re.compile(r"(?i).*(\>\>\>\>\>)|(\=\=\=\=\=)|(\<\<\<\<\<).*$")
|
|
rx_ref2 = re.compile(r"(?i)\s*ref[.;]?")
|
|
rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)")
|
|
rx_quotedtitle = re.compile(r'(?i)^"(.*)"$')
|
|
|
|
|
|
# This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g.
|
|
# *DATA normal from to length gradient bearing ignore ignore ignore ignore
|
|
datastardefault = {"type": "normal", "from": 0, "to": 1, "tape": 2, "compass": 3, "clino": 4}
|
|
flagsdefault = {"duplicate": False, "surface": False, "splay": False, "skiplegs": False, "splayalias": False}
|
|
|
|
datastar = {}
|
|
flagsstar = {}
|
|
units = "metres"
|
|
unitsfactor = None
|
|
slength = 0.0
|
|
legsnumber = 0
|
|
depthbegin = 0
|
|
depthinclude = 0
|
|
unitsstack = []
|
|
legsnumberstack = []
|
|
slengthstack = []
|
|
teaminheritstack = []
|
|
teamcurrentstack = []
|
|
dateinheritstack = []
|
|
datecurrentstack = []
|
|
stackbegin = []
|
|
flagsstack = []
|
|
datastack = []
|
|
includestack = []
|
|
stacksvxfiles = []
|
|
svxfileslist = []
|
|
svxdirs = {}
|
|
uniquefile = {} # each key is a survex path, and its value is a list of parent files
|
|
expos = {}
|
|
survexdict = {} # each key is a directory, and its value is a list of files
|
|
lineno = 0
|
|
insp = ""
|
|
callcount = 0
|
|
caverncount = 0
|
|
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
|
|
ignorenoncave = [
|
|
"caves-1623",
|
|
"caves-1623/2007-NEU",
|
|
"caves-1626",
|
|
"caves-1624",
|
|
"caves-1627",
|
|
"fixedpts/gps/gps00raw",
|
|
"",
|
|
]
|
|
TREE = "tree"
|
|
ODDS = "oddments"
|
|
svxpass = TREE
|
|
includedfilename = ""
|
|
currentsurvexblock = None
|
|
currentsurvexfile = None
|
|
currentcave = None
|
|
caverndate = None
|
|
currentteam = set()
|
|
inheritteam = set()
|
|
currentdate = None
|
|
inheritdate = None
|
|
pending = []
|
|
adhocload = False
|
|
|
|
def __init__(self):
|
|
self.caveslist = GetCaveLookup()
|
|
pass
|
|
|
|
def LoadSurvexFallThrough(self, survexblock, line, cmd):
|
|
if cmd == "require":
|
|
pass # should we check survex version available for processing?
|
|
elif cmd in ["equate", "fix", "calibrate", "cs", "export", "case", "declination", "infer", "instrument", "sd"]:
|
|
pass # we ignore all these, which is fine.
|
|
else:
|
|
if cmd in ["include", "data", "flags", "title", "entrance", "set", "units", "alias", "ref"]:
|
|
message = (
|
|
f"! Warning. Unparsed [*{cmd}]: '{line}' {survexblock.survexfile.path} - not an error (probably)"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
else:
|
|
message = (
|
|
f"! Bad unrecognised svx command: [*{cmd}] {line} ({survexblock}) {survexblock.survexfile.path}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs
|
|
"""See get_team_pending(survexblock) which gets called at the same time,
|
|
when we see a *date line"""
|
|
global person_pending_cache
|
|
|
|
if self.inheritteam:
|
|
message = (
|
|
f"- INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'"
|
|
)
|
|
print(self.insp + message)
|
|
# stash_data_issue(
|
|
# parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
# )
|
|
return self.inheritteam
|
|
|
|
def fix_undated(self, survexblock):
|
|
"""Called when we reach *end of a block OR when a QM is seen.
|
|
Checks to see if the block has no *date, in which case it uses the
|
|
inherited date.
|
|
This is fine if the inherited date is from the same SurvexFile,
|
|
but inheriting dates across *include files is almost certainly NOT
|
|
expected behaviour, even though it is syntactically "correct",
|
|
so triggers a Warning.
|
|
"""
|
|
if survexblock.parent.name == "troggle_unseens":
|
|
# Bolluxed up if we try to inherit from this random junk, so don't.
|
|
return
|
|
|
|
if self.currentdate:
|
|
# already set
|
|
if not survexblock.date:
|
|
# error
|
|
message = (
|
|
f"! no survexblock.date but currentdate is set. ({survexblock})-{survexblock.survexfile.path} {self.currentdate=}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
return
|
|
|
|
if self.inheritdate:
|
|
survexblock.date = self.inheritdate
|
|
self.currentdate = self.inheritdate # unecessary duplication
|
|
# Not an error, so not put in DataIssues, but is printed to debug output
|
|
message = (
|
|
f"- No *date. INHERITING date from ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}"
|
|
)
|
|
print(self.insp + message)
|
|
# stash_data_issue(
|
|
# parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) # child
|
|
# )
|
|
if survexblock.survexfile != survexblock.parent.survexfile:
|
|
# This is noteworthy, however.
|
|
message = (
|
|
f"- Warning *date INHERITED from DIFFERENT file:\n ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}\n {self.stackbegin} {self.inheritdate:%Y-%m-%d}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.parent.survexfile.path) # PARENT
|
|
)
|
|
|
|
return self.inheritdate
|
|
else:
|
|
# This is not an error in the Expo dataset.
|
|
# Many files just holding *include lines do not have dates.
|
|
# Hardly _any_ of the ARGE survex files have dates !
|
|
pass
|
|
# message = f" ! No survexblock.date inheritable in '{survexblock}' in '{survexblock.survexfile.path}', setting to 1976"
|
|
# print(self.insp + message)
|
|
# stash_data_issue(
|
|
# parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
# )
|
|
# expoyear = "1976"
|
|
return
|
|
|
|
def fix_anonymous(self, survexblock):
|
|
"""Called when we reach *end of a block
|
|
Checks to see if the block has no team attached, in which case it uses the
|
|
inherited team.
|
|
This is fine if the inherited team is from the same SurvexFile,
|
|
but inheriting team across *include files is almost certainly NOT
|
|
expected behaviour, even though it is syntactically "correct".
|
|
|
|
If the block has no date, then it is assumed to be an abstract container,
|
|
with no relevant team, and anyway we can't attach a PersonExpedition without
|
|
knowing the year. Unless its parent has an identified expo"""
|
|
|
|
if survexblock.parent.name == "troggle_unseens":
|
|
# Bolluxed up if we try to inherit from this random junk, so don't.
|
|
return
|
|
|
|
expo = survexblock.expedition # may be None if no *date yet
|
|
if not expo:
|
|
expo = survexblock.parent.expedition # immediate parent works mostly
|
|
if not expo:
|
|
return
|
|
|
|
if not self.currentteam: # i.e. if it is a dated block and has no team
|
|
if teamnames := self.get_team_inherited(survexblock):# WALRUS
|
|
for tm in teamnames:
|
|
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
|
if personexpedition:
|
|
put_person_on_trip(survexblock, personexpedition, tm)
|
|
return
|
|
|
|
def LoadSurvexTeam(self, survexblock, line):
|
|
"""Interpeting the *team fields means interpreting older style survex as well as current survex standard,
|
|
*team Insts Anthony Day - this is how most of our files specify the team member
|
|
*team "Anthony Day" notes pictures tape - this is how the survex documentation says it should be done
|
|
We have a huge variety of abbreviations and mispellings. The most laconic being
|
|
*team gb, bl
|
|
|
|
personrole is used to record that a person was on a survex trip, NOT the role they played.
|
|
(NB PersonLogEntry is a logbook thing, not a survex thing. )
|
|
"""
|
|
|
|
def record_team_member(tm, survexblock):
|
|
tm = tm.strip("\"'").strip()
|
|
# Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition
|
|
# This is convoluted
|
|
|
|
# we need the current expedition, but if there has been no date yet in the survex file, we don't know which one it is.
|
|
# so we can't validate whether the person was on expo or not.
|
|
# we will have to attach them to the survexblock anyway, and then do a
|
|
# later check on whether they are valid when we get the date.
|
|
|
|
self.currentteam.add(tm) # used in push/pop block code
|
|
expo = survexblock.expedition # may be None if no *date yet
|
|
|
|
if expo:
|
|
personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
|
if personexpedition:
|
|
put_person_on_trip(survexblock, personexpedition, tm)
|
|
|
|
elif known_foreigner(tm): # note, not using .lower()
|
|
message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
|
print(self.insp + message)
|
|
# stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path))
|
|
else:
|
|
# we know the date and expo, but can't find the person
|
|
message = f"! *team {expo.year} '{tm}' FAIL personexpedition lookup on *team {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
else:
|
|
add_to_pending(survexblock, tm)
|
|
# don't know the date yet, so cannot query the table about validity.
|
|
# assume the person is valid. It will get picked up with the *date appears
|
|
# There are hundreds of these..
|
|
message = (
|
|
f"- Team before Date: {line} ({survexblock}) {survexblock.survexfile.path}"
|
|
)
|
|
# print(self.insp + message)
|
|
# stash_data_issue(
|
|
# parser="survex team", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
# )
|
|
|
|
mteammember = self.rx_teammem.match(line) # matches the role at the beginning
|
|
if not mteammember:
|
|
moldstyle = self.rx_teamold.match(line) # matches the role at the the end of the string
|
|
if moldstyle:
|
|
for tm in self.rx_person.split(moldstyle.group(1)):
|
|
if tm:
|
|
record_team_member(tm, survexblock)
|
|
# seems to be working
|
|
# msg = "! OLD tm='{}' line: '{}' ({}) {}".format(tm, line, survexblock, survexblock.survexfile.path)
|
|
# print(msg, file=sys.stderr)
|
|
else:
|
|
message = f"! *team {survexblock.survexfile.path} ({survexblock}) Weird '{mteammember.group(1)}' oldstyle line: '{line}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
else:
|
|
nullmember = self.rx_teamabs.match(line) # matches empty role line. Ignore these.
|
|
if not nullmember:
|
|
message = f"! *team {survexblock.survexfile.path} ({survexblock}) Bad line: '{line}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
else:
|
|
for tm in self.rx_person.split(mteammember.group(2)):
|
|
if tm:
|
|
record_team_member(tm, survexblock)
|
|
else:
|
|
if mteammember.group(2).lower() not in ("none", "both"):
|
|
message = f"! Weird *team '{mteammember.group(2)}' newstyle line: '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
def LoadSurvexEntrance(self, survexblock, line):
|
|
# Not using this yet
|
|
pass
|
|
|
|
def LoadSurvexAlias(self, survexblock, line):
|
|
# *alias station - ..
|
|
splayalias = re.match("(?i)\s*station\s*\-\s*\.\.\s*$", line)
|
|
if splayalias:
|
|
self.flagsstar["splayalias"] = True
|
|
print(line)
|
|
else:
|
|
message = f"! Bad *ALIAS: '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message)
|
|
|
|
def LoadSurvexUnits(self, survexblock, line):
|
|
# all for 4 survex files with measurements in feet. bugger.
|
|
# Won't need this once we move to using cavern or d3dump output for lengths
|
|
tapeunits = self.rx_tapelng.match(line) # tape|length
|
|
if not tapeunits:
|
|
return
|
|
convert = re.match("(?i)(\w*)\s*([\.\d]+)\s*(\w*)", line)
|
|
if convert:
|
|
factor = convert.groups()[1]
|
|
self.unitsfactor = float(factor)
|
|
if debugprint:
|
|
message = (
|
|
f"! *UNITS NUMERICAL conversion [{factor}x] '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survexunits", message=message)
|
|
|
|
feet = re.match("(?i).*feet$", line)
|
|
metres = re.match("(?i).*(METRIC|METRES|METERS)$", line)
|
|
if feet:
|
|
self.units = "feet"
|
|
elif metres:
|
|
self.units = "metres"
|
|
else:
|
|
message = f"! *UNITS in YARDS!? - not converted '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survexunits", message=message)
|
|
|
|
def get_expo_from_year(self, year, line, survexblock):
|
|
# cacheing to save DB query on every block
|
|
if year in self.expos:
|
|
expo = self.expos[year]
|
|
else:
|
|
expeditions = Expedition.objects.filter(year=year)
|
|
if len(expeditions) != 1:
|
|
message = (
|
|
f"! More than one expedition in year {year} '{line}' ({survexblock}) {survexblock.survexfile.path}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
if expeditions:
|
|
expo = expeditions[0]
|
|
self.expos[year] = expo
|
|
else:
|
|
expo = Expedition.objects.get(year="1976")
|
|
message = f"! DATE INCORRECT. There is no expedition for the year {year}. {survexblock.survexfile.path} ({survexblock}) - set to 1976."
|
|
print(self.insp + message)
|
|
stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path))
|
|
return expo
|
|
|
|
def LoadSurvexDate(self, survexblock, line):
|
|
"""We now have a valid date for this survexblock, so we now know the expo
|
|
it relates to and can use GetPersonExpeditionNameLookup(expo) to check whether
|
|
the people are correct.
|
|
|
|
Note that a *team line can come before AND after a *date line"""
|
|
|
|
def setdate_on_survexblock(year):
|
|
"""Either *date comes before any *team, in which case there are no prior
|
|
PersonRoles attached, or
|
|
*team came before this *date, in which case the names are only in 'pending'"""
|
|
global trip_person_record
|
|
|
|
expo = self.get_expo_from_year(year, line, survexblock)
|
|
survexblock.expedition = expo
|
|
|
|
team = get_team_on_trip(survexblock) # should be empty, should only be in 'pending'
|
|
# team = SurvexPersonRole.objects.filter(survexblock=survexblock)
|
|
if len(team) > 0:
|
|
message = f"! *team {expo.year} Multiple *date in one block? Already someone on team when *date seen. {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path))
|
|
|
|
if teamnames := get_team_pending(survexblock): # WALRUS https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions
|
|
for tm in teamnames:
|
|
if known_foreigner(tm):
|
|
message = f"- *team {expo.year} '{tm}' known foreigner *date (misordered) {survexblock.survexfile.path} ({survexblock}) in '{line}'"
|
|
print(self.insp + message)
|
|
# stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path))
|
|
else:
|
|
pe = GetPersonExpeditionNameLookup(expo).get(tm.lower())
|
|
if pe:
|
|
put_person_on_trip(survexblock, pe, tm)
|
|
|
|
else:
|
|
message = f"! *team {year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) "
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex",
|
|
message=message,
|
|
url=None, sb=(survexblock.survexfile.path),
|
|
)
|
|
|
|
oline = line
|
|
if len(line) > 10:
|
|
message = "! DATE Warning LONG DATE '{}' ({}) {}".format(oline, survexblock, survexblock.survexfile.path)
|
|
print(self.insp+message)
|
|
stash_data_issue(parser='svxdate', message=message, url=None, sb=(survexblock.survexfile.path))
|
|
if line[10] == "-": # ie a range, just look at first date
|
|
line = line[0:10]
|
|
if len(line) == 10:
|
|
year = line[:4]
|
|
# TO DO set to correct Austrian timezone Europe/Vienna ?
|
|
# %m and %d need leading zeros. Source svx files require them.
|
|
survexblock.date = datetime.strptime(line.replace(".", "-"), "%Y-%m-%d")
|
|
elif len(line) == 7:
|
|
year = line[:4]
|
|
perps = get_people_on_trip(survexblock) # What, you don't know Judge Dredd slang ?
|
|
message = f"! DATE Warning only accurate to the month, setting to 1st '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="svxdate", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
survexblock.date = datetime.strptime(line.replace(".", "-"), "%Y-%m") # sets to first of month
|
|
elif len(line) == 4:
|
|
year = line[:4]
|
|
perps = get_people_on_trip(survexblock)
|
|
message = f"! DATE WARNING only accurate to the YEAR, setting to 1st January '{oline}' ({survexblock}) {survexblock.survexfile.path} {perps}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="svxdate", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
survexblock.date = datetime.strptime(line, "%Y") # sets to January 1st
|
|
else:
|
|
# these errors are reporting the wrong survexblock, which is actually a SurvexFile (!)
|
|
# see To Do notes on how to trigger this. Still needs investigating..
|
|
message = (
|
|
f"! DATE Error unrecognised '{oline}-{survexblock}' ({type(survexblock)}) {survexblock.survexfile.path}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
print(f" {type(survexblock)=}") # survexblock.parent fails as a SurvexFile has no .parent ...ugh.
|
|
print(f" {survexblock.survexpath=}")
|
|
print(f" {survexblock.survexfile=}")
|
|
# raise
|
|
|
|
setdate_on_survexblock(year)
|
|
if survexblock.date:
|
|
# do not actually need a distict variable 'currentdate' but it makes the code clearer
|
|
self.currentdate = survexblock.date
|
|
survexblock.save()
|
|
|
|
def LoadSurvexLeg(self, survexblock, sline, comment, svxline):
|
|
"""This reads compass, clino and tape data but only keeps the tape lengths,
|
|
the rest is discarded after error-checking.
|
|
Now skipping the error checking - returns as soon as the leg is not one we count.
|
|
|
|
REPLACE ALL THIS by reading the .log output of cavern for the file.
|
|
But we need the lengths per Block, not by File. dump3d will do lengths per block.
|
|
"""
|
|
invalid_clino = 180.0
|
|
invalid_compass = 720.0
|
|
invalid_tape = 0.0
|
|
|
|
if self.flagsstar["skiplegs"]:
|
|
if debugprint:
|
|
print("skip in ", self.flagsstar, survexblock.survexfile.path)
|
|
return
|
|
|
|
if debugprint:
|
|
print(f"! LEG datastar type:{self.datastar['type'].upper()}++{survexblock.survexfile.path}\n{sline} ")
|
|
if self.datastar["type"] == "passage":
|
|
return
|
|
if self.datastar["type"] == "cartesian":
|
|
return
|
|
if self.datastar["type"] == "nosurvey":
|
|
return
|
|
if self.datastar["type"] == "diving":
|
|
return
|
|
if self.datastar["type"] == "cylpolar":
|
|
return
|
|
if debugprint:
|
|
print(
|
|
f" !! LEG data lineno:{self.lineno}\n !! sline:'{sline}'\n !! datastar['tape']: {self.datastar['tape']}"
|
|
)
|
|
|
|
if self.datastar["type"] != "normal":
|
|
return
|
|
|
|
ls = sline.lower().split()
|
|
# NORMAL, so there should be 5 fields
|
|
# from the content, this is clearly reading fixedpts/gps/gps00raw.svx, but not reporting it by that name
|
|
if len(ls) < 5:
|
|
print("! Fewer than 5 fields in NORMAL in ", survexblock.survexfile.path, survexfile, survexfile.parent)
|
|
print(" datastar NORMAL:", self.datastar)
|
|
print(f" Line (split): {ls}, comment: {comment}")
|
|
print(f" Line: {sline}\nsvxline: {svxline}")
|
|
message = f" ! Not 5 fields in line '{sline.lower()}' {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}"
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
datastar = self.datastar # shallow copy: alias but the things inside are the same things
|
|
survexleg = SurvexLeg()
|
|
|
|
# skip all splay legs
|
|
try:
|
|
if "splayalias" in self.flagsstar:
|
|
if ls[datastar["from"]] == "-" or ls[datastar["to"]] == "-":
|
|
if debugprint:
|
|
print("Aliased splay in ", survexblock.survexfile.path)
|
|
return
|
|
|
|
if ls[datastar["from"]] == ".." or ls[datastar["from"]] == ".":
|
|
if debugprint:
|
|
print("Splay in ", survexblock.survexfile.path)
|
|
return
|
|
if ls[datastar["to"]] == ".." or ls[datastar["to"]] == ".":
|
|
if debugprint:
|
|
print("Splay in ", survexblock.survexfile.path)
|
|
return
|
|
|
|
if ls[datastar["to"]] == "-":
|
|
message = f" ! Suspected splay, not declared, in line {ls} in {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
return
|
|
except:
|
|
message = f" ! datastar parsing from/to incorrect in line {ls} in {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
return
|
|
|
|
try:
|
|
tape = ls[datastar["tape"]]
|
|
except:
|
|
message = f" ! datastar parsing incorrect in line {ls} in {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
survexleg.tape = invalid_tape
|
|
return
|
|
# e.g. '29/09' or '(06.05)' in the tape measurement
|
|
# tape = tape.replace("(","") # edited original file (only one) instead
|
|
# tape = tape.replace(")","") # edited original file (only one) instead
|
|
# tape = tape.replace("/",".") # edited original file (only one) instead.
|
|
try:
|
|
if self.unitsfactor:
|
|
tape = float(tape) * self.unitsfactor
|
|
if debugprint:
|
|
message = f" ! Units: Length scaled {tape}m '{ls}' in ({survexblock.survexfile.path}) units:{self.units} factor:{self.unitsfactor}x"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
if self.units == "feet":
|
|
tape = float(tape) / METRESINFEET
|
|
if debugprint:
|
|
message = f" ! Units: converted to {tape:.3f}m from {self.units} '{ls}' in ({survexblock.survexfile.path})"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
survexleg.tape = float(tape)
|
|
self.legsnumber += 1
|
|
except ValueError:
|
|
message = f" ! Value Error: Tape misread in line'{ls}' in {survexblock.survexfile.path} units:{self.units}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
survexleg.tape = invalid_tape
|
|
try:
|
|
survexblock.legslength += survexleg.tape
|
|
self.slength += survexleg.tape
|
|
except ValueError:
|
|
message = (
|
|
f" ! Value Error: Tape length not added '{ls}' in {survexblock.survexfile.path} units:{self.units}"
|
|
)
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
try:
|
|
lcompass = ls[datastar["compass"]]
|
|
except:
|
|
message = f" ! Value Error: Compass not found in line {ls} in {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
lcompass = invalid_compass
|
|
|
|
try:
|
|
lclino = ls[datastar["clino"]]
|
|
except:
|
|
print(("! Clino misread in", survexblock.survexfile.path))
|
|
print((" datastar:", datastar))
|
|
print((" Line:", ls))
|
|
message = f" ! Value Error: Clino misread in line '{sline.lower()}' {datastar=} {self.datastar=} {ls=} in\n{survexblock}\n{survexblock.survexfile}\n{survexblock.survexfile.path}"
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
lclino = invalid_clino
|
|
|
|
if lclino == "up":
|
|
survexleg.clino = 90.0
|
|
lcompass = invalid_compass
|
|
elif lclino == "down":
|
|
survexleg.clino = -90.0
|
|
lcompass = invalid_compass
|
|
elif lclino == "-" or lclino == "level":
|
|
survexleg.clino = -90.0
|
|
|
|
try:
|
|
survexleg.compass = float(lcompass)
|
|
except ValueError:
|
|
print(("! Compass misread in", survexblock.survexfile.path))
|
|
print((" datastar:", datastar))
|
|
print((" Line:", ls))
|
|
message = " ! Value Error: lcompass:'{}' line {} in '{}'".format(lcompass, ls, survexblock.survexfile.path)
|
|
stash_data_issue(
|
|
parser="survexleg", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
survexleg.compass = invalid_compass
|
|
|
|
# delete the object to save memory
|
|
survexleg = None
|
|
|
|
def LoadSurvexRef(self, survexblock, args):
|
|
"""Interpret the *ref record, and all the many variants"""
|
|
# print(self.insp+ "*REF ---- '"+ args +"'")
|
|
url = get_offending_filename(survexblock.survexfile.path)
|
|
# *REF but also ; Ref years from 1960 to 2039
|
|
refline = self.rx_ref_text.match(args)
|
|
if refline:
|
|
# a textual reference such as "1996-1999 Not-KH survey book pp 92-95"
|
|
print(f"{self.insp} *REF quoted text so ignored:{args} in {survexblock.survexfile.path}")
|
|
return
|
|
|
|
if len(args) < 4:
|
|
message = f" ! Empty or BAD *REF statement '{args}' in '{survexblock.survexfile.path}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
return
|
|
|
|
argsgps = self.rx_argsref.match(args)
|
|
if argsgps:
|
|
yr, letterx, wallet = argsgps.groups()
|
|
else:
|
|
perps = get_people_on_trip(survexblock)
|
|
message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' malformed id '{args}' {perps}"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
return
|
|
|
|
if not letterx:
|
|
letterx = ""
|
|
else:
|
|
letterx = "X"
|
|
if len(wallet) < 2:
|
|
wallet = "0" + wallet
|
|
if not (int(yr) > 1960 and int(yr) < 2050):
|
|
message = " ! Wallet year out of bounds {yr} '{refscan}' {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
|
|
refscan = f"{yr}#{letterx}{wallet}"
|
|
try:
|
|
if int(wallet) > 99:
|
|
message = f" ! Wallet *REF {refscan} - very big (more than 99) so probably wrong in '{survexblock.survexfile.path}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
except:
|
|
message = f" ! Wallet *REF {refscan} - not numeric in '{survexblock.survexfile.path}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
|
|
manywallets = Wallet.objects.filter(
|
|
walletname=refscan
|
|
) # assumes all wallets found in earlier pass of data import
|
|
if manywallets:
|
|
if len(manywallets) > 1:
|
|
message = f" ! Wallet *REF {refscan} - more than one found {len(manywallets)} wallets in db with same id {survexblock.survexfile.path}"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
|
|
if survexblock.scanswallet:
|
|
if survexblock.scanswallet.walletname != refscan:
|
|
message = f" ! Wallet *REF {refscan} in {survexblock.survexfile.path} - Already a DIFFERENT wallet is set for this block '{survexblock.scanswallet.walletname}'"
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
else:
|
|
survexblock.scanswallet = manywallets[0] # this is a ForeignKey field
|
|
survexblock.save()
|
|
# This is where we should check that the wallet JSON contains a link to the survexfile
|
|
# and that the JSON date and walletdate are set correctly to the survexblock date.
|
|
set_walletdate(survexblock.scanswallet)
|
|
else:
|
|
perps = get_people_on_trip(survexblock)
|
|
message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' '{refscan}' NOT in database i.e. wallet does not exist {perps}."
|
|
print(self.insp + message)
|
|
stash_data_issue(parser="survex", message=message, url=url)
|
|
|
|
def LoadSurvexDataNormal(self, survexblock, args):
|
|
"""Sets the order for data elements in this and following blocks, e.g.
|
|
*data normal from to compass clino tape
|
|
*data normal from to tape compass clino
|
|
We are only collecting length data so we are disinterested in from, to, LRUD etc.
|
|
"""
|
|
# datastardefault = { # included here as reference to help understand the code
|
|
# "type":"normal",
|
|
# "t":"leg",
|
|
# "from":0,
|
|
# "to":1,
|
|
# "tape":2,
|
|
# "compass":3,
|
|
# "clino":4}
|
|
datastar = copy.deepcopy(self.datastardefault)
|
|
if args == "":
|
|
# naked '*data' which is relevant only for passages. Ignore. Continue with previous settings.
|
|
return
|
|
# DEFAULT | NORMAL | CARTESIAN| NOSURVEY |PASSAGE | TOPOFIL | CYLPOLAR | DIVING
|
|
ls = args.lower().split()
|
|
if ls[0] == "default":
|
|
self.datastar = copy.deepcopy(self.datastardefault)
|
|
elif ls[0] == "normal" or ls[0] == "topofil":
|
|
if not ("from" in datastar and "to" in datastar):
|
|
message = (
|
|
f" ! - Unrecognised *data normal statement '{args}' {survexblock.name}|{survexblock.survexpath}"
|
|
)
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
return
|
|
else:
|
|
datastar = self.datastardefault
|
|
# ls = ["normal", "from", "to", "tape", "compass", "clino" ]
|
|
for i in range(1, len(ls)): # len[0] is "normal"
|
|
if ls[i].lower() == "newline":
|
|
message = f" ! - ABORT *data statement has NEWLINE in it in {survexblock.survexfile.path}. Not parsed by troggle. '{args}'"
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
return False
|
|
|
|
if ls[i] in ["bearing", "compass"]:
|
|
datastar["compass"] = i - 1
|
|
if ls[i] in ["clino", "gradient"]:
|
|
datastar["clino"] = i - 1
|
|
if ls[i] in ["tape", "length"]:
|
|
datastar["tape"] = i - 1
|
|
self.datastar = copy.deepcopy(datastar)
|
|
return
|
|
elif ls[0] == "passage" or ls[0] == "nosurvey" or ls[0] == "diving" or ls[0] == "cylpolar":
|
|
# message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
|
|
# print(message)
|
|
# print(message,file=sys.stderr)
|
|
# stash_data_issue(parser='survex', message=message)
|
|
self.datastar["type"] = ls[0]
|
|
elif ls[0] == "cartesian": # We should not ignore this ?! Default for Germans ?
|
|
# message = " ! - *data {} blocks ignored. {}|{}" '{}' .format(ls[0].upper(), survexblock.name, survexblock.survexpath, args)
|
|
# print(message)
|
|
# print(message,file=sys.stderr)
|
|
# stash_data_issue(parser='survex', message=message)
|
|
self.datastar["type"] = ls[0]
|
|
else:
|
|
message = f" ! - Unrecognised *data statement '{args}' {survexblock.name}|{survexblock.survexpath}"
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
self.datastar["type"] = ls[0]
|
|
|
|
def LoadSurvexFlags(self, args):
|
|
# Valid flags are DUPLICATE, SPLAY, and SURFACE, and a flag may be preceded with NOT to turn it off.
|
|
# Default values are NOT any of them
|
|
self.flagsstar = copy.deepcopy(self.flagsdefault)
|
|
flags = []
|
|
|
|
args = self.rx_flagsnot.sub("not", args)
|
|
argslist = args.split()
|
|
for s in argslist:
|
|
flags.append(s)
|
|
if debugprint:
|
|
print(
|
|
f" ^ flagslist:{flags}",
|
|
)
|
|
|
|
if "duplicate" in flags:
|
|
self.flagsstar["duplicate"] = True
|
|
if "surface" in flags:
|
|
self.flagsstar["surface"] = True
|
|
if "splay" in flags:
|
|
self.flagsstar["splay"] = True
|
|
|
|
if "notduplicate" in flags:
|
|
self.flagsstar["duplicate"] = False
|
|
if "notsurface" in flags:
|
|
self.flagsstar["surface"] = False
|
|
if "notsplay" in flags:
|
|
self.flagsstar["splay"] = False
|
|
|
|
# if self.flagsstar["duplicate"] == True or self.flagsstar["surface"] == True or self.flagsstar["splay"] == True:
|
|
# actually we do want to count duplicates as this is for "effort expended in surveying underground"
|
|
if self.flagsstar["surface"] is True or self.flagsstar["splay"] is True:
|
|
self.flagsstar["skiplegs"] = True
|
|
if debugprint:
|
|
print(
|
|
f" $ flagslist:{flags}",
|
|
)
|
|
|
|
def IdentifyCave(self, cavepath):
|
|
if cavepath.lower() in self.caveslist:
|
|
return self.caveslist[cavepath.lower()]
|
|
# TO DO - this predates the big revision to Gcavelookup so look at this again carefully
|
|
path_match = self.rx_cave.search(cavepath)
|
|
if path_match:
|
|
sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
|
|
guesses = [sluggy.lower(), path_match.group(2).lower()]
|
|
for g in guesses:
|
|
if g in self.caveslist:
|
|
self.caveslist[cavepath] = self.caveslist[g]
|
|
return self.caveslist[g]
|
|
print(f" ! Failed to find cave for {cavepath.lower()}")
|
|
else:
|
|
# not a cave, but that is fine.
|
|
# print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
|
|
return None
|
|
|
|
def GetSurvexDirectory(self, headpath):
|
|
"""This creates a SurvexDirectory if it has not been seen before, and on creation
|
|
it sets the primarysurvexfile. This is correct as it should be set on the first file
|
|
in the directory, where first is defined by the *include ordering. Which is what we
|
|
are doing.
|
|
"""
|
|
if not headpath:
|
|
return self.svxdirs[""]
|
|
if headpath.lower() not in self.svxdirs:
|
|
self.svxdirs[headpath.lower()] = SurvexDirectory(path=headpath, primarysurvexfile=self.currentsurvexfile)
|
|
self.svxdirs[headpath.lower()].save()
|
|
self.survexdict[self.svxdirs[headpath.lower()]] = [] # list of the files in the directory
|
|
return self.svxdirs[headpath.lower()]
|
|
|
|
def ReportNonCaveIncludes(self, headpath, includelabel, depth):
|
|
"""Ignore surface, kataser and gpx *include survex files"""
|
|
if not self.pending:
|
|
self.pending = set()
|
|
fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt")
|
|
if fpending.is_file():
|
|
with open(fpending, "r") as fo:
|
|
cids = fo.readlines()
|
|
for cid in cids:
|
|
id = cid.strip().rstrip("\n").upper()
|
|
if cid.startswith("162"):
|
|
self.pending.add(id)
|
|
else:
|
|
self.pending.add("1623-" + id)
|
|
|
|
if headpath in self.ignorenoncave:
|
|
message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
|
|
# print("\n"+message)
|
|
# print("\n"+message,file=sys.stderr)
|
|
return
|
|
for i in self.ignoreprefix:
|
|
if headpath.startswith(i):
|
|
message = (
|
|
f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
|
|
)
|
|
# print("\n"+message)
|
|
# print("\n"+message,file=sys.stderr)
|
|
return
|
|
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
|
|
if caveid in self.pending:
|
|
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
|
|
# print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
|
|
return
|
|
id = caveid[5:]
|
|
if id in self.pending:
|
|
print(f"! ALREADY PENDING {id}", file=sys.stderr)
|
|
return
|
|
|
|
message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pending.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
|
|
print("\n" + message)
|
|
print("\n" + message, file=sys.stderr)
|
|
print(f"{self.pending}", end="", file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
|
|
# print(f' # datastack in LoadSurvexFile:{includelabel}', file=sys.stderr)
|
|
# for dict in self.datastack:
|
|
# print(f' type: <{dict["type"].upper()} >', file=sys.stderr)
|
|
|
|
def LoadSurvexFile(self, svxid):
|
|
"""Creates SurvexFile in the database, and SurvexDirectory if needed
|
|
with links to 'cave'
|
|
Creates a new current survexfile and valid .survexdirectory
|
|
Inspects the parent folder of the survexfile and uses that to decide if this is
|
|
a cave we know.
|
|
|
|
If we see a duplicate cave, this is TOO LATE. It has already been included into the
|
|
long linear file. We prevent duplication when the long linear file is created, so
|
|
if we see a duplicate here, it is a serious error.
|
|
|
|
The survexblock passed-in is not necessarily the parent. FIX THIS.
|
|
"""
|
|
global dup_includes
|
|
|
|
if debugprint:
|
|
print(f" # datastack in LoadSurvexFile:{svxid} 'type':", end="")
|
|
for dict in self.datastack:
|
|
print(f"'{dict['type'].upper()}' ", end="")
|
|
print("")
|
|
|
|
depth = " " * self.depthbegin
|
|
print("{:2}{} - NEW survexfile:'{}'".format(self.depthbegin, depth, svxid))
|
|
headpath = os.path.dirname(svxid)
|
|
|
|
newfile, created = SurvexFile.objects.update_or_create(path=svxid)
|
|
if not created:
|
|
dup_includes += 1
|
|
message = f" ! DUPLICATE SurvexFile '{svxid}' create attempt in LoadSurvexFile()"
|
|
print(message)
|
|
# print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=f"/survexfile/{svxid}")
|
|
|
|
self.currentsurvexfile = newfile
|
|
return # abort as everything already done for object creation
|
|
|
|
newfile.save() # until we do this there is no internal id so no foreign key works
|
|
self.currentsurvexfile = newfile
|
|
newdirectory = self.GetSurvexDirectory(headpath)
|
|
newdirectory.save()
|
|
newfile.survexdirectory = newdirectory
|
|
self.survexdict[newdirectory].append(newfile)
|
|
cave = self.IdentifyCave(headpath) # cave already exists in db
|
|
|
|
if not newdirectory:
|
|
message = f" ! 'None' SurvexDirectory returned from GetSurvexDirectory({headpath})"
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=f"/survexfile/{svxid}")
|
|
|
|
if cave:
|
|
newdirectory.cave = cave
|
|
newfile.cave = cave
|
|
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
|
|
else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list
|
|
self.ReportNonCaveIncludes(headpath, svxid, depth)
|
|
|
|
if not newfile.survexdirectory:
|
|
message = f" ! SurvexDirectory NOT SET in new SurvexFile {svxid} "
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message)
|
|
self.currentsurvexfile.save() # django insists on this although it is already saved !?
|
|
try:
|
|
newdirectory.save()
|
|
except:
|
|
print(newdirectory, file=sys.stderr)
|
|
print(newdirectory.primarysurvexfile, file=sys.stderr)
|
|
raise
|
|
|
|
if debugprint:
|
|
print(f" # datastack end LoadSurvexFile:{svxid} 'type':", end="")
|
|
for dict in self.datastack:
|
|
print(f"'{dict['type'].upper()}' ", end="")
|
|
print("")
|
|
|
|
|
|
def ProcessIncludeLine(self, included):
|
|
"""As we read the long linear file, we come across lines telling us that the
|
|
content from this point on is from a particular included file
|
|
"""
|
|
global debugprint
|
|
svxid = included.groups()[0]
|
|
if svxid.lower() == debugprinttrigger.lower():
|
|
debugprint = True
|
|
self.LoadSurvexFile(svxid)
|
|
self.stacksvxfiles.append(self.currentsurvexfile)
|
|
|
|
def ProcessEdulcniLine(self, edulcni):
|
|
"""As we read the long linear file, we come across lines telling us that the
|
|
we are about to pop back out of the contents of an included file
|
|
Saves the current survexfile object in the db to include the data parsed from it"""
|
|
global debugprint
|
|
svxid = edulcni.groups()[0]
|
|
if debugprint:
|
|
depth = " " * self.depthbegin
|
|
print(f"{self.depthbegin:2}{depth} - Edulcni survexfile:'{svxid}'")
|
|
if svxid.lower() == debugprinttrigger.lower():
|
|
debugprint = False
|
|
self.currentsurvexfile.save()
|
|
self.currentsurvexfile = self.stacksvxfiles.pop()
|
|
|
|
def TickSurvexQM(self, survexblock, qmtick):
|
|
"""Interpret the specially formatted comment which is a QM TICKED statement"""
|
|
# Now we need to find the correct QM object. It will be in the same block and have the same number.
|
|
|
|
try:
|
|
# could try to search on blockname instead?
|
|
# but the QMn TICK has to be in the same block anyway
|
|
qm = QM.objects.filter(block=survexblock, number=int(qmtick.group(1)))
|
|
except:
|
|
# raise
|
|
message = f' ! QM TICK find FAIL QM{qmtick.group(1)} date:"{qmtick.group(2)}" qmlist:"{qm}" in "{survexblock.survexfile.path}" + completion_description:"{qmtick.group(3)}" '
|
|
print(message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
if len(qm) > 1:
|
|
message = f' ! QM TICK MULTIPLE found FAIL QM{qmtick.group(1)} date:"{qmtick.group(2)}" in "{survexblock.survexfile.path}" + completion_description:"{qmtick.group(3)}" '
|
|
print(message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
qm[0].ticked = True
|
|
# qm[0].ticked_date = qmtick.group(2) # not in data model yet
|
|
qm[0].completion_description = qmtick.group(3)
|
|
qm[0].save()
|
|
|
|
def LoadSurvexQM(self, survexblock, qmline):
|
|
"""Interpret the specially formatted comment which is a QM definition"""
|
|
# r"(?i)^\s*QM(\d+)\s+(.+)\s+([\w\-\_]+)\.([\w\.\-]+)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$"
|
|
# r"(?i)^\s*QM(\d+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+(.+)$"
|
|
# rx_qm_tick QMnn TICK date comment
|
|
# (r"(?i)^\s*QM(\d+)\s+TICK\s([\d\-]+)\s(.*)$")
|
|
|
|
insp = self.insp
|
|
# create a short, hopefully-unique name for this block to be used in the QM id
|
|
blockname = survexblock.name[:6] + survexblock.name[-1:]
|
|
# logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}'
|
|
|
|
qm_no = qmline.group(1) # this is NOT unique across multiple survex files
|
|
qm_grade = qmline.group(2).strip().upper() # TICK or [a-dA-DvVxX?]
|
|
if qm_grade == "TICK":
|
|
self.TickSurvexQM(survexblock, qmline)
|
|
return
|
|
|
|
if qm_grade not in ["A", "B", "C", "D", "X", "V", "?"]:
|
|
message = f" ! QM{qm_no} INVALID code '{qm_grade}' [{blockname}] '{survexblock.survexfile.path}'"
|
|
print(insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
qm_nearest = qmline.group(3)
|
|
# if qmline.group(3): # usual closest survey station
|
|
# qm_nearest = qmline.group(3)
|
|
# if qmline.group(4):
|
|
# qm_nearest = qm_nearest + "." + qmline.group(4)
|
|
|
|
qm_resolve_station = qmline.group(4)
|
|
# if qmline.group(6) and qmline.group(6) != "-":
|
|
# qm_resolve_station = qmline.group(6)
|
|
# if qmline.group(7):
|
|
# qm_resolve_station = qm_resolve_station + "." + qmline.group(7)
|
|
# else:
|
|
# qm_resolve_station = ""
|
|
qm_notes = qmline.group(5)
|
|
# qm_notes = qmline.group(8)
|
|
|
|
# Spec of QM in SVX files:
|
|
# ;Serial number grade(A/B/C/D/V/X) nearest-station resolution-station description
|
|
# ;QM1 a hobnob_hallway_2.42 hobnob-hallway_3.42 junction of keyhole passage
|
|
# ;QM1 a hobnob_hallway_2.42 - junction of keyhole passage
|
|
|
|
#;QM1 A B6 - see plan drawing there is definitely a QM
|
|
|
|
# NB none of the SurveyStations are in the DB now, so if we want to link to aSurvexStation
|
|
# we would have to create one. But that is not obligatory and no QMs loaded from CSVs have one
|
|
|
|
# Older troggle/CSV assumes a logbook entry 'found_by' for each QM, with a date.
|
|
# We don't need this anymore so we don't need to create a placeholder logbook entry.
|
|
|
|
|
|
if survexblock.survexfile.cave:
|
|
survexblock.survexfile.cave.slug()
|
|
|
|
self.fix_undated(survexblock) # null-op if already set
|
|
expoyear = str(survexblock.date.year)
|
|
|
|
try:
|
|
qm = QM.objects.create(
|
|
number=qm_no,
|
|
# nearest_station=a_survex_station_object, # can be null
|
|
nearest_station_description=qm_resolve_station,
|
|
nearest_station_name=qm_nearest,
|
|
grade=qm_grade.upper(),
|
|
location_description=qm_notes,
|
|
block=survexblock, # only set for survex-imported QMs
|
|
blockname=blockname, # only set for survex-imported QMs
|
|
expoyear=expoyear,
|
|
cave=survexblock.survexfile.cave,
|
|
)
|
|
qm.save
|
|
except:
|
|
qms = QM.objects.filter(
|
|
number=qm_no,
|
|
# nearest_station=a_survex_station_object, # can be null
|
|
nearest_station_description=qm_resolve_station,
|
|
nearest_station_name=qm_nearest,
|
|
grade=qm_grade.upper(),
|
|
location_description=qm_notes,
|
|
block=survexblock, # only set for survex-imported QMs
|
|
blockname=blockname, # only set for survex-imported QMs
|
|
expoyear=expoyear,
|
|
cave=survexblock.survexfile.cave,
|
|
)
|
|
message = f" ! QM{qm_no} FAIL to create {qm_nearest} in'{survexblock.survexfile.path}' found {len(qms)}:{qms}"
|
|
print(insp + message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
|
|
def ProcessQM(self, survexblock, qml, comment):
|
|
"""Process the line beginning
|
|
;QM
|
|
which is a QM new declaration or a QM TICK closing declaration.
|
|
|
|
It _should_ recognise a non-numeric survey station ID, but currently doesn't.
|
|
Valid QM types are [a-dA-DvVxX?] A-D, V for Vertical, X for horrible and ? for unknown
|
|
"""
|
|
# rx_qm : r"(?i)^\s*QM(\d+)\s+?(.+)\s+([\w\-\_]+)(\.([\w\.\-]+)?)\s+(([\w\-]+)\.([\w\.\-]+)|\-)\s+(.+)$)
|
|
qmline = self.rx_qm.match(comment)
|
|
if qmline:
|
|
self.LoadSurvexQM(survexblock, qmline)
|
|
else:
|
|
message = f' ! QM Unrecognised as valid in "{survexblock.survexfile.path}" QM{qml.group(1)} "{qml.group(2)}" : regex failure typo?'
|
|
print(message)
|
|
stash_data_issue(
|
|
parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
|
|
)
|
|
|
|
def LoadSurvexComment(self, survexblock, comment):
|
|
# ignore all comments except ;ref, ; wallet and ;QM and ;*include (for collated survex file)
|
|
# rx_ref2 = re.compile(r'(?i)\s*ref[.;]?')
|
|
|
|
# This _should_ also check that the QM survey point exists in the block
|
|
depth = " " * self.depthbegin
|
|
|
|
refline = self.rx_commref.match(comment)
|
|
if refline:
|
|
comment = self.rx_ref2.sub("", comment.strip())
|
|
print(f"{self.depthbegin:2}{depth} - rx_ref2 -- {comment=} in {survexblock.survexfile.path} :: {survexblock}")
|
|
self.LoadSurvexRef(survexblock, comment)
|
|
|
|
# handle
|
|
# ; Messteam: Jörg Haussmann, Robert Eckardt, Thilo Müller
|
|
# ; Zeichner: Thilo Müller
|
|
# But none of these will be valid teammembers because they are not actually on our expo
|
|
|
|
team = self.rx_commteam.match(comment)
|
|
if team:
|
|
# print(f'rx_commteam -- {comment=} in {survexblock.survexfile.path} :: {survexblock}')
|
|
pass
|
|
|
|
|
|
# rx_qm0 = re.compile(r"(?i)^\s*QM(\d+)\s+(.+)$")
|
|
qml = self.rx_qm0.match(comment)
|
|
if qml:
|
|
self.ProcessQM(survexblock, qml, comment)
|
|
|
|
included = self.rx_comminc.match(comment)
|
|
# ;|*include means 'we have been included'; whereas *include means 'proceed to include'
|
|
# No test here to check that this file has not already been included. Ouch.
|
|
if included:
|
|
self.ProcessIncludeLine(included)
|
|
|
|
edulcni = self.rx_commcni.match(comment)
|
|
# ;*edulcni means we are returning from an included file
|
|
if edulcni:
|
|
self.ProcessEdulcniLine(edulcni)
|
|
|
|
def LoadSurvexSetup(self, survexblock, survexfile):
|
|
self.depthbegin = 0
|
|
self.datastar = self.datastardefault
|
|
print(
|
|
self.insp
|
|
+ f" - MEM:{get_process_memory():.3f} Reading. parent:{survexblock.survexfile.path} <> {survexfile.path} "
|
|
)
|
|
self.lineno = 0
|
|
sys.stderr.flush()
|
|
self.callcount += 1
|
|
if self.callcount % 10 == 0:
|
|
print(".", file=sys.stderr, end="")
|
|
if self.callcount % 500 == 0:
|
|
print("\n", file=sys.stderr, end="")
|
|
# Try to find the cave in the DB if not use the string as before
|
|
path_match = re.search(r"caves-(\d\d\d\d)/(\d+|\d\d\d\d-?\w+-\d+)/", survexblock.survexfile.path)
|
|
if path_match:
|
|
pos_cave = f"{path_match.group(1)}-{path_match.group(2)}"
|
|
cave = getCaveByReference(pos_cave)
|
|
if cave:
|
|
survexfile.cave = cave
|
|
|
|
def LinearLoad(self, survexblock, path, collatefilename):
|
|
"""Loads a single survex file. Usually used to import all the survex files which have been collated
|
|
into a single file: either the tree or the _unseens.
|
|
Also used for loading a single file which has been edited online.
|
|
Loads the begin/end blocks using a stack for labels.
|
|
Uses the python generator idiom to avoid loading the whole file (21MB) into memory.
|
|
"""
|
|
blkid = None
|
|
pathlist = None
|
|
args = None
|
|
oldflags = None
|
|
blockcount = 0
|
|
self.lineno = 0
|
|
slengthtotal = 0.0
|
|
nlegstotal = 0
|
|
self.relativefilename = path
|
|
self.IdentifyCave(path) # this will produce null for survex files which are geographic collections
|
|
|
|
self.currentsurvexfile = survexblock.survexfile
|
|
self.currentsurvexfile.save() # django insists on this although it is already saved !?
|
|
|
|
self.datastar = copy.deepcopy(self.datastardefault)
|
|
self.flagsstar = copy.deepcopy(self.flagsdefault)
|
|
|
|
def tickle():
|
|
nonlocal blockcount
|
|
|
|
blockcount += 1
|
|
if blockcount % 20 == 0:
|
|
print(".", file=sys.stderr, end="")
|
|
if blockcount % 800 == 0:
|
|
print("\n", file=sys.stderr, end="")
|
|
mem = get_process_memory()
|
|
print(f" - MEM: {mem:7.2f} MB in use", file=sys.stderr)
|
|
print(" ", file=sys.stderr, end="")
|
|
sys.stderr.flush()
|
|
|
|
def printbegin():
|
|
nonlocal blkid
|
|
nonlocal pathlist
|
|
|
|
depth = " " * self.depthbegin
|
|
self.insp = depth
|
|
if debugprint:
|
|
print(f"{self.depthbegin:2}{depth} - Begin for :'{blkid}'")
|
|
pathlist = ""
|
|
for id in self.stackbegin:
|
|
if len(id) > 0:
|
|
pathlist += "." + id
|
|
|
|
def printend():
|
|
nonlocal args
|
|
|
|
depth = " " * self.depthbegin
|
|
if debugprint:
|
|
print(f"{self.depthbegin:2}{depth} - End from:'{args}'")
|
|
print(
|
|
"{:2}{} - LEGS: {} (n: {}, length:{} units:{})".format(
|
|
self.depthbegin, depth, self.slength, self.slength, self.legsnumber, self.units
|
|
)
|
|
)
|
|
|
|
def pushblock():
|
|
nonlocal blkid
|
|
if debugprint:
|
|
print(f" # datastack at 1 *begin {blkid} 'type':", end="")
|
|
for dict in self.datastack:
|
|
print(f"'{dict['type'].upper()}' ", end="")
|
|
print("")
|
|
print(f"'{self.datastar['type'].upper()}' self.datastar ")
|
|
# ------------ * DATA
|
|
self.datastack.append(copy.deepcopy(self.datastar))
|
|
# ------------ * DATA
|
|
if debugprint:
|
|
print(f" # datastack at 2 *begin {blkid} 'type':", end="")
|
|
for dict in self.datastack:
|
|
print(f"'{dict['type'].upper()}' ", end="")
|
|
print("")
|
|
print(f"'{self.datastar['type'].upper()}' self.datastar ")
|
|
|
|
# ------------ * FLAGS
|
|
self.flagsstack.append(copy.deepcopy(self.flagsstar))
|
|
# ------------ * FLAGS
|
|
pass
|
|
|
|
def popblock():
|
|
nonlocal blkid
|
|
nonlocal oldflags
|
|
if debugprint:
|
|
print(f" # datastack at *end '{blkid} 'type':", end="")
|
|
for dict in self.datastack:
|
|
print(f"'{dict['type'].upper()}' ", end="")
|
|
print("")
|
|
print(f"'{self.datastar['type'].upper()}' self.datastar ")
|
|
# ------------ * DATA
|
|
self.datastar = copy.deepcopy(self.datastack.pop())
|
|
# ------------ * DATA
|
|
if debugprint:
|
|
print(f" # datastack after *end '{blkid} 'type':", end="")
|
|
for dict in self.datastack:
|
|
print(f"'{dict['type'].upper()}' ", end="")
|
|
print("")
|
|
print(f"'{self.datastar['type'].upper()}' self.datastar ")
|
|
|
|
# ------------ * FLAGS
|
|
self.flagsstar = copy.deepcopy(self.flagsstack.pop())
|
|
# ------------ * FLAGS
|
|
if debugprint:
|
|
if oldflags["skiplegs"] != self.flagsstar["skiplegs"]:
|
|
print(f" # POP 'any' flag now:'{self.flagsstar['skiplegs']}' was:{oldflags['skiplegs']} ")
|
|
|
|
def starstatement(star):
|
|
"""Interprets a survex comamnd where * is the first character on the line, e.g. *begin"""
|
|
nonlocal survexblock
|
|
nonlocal blkid
|
|
nonlocal pathlist
|
|
nonlocal args
|
|
nonlocal oldflags
|
|
nonlocal slengthtotal
|
|
nonlocal nlegstotal
|
|
|
|
cmd, args = star.groups()
|
|
cmd = cmd.lower()
|
|
|
|
# ------------------------BEGIN
|
|
if self.rx_begin.match(cmd):
|
|
blkid = args.lower()
|
|
# PUSH state ++++++++++++++
|
|
self.depthbegin += 1
|
|
self.stackbegin.append(blkid)
|
|
self.unitsstack.append((self.units, self.unitsfactor))
|
|
self.legsnumberstack.append(self.legsnumber)
|
|
self.slengthstack.append(self.slength)
|
|
self.teaminheritstack.append(self.inheritteam)
|
|
self.teamcurrentstack.append(self.currentteam)
|
|
self.dateinheritstack.append(self.inheritdate)
|
|
self.datecurrentstack.append(self.currentdate)
|
|
pushblock()
|
|
# PUSH state ++++++++++++++
|
|
self.legsnumber = 0
|
|
self.slength = 0.0
|
|
self.units = "metres"
|
|
self.inheritteam = self.currentteam
|
|
self.currentteam = set() # zero the current team when we start a new block
|
|
self.inheritdate = self.currentdate
|
|
self.currentdate = None # zero the current date when we start a new block
|
|
printbegin()
|
|
newsurvexblock = SurvexBlock(
|
|
name=blkid,
|
|
parent=survexblock,
|
|
survexpath=pathlist,
|
|
survexfile=self.currentsurvexfile,
|
|
legsall=0,
|
|
legslength=0.0,
|
|
)
|
|
newsurvexblock.save()
|
|
newsurvexblock.title = (
|
|
"(" + survexblock.title + ")"
|
|
) # copy parent inititally, overwrite if it has its own
|
|
survexblock = newsurvexblock
|
|
survexblock.save() # django insists on this , but we want to save at the end !
|
|
tickle()
|
|
|
|
# ---------------------------END
|
|
elif self.rx_end.match(cmd):
|
|
survexblock.legsall = self.legsnumber
|
|
survexblock.legslength = self.slength
|
|
printend()
|
|
slengthtotal += self.slength
|
|
nlegstotal += self.legsnumber
|
|
|
|
self.fix_undated(survexblock)
|
|
self.fix_anonymous(survexblock)
|
|
try:
|
|
survexblock.parent.save() # django insists on this although it is already saved !?
|
|
except:
|
|
print(survexblock.parent, file=sys.stderr)
|
|
raise
|
|
try:
|
|
survexblock.save() # save to db at end of block
|
|
except:
|
|
print(survexblock, file=sys.stderr)
|
|
raise
|
|
confirm_team_on_trip(survexblock)
|
|
# POP state ++++++++++++++
|
|
popblock()
|
|
self.inheritteam = self.teaminheritstack.pop()
|
|
self.currentteam = self.teamcurrentstack.pop()
|
|
self.inheritdate = self.dateinheritstack.pop()
|
|
self.currentdate = self.datecurrentstack.pop()
|
|
self.legsnumber = self.legsnumberstack.pop()
|
|
self.units, self.unitsfactor = self.unitsstack.pop()
|
|
self.slength = self.slengthstack.pop()
|
|
blkid = self.stackbegin.pop()
|
|
self.currentsurvexblock = survexblock.parent
|
|
survexblock = survexblock.parent
|
|
oldflags = self.flagsstar
|
|
self.depthbegin -= 1
|
|
# POP state ++++++++++++++
|
|
|
|
# -----------------------------
|
|
elif self.rx_title.match(cmd):
|
|
quotedtitle = self.rx_quotedtitle.match(args)
|
|
if quotedtitle:
|
|
survexblock.title = quotedtitle.groups()[0]
|
|
else:
|
|
survexblock.title = args
|
|
elif self.rx_ref.match(cmd):
|
|
self.LoadSurvexRef(survexblock, args)
|
|
elif self.rx_flags.match(cmd):
|
|
oldflags = self.flagsstar
|
|
self.LoadSurvexFlags(args)
|
|
if debugprint:
|
|
if oldflags["skiplegs"] != self.flagsstar["skiplegs"]:
|
|
print(f" # CHANGE 'any' flag now:'{self.flagsstar['skiplegs']}' was:{oldflags['skiplegs']} ")
|
|
|
|
elif self.rx_data.match(cmd):
|
|
if self.LoadSurvexDataNormal(survexblock, args):
|
|
pass
|
|
else:
|
|
# Abort, we do not cope with this *data format
|
|
return
|
|
elif self.rx_alias.match(cmd):
|
|
self.LoadSurvexAlias(survexblock, args)
|
|
elif self.rx_entrance.match(cmd):
|
|
self.LoadSurvexEntrance(survexblock, args)
|
|
elif self.rx_date.match(cmd):
|
|
self.LoadSurvexDate(survexblock, args)
|
|
elif self.rx_units.match(cmd):
|
|
self.LoadSurvexUnits(survexblock, args)
|
|
elif self.rx_team.match(cmd):
|
|
self.LoadSurvexTeam(survexblock, args)
|
|
elif self.rx_set.match(cmd) and self.rx_names.match(cmd):
|
|
pass
|
|
elif self.rx_include.match(cmd):
|
|
message = f" ! -ERROR *include command not expected here {path}. Re-run a full Survex import."
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(
|
|
parser="survex",
|
|
message=message,
|
|
)
|
|
else:
|
|
self.LoadSurvexFallThrough(survexblock, args, cmd)
|
|
|
|
# this is a python generator idiom.
|
|
# see https://realpython.com/introduction-to-python-generators/
|
|
# this is the first use of generators in troggle (Oct.2022) and save 21 MB of memory
|
|
with open(collatefilename, "r") as fcollate:
|
|
for svxline in fcollate:
|
|
self.lineno += 1
|
|
sline, comment = self.rx_comment.match(svxline).groups()
|
|
if comment:
|
|
# this catches the ;|*include NEWFILE and ;|*edulcni ENDOFFILE lines too
|
|
self.LoadSurvexComment(survexblock, comment)
|
|
|
|
if not sline:
|
|
continue # skip blank lines
|
|
|
|
# detect a merge failure inserted by version control
|
|
mfail = self.rx_badmerge.match(sline)
|
|
if mfail:
|
|
message = f"\n ! - ERROR version control merge failure\n - '{sline}'\n"
|
|
message = (
|
|
message + f" - line {self.lineno} in {blkid} in {survexblock}\n - NERD++ needed to fix it"
|
|
)
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message)
|
|
continue # skip this line
|
|
|
|
# detect a star command
|
|
star = self.rx_star.match(sline)
|
|
if star:
|
|
# yes we are reading a *command
|
|
starstatement(star)
|
|
else: # not a *cmd so we are reading data OR a ";" rx_comment failed. We hope.
|
|
self.LoadSurvexLeg(survexblock, sline, comment, svxline)
|
|
|
|
self.legsnumber = nlegstotal
|
|
self.slength = slengthtotal
|
|
|
|
def PushdownStackScan(self, survexblock, path, finname, flinear, fcollate):
|
|
"""Follows the *include links in all the survex files from the root file (usually 1623.svx)
|
|
and reads only the *include and *begin and *end statements. It produces a linearised
|
|
list of the include tree and detects blocks included more than once.
|
|
"""
|
|
global stop_dup_warning
|
|
|
|
def process_line(svxline):
|
|
self.lineno += 1
|
|
# detect a merge failure inserted by version control
|
|
mfail = self.rx_badmerge.match(svxline)
|
|
if mfail:
|
|
message = f"\n!! - ERROR version control merge failure\n - '{svxline}'\n"
|
|
message = message + f" - in '{path}' at line {thissvxline}\n"
|
|
message = (
|
|
message + f" - line {self.lineno} {survexblock}\n - Parsing aborted. NERD++ needed to fix it"
|
|
)
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
return # skip this survex file and all things *included in it
|
|
|
|
includestmt = self.rx_include.match(svxline)
|
|
if not includestmt:
|
|
fcollate.write(f"{svxline.strip()}\n")
|
|
|
|
sline, comment = self.rx_comment.match(svxline.strip()).groups()
|
|
star = self.rx_star.match(sline)
|
|
if star: # yes we are reading a *cmd
|
|
cmd, args = star.groups()
|
|
cmd = cmd.lower()
|
|
if self.rx_include2.match(cmd):
|
|
# rx_include2 = re.compile("(?i)include$")
|
|
# if re.match("(?i)include$", cmd):
|
|
includepath = os.path.normpath(os.path.join(os.path.split(path)[0], re.sub(r"\.svx$", "", args))) # normalises path syntax
|
|
if self.never_seen(includepath, path):
|
|
fullpath = os.path.join(settings.SURVEX_DATA, includepath + ".svx")
|
|
self.RunSurvexIfNeeded(os.path.join(settings.SURVEX_DATA, includepath), path)
|
|
self.check_unique_name(os.path.join(settings.SURVEX_DATA, includepath))
|
|
if os.path.isfile(fullpath):
|
|
# --------------------------------------------------------
|
|
self.depthinclude += 1
|
|
# fininclude = open(fullpath,'r')
|
|
finincludename = fullpath
|
|
fcollate.write(f";|*include {includepath}\n")
|
|
flinear.write(f"{self.depthinclude:2} {indent} *include {includepath}\n")
|
|
push = includepath.lower()
|
|
self.includestack.append(push)
|
|
# -----------------
|
|
self.PushdownStackScan(survexblock, includepath, finincludename, flinear, fcollate)
|
|
# -----------------
|
|
pop = self.includestack.pop()
|
|
if pop != push:
|
|
message = "!! ERROR mismatch *include pop!=push {}".format(pop, push, self.includestack)
|
|
print(message)
|
|
print(message, file=flinear)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
flinear.write(f"{self.depthinclude:2} {indent} *edulcni {pop}\n")
|
|
fcollate.write(f";|*edulcni {pop}\n")
|
|
# fininclude.close()
|
|
self.depthinclude -= 1
|
|
# --------------------------------------------------------
|
|
else:
|
|
message = f" ! ERROR *include file '{includepath}' not found, listed in '{fin.name}'"
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
elif self.rx_begin2.match(cmd):
|
|
#elif re.match("(?i)begin$", cmd):
|
|
self.depthbegin += 1
|
|
depth = " " * self.depthbegin
|
|
if args:
|
|
pushargs = args
|
|
else:
|
|
pushargs = " "
|
|
self.stackbegin.append(pushargs.lower())
|
|
flinear.write(f" {self.depthbegin:2} {depth} *begin {args}\n")
|
|
pass
|
|
elif self.rx_end2.match(cmd):
|
|
# elif re.match("(?i)end$", cmd):
|
|
depth = " " * self.depthbegin
|
|
flinear.write(f" {self.depthbegin:2} {depth} *end {args}\n")
|
|
if not args:
|
|
args = " "
|
|
popargs = self.stackbegin.pop()
|
|
if popargs != args.lower():
|
|
message = (
|
|
f"!! ERROR mismatch in BEGIN/END labels pop!=push '{popargs}'!='{args}'\n{self.stackbegin}"
|
|
)
|
|
print(message)
|
|
print(message, file=flinear)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
|
|
self.depthbegin -= 1
|
|
pass
|
|
elif self.rx_title2.match(cmd):
|
|
# elif re.match("(?i)title$", cmd):
|
|
depth = " " * self.depthbegin
|
|
flinear.write(f" {self.depthbegin:2} {depth} *title {args}\n")
|
|
pass
|
|
|
|
indent = " " * self.depthinclude
|
|
sys.stderr.flush()
|
|
self.callcount += 1
|
|
|
|
if self.callcount % 10 == 0:
|
|
print(".", file=sys.stderr, end="")
|
|
if self.callcount % 500 == 0:
|
|
print("\n ", file=sys.stderr, end="")
|
|
|
|
if path in self.svxfileslist:
|
|
# We have already used os.normpath() so this is OK. "/../" and "//" have been simplified already.
|
|
if stop_dup_warning:
|
|
# print("D",end="", file=sys.stderr)
|
|
pass
|
|
else:
|
|
message = f" * Warning. Duplicate detected. We have already seen this *include '{path}' from another survex file. Detected at callcount:{self.callcount} depth:{self.depthinclude}"
|
|
print(message)
|
|
print(message, file=flinear)
|
|
# print(message,file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
if self.svxfileslist.count(path) > 2:
|
|
message = f" ! ERROR. Should have been caught before this. Survex file already *included 2x. Probably an infinite loop so fix your *include statements that include this. Aborting. {path}"
|
|
print(message)
|
|
print(message, file=flinear)
|
|
# print(message,file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
return
|
|
return
|
|
try:
|
|
# python generator idiom again. Not important here as these are small files
|
|
with open(finname, "r") as fin:
|
|
for svxline in fin:
|
|
process_line(svxline)
|
|
|
|
self.svxfileslist.append(path)
|
|
|
|
except UnicodeDecodeError:
|
|
# some bugger put an umlaut in a non-UTF survex file ?!
|
|
message = f" ! ERROR *include file '{path}' in '{survexblock}' has UnicodeDecodeError. Omitted."
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
return # skip this survex file and all things *included in it
|
|
except:
|
|
message = f" ! ERROR *include file '{path}' in '{survexblock}' has unexpected error on opening file. OMITTED!"
|
|
print(message)
|
|
print(message, file=sys.stderr)
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(path))
|
|
raise
|
|
return # skip this survex file and all things *included in it
|
|
|
|
def never_seen(self, incpath, parent):
|
|
"""The _unseen files may include survex files we have already seen, and we do not
|
|
want to process them again. For the _unseens this is not an error, but for the main
|
|
*include tree it is an error.
|
|
"""
|
|
if incpath in self.uniquefile:
|
|
self.uniquefile[incpath].append(parent)
|
|
|
|
if self.svxpass == self.TREE:
|
|
message = (
|
|
f" DUP: skipping non-unique survex filepath, '{incpath}' - #{len(self.uniquefile[incpath])} '{self.uniquefile[incpath]}'"
|
|
)
|
|
print(message)
|
|
stash_data_issue(parser='survex', message=message)
|
|
for p in self.uniquefile[incpath]:
|
|
if p in self.uniquefile:
|
|
print(f"{p} <- {self.uniquefile[p]}")
|
|
return False
|
|
else:
|
|
self.uniquefile[incpath] = [parent]
|
|
return True
|
|
|
|
def check_unique_name(self, fullpath):
|
|
"""This only checks whether the last bit of the name of the survex file is unique,
|
|
e.g. "bigpitch", not whether the whole path of the survexfile has been seen before.
|
|
|
|
We don't care about this any more.
|
|
"""
|
|
return
|
|
|
|
|
|
def RunSurvexIfNeeded(self, fullpath, calledpath):
|
|
now = time.time()
|
|
cav_t = now - 365 * 24 * 3600
|
|
log_t = now - 365 * 24 * 3600
|
|
svx_t = now - 365 * 24 * 3600
|
|
|
|
def runcavern():
|
|
"""regenerates the .3d file from the .svx if it is older than the svx file, or older than the software,
|
|
or randomly using chaosmonkey() just to keep things ticking over.
|
|
"""
|
|
try:
|
|
print(
|
|
f" - Regenerating stale (or chaos-monkeyed) cavern .log and .3d for '{fullpath}'\n at '{logpath}'\n"
|
|
)
|
|
print(
|
|
f"days svx old: {(svx_t - log_t)/(24*3600):.1f} cav:{(cav_t - log_t)/(24*3600):.1f} log old: { (now - log_t)/(24*3600):.1f}"
|
|
)
|
|
|
|
outputdir = Path(str(f"{fullpath}.svx")).parent
|
|
sp = subprocess.run(
|
|
[settings.CAVERN, "--log", f"--output={outputdir}", f"{fullpath}.svx"],
|
|
capture_output=True,
|
|
check=False,
|
|
text=True,
|
|
)
|
|
if sp.returncode != 0:
|
|
message = f" ! Error running {settings.CAVERN}: {fullpath}"
|
|
url = f"/survexfile{fullpath}.svx".replace(str(settings.SURVEX_DATA), "")
|
|
stash_data_issue(parser="xEntrances", message=message, url=url)
|
|
print(message)
|
|
print(
|
|
"stderr:\n\n" + str(sp.stderr) + "\n\n" + str(sp.stdout) + "\n\nreturn code: " + str(sp.returncode)
|
|
)
|
|
self.caverncount += 1
|
|
|
|
# should also collect all the .err files too and create a DataIssue for each one which
|
|
# - is nonzero in size AND
|
|
# - has Error greater than 5% anywhere, or some other more serious error
|
|
|
|
errpath = Path(fullpath + ".err")
|
|
if errpath.is_file():
|
|
if errpath.stat().st_size == 0:
|
|
errpath.unlink() # delete empty closure error file
|
|
except:
|
|
message = f' ! FAIL running cavern on survex file "{fullpath}" specified in *include in {calledpath} '
|
|
stash_data_issue(parser="survex", message=message)
|
|
print(message)
|
|
|
|
svxpath = Path(fullpath + ".svx")
|
|
logpath = Path(fullpath + ".log")
|
|
Path(svxpath).parent
|
|
|
|
if not svxpath.is_file():
|
|
message = f' ! BAD. "{fullpath}" is not a file, specified in *include in {calledpath} '
|
|
stash_data_issue(parser="survex", message=message)
|
|
print(message)
|
|
return
|
|
|
|
if not logpath.is_file(): # always run if logfile not there
|
|
runcavern()
|
|
return
|
|
|
|
self.caverndate = now - 2 * 365 * 24 * 3600
|
|
|
|
if not self.caverndate:
|
|
sp = subprocess.run(["which", f"{settings.CAVERN}"], capture_output=True, check=False, text=True)
|
|
if sp.returncode != 0:
|
|
message = f' ! Error running "which" on {settings.CAVERN}'
|
|
stash_data_issue(parser="survex", message=message)
|
|
print(message)
|
|
print(
|
|
"stderr:\n\n" + str(sp.stderr) + "\n\n" + str(sp.stdout) + "\n\nreturn code: " + str(sp.returncode)
|
|
)
|
|
self.caverndate = os.path.getmtime(sp.stdout.strip())
|
|
else:
|
|
self.caverndate = now - 2 * 365 * 24 * 3600
|
|
cav_t = self.caverndate
|
|
log_t = os.path.getmtime(logpath)
|
|
svx_t = os.path.getmtime(svxpath)
|
|
now = time.time()
|
|
|
|
if svx_t - log_t > 0: # stale, svx file is newer than log
|
|
runcavern()
|
|
return
|
|
if now - log_t > 60 * 24 * 60 * 60: # >60 days, re-run anyway
|
|
runcavern()
|
|
return
|
|
if cav_t - log_t > 0: # new version of cavern
|
|
runcavern()
|
|
return
|
|
if chaosmonkey(350): # one in every 350 runs
|
|
runcavern()
|
|
|
|
|
|
def FindAndLoadSurvex(survexblockroot):
|
|
"""Follows the *include links successively to find survex files
|
|
This proceeds in 3 phases:
|
|
1. The root survex file is read and all the *include files are found, using PushdownStackScan()
|
|
2. All the other survex files in the :loser: repo are found, and their *includes found,
|
|
using another PushdownStackScan() [duplicates omitted]
|
|
3. The combined expanded file containing all the survex data is parsed as a single file,
|
|
using LinearLoad()"""
|
|
global stop_dup_warning
|
|
print(" - redirecting stdout to svxblks.log...")
|
|
stdout_orig = sys.stdout
|
|
# Redirect sys.stdout to the file
|
|
sys.stdout = open("svxblks.log", "w")
|
|
|
|
print(f" - Scanning Survex Blocks tree from {settings.SURVEX_TOPNAME}.svx ...", file=sys.stderr)
|
|
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
|
|
collatefilename = "_" + survexfileroot.path + ".svx"
|
|
|
|
svx_scan = LoadingSurvex()
|
|
svx_scan.callcount = 0
|
|
svx_scan.depthinclude = 0
|
|
fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, survexfileroot.path)
|
|
|
|
print(f" - RunSurvexIfNeeded cavern on '{fullpathtotop}'", file=sys.stderr)
|
|
svx_scan.RunSurvexIfNeeded(fullpathtotop, fullpathtotop)
|
|
svx_scan.check_unique_name(fullpathtotop)
|
|
svx_scan.uniquefile[str(survexfileroot)] = ["0"]
|
|
|
|
indent = ""
|
|
fcollate = open(collatefilename, "w")
|
|
|
|
mem0 = get_process_memory()
|
|
print(f" - MEM:{mem0:7.2f} MB START '{survexfileroot}'", file=sys.stderr)
|
|
flinear = open("svxlinear.log", "w")
|
|
flinear.write(f" - MEM:{mem0:7.2f} MB START '{survexfileroot.path}'\n")
|
|
print(" ", file=sys.stderr, end="")
|
|
|
|
finrootname = Path(settings.SURVEX_DATA, survexfileroot.path + ".svx")
|
|
fcollate.write(f";*include {survexfileroot.path}\n")
|
|
flinear.write(f"{svx_scan.depthinclude:2} {indent} *include {survexfileroot.path}\n")
|
|
|
|
import cProfile
|
|
import pstats
|
|
from pstats import SortKey
|
|
|
|
pr = cProfile.Profile()
|
|
pr.enable()
|
|
svx_scan.svxpass = svx_scan.TREE
|
|
# ----------------------------------------------------------------
|
|
svx_scan.PushdownStackScan(survexblockroot, survexfileroot.path, finrootname, flinear, fcollate)
|
|
# ----------------------------------------------------------------
|
|
svx_scan.svxpass = ""
|
|
pr.disable()
|
|
with open("PushdownStackScan.prof", "w") as f:
|
|
ps = pstats.Stats(pr, stream=f)
|
|
ps.sort_stats(SortKey.CUMULATIVE)
|
|
ps.print_stats()
|
|
|
|
flinear.write(f"{svx_scan.depthinclude:2} {indent} *edulcni {survexfileroot.path}\n")
|
|
fcollate.write(f";*edulcni {survexfileroot.path}\n")
|
|
mem1 = get_process_memory()
|
|
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {survexfileroot.path}\n")
|
|
flinear.write(f" - MEM:{mem1 - mem0:.3f} MB ADDITIONALLY USED\n")
|
|
flinear.write(f" - {len(svx_scan.svxfileslist):,} survex files in linear include list \n")
|
|
flinear.write(f" - {len(svx_scan.uniquefile):,} unique survex files in linear include list \n")
|
|
for j in svx_scan.svxfileslist:
|
|
if j not in svx_scan.uniquefile:
|
|
flinear.write(f" - '{j}' {type(j)} not in unique list \n")
|
|
for f in svx_scan.uniquefile:
|
|
# flinear.write(f" - '{f}' {type(f)} {svx_scan.uniquefile[f]} \n")
|
|
if len(svx_scan.uniquefile[f]) > 1:
|
|
flinear.write(f" - '{f}' {type(f)} {svx_scan.uniquefile[f]} dup survex files \n")
|
|
|
|
print(f"\n - {svx_scan.caverncount:,} runs of survex 'cavern' refreshing .3d files", file=sys.stderr)
|
|
print(f" - {len(svx_scan.svxfileslist):,} survex files from tree in linear include list", file=sys.stderr)
|
|
print(f" - {len(svx_scan.uniquefile):,} unique survex files from tree in linear include list", file=sys.stderr)
|
|
mem1 = get_process_memory()
|
|
print(f" - MEM:{mem1:7.2f} MB END ", file=sys.stderr)
|
|
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)
|
|
|
|
#
|
|
# Process all the omitted files in :loser: with some exceptions
|
|
unseens = set()
|
|
b = []
|
|
|
|
for p in Path(settings.SURVEX_DATA).rglob("*.svx"):
|
|
if p.is_file():
|
|
po = p.relative_to(Path(settings.SURVEX_DATA))
|
|
pox = po.with_suffix("")
|
|
if str(pox) not in svx_scan.svxfileslist:
|
|
# print(f"[{pox}]", file=sys.stderr)
|
|
unseens.add(pox)
|
|
else:
|
|
b.append(pox)
|
|
|
|
if len(b) != len(svx_scan.svxfileslist):
|
|
print(
|
|
f" ! Mismatch. {len(b)} survex files found which should be {len(svx_scan.svxfileslist)} in main tree)",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
unseensroot = re.sub(r"\.svx$", "", UNSEENS)
|
|
excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", unseensroot]
|
|
removals = []
|
|
for x in unseens:
|
|
for o in excpts:
|
|
if str(x).strip().startswith(o):
|
|
removals.append(x)
|
|
# special fix for .svx file not actually in survex format
|
|
unseens.remove(Path("fixedpts/gps/gps00raw"))
|
|
|
|
for x in removals:
|
|
unseens.remove(x)
|
|
print(
|
|
f"\n - {len(unseens)} survex files found which were not included in main tree. ({len(svx_scan.svxfileslist)} in main tree)",
|
|
file=sys.stderr,
|
|
)
|
|
check_team_cache()
|
|
print(" -- Now loading the previously-omitted survex files.", file=sys.stderr)
|
|
|
|
with open(Path(settings.SURVEX_DATA, UNSEENS), "w") as u:
|
|
u.write(
|
|
f"; {len(unseens):,} survex files not *included by {settings.SURVEX_TOPNAME} (which are {len(svx_scan.svxfileslist):,} files)\n"
|
|
)
|
|
u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n")
|
|
u.write(f"; omitting any file beginning with {excpts}\n\n")
|
|
u.write("*begin troggle_unseens\n")
|
|
u.write("*title \"Collated unseen and unlinked survex files\"\n")
|
|
for x in sorted(unseens):
|
|
u.write(f" *include {x}\n")
|
|
u.write("*end troggle_unseens\n")
|
|
|
|
survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only
|
|
|
|
omit_scan = LoadingSurvex()
|
|
omit_scan.callcount = 0
|
|
omit_scan.depthinclude = 0
|
|
fullpathtotop = os.path.join(survexfileroot.survexdirectory.path, UNSEENS)
|
|
|
|
# copy the list to prime the next pass through the files
|
|
omit_scan.svxfileslist = svx_scan.svxfileslist[:]
|
|
svx_scan.svxfileslist = [] # free memory
|
|
svx_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
|
|
|
|
omit_scan.check_unique_name(fullpathtotop)
|
|
omit_scan.uniquefile[unseensroot] = ["0"]
|
|
|
|
mem0 = get_process_memory()
|
|
print(f" - MEM:{mem0:7.2f} MB START '{unseensroot}'", file=sys.stderr)
|
|
# flinear = open('svxlinear.log', 'w')
|
|
flinear.write(f" - MEM:{mem0:7.2f} MB START '{unseensroot}'\n")
|
|
print(" ", file=sys.stderr, end="")
|
|
|
|
# this is a bit tricky as some unseen files will *include files we have already seen, which
|
|
# we should not process again.
|
|
finrootname = fullpathtotop
|
|
fcollate.write(f";*include {UNSEENS}\n")
|
|
flinear.write(f"{omit_scan.depthinclude:2} {indent} *include {unseensroot}\n")
|
|
omit_scan.svxpass = omit_scan.ODDS
|
|
# stop_dup_warning = True
|
|
# ----------------------------------------------------------------
|
|
omit_scan.PushdownStackScan(survexblockroot, unseensroot, finrootname, flinear, fcollate)
|
|
# ----------------------------------------------------------------
|
|
# stop_dup_warning = False
|
|
omit_scan.svxpass = ""
|
|
|
|
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
|
|
fcollate.write(f";*edulcni {UNSEENS}\n")
|
|
|
|
check_team_cache()
|
|
|
|
mem1 = get_process_memory()
|
|
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
|
|
flinear.write(f" - MEM:{mem1 - mem0:.3f} MB ADDITIONALLY USED Unseen Oddments\n")
|
|
flinear.write(f" - {len(omit_scan.svxfileslist):,} survex files in linear include list Unseen Oddments \n")
|
|
|
|
flinear.close()
|
|
fcollate.close()
|
|
|
|
print(
|
|
f"\n - {omit_scan.caverncount:,} runs of survex 'cavern' refreshing .3d files in the unseen list",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
print(
|
|
f" - {len(omit_scan.svxfileslist):,} survex files in linear include list including previously unseen ones \n",
|
|
file=sys.stderr,
|
|
)
|
|
omit_scan = None # Hmm. Does this actually delete all the instance variables if they are lists, dicts etc.?
|
|
|
|
mem1 = get_process_memory()
|
|
print(f" - MEM:{mem1:7.2f} MB END ", file=sys.stderr)
|
|
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)
|
|
|
|
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
|
|
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
|
|
# look in MapLocations() for how we find the entrances
|
|
|
|
print("\n - Loading All Survex Blocks (LinearLoad)", file=sys.stderr)
|
|
svx_load = LoadingSurvex()
|
|
|
|
svx_load.survexdict[survexfileroot.survexdirectory] = []
|
|
svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot)
|
|
svx_load.svxdirs[""] = survexfileroot.survexdirectory
|
|
|
|
# pr2 = cProfile.Profile()
|
|
# pr2.enable()
|
|
print(" ", file=sys.stderr, end="")
|
|
# ----------------------------------------------------------------
|
|
svx_load.LinearLoad(survexblockroot, survexfileroot.path, collatefilename)
|
|
# ----------------------------------------------------------------
|
|
# pr2.disable()
|
|
# with open('LinearLoad.prof', 'w') as f:
|
|
# ps = pstats.Stats(pr2, stream=f)
|
|
# ps.sort_stats(SortKey.CUMULATIVE)
|
|
# ps.print_stats()
|
|
mem1 = get_process_memory()
|
|
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
|
|
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)
|
|
|
|
# Close the logging file, Restore sys.stdout to our old saved file handle
|
|
sys.stdout.close()
|
|
print("+", file=sys.stderr)
|
|
sys.stderr.flush()
|
|
sys.stdout = stdout_orig
|
|
|
|
legsnumber = svx_load.legsnumber
|
|
mem1 = get_process_memory()
|
|
|
|
print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
|
|
tf = 0
|
|
for d in svx_load.survexdict:
|
|
tf += len(svx_load.survexdict[d])
|
|
print(f" - Number of SurvexFiles: {tf:,}")
|
|
print(f" - Number of Survex legs: {legsnumber:,}")
|
|
svx_load = None
|
|
|
|
return legsnumber
|
|
|
|
def display_contents(blocks):
|
|
for b in blocks:
|
|
print(f"B {b} {b.parent=} {b.expedition=}")
|
|
sfs = SurvexFile.objects.filter(survexblock=b)
|
|
for sf in sfs:
|
|
print(f" SF {sf}")
|
|
print(f" SD {sf.survexdirectory} {sf.survexdirectory.cave}")
|
|
|
|
ws = Wallet.objects.filter(survexblock=b)
|
|
for w in ws:
|
|
print(f" W {w}")
|
|
sfs = QM.objects.filter(block=b)
|
|
for sf in sfs:
|
|
print(f" QM {sf}")
|
|
sfs = SurvexStation.objects.filter(block=b)
|
|
for sf in sfs:
|
|
print(f" SS {sf}")
|
|
|
|
def parse_one_file(fpath): # --------------------------------------in progress-------------------
|
|
"""Parse just one file. Use when re-loading after editing.
|
|
|
|
NOTE: *include lines are ignored.
|
|
In the initial file parsing in databaseReset, the *include expansion is done
|
|
in an earlier stange than LinearLoad(). By the time LinearLoad() is called,
|
|
all the *include expansion has happened.
|
|
|
|
WORK IN PROGRESS.
|
|
Works fine for completely new survex file.
|
|
|
|
For an edited, pre-existing survex file,
|
|
I am having great trouble getting the 'parent' block to work correctly.
|
|
It gets overwritten, and then nullified, on repeated SAVE & import.
|
|
I should learn how to step through with the debugger.
|
|
"""
|
|
def parse_new_svx(fpath):
|
|
newfileroot = MakeFileRoot(fpath)
|
|
survexblockparent = SurvexBlock(
|
|
name="adhoc_parent", survexpath="", survexfile=newfileroot, legsall=0, legslength=0.0
|
|
)
|
|
survexblockparent.save()
|
|
|
|
svx_load.survexdict[newfileroot.survexdirectory] = []
|
|
svx_load.survexdict[newfileroot.survexdirectory].append(newfileroot)
|
|
svx_load.svxdirs[""] = newfileroot.survexdirectory
|
|
|
|
# ----------------------------------------------------------------
|
|
svx_load.LinearLoad(survexblockparent, newfileroot.path, fname)
|
|
# ----------------------------------------------------------------
|
|
|
|
def reparse_existing_svx(svxs):
|
|
"""If this SurvexFile object already exists in the database, we want to keep the parent survexblock
|
|
but replace everything else by parsing the file.
|
|
But we do not want to delete and recreate the object as other survex files may have this as the parent
|
|
and we are not processing any *include we find
|
|
"""
|
|
svx = svxs[0] # first and only member of QuerySet
|
|
blocks = SurvexBlock.objects.filter(survexfile=svx)
|
|
|
|
if len(blocks) >= 1:
|
|
print(f"Blocks in '{svx}': {blocks}")
|
|
survexblockparent=blocks[0].parent # all should have same parent
|
|
# But may have been obliterated by previous error
|
|
|
|
# Stamp all over the accumulated lengths and legs in the parent block,
|
|
# This also obliterates survey lengths from all other 'sibling' survex files
|
|
# to the one being re-parsed
|
|
if survexblockparent:
|
|
survexblockparent.legsall=0
|
|
survexblockparent.legslength=0.0
|
|
survexblockparent.save()
|
|
|
|
display_contents(blocks)
|
|
print(f"ABORTING - UNSOLVED BUGS. Do a complete databaseReset")
|
|
return True
|
|
blocks.delete() # deletes all pre-existing SurvexBlocks attached to this SurvexFile
|
|
bafter = SurvexBlock.objects.filter(survexfile=svx)
|
|
display_contents(bafter)
|
|
|
|
# all these foreign keys should be recreated properly when the file is parsed.
|
|
# so why is /expedition/1996 crashing in nasty template error?
|
|
else:
|
|
print(f"ABORTING - UNSOLVED BUGS. Do a complete databaseReset")
|
|
return True
|
|
survexblockparent = SurvexBlock(
|
|
name="fresh_parent", survexpath="", survexfile=svx, legsall=0, legslength=0.0
|
|
)
|
|
survexblockparent.save()
|
|
|
|
print(f" - {survexblockparent=}")
|
|
|
|
|
|
svx_load.survexdict[svx.survexdirectory] = []
|
|
svx_load.survexdict[svx.survexdirectory].append(svx)
|
|
svx_load.svxdirs[""] = svx.survexdirectory
|
|
|
|
# ----------------------------------------------------------------
|
|
svx_load.LinearLoad(survexblockparent, fpath, fname)
|
|
# ----------------------------------------------------------------
|
|
|
|
# For some reason I have not yet worked out, I am getting the parent block
|
|
# added in as one of the child blocks of the survexfile
|
|
# so explicitly remove it.
|
|
blocks = SurvexBlock.objects.filter(survexfile=svx)
|
|
print(f"\nAfter import. {svx=}")
|
|
display_contents(blocks)
|
|
#survexblockparent.survexfile = MakeFileRoot("")
|
|
survexblockparent.delete()
|
|
blocks = SurvexBlock.objects.filter(survexfile=svx)
|
|
print(f"\nAfter import, specific removal. {svx=}")
|
|
display_contents(blocks)
|
|
|
|
print(f"\n - Loading One Survex file '{fpath}'", file=sys.stderr)
|
|
svx_load = LoadingSurvex()
|
|
|
|
fname = Path(settings.SURVEX_DATA, (fpath + ".svx"))
|
|
# print(f" - {fname=}")
|
|
|
|
svxs = SurvexFile.objects.filter(path=fpath)
|
|
if svxs:
|
|
if len(svxs)>1:
|
|
print(f" ! Mistake? More than one survex file object in database with the same file-path {svxs}")
|
|
print(f" - Aborting file parsing & import into database.")
|
|
return True
|
|
print(f" - Pre-existing survexfile {svxs}.")
|
|
reparse_existing_svx(svxs)
|
|
else:
|
|
print(f" - Not seen this survexfile before '{fpath}' Loading...")
|
|
parse_new_svx(fpath)
|
|
|
|
legsnumber = svx_load.legsnumber
|
|
|
|
print(f" - Number of SurvexDirectories: {len(svx_load.survexdict):,}")
|
|
print(f" - SurvexDirectories: {svx_load.survexdict}")
|
|
|
|
tf = 0
|
|
for d in svx_load.survexdict:
|
|
tf += len(svx_load.survexdict[d])
|
|
print(f" - Number of SurvexFiles: {tf:,}")
|
|
print(f" - Number of Survex legs: {legsnumber:,}")
|
|
print(f" - Length of Survex legs: {svx_load.slength:.2f}")
|
|
|
|
svx_load = None
|
|
|
|
def MakeSurvexFileRoot():
|
|
"""Returns a file_object.path = SURVEX_TOPNAME associated with directory_object.path = SURVEX_DATA"""
|
|
# find a cave, any cave..
|
|
smk = Cave.objects.filter(kataster_number="000") # returns a list, a QuerySet
|
|
|
|
fileroot = SurvexFile(path=settings.SURVEX_TOPNAME, cave=None)
|
|
fileroot.save()
|
|
directoryroot = SurvexDirectory(path=settings.SURVEX_DATA, cave=smk[0], primarysurvexfile=fileroot)
|
|
# MariaDB doesn't like this hack. Complains about non-null cave_id EVEN THOUGH our model file says this is OK:
|
|
# cave = models.ForeignKey('Cave', blank=True, null=True,on_delete=models.SET_NULL)
|
|
directoryroot.save()
|
|
fileroot.survexdirectory = directoryroot # i.e. SURVEX_DATA/SURVEX_TOPNAME
|
|
fileroot.save() # mutually dependent objects need a double-save like this
|
|
return fileroot
|
|
|
|
|
|
def MakeFileRoot(fn):
|
|
"""Returns a file_object.path = _unseens.svx associated with directory_object.path = SURVEX_DATA
|
|
|
|
CHANGE THIS to just use the same block root as for SURVEX_TOPNAME ?
|
|
"""
|
|
fileroot = SurvexFile(path=fn, cave=None)
|
|
fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # just re-use the first thing we made
|
|
fileroot.save()
|
|
return fileroot
|
|
|
|
|
|
def LoadSurvexBlocks():
|
|
global dup_includes
|
|
mem1 = get_process_memory()
|
|
print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr)
|
|
start = time.time()
|
|
|
|
|
|
print(" - Flushing All Survex Blocks...")
|
|
# why does this increase memory use by 20 MB ?!
|
|
# We have foreign keys, Django needs to load the related objects
|
|
# in order to resolve how the relation should handle the deletion:
|
|
# https://docs.djangoproject.com/en/dev/ref/models/fields/#django.db.models.ForeignKey.on_delete
|
|
SurvexBlock.objects.all().delete()
|
|
SurvexFile.objects.all().delete()
|
|
SurvexDirectory.objects.all().delete()
|
|
SurvexPersonRole.objects.all().delete()
|
|
SurvexStation.objects.all().delete()
|
|
mem1 = get_process_memory()
|
|
print(f" - MEM:{mem1:7.2f} MB now. Foreign key objects loaded on deletion. ", file=sys.stderr)
|
|
|
|
print(" - Flushing survex Data Issues ")
|
|
global dataissues
|
|
dataissues = []
|
|
DataIssue.objects.filter(parser="survex").delete()
|
|
DataIssue.objects.filter(parser="svxdate").delete()
|
|
DataIssue.objects.filter(parser="survexleg").delete()
|
|
DataIssue.objects.filter(parser="survexunits").delete()
|
|
DataIssue.objects.filter(parser="survex team").delete()
|
|
DataIssue.objects.filter(parser="entrances").delete()
|
|
DataIssue.objects.filter(parser="xEntrances").delete()
|
|
print(" - survex Data Issues flushed")
|
|
mem1 = get_process_memory()
|
|
print(f" - MEM:{mem1:7.2f} MB now ", file=sys.stderr)
|
|
|
|
survexfileroot = MakeSurvexFileRoot()
|
|
# this next makes a block_object assciated with a file_object.path = SURVEX_TOPNAME
|
|
survexblockroot = SurvexBlock(
|
|
name=ROOTBLOCK, survexpath="", survexfile=survexfileroot, legsall=0, legslength=0.0
|
|
)
|
|
# crashes here sometimes on MariaDB complaining that cave_id should not be null. But it should be.
|
|
# django.db.utils.IntegrityError: (1048, "Column 'cave_id' cannot be null")
|
|
# fix by restarting db on server
|
|
# sudo service mariadb stop
|
|
# sudo service mariadb start
|
|
survexblockroot.save()
|
|
|
|
omitsfileroot = MakeFileRoot(UNSEENS)
|
|
survexomitsroot = SurvexBlock(
|
|
name=OMITBLOCK, survexpath="", survexfile=omitsfileroot, legsall=0, legslength=0.0
|
|
)
|
|
survexomitsroot.save()
|
|
|
|
print(" - Loading Survex Blocks...")
|
|
memstart = get_process_memory()
|
|
# ----------------------------------------------------------------
|
|
FindAndLoadSurvex(survexblockroot)
|
|
# ----------------------------------------------------------------
|
|
memend = get_process_memory()
|
|
print(f" - MEMORY start:{memstart:.3f} MB end:{memend:.3f} MB increase={memend - memstart:.3f} MB")
|
|
|
|
survexblockroot.save()
|
|
|
|
global person_pending_cache
|
|
for sb in person_pending_cache:
|
|
if len(person_pending_cache[sb]) > 0:
|
|
print(f" ")
|
|
message = f" ! PENDING team list not emptied {sb.survexfile.path} {len(person_pending_cache[sb])} people: {person_pending_cache[sb]}"
|
|
stash_data_issue(parser="survex", message=message, url=None, sb=(sb.survexfile.path))
|
|
print(message)
|
|
# duration = time.time() - start
|
|
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
|
store_data_issues()
|
|
# duration = time.time() - start
|
|
# print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
|
|
if dup_includes > 0:
|
|
print(f" - ERROR: There are {dup_includes} duplicate *includes in the final list")
|
|
print(" - Loaded All Survex Blocks.")
|
|
|