2
0
mirror of https://expo.survex.com/repositories/troggle/.git synced 2025-12-14 05:55:06 +00:00

better errors for drawings parsing & upload

This commit is contained in:
Philip Sargent
2022-03-05 20:29:01 +00:00
parent 88f5df0f19
commit a3a65524b8
3 changed files with 48 additions and 24 deletions

View File

@@ -38,7 +38,7 @@ def find_dwg_file(dwgfile, path):
which we have already seen when we imported all the files we could find in the surveyscans direstories
'''
wallet, scansfile = None, None
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path)
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path)
if mscansdir:
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
# This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first.
@@ -66,7 +66,7 @@ def find_dwg_file(dwgfile, path):
if scansfile:
dwgfile.scans.add(scansfile)
elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path):
elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|txt)$(?i)", path):
name = os.path.split(path)[1]
rdwgfilel = DrawingFile.objects.filter(dwgname=name)
if len(rdwgfilel):
@@ -76,7 +76,7 @@ def find_dwg_file(dwgfile, path):
plist.append(df.dwgname)
message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}"
print(message)
DataIssue.objects.create(parser='Tunnel', message=message)
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}')
rdwgfile = rdwgfilel[0]
dwgfile.dwgcontains.add(rdwgfile)
@@ -106,7 +106,7 @@ def settherionfileinfo(filetuple):
if therionfile.filesize <= 0:
message = "! Zero length therion file {}".format(ff)
print(message)
DataIssue.objects.create(parser='Therion', message=message)
DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
return
fin = open(ff,'r')
ttext = fin.read()
@@ -130,7 +130,7 @@ def settherionfileinfo(filetuple):
for xth_me in rx_xth_me.findall(ttext):
message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'
#print(message)
DataIssue.objects.create(parser='Therion', message=message)
DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
findimageinsert(therionfile, xth_me)
for inp in rx_input.findall(ttext):
@@ -138,7 +138,7 @@ def settherionfileinfo(filetuple):
# but we would need to disentangle to get the current path properly
message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'
#print(message)
DataIssue.objects.create(parser='Therion', message=message)
DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
findimportinsert(therionfile, inp)
therionfile.save()
@@ -146,7 +146,7 @@ def settherionfileinfo(filetuple):
rx_skpath = re.compile(rb'<skpath')
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
def setdwgfileinfo(dwgfile):
def settnlfileinfo(dwgfile):
'''Read in the drawing file contents and sets values on the dwgfile object
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
then we could display on the master calendar per expo.
@@ -154,9 +154,9 @@ def setdwgfileinfo(dwgfile):
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
if dwgfile.filesize <= 0:
message = "! Zero length xml file {}".format(ff)
message = "! Zero length tunnel file {}".format(ff)
print(message)
DataIssue.objects.create(parser='Drawings', message=message)
DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
return
fin = open(ff,'rb')
ttext = fin.read()
@@ -177,6 +177,20 @@ def setdwgfileinfo(dwgfile):
dwgfile.save()
def setdrwfileinfo(dwgfile):
'''Read in the drawing file contents and sets values on the dwgfile object,
but these are PDFs or .txt files, so there is no useful format to search for
This function is a placeholder in case we thnk of a way to do something
to recognise generic survex filenames.
'''
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
dwgfile.filesize = ff.stat().st_size
if dwgfile.filesize <= 0:
message = "! Zero length drawing file {}".format(ff)
print(message)
DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
return
def load_drawings_files():
'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
@@ -184,13 +198,14 @@ def load_drawings_files():
Why do we have all this detection of file types/! Why not use get_mime_types ?
What is it all for ??
ALL THIS NEEDS TO DETCT UPPER CASE suffices
ALL THIS NEEDS TO DETECT UPPER CASE suffices
'''
all_xml = []
drawdatadir = settings.DRAWINGS_DATA
DrawingFile.objects.all().delete()
DataIssue.objects.filter(parser='Drawings').delete()
DataIssue.objects.filter(parser='drawings').delete()
DataIssue.objects.filter(parser='Therion').delete()
DataIssue.objects.filter(parser='xTherion').delete()
DataIssue.objects.filter(parser='Tunnel').delete()
drawingsdirs = [ "" ]
@@ -203,32 +218,37 @@ def load_drawings_files():
ff = os.path.join(drawdatadir, lf)
if os.path.isdir(ff):
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!
elif f[-4:] == ".xml":
elif Path(f).suffix.lower() == ".txt":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('txt',dwgfile))
elif Path(f).suffix.lower() == ".xml":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('xml',dwgfile))
elif f[-3:] == ".th":
elif Path(f).suffix.lower() == ".th":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('th',dwgfile))
elif f[-4:] == ".th2":
elif Path(f).suffix.lower() == ".th2":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('th2',dwgfile))
elif f[-4:] == ".pdf":
elif Path(f).suffix.lower() == ".pdf":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('pdf',dwgfile))
elif f[-4:] == ".svg":
elif Path(f).suffix.lower() == ".svg":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('svg',dwgfile))
elif f[-4:] == ".jpg":
elif Path(f).suffix.lower() == ".jpg":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
@@ -242,8 +262,10 @@ def load_drawings_files():
print(f' - {len(all_xml)} Drawings files found')
for d in all_xml:
if d[0] in ['pdf', 'txt', '']:
setdrwfileinfo(d[1])
if d[0] == 'xml':
setdwgfileinfo(d[1])
settnlfileinfo(d[1])
# important to import .th2 files before .th so that we can assign them when found in .th files
if d[0] == 'th2':
settherionfileinfo(d)