better errors for drawings parsing & upload

This commit is contained in:
Philip Sargent 2022-03-05 20:29:01 +00:00
parent 88f5df0f19
commit a3a65524b8
3 changed files with 48 additions and 24 deletions

View File

@ -109,12 +109,12 @@ def dwgupload(request, folder=None, gitdisable='no'):
We use get_or_create instead of simply creating a new object in case someone uploads the same file We use get_or_create instead of simply creating a new object in case someone uploads the same file
several times in one session, and expects them to be overwritten in the database. Although several times in one session, and expects them to be overwritten in the database. Although
the actual file will be duplicated in the filesystem with different random name ammendation. the actual file will be duplicated in the filesystem with different random name ending.
''' '''
def dwgvalid(name): def dwgvalid(name):
if name in [ '.gitignore', '.hgignore', ]: if name in [ '.gitignore', ]:
return False return False
if Path(name).suffix.lower() in ['.xml', '.th', '.th2', '', '.svg', '.jpg', '.pdf', 'jpeg']: if Path(name).suffix.lower() in ['.xml', '.th', '.th2', '', '.svg', '.jpg', '.pdf', '.jpeg', '.txt']:
return True # dangerous, we should check the actual file binary signature return True # dangerous, we should check the actual file binary signature
return False return False
@ -163,10 +163,11 @@ def dwgupload(request, folder=None, gitdisable='no'):
dwgfile.save() dwgfile.save()
else: else:
refused.append(f.name) refused.append(f.name)
print(f'REFUSED {f.name}')
if actual_saved: # maybe all were refused by the suffix test in dwgvalid() if actual_saved: # maybe all were refused by the suffix test in dwgvalid()
filesaved = True filesaved = True
if gitdisable != 'yes': if gitdisable != 'yes':
subprocess.call([git, "commit", "-m", 'dwgupload'], cwd=dirpath) subprocess.call([git, "commit", "-m", f'Drawings upload - {list(multiple)}'], cwd=dirpath)
files = [] files = []

View File

@ -38,7 +38,7 @@ def find_dwg_file(dwgfile, path):
which we have already seen when we imported all the files we could find in the surveyscans direstories which we have already seen when we imported all the files we could find in the surveyscans direstories
''' '''
wallet, scansfile = None, None wallet, scansfile = None, None
mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif))$", path) mscansdir = re.search(r"(\d\d\d\d#X?\d+\w?|1995-96kh|92-94Surveybookkh|1991surveybook|smkhs)/(.*?(?:png|jpg|pdf|jpeg|gif|txt))$", path)
if mscansdir: if mscansdir:
scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1)) scanswalletl = Wallet.objects.filter(walletname=mscansdir.group(1))
# This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first. # This should be chnaged to properly detect if a list of folders is returned and do something sensible, not just pick the first.
@ -66,7 +66,7 @@ def find_dwg_file(dwgfile, path):
if scansfile: if scansfile:
dwgfile.scans.add(scansfile) dwgfile.scans.add(scansfile)
elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif)$(?i)", path): elif path and not re.search(r"\.(?:png|jpg|pdf|jpeg|gif|txt)$(?i)", path):
name = os.path.split(path)[1] name = os.path.split(path)[1]
rdwgfilel = DrawingFile.objects.filter(dwgname=name) rdwgfilel = DrawingFile.objects.filter(dwgname=name)
if len(rdwgfilel): if len(rdwgfilel):
@ -76,7 +76,7 @@ def find_dwg_file(dwgfile, path):
plist.append(df.dwgname) plist.append(df.dwgname)
message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}" message = f"! {len(rdwgfilel)} paths found with same name '{path}' {plist}"
print(message) print(message)
DataIssue.objects.create(parser='Tunnel', message=message) DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{path}')
rdwgfile = rdwgfilel[0] rdwgfile = rdwgfilel[0]
dwgfile.dwgcontains.add(rdwgfile) dwgfile.dwgcontains.add(rdwgfile)
@ -106,7 +106,7 @@ def settherionfileinfo(filetuple):
if therionfile.filesize <= 0: if therionfile.filesize <= 0:
message = "! Zero length therion file {}".format(ff) message = "! Zero length therion file {}".format(ff)
print(message) print(message)
DataIssue.objects.create(parser='Therion', message=message) DataIssue.objects.create(parser='Therion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
return return
fin = open(ff,'r') fin = open(ff,'r')
ttext = fin.read() ttext = fin.read()
@ -130,7 +130,7 @@ def settherionfileinfo(filetuple):
for xth_me in rx_xth_me.findall(ttext): for xth_me in rx_xth_me.findall(ttext):
message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}' message = f'! Un-parsed image filename: {therionfile.dwgname} : {xth_me.split()[-3]} - {therionfile.dwgpath}'
#print(message) #print(message)
DataIssue.objects.create(parser='Therion', message=message) DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
findimageinsert(therionfile, xth_me) findimageinsert(therionfile, xth_me)
for inp in rx_input.findall(ttext): for inp in rx_input.findall(ttext):
@ -138,7 +138,7 @@ def settherionfileinfo(filetuple):
# but we would need to disentangle to get the current path properly # but we would need to disentangle to get the current path properly
message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}' message = f'! Un-set Therion .th2 input: - {therionfile.dwgname} : {inp} - {therionfile.dwgpath}'
#print(message) #print(message)
DataIssue.objects.create(parser='Therion', message=message) DataIssue.objects.create(parser='xTherion', message=message, url=f'/dwgdataraw/{therionfile.dwgpath}')
findimportinsert(therionfile, inp) findimportinsert(therionfile, inp)
therionfile.save() therionfile.save()
@ -146,7 +146,7 @@ def settherionfileinfo(filetuple):
rx_skpath = re.compile(rb'<skpath') rx_skpath = re.compile(rb'<skpath')
rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"') rx_pcpath = re.compile(rb'<pcarea area_signal="frame".*?sfsketch="([^"]*)" sfstyle="([^"]*)"')
def setdwgfileinfo(dwgfile): def settnlfileinfo(dwgfile):
'''Read in the drawing file contents and sets values on the dwgfile object '''Read in the drawing file contents and sets values on the dwgfile object
Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57 Should try to read the date too e.g. tunneldate="2010-08-16 22:51:57
then we could display on the master calendar per expo. then we could display on the master calendar per expo.
@ -154,9 +154,9 @@ def setdwgfileinfo(dwgfile):
ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath) ff = os.path.join(settings.DRAWINGS_DATA, dwgfile.dwgpath)
dwgfile.filesize = os.stat(ff)[stat.ST_SIZE] dwgfile.filesize = os.stat(ff)[stat.ST_SIZE]
if dwgfile.filesize <= 0: if dwgfile.filesize <= 0:
message = "! Zero length xml file {}".format(ff) message = "! Zero length tunnel file {}".format(ff)
print(message) print(message)
DataIssue.objects.create(parser='Drawings', message=message) DataIssue.objects.create(parser='Tunnel', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
return return
fin = open(ff,'rb') fin = open(ff,'rb')
ttext = fin.read() ttext = fin.read()
@ -177,6 +177,20 @@ def setdwgfileinfo(dwgfile):
dwgfile.save() dwgfile.save()
def setdrwfileinfo(dwgfile):
'''Read in the drawing file contents and sets values on the dwgfile object,
but these are PDFs or .txt files, so there is no useful format to search for
This function is a placeholder in case we thnk of a way to do something
to recognise generic survex filenames.
'''
ff = Path(settings.DRAWINGS_DATA) / dwgfile.dwgpath
dwgfile.filesize = ff.stat().st_size
if dwgfile.filesize <= 0:
message = "! Zero length drawing file {}".format(ff)
print(message)
DataIssue.objects.create(parser='drawings', message=message, url=f'/dwgdataraw/{dwgfile.dwgpath}')
return
def load_drawings_files(): def load_drawings_files():
'''Breadth first search of drawings directory looking for sub-directories and *.xml filesize '''Breadth first search of drawings directory looking for sub-directories and *.xml filesize
@ -184,13 +198,14 @@ def load_drawings_files():
Why do we have all this detection of file types/! Why not use get_mime_types ? Why do we have all this detection of file types/! Why not use get_mime_types ?
What is it all for ?? What is it all for ??
ALL THIS NEEDS TO DETCT UPPER CASE suffices ALL THIS NEEDS TO DETECT UPPER CASE suffices
''' '''
all_xml = [] all_xml = []
drawdatadir = settings.DRAWINGS_DATA drawdatadir = settings.DRAWINGS_DATA
DrawingFile.objects.all().delete() DrawingFile.objects.all().delete()
DataIssue.objects.filter(parser='Drawings').delete() DataIssue.objects.filter(parser='drawings').delete()
DataIssue.objects.filter(parser='Therion').delete() DataIssue.objects.filter(parser='Therion').delete()
DataIssue.objects.filter(parser='xTherion').delete()
DataIssue.objects.filter(parser='Tunnel').delete() DataIssue.objects.filter(parser='Tunnel').delete()
drawingsdirs = [ "" ] drawingsdirs = [ "" ]
@ -203,32 +218,37 @@ def load_drawings_files():
ff = os.path.join(drawdatadir, lf) ff = os.path.join(drawdatadir, lf)
if os.path.isdir(ff): if os.path.isdir(ff):
drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop! drawingsdirs.append(lf) # lunatic! adding to list in middle of list while loop!
elif f[-4:] == ".xml": elif Path(f).suffix.lower() == ".txt":
# Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save()
all_xml.append(('txt',dwgfile))
elif Path(f).suffix.lower() == ".xml":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('xml',dwgfile)) all_xml.append(('xml',dwgfile))
elif f[-3:] == ".th": elif Path(f).suffix.lower() == ".th":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('th',dwgfile)) all_xml.append(('th',dwgfile))
elif f[-4:] == ".th2": elif Path(f).suffix.lower() == ".th2":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('th2',dwgfile)) all_xml.append(('th2',dwgfile))
elif f[-4:] == ".pdf": elif Path(f).suffix.lower() == ".pdf":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('pdf',dwgfile)) all_xml.append(('pdf',dwgfile))
elif f[-4:] == ".svg": elif Path(f).suffix.lower() == ".svg":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
all_xml.append(('svg',dwgfile)) all_xml.append(('svg',dwgfile))
elif f[-4:] == ".jpg": elif Path(f).suffix.lower() == ".jpg":
# Always creates new # Always creates new
dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1]) dwgfile = DrawingFile(dwgpath=lf, dwgname=os.path.split(f[:-4])[1])
dwgfile.save() dwgfile.save()
@ -242,8 +262,10 @@ def load_drawings_files():
print(f' - {len(all_xml)} Drawings files found') print(f' - {len(all_xml)} Drawings files found')
for d in all_xml: for d in all_xml:
if d[0] in ['pdf', 'txt', '']:
setdrwfileinfo(d[1])
if d[0] == 'xml': if d[0] == 'xml':
setdwgfileinfo(d[1]) settnlfileinfo(d[1])
# important to import .th2 files before .th so that we can assign them when found in .th files # important to import .th2 files before .th so that we can assign them when found in .th files
if d[0] == 'th2': if d[0] == 'th2':
settherionfileinfo(d) settherionfileinfo(d)

View File

@ -61,8 +61,9 @@
{% empty %} {% empty %}
<p>&lt;No subdirectories&gt; <p>&lt;No subdirectories&gt;
{% endfor %} {% endfor %}
<p>Clicking on a filename only works if the drawing file has been imported into the system as part of a bulk-import <p>Clicking on a filename only shows the file if the drawing file had already been imported into the system as part of a bulk-import
as we are matching it against a file recorded in the database. as we are matching it against a file recorded in the database. If you only just uploaded it, you will get an error message.
<p>You cannot create folders here, but you can put files into any of the pre-existing folders.
{% endif %} {% endif %}
</div> </div>