forked from expo/troggle
211 lines
8.6 KiB
Python
211 lines
8.6 KiB
Python
|
|
import troggle.core.models as models #import models for various objects
|
|
from django.conf import settings
|
|
import xml.etree.ElementTree as ET #this is used to parse XML's
|
|
import subprocess
|
|
import re
|
|
|
|
#
|
|
# This parser has to find several things:
|
|
# There are files of .html format in expoweb area - they contain some of the important information
|
|
# There is a similar number of .svx files in loser are - they contain all the measurements
|
|
#
|
|
# Previous version was incredibly slow due to various shitty ideas about finding things
|
|
# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
|
|
# and handles more sophisticated bits only
|
|
#
|
|
|
|
def load():
|
|
print('Hi! I\'m caves parser. Ready to work')
|
|
|
|
print('Loading caves of 1623 area')
|
|
loadarea('1623')
|
|
|
|
|
|
def loadarea(areacode):
|
|
|
|
if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'):
|
|
print('Computing master .3d file')
|
|
bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
|
|
else:
|
|
print('Loading from existing master .3d file')
|
|
|
|
master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines()
|
|
master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file
|
|
master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file
|
|
|
|
print('Searching all cave dirs files')
|
|
basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
|
|
|
|
cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
|
|
print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
|
|
ndirs = len(cavedirs) #remember number of dirs for nice debug output
|
|
|
|
for cavedir in cavedirs:
|
|
if cavedir==basedir:
|
|
continue #skip the basedir - a non-proper subdirectory
|
|
cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
|
|
|
|
test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
|
|
if not file_exists(cavedir+'/'+cavename+'.svx'):
|
|
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
|
|
print('Cave missing'+cavename+' :(')
|
|
msg.save()
|
|
continue
|
|
fullname=cavedir+'/'+cavename+'.svx'
|
|
print('Found cave:'+cavename)
|
|
cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing
|
|
if 'cavern: error:' in cavernout:
|
|
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
|
|
print('Fucked svx'+cavename+' :(')
|
|
msg.save()
|
|
continue
|
|
|
|
cavernout = cavernout.splitlines()
|
|
depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
|
|
length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
|
|
cavefile = open(fullname,'r')
|
|
cavefilecontents = cavefile.read().splitlines()
|
|
surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower()
|
|
try:
|
|
title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1]
|
|
except:
|
|
syrveyname = "Untitled"
|
|
|
|
relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
|
|
entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x]
|
|
surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x]
|
|
location_nodes = []
|
|
print('rel_nodes'+str(len(relevant_nodes)))
|
|
if len(entrance_nodes) > 0:
|
|
location_nodes = entrance_nodes
|
|
elif len(surface_nodes) > 0:
|
|
location_nodes = surface_nodes
|
|
elif len(relevant_nodes) > 0:
|
|
location_nodes = relevant_nodes
|
|
|
|
try:
|
|
location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop()
|
|
except:
|
|
print(location_nodes)
|
|
location = 'Not found'
|
|
|
|
relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))]
|
|
try:
|
|
lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop()
|
|
except:
|
|
lastleg = ['LINE 1900.01.01']
|
|
try:
|
|
lastdate = lastleg.split().pop()
|
|
if 'STYLE' in lastdate:
|
|
lastdate = lastleg.split().pop().pop()
|
|
except:
|
|
lastdate = '1900.01.01'
|
|
|
|
entrance = ' '.join(location.split()[1:3])
|
|
print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print
|
|
|
|
|
|
newcave = models.CaveM(
|
|
survex_file = fullname,
|
|
total_length = length,
|
|
name=areacode+'.'+surveyname,
|
|
total_depth = depth,
|
|
date = lastdate,
|
|
entrance = entrance)
|
|
newcave.save()
|
|
#end of reading survex masterfiles
|
|
|
|
print ("Reading cave descriptions")
|
|
cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
|
|
for fn in cavefiles:
|
|
f = open(fn, "r")
|
|
print(fn)
|
|
contents = f.read()
|
|
|
|
slug = re.sub(r"\s+", "", extractXML(contents,'caveslug'))
|
|
desc = extractXML(contents,'underground_description')
|
|
name = slug[5:] #get survex compatible name
|
|
area = slug[0:4]
|
|
|
|
print([area,name])
|
|
|
|
if desc==None or name==None:
|
|
msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
|
|
print('Fucked description '+fn+' :(')
|
|
msg.save()
|
|
continue
|
|
|
|
print(area+'/'+name+'/'+name+'.svx')
|
|
|
|
updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx')
|
|
if len(updatecave)>1:
|
|
print('Non unique solution - skipping. Name:'+name)
|
|
elif len(updatecave)==0:
|
|
print('Cave with no survex data:'+name)
|
|
continue
|
|
else: #exaclty one match
|
|
print('Adding desc:'+name)
|
|
updatecave = updatecave[0]
|
|
updatecave.description = '/cave/descriptionM/'+slug #area-name
|
|
updatecave.title=name
|
|
updatecave.save()
|
|
|
|
slugS = slug
|
|
explorersS = extractXML(contents,'explorers')
|
|
underground_descriptionS = extractXML(contents,'underground_description')
|
|
equipmentS = extractXML(contents,'equipment')
|
|
referencesS = extractXML(contents,'references')
|
|
surveyS = extractXML(contents,'survey')
|
|
kataster_statusS = extractXML(contents,'kataster_status')
|
|
underground_centre_lineS = extractXML(contents,'underground_centre_line')
|
|
survex_fileS = extractXML(contents,'survex_file')
|
|
notesS = extractXML(contents,'notes')
|
|
|
|
|
|
newcavedesc = models.Cave_descriptionM(
|
|
slug = slugS,
|
|
explorers = explorersS,
|
|
underground_description = underground_descriptionS,
|
|
equipment = equipmentS,
|
|
references = referencesS,
|
|
survey = surveyS,
|
|
kataster_status = kataster_statusS,
|
|
underground_centre_line = underground_centre_lineS,
|
|
survex_file = survex_fileS,
|
|
notes = notesS)
|
|
newcavedesc.save()
|
|
|
|
|
|
|
|
|
|
#end of reading cave descriptions
|
|
|
|
def file_exists(filename):
|
|
test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
|
|
if 'MISSING' in test: #send error message to the database
|
|
return False
|
|
return True
|
|
|
|
def extractXML(contents,tag):
|
|
#find correct lines
|
|
lines = contents.splitlines()
|
|
beg = [x for x in lines if ('<'+tag+'>' in x)]
|
|
end = [x for x in lines if ('</'+tag+'>' in x)]
|
|
if (not beg) or (not end):
|
|
return None
|
|
begi = lines.index(beg[0])
|
|
endi = lines.index(end[0])
|
|
if endi!=begi:
|
|
segment = '\n'.join(lines[begi:endi+1])
|
|
else:
|
|
segment = lines[begi:endi+1][0]
|
|
|
|
hit = re.findall('<'+tag+'>(.*)</'+tag+'>', segment, re.S)[0]
|
|
return hit
|
|
|
|
def bash(cmd): #calls command in bash shell, returns output
|
|
process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
|
output, error = process.communicate()
|
|
return output
|