forked from expo/troggle
130 lines
5.3 KiB
Python
130 lines
5.3 KiB
Python
|
|
||
|
import troggle.core.models as models #import models for various objects
|
||
|
from django.conf import settings
|
||
|
import xml.etree.ElementTree as ET #this is used to parse XML's
|
||
|
import subprocess
|
||
|
import re
|
||
|
|
||
|
#
|
||
|
# This parser has to find several things:
|
||
|
# There are files of .html format in expoweb area - they contain some of the important information
|
||
|
# There is a similar number of .svx files in loser are - they contain all the measurements
|
||
|
#
|
||
|
# Previous version was incredibly slow due to various shitty ideas about finding things
|
||
|
# and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell
|
||
|
# and handles more sophisticated bits only
|
||
|
#
|
||
|
|
||
|
def load():
|
||
|
print('Hi! I\'m caves parser. Ready to work')
|
||
|
|
||
|
print('Loading caves of 1623 area')
|
||
|
loadarea('caves-1623/')
|
||
|
|
||
|
|
||
|
def loadarea(areacode):
|
||
|
|
||
|
|
||
|
print('Searching all cave dirs files')
|
||
|
basedir = settings.SURVEX_DATA+areacode
|
||
|
|
||
|
bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx')
|
||
|
|
||
|
cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
|
||
|
print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
|
||
|
ndirs = len(cavedirs) #remember number of dirs for nice debug output
|
||
|
|
||
|
for cavedir in cavedirs:
|
||
|
if cavedir==basedir:
|
||
|
continue #skip the basedir - a non-proper subdirectory
|
||
|
cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
|
||
|
|
||
|
test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence
|
||
|
if 'MISSING' in test: #send error message to the database
|
||
|
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn')
|
||
|
print('Cave missing'+cavename+' :(')
|
||
|
msg.save()
|
||
|
continue
|
||
|
fullname=cavedir+'/'+cavename+'.svx'
|
||
|
print('Found cave:'+cavename)
|
||
|
cavernout = bash('cavern -q '+fullname) #make cavern process the thing
|
||
|
if 'cavern: error:' in cavernout:
|
||
|
msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn')
|
||
|
print('Fucked svx'+cavename+' :(')
|
||
|
msg.save()
|
||
|
continue
|
||
|
|
||
|
cavernout = cavernout.splitlines()
|
||
|
depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2])
|
||
|
length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1])
|
||
|
surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop()
|
||
|
title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0]
|
||
|
print((('depth','length','surv name'),(depth,length,surveyname)))
|
||
|
print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'')
|
||
|
nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines()
|
||
|
entran = [x for x in nodes if ('ENTRANCE' in x) ]
|
||
|
print(nodes)
|
||
|
|
||
|
|
||
|
newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth)
|
||
|
newcave.save()
|
||
|
#end of reading survex masterfiles
|
||
|
|
||
|
print ("Reading cave descriptions")
|
||
|
cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines()
|
||
|
for fn in cavefiles:
|
||
|
f = open(fn, "r")
|
||
|
print(fn)
|
||
|
contents = f.read()
|
||
|
|
||
|
desc = extractXML(contents,'underground_description')
|
||
|
name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1]
|
||
|
|
||
|
if desc==None or name==None:
|
||
|
msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn')
|
||
|
print('Fucked description '+fn+' :(')
|
||
|
msg.save()
|
||
|
continue
|
||
|
|
||
|
|
||
|
updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx')
|
||
|
if len(updatecave)>1:
|
||
|
print('Non unique solution - skipping. Name:'+name)
|
||
|
elif len(updatecave)==0:
|
||
|
print('Cave with no survex data'+name)
|
||
|
newcave = models.CaveM(description = desc, name = name)
|
||
|
newcave.save()
|
||
|
else: #exaclty one match
|
||
|
updatecave = updatecave[0]
|
||
|
updatecave.description = desc
|
||
|
if updatecave.name=="Not found":
|
||
|
updatecave.name=name
|
||
|
updatecave.title=name
|
||
|
updatecave.save()
|
||
|
|
||
|
|
||
|
#end of reading cave descriptions
|
||
|
|
||
|
|
||
|
|
||
|
def extractXML(contents,tag):
|
||
|
#find correct lines
|
||
|
lines = contents.splitlines()
|
||
|
beg = [x for x in lines if ('<'+tag+'>' in x)]
|
||
|
end = [x for x in lines if ('</'+tag+'>' in x)]
|
||
|
if (not beg) or (not end):
|
||
|
return None
|
||
|
begi = lines.index(beg[0])
|
||
|
endi = lines.index(end[0])
|
||
|
if endi!=begi:
|
||
|
segment = '\n'.join(lines[begi:endi+1])
|
||
|
else:
|
||
|
segment = lines[begi:endi+1]
|
||
|
return segment[0]
|
||
|
|
||
|
|
||
|
def bash(cmd): #calls command in bash shell, returns output
|
||
|
process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
||
|
output, error = process.communicate()
|
||
|
return output
|