import troggle.core.models as models #import models for various objects from django.conf import settings import xml.etree.ElementTree as ET #this is used to parse XML's import subprocess import re # # This parser has to find several things: # There are files of .html format in expoweb area - they contain some of the important information # There is a similar number of .svx files in loser are - they contain all the measurements # # Previous version was incredibly slow due to various shitty ideas about finding things # and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell # and handles more sophisticated bits only # def load(): print('Hi! I\'m caves parser. Ready to work') print('Loading caves of 1623 area') loadarea('caves-1623/') def loadarea(areacode): print('Searching all cave dirs files') basedir = settings.SURVEX_DATA+areacode bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')') ndirs = len(cavedirs) #remember number of dirs for nice debug output for cavedir in cavedirs: if cavedir==basedir: continue #skip the basedir - a non-proper subdirectory cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence if 'MISSING' in test: #send error message to the database msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn') print('Cave missing'+cavename+' :(') msg.save() continue fullname=cavedir+'/'+cavename+'.svx' print('Found cave:'+cavename) cavernout = bash('cavern -q '+fullname) #make cavern process the thing if 'cavern: error:' in cavernout: msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn') print('Fucked svx'+cavename+' :(') msg.save() continue cavernout = cavernout.splitlines() depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2]) length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1]) surveyname = bash('cat '+fullname+' | grep \'\*begin\' | head -n1 | cut -f2 -d \' \' ').splitlines().pop() title = (bash('cat '+fullname+' | grep \'\*title\' | head -n1 | cut -f2 -d \' \' ').splitlines() or ["Not found"])[0] print((('depth','length','surv name'),(depth,length,surveyname))) print('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[\\.'+surveyname+'.*\\]\'') nodes = bash('dump3d '+settings.SURVEX_DATA+'1623-and-1626.3d | grep NODE | grep \'\\[.*\\.'+surveyname+'.*\\]\'').splitlines() entran = [x for x in nodes if ('ENTRANCE' in x) ] print(nodes) newcave = models.CaveM(survex_file = fullname, total_length = length, name=title, total_depth = depth) newcave.save() #end of reading survex masterfiles print ("Reading cave descriptions") cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines() for fn in cavefiles: f = open(fn, "r") print(fn) contents = f.read() desc = extractXML(contents,'underground_description') name = re.search(r'>.*<',extractXML(contents,'caveslug')).group()[6:-1] if desc==None or name==None: msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn') print('Fucked description '+fn+' :(') msg.save() continue updatecave = models.CaveM.objects.filter(survex_file__icontains='/'+name+'.svx') if len(updatecave)>1: print('Non unique solution - skipping. Name:'+name) elif len(updatecave)==0: print('Cave with no survex data'+name) newcave = models.CaveM(description = desc, name = name) newcave.save() else: #exaclty one match updatecave = updatecave[0] updatecave.description = desc if updatecave.name=="Not found": updatecave.name=name updatecave.title=name updatecave.save() #end of reading cave descriptions def extractXML(contents,tag): #find correct lines lines = contents.splitlines() beg = [x for x in lines if ('<'+tag+'>' in x)] end = [x for x in lines if ('' in x)] if (not beg) or (not end): return None begi = lines.index(beg[0]) endi = lines.index(end[0]) if endi!=begi: segment = '\n'.join(lines[begi:endi+1]) else: segment = lines[begi:endi+1] return segment[0] def bash(cmd): #calls command in bash shell, returns output process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) output, error = process.communicate() return output