import troggle.core.models as models #import models for various objects from django.conf import settings import xml.etree.ElementTree as ET #this is used to parse XML's import subprocess import re # # This parser has to find several things: # There are files of .html format in expoweb area - they contain some of the important information # There is a similar number of .svx files in loser are - they contain all the measurements # # Previous version was incredibly slow due to various shitty ideas about finding things # and overelayance on python when handling regular expressions, new version delegates heavy lifting to shell # and handles more sophisticated bits only # def load(): print('Hi! I\'m caves parser. Ready to work') print('Loading caves of 1623 area') loadarea('1623') print('Loading caves of 1626 area') loadarea('1626') def loadarea(areacode): if not file_exists(settings.SURVEX_DATA+'1623-and-1626.3d'): print('Computing master .3d file') bash('cavern -o'+settings.SURVEX_DATA+' '+settings.SURVEX_DATA+'1623-and-1626.svx') else: print('Loading from existing master .3d file') master3d = bash('dump3d -d '+settings.SURVEX_DATA+'1623-and-1626.3d').splitlines() master3dN = [x for x in master3d if ('NODE' in x)] #list of nodes of master survex file master3dL = [x for x in master3d if ('LINE' in x)] #list of nodes of master survex file print('Searching all cave dirs files') basedir = settings.SURVEX_DATA+'caves-'+areacode+'/' cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')') ndirs = len(cavedirs) #remember number of dirs for nice debug output for cavedir in cavedirs: if cavedir==basedir: continue #skip the basedir - a non-proper subdirectory cavename = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory test = bash('if [ ! -f '+cavedir+'/'+cavename+'.svx ] ; then echo MISSING; fi')#test for file exisence if not file_exists(cavedir+'/'+cavename+'.svx'): msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' MISSING!',message_type='warn') print('Cave missing'+cavename+' :(') msg.save() continue fullname=cavedir+'/'+cavename+'.svx' print('Found cave:'+cavename) cavernout = bash('cavern -o '+cavedir+' '+fullname) #make cavern process the thing if 'cavern: error:' in cavernout: msg = models.Parser_messageM(parsername='caves',content=cavedir+'/'+cavename+' Survex file messed up!',message_type='warn') print('Fucked svx'+cavename+' :(') msg.save() continue cavernout = cavernout.splitlines() depth = float(([x for x in cavernout if ('Total vertical length' in x)][0].split()[-1])[:-2]) length = float(([x for x in cavernout if ('Total length' in x)][0].split()[6])[:-1]) cavefile = open(fullname,'r') cavefilecontents = cavefile.read().splitlines() surveyname = [x for x in cavefilecontents if ('*begin ') in x][0].split()[1].lower() try: title = [x for x in cavefilecontents if ('*title ') in x][0].split()[1] except: syrveyname = "Untitled" relevant_nodes = [x for x in master3dN if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] entrance_nodes = [x for x in relevant_nodes if 'ENTRANCE' in x] surface_nodes = [x for x in relevant_nodes if 'SURFACE' in x] location_nodes = [] print('rel_nodes'+str(len(relevant_nodes))) if len(entrance_nodes) > 0: location_nodes = entrance_nodes elif len(surface_nodes) > 0: location_nodes = surface_nodes elif len(relevant_nodes) > 0: location_nodes = relevant_nodes try: location = sorted(location_nodes, key = lambda y : float(y.split()[3])).pop() except: print(location_nodes) location = 'Not found' relevant_lines = [x for x in master3dL if (('['+areacode+'.'+surveyname+'.' in x) or ('['+areacode+'.'+surveyname+']' in x))] try: lastleg = sorted(relevant_lines, key = lambda y : y.split().pop()).pop() except: lastleg = ['LINE 1900.01.01'] try: lastdate = lastleg.split().pop() if 'STYLE' in lastdate: lastdate = lastleg.split().pop().pop() except: lastdate = '1900.01.01' entrance = ' '.join(location.split()[1:3]) print((('depth','length','surv name','entr','date'),(depth,length,surveyname,entrance,lastdate))) #sanity check print newcave = models.CaveM( survex_file = fullname, total_length = length, name=areacode+'.'+surveyname, total_depth = depth, date = lastdate, entrance = entrance) newcave.save() #end of reading survex masterfiles print ("Reading cave descriptions") cavefiles = bash('find '+settings.CAVEDESCRIPTIONS+' -name \'*.html\'').splitlines() for fn in cavefiles: f = open(fn, "r") print(fn) contents = f.read() slug = re.sub(r"\s+", "", extractXML(contents,'caveslug')) desc = extractXML(contents,'underground_description') name = slug[5:] #get survex compatible name area = slug[0:4] print([area,name]) if desc==None or name==None: msg = models.Parser_messageM(parsername='caves',content=fn+' Description meesed up!',message_type='warn') print('Fucked description '+fn+' :(') msg.save() continue print(area+'/'+name+'/'+name+'.svx') updatecave = models.CaveM.objects.filter(survex_file__icontains=area+'/'+name+'/'+name+'.svx') if len(updatecave)>1: print('Non unique solution - skipping. Name:'+name) elif len(updatecave)==0: print('Cave with no survex data:'+name) continue else: #exaclty one match print('Adding desc:'+name) updatecave = updatecave[0] updatecave.description = '/cave/descriptionM/'+slug #area-name updatecave.title=name updatecave.save() slugS = slug explorersS = extractXML(contents,'explorers') underground_descriptionS = extractXML(contents,'underground_description') equipmentS = extractXML(contents,'equipment') referencesS = extractXML(contents,'references') surveyS = extractXML(contents,'survey') kataster_statusS = extractXML(contents,'kataster_status') underground_centre_lineS = extractXML(contents,'underground_centre_line') survex_fileS = extractXML(contents,'survex_file') notesS = extractXML(contents,'notes') newcavedesc = models.Cave_descriptionM( slug = slugS, explorers = explorersS, underground_description = underground_descriptionS, equipment = equipmentS, references = referencesS, survey = surveyS, kataster_status = kataster_statusS, underground_centre_line = underground_centre_lineS, survex_file = survex_fileS, notes = notesS) newcavedesc.save() #end of reading cave descriptions def file_exists(filename): test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence if 'MISSING' in test: #send error message to the database return False return True def extractXML(contents,tag): #find correct lines lines = contents.splitlines() beg = [x for x in lines if ('<'+tag+'>' in x)] end = [x for x in lines if ('' in x)] if (not beg) or (not end): return None begi = lines.index(beg[0]) endi = lines.index(end[0]) if endi!=begi: segment = '\n'.join(lines[begi:endi+1]) else: segment = lines[begi:endi+1][0] hit = re.findall('<'+tag+'>(.*)', segment, re.S)[0] return hit def bash(cmd): #calls command in bash shell, returns output process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) output, error = process.communicate() return output