survex parser added

This commit is contained in:
Rad 2019-02-28 18:07:50 +00:00
parent 6b59e3a689
commit 240c7eff10
2 changed files with 92 additions and 0 deletions

27
parsers/peopleM.py Normal file
View File

@ -0,0 +1,27 @@
from django.conf import settings
import troggle.core.models as models
def load():
folkfile = open(settings.EXPOWEB+"noinfo/folk.csv")
personlines = folkfile.read().splitlines()
persontable = [x.split(',') for x in personlines]
years = [persontable[0][i] for i in range(5,len(persontable[0]))]
for year in years:
newexpedition = models.ExpeditionM( date = year )
newexpedition.save()
for row in persontable[1:]: #skip header
attendedid = [i for i, x in enumerate(row) if '1' in x]
attendedyears = [persontable[0][i] for i in attendedid if i >= 5]
name = row[0]
print(name+' has attended: '+', '.join(attendedyears))
newperson = models.PersonM(
name = name)
newperson.save()
for year in attendedyears:
target = models.ExpeditionM.objects.get(date=year)
newperson.expos_attended.add( target )
print('Person -> Expo table created!')

65
parsers/surveysM.py Normal file
View File

@ -0,0 +1,65 @@
from django.conf import settings
import subprocess, re
import troggle.core.models as models
def load():
print('Load survex files and relations')
load_area('1623')
def load_area(areacode):
print('Searching all cave dirs files')
basedir = settings.SURVEX_DATA+'caves-'+areacode+'/'
cavedirs = bash("find "+basedir+" -maxdepth 1 -type d").splitlines() #this command finds all directories
print('Obtained list of directories! (#dirs='+str(len(cavedirs))+')')
for cavedir in cavedirs:
if cavedir==basedir:
continue #skip the basedir - a non-proper subdirectory
parentname = bash('echo '+cavedir+' | rev | cut -f1 -d \'/\' | rev').splitlines()[0] #get final bit of the directory
parentcave = models.CaveM.objects.filter(survex_file__icontains=cavedir)
if len(parentcave)>1:
print('Non unique parent - skipping. Name:'+parentname)
elif len(parentcave)==0:
print('Error! parent not created:'+parentname)
continue
else: #exaclty one match
print('Adding relations of:'+parentname)
parentcave = parentcave[0]
surveyfiles = bash('find '+cavedir+' -name \'*.svx\'').splitlines()
for fn in surveyfiles:
print(fn)
svxcontents = open(fn,'r').read().splitlines()
try:
dateline = [x for x in svxcontents if ('*date' in x)][0]
date = re.findall('\\d\\d\\d\\d\\.\\d\\d\\.\\d\\d', dateline, re.S)[0]
except:
if( len( [x for x in svxcontents if ('*date' in x)] ) == 0 ):
continue #skip dateless files
print('Date format error in '+fn)
print('Dateline = '+ '"'.join([x for x in svxcontents if ('*date' in x)]))
date = '1900.01.01'
newsurvex = models.SurveyM(survex_file=fn, date=date)
newsurvex.save()
parentcave.surveys.add(newsurvex)
parentcave.save()
def file_exists(filename):
test = bash('if [ ! -f '+filename+' ] ; then echo MISSING; fi')#test for file exisence
if 'MISSING' in test: #send error message to the database
return False
return True
def bash(cmd): #calls command in bash shell, returns output
process = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
output, error = process.communicate()
return output