forked from expo/troggle
Merge lots of troggle fixes
This commit is contained in:
@@ -17,19 +17,19 @@ def parseCaveQMs(cave,inputFile):
|
||||
try:
|
||||
steinBr=Cave.objects.get(official_name="Steinbrückenhöhle")
|
||||
except Cave.DoesNotExist:
|
||||
print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
|
||||
print("Steinbruckenhoehle is not in the database. Please run parsers.cavetab first.")
|
||||
return
|
||||
elif cave=='hauch':
|
||||
try:
|
||||
hauchHl=Cave.objects.get(official_name="Hauchhöhle")
|
||||
except Cave.DoesNotExist:
|
||||
print "Hauchhoele is not in the database. Please run parsers.cavetab first."
|
||||
print("Hauchhoele is not in the database. Please run parsers.cavetab first.")
|
||||
return
|
||||
elif cave =='kh':
|
||||
try:
|
||||
kh=Cave.objects.get(official_name="Kaninchenhöhle")
|
||||
except Cave.DoesNotExist:
|
||||
print "KH is not in the database. Please run parsers.cavetab first."
|
||||
print("KH is not in the database. Please run parsers.cavetab first.")
|
||||
parse_KH_QMs(kh, inputFile=inputFile)
|
||||
return
|
||||
|
||||
@@ -48,7 +48,7 @@ def parseCaveQMs(cave,inputFile):
|
||||
elif cave=='hauch':
|
||||
placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 234", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":hauchHl})
|
||||
if hadToCreate:
|
||||
print cave+" placeholder logbook entry for " + str(year) + " added to database"
|
||||
print(cave + " placeholder logbook entry for " + str(year) + " added to database")
|
||||
QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
|
||||
newQM = QM()
|
||||
newQM.found_by=placeholder
|
||||
@@ -71,19 +71,18 @@ def parseCaveQMs(cave,inputFile):
|
||||
if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it
|
||||
preexistingQM.delete()
|
||||
newQM.save()
|
||||
print "overwriting " + str(preexistingQM) +"\r",
|
||||
|
||||
print("overwriting " + str(preexistingQM) +"\r")
|
||||
else: # otherwise, print that it was ignored
|
||||
print "preserving "+ str(preexistingQM) + ", which was edited in admin \r",
|
||||
print("preserving " + str(preexistingQM) + ", which was edited in admin \r")
|
||||
|
||||
except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
|
||||
newQM.save()
|
||||
print "QM "+str(newQM) + ' added to database\r',
|
||||
print("QM "+str(newQM) + ' added to database\r')
|
||||
|
||||
except KeyError: #check on this one
|
||||
continue
|
||||
except IndexError:
|
||||
print "Index error in " + str(line)
|
||||
print("Index error in " + str(line))
|
||||
continue
|
||||
|
||||
def parse_KH_QMs(kh, inputFile):
|
||||
@@ -104,7 +103,7 @@ def parse_KH_QMs(kh, inputFile):
|
||||
}
|
||||
nonLookupArgs={
|
||||
'grade':res['grade'],
|
||||
'nearest_station':res['nearest_station'],
|
||||
'nearest_station_name':res['nearest_station'],
|
||||
'location_description':res['description']
|
||||
}
|
||||
|
||||
@@ -115,3 +114,4 @@ parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv")
|
||||
parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv")
|
||||
parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm")
|
||||
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
|
||||
author = res[-1][0]
|
||||
return res, author
|
||||
|
||||
def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function...
|
||||
def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function...
|
||||
# print "Getting cave for " , place
|
||||
try:
|
||||
katastNumRes=[]
|
||||
@@ -74,23 +74,23 @@ def GetTripCave(place): #need to be fuzzier about matching h
|
||||
|
||||
|
||||
noncaveplaces = [ "Journey", "Loser Plateau" ]
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
|
||||
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
|
||||
""" saves a logbook entry and related persontrips """
|
||||
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
|
||||
if not author:
|
||||
print(" - skipping logentry" + title + " no author for entry")
|
||||
print(" - Skipping logentry: " + title + " no author for entry")
|
||||
return
|
||||
|
||||
# tripCave = GetTripCave(place)
|
||||
#
|
||||
|
||||
#tripCave = GetTripCave(place)
|
||||
|
||||
lplace = place.lower()
|
||||
if lplace not in noncaveplaces:
|
||||
cave=GetCaveLookup().get(lplace)
|
||||
|
||||
#Check for an existing copy of the current entry, and save
|
||||
expeditionday = expedition.get_expedition_day(date)
|
||||
lookupAttribs={'date':date, 'title':title}
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]}
|
||||
lookupAttribs={'date':date, 'title':title}
|
||||
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
|
||||
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
for tripperson, time_underground in trippersons:
|
||||
@@ -115,7 +115,7 @@ def ParseDate(tripdate, year):
|
||||
assert False, tripdate
|
||||
return datetime.date(year, month, day)
|
||||
|
||||
# 2007, 2008, 2006
|
||||
# 2006, 2008 - 2010
|
||||
def Parselogwikitxt(year, expedition, txt):
|
||||
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
|
||||
for triphead, triptext in trippara:
|
||||
@@ -140,9 +140,9 @@ def Parselogwikitxt(year, expedition, txt):
|
||||
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
||||
|
||||
# 2002, 2004, 2005
|
||||
# 2002, 2004, 2005, 2007, 2011 - 2018
|
||||
def Parseloghtmltxt(year, expedition, txt):
|
||||
print(" - Using log html parser")
|
||||
#print(" - Starting log html parser")
|
||||
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||
logbook_entry_count = 0
|
||||
for trippara in tripparas:
|
||||
@@ -163,7 +163,6 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
print("can't parse: ", trippara) # this is 2007 which needs editing
|
||||
#assert s, trippara
|
||||
continue
|
||||
|
||||
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
|
||||
ldate = ParseDate(tripdate.strip(), year)
|
||||
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
|
||||
@@ -174,16 +173,18 @@ def Parseloghtmltxt(year, expedition, txt):
|
||||
tripcave = triptitles[0]
|
||||
else:
|
||||
tripcave = "UNKNOWN"
|
||||
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
|
||||
#print("\n", tripcave, "--- ppp", trippeople, len(triptext))
|
||||
ltriptext = re.sub(r"</p>", "", triptext)
|
||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
|
||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
entry_type="html")
|
||||
if logbook_entry_count == 0:
|
||||
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
|
||||
|
||||
|
||||
# main parser for pre-2001. simpler because the data has been hacked so much to fit it
|
||||
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
|
||||
def Parseloghtml01(year, expedition, txt):
|
||||
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
|
||||
for trippara in tripparas:
|
||||
@@ -227,9 +228,11 @@ def Parseloghtml01(year, expedition, txt):
|
||||
|
||||
#print ldate, trippeople.strip()
|
||||
# could includ the tripid (url link for cross referencing)
|
||||
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
||||
|
||||
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
|
||||
trippeople=trippeople, expedition=expedition, logtime_underground=0,
|
||||
entry_type="html")
|
||||
|
||||
# parser for 2003
|
||||
def Parseloghtml03(year, expedition, txt):
|
||||
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
|
||||
for trippara in tripparas:
|
||||
@@ -256,7 +259,9 @@ def Parseloghtml03(year, expedition, txt):
|
||||
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
|
||||
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
|
||||
ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
|
||||
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
|
||||
text = ltriptext, trippeople=trippeople, expedition=expedition,
|
||||
logtime_underground=0, entry_type="html")
|
||||
|
||||
|
||||
def SetDatesFromLogbookEntries(expedition):
|
||||
@@ -281,8 +286,7 @@ def SetDatesFromLogbookEntries(expedition):
|
||||
def LoadLogbookForExpedition(expedition):
|
||||
""" Parses all logbook entries for one expedition """
|
||||
|
||||
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
||||
#year = str(expedition.year)
|
||||
expowebbase = os.path.join(settings.EXPOWEB, "years")
|
||||
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
|
||||
|
||||
logbook_parseable = False
|
||||
@@ -294,6 +298,7 @@ def LoadLogbookForExpedition(expedition):
|
||||
file_in.close()
|
||||
parsefunc = year_settings[1]
|
||||
logbook_parseable = True
|
||||
print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
|
||||
else:
|
||||
try:
|
||||
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
|
||||
@@ -304,7 +309,7 @@ def LoadLogbookForExpedition(expedition):
|
||||
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
|
||||
except (IOError):
|
||||
logbook_parseable = False
|
||||
print("Couldn't open default logbook file and nothing set for expo " + expedition.year)
|
||||
print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
|
||||
|
||||
if logbook_parseable:
|
||||
parser = globals()[parsefunc]
|
||||
|
||||
@@ -59,22 +59,19 @@ def LoadPersonsExpos():
|
||||
|
||||
save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
|
||||
# make persons
|
||||
print("Loading personexpeditions")
|
||||
#expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
|
||||
#expomissing = set(expoers2008)
|
||||
|
||||
for personline in personreader:
|
||||
name = personline[header["Name"]]
|
||||
name = re.sub("<.*?>", "", name)
|
||||
mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
|
||||
name = re.sub(r"<.*?>", "", name)
|
||||
mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
|
||||
nickname = mname.group(3) or ""
|
||||
|
||||
|
||||
lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
|
||||
nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
|
||||
person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
|
||||
|
||||
|
||||
parseMugShotAndBlurb(personline=personline, header=header, person=person)
|
||||
|
||||
# make person expedition from table
|
||||
@@ -88,6 +85,8 @@ def LoadPersonsExpos():
|
||||
|
||||
# this fills in those people for whom 2008 was their first expo
|
||||
#print "Loading personexpeditions 2008"
|
||||
#expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
|
||||
#expomissing = set(expoers2008)
|
||||
#for name in expomissing:
|
||||
# firstname, lastname = name.split()
|
||||
# is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
|
||||
@@ -103,18 +102,6 @@ def LoadPersonsExpos():
|
||||
# personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
|
||||
# personexpedition.save()
|
||||
|
||||
#Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
|
||||
# could rank according to surveying as well
|
||||
#print "Setting person notability"
|
||||
#for person in models.Person.objects.all():
|
||||
#person.notability = 0.0
|
||||
#for personexpedition in person.personexpedition_set.all():
|
||||
#if not personexpedition.is_guest:
|
||||
#person.notability += 1.0 / (2012 - int(personexpedition.expedition.year))
|
||||
#person.bisnotable = person.notability > 0.3 # I don't know how to filter by this
|
||||
#person.save()
|
||||
|
||||
|
||||
# used in other referencing parser functions
|
||||
# expedition name lookup cached for speed (it's a very big list)
|
||||
Gpersonexpeditionnamelookup = { }
|
||||
|
||||
@@ -92,13 +92,13 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
|
||||
teammembers = [ ]
|
||||
|
||||
# uncomment to print out all files during parsing
|
||||
print("Reading file:", survexblock.survexfile.path)
|
||||
print("Reading file: " + survexblock.survexfile.path)
|
||||
while True:
|
||||
svxline = fin.readline().decode("latin1")
|
||||
if not svxline:
|
||||
return
|
||||
textlines.append(svxline)
|
||||
|
||||
|
||||
# break the line at the comment
|
||||
sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ def parseSurveyScans(expedition, logfile=None):
|
||||
#scanList = listdir(expedition.year, surveyFolder)
|
||||
scanList=os.listdir(os.path.join(yearPath,surveyFolder))
|
||||
except AttributeError:
|
||||
print(surveyFolder + " ignored\r",)
|
||||
print("Folder: " + surveyFolder + " ignored\r")
|
||||
continue
|
||||
|
||||
for scan in scanList:
|
||||
@@ -107,7 +107,7 @@ def parseSurveyScans(expedition, logfile=None):
|
||||
scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
|
||||
scanType,scanNumber,scanFormat=scanChopped
|
||||
except AttributeError:
|
||||
print(scan + " ignored\r",)
|
||||
print("File: " + scan + " ignored\r")
|
||||
continue
|
||||
if scanType == 'elev' or scanType == 'extend':
|
||||
scanType = 'elevation'
|
||||
|
||||
Reference in New Issue
Block a user