Merge lots of troggle fixes

This commit is contained in:
Wookey
2019-04-02 00:57:54 +01:00
24 changed files with 239 additions and 255 deletions

View File

@@ -17,19 +17,19 @@ def parseCaveQMs(cave,inputFile):
try:
steinBr=Cave.objects.get(official_name="Steinbrückenhöhle")
except Cave.DoesNotExist:
print "Steinbruckenhoehle is not in the database. Please run parsers.cavetab first."
print("Steinbruckenhoehle is not in the database. Please run parsers.cavetab first.")
return
elif cave=='hauch':
try:
hauchHl=Cave.objects.get(official_name="Hauchhöhle")
except Cave.DoesNotExist:
print "Hauchhoele is not in the database. Please run parsers.cavetab first."
print("Hauchhoele is not in the database. Please run parsers.cavetab first.")
return
elif cave =='kh':
try:
kh=Cave.objects.get(official_name="Kaninchenhöhle")
except Cave.DoesNotExist:
print "KH is not in the database. Please run parsers.cavetab first."
print("KH is not in the database. Please run parsers.cavetab first.")
parse_KH_QMs(kh, inputFile=inputFile)
return
@@ -48,7 +48,7 @@ def parseCaveQMs(cave,inputFile):
elif cave=='hauch':
placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, title="placeholder for QMs in 234", text="QMs temporarily attached to this should be re-attached to their actual trips", defaults={"date": date(year, 1, 1),"cave":hauchHl})
if hadToCreate:
print cave+" placeholder logbook entry for " + str(year) + " added to database"
print(cave + " placeholder logbook entry for " + str(year) + " added to database")
QMnum=re.match(r".*?-\d*?-X?(?P<numb>\d*)",line[0]).group("numb")
newQM = QM()
newQM.found_by=placeholder
@@ -71,19 +71,18 @@ def parseCaveQMs(cave,inputFile):
if preexistingQM.new_since_parsing==False: #if the pre-existing QM has not been modified, overwrite it
preexistingQM.delete()
newQM.save()
print "overwriting " + str(preexistingQM) +"\r",
print("overwriting " + str(preexistingQM) +"\r")
else: # otherwise, print that it was ignored
print "preserving "+ str(preexistingQM) + ", which was edited in admin \r",
print("preserving " + str(preexistingQM) + ", which was edited in admin \r")
except QM.DoesNotExist: #if there is no pre-existing QM, save the new one
newQM.save()
print "QM "+str(newQM) + ' added to database\r',
print("QM "+str(newQM) + ' added to database\r')
except KeyError: #check on this one
continue
except IndexError:
print "Index error in " + str(line)
print("Index error in " + str(line))
continue
def parse_KH_QMs(kh, inputFile):
@@ -104,7 +103,7 @@ def parse_KH_QMs(kh, inputFile):
}
nonLookupArgs={
'grade':res['grade'],
'nearest_station':res['nearest_station'],
'nearest_station_name':res['nearest_station'],
'location_description':res['description']
}
@@ -115,3 +114,4 @@ parseCaveQMs(cave='stein',inputFile=r"1623/204/qm.csv")
parseCaveQMs(cave='hauch',inputFile=r"1623/234/qm.csv")
parseCaveQMs(cave='kh', inputFile="1623/161/qmtodo.htm")
#parseCaveQMs(cave='balkonhoehle',inputFile=r"1623/264/qm.csv")

View File

@@ -45,7 +45,7 @@ def GetTripPersons(trippeople, expedition, logtime_underground):
author = res[-1][0]
return res, author
def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function...
def GetTripCave(place): #need to be fuzzier about matching here. Already a very slow function...
# print "Getting cave for " , place
try:
katastNumRes=[]
@@ -74,23 +74,23 @@ def GetTripCave(place): #need to be fuzzier about matching h
noncaveplaces = [ "Journey", "Loser Plateau" ]
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground):
def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_underground, entry_type="wiki"):
""" saves a logbook entry and related persontrips """
trippersons, author = GetTripPersons(trippeople, expedition, logtime_underground)
if not author:
print(" - skipping logentry" + title + " no author for entry")
print(" - Skipping logentry: " + title + " no author for entry")
return
# tripCave = GetTripCave(place)
#
#tripCave = GetTripCave(place)
lplace = place.lower()
if lplace not in noncaveplaces:
cave=GetCaveLookup().get(lplace)
#Check for an existing copy of the current entry, and save
expeditionday = expedition.get_expedition_day(date)
lookupAttribs={'date':date, 'title':title}
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50]}
lookupAttribs={'date':date, 'title':title}
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave':cave, 'slug':slugify(title)[:50], 'entry_type':entry_type}
lbo, created=save_carefully(models.LogbookEntry, lookupAttribs, nonLookupAttribs)
for tripperson, time_underground in trippersons:
@@ -115,7 +115,7 @@ def ParseDate(tripdate, year):
assert False, tripdate
return datetime.date(year, month, day)
# 2007, 2008, 2006
# 2006, 2008 - 2010
def Parselogwikitxt(year, expedition, txt):
trippara = re.findall(r"===(.*?)===([\s\S]*?)(?====)", txt)
for triphead, triptext in trippara:
@@ -140,9 +140,9 @@ def Parselogwikitxt(year, expedition, txt):
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
EnterLogIntoDbase(date = ldate, place = tripcave, title = tripplace, text = triptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
# 2002, 2004, 2005
# 2002, 2004, 2005, 2007, 2011 - 2018
def Parseloghtmltxt(year, expedition, txt):
print(" - Using log html parser")
#print(" - Starting log html parser")
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
logbook_entry_count = 0
for trippara in tripparas:
@@ -163,7 +163,6 @@ def Parseloghtmltxt(year, expedition, txt):
print("can't parse: ", trippara) # this is 2007 which needs editing
#assert s, trippara
continue
tripid, tripid1, tripdate, trippeople, triptitle, triptext, tu = s.groups()
ldate = ParseDate(tripdate.strip(), year)
#assert tripid[:-1] == "t" + tripdate, (tripid, tripdate)
@@ -174,16 +173,18 @@ def Parseloghtmltxt(year, expedition, txt):
tripcave = triptitles[0]
else:
tripcave = "UNKNOWN"
#print "\n", tripcave, "--- ppp", trippeople, len(triptext)
#print("\n", tripcave, "--- ppp", trippeople, len(triptext))
ltriptext = re.sub(r"</p>", "", triptext)
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext,
trippeople=trippeople, expedition=expedition, logtime_underground=0,
entry_type="html")
if logbook_entry_count == 0:
print(" - No trip entrys found in logbook, check the syntax matches htmltxt format")
# main parser for pre-2001. simpler because the data has been hacked so much to fit it
# main parser for 1991 - 2001. simpler because the data has been hacked so much to fit it
def Parseloghtml01(year, expedition, txt):
tripparas = re.findall(r"<hr[\s/]*>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -227,9 +228,11 @@ def Parseloghtml01(year, expedition, txt):
#print ldate, trippeople.strip()
# could includ the tripid (url link for cross referencing)
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
EnterLogIntoDbase(date=ldate, place=tripcave, title=triptitle, text=ltriptext,
trippeople=trippeople, expedition=expedition, logtime_underground=0,
entry_type="html")
# parser for 2003
def Parseloghtml03(year, expedition, txt):
tripparas = re.findall(r"<hr\s*/>([\s\S]*?)(?=<hr)", txt)
for trippara in tripparas:
@@ -256,7 +259,9 @@ def Parseloghtml03(year, expedition, txt):
ltriptext = re.sub(r"\s*?\n\s*", " ", ltriptext)
ltriptext = re.sub(r"<p>", "\n\n", ltriptext).strip()
ltriptext = re.sub(r"[^\s0-9a-zA-Z\-.,:;'!&()\[\]<>?=+*%]", "_NONASCII_", ltriptext)
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle, text = ltriptext, trippeople=trippeople, expedition=expedition, logtime_underground=0)
EnterLogIntoDbase(date = ldate, place = tripcave, title = triptitle,
text = ltriptext, trippeople=trippeople, expedition=expedition,
logtime_underground=0, entry_type="html")
def SetDatesFromLogbookEntries(expedition):
@@ -281,8 +286,7 @@ def SetDatesFromLogbookEntries(expedition):
def LoadLogbookForExpedition(expedition):
""" Parses all logbook entries for one expedition """
expowebbase = os.path.join(settings.EXPOWEB, "years")
#year = str(expedition.year)
expowebbase = os.path.join(settings.EXPOWEB, "years")
yearlinks = settings.LOGBOOK_PARSER_SETTINGS
logbook_parseable = False
@@ -294,6 +298,7 @@ def LoadLogbookForExpedition(expedition):
file_in.close()
parsefunc = year_settings[1]
logbook_parseable = True
print(" - Parsing logbook: " + year_settings[0] + "\n - Using parser: " + year_settings[1])
else:
try:
file_in = open(os.path.join(expowebbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE))
@@ -304,7 +309,7 @@ def LoadLogbookForExpedition(expedition):
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
except (IOError):
logbook_parseable = False
print("Couldn't open default logbook file and nothing set for expo " + expedition.year)
print("Couldn't open default logbook file and nothing in settings for expo " + expedition.year)
if logbook_parseable:
parser = globals()[parsefunc]

View File

@@ -59,22 +59,19 @@ def LoadPersonsExpos():
save_carefully(models.Expedition, lookupAttribs, nonLookupAttribs)
# make persons
print("Loading personexpeditions")
#expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
#expomissing = set(expoers2008)
for personline in personreader:
name = personline[header["Name"]]
name = re.sub("<.*?>", "", name)
mname = re.match("(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
name = re.sub(r"<.*?>", "", name)
mname = re.match(r"(\w+)(?:\s((?:van |ten )?\w+))?(?:\s\(([^)]*)\))?", name)
nickname = mname.group(3) or ""
lookupAttribs={'first_name':mname.group(1), 'last_name':(mname.group(2) or "")}
nonLookupAttribs={'is_vfho':personline[header["VfHO member"]],}
person, created = save_carefully(models.Person, lookupAttribs, nonLookupAttribs)
parseMugShotAndBlurb(personline=personline, header=header, person=person)
# make person expedition from table
@@ -88,6 +85,8 @@ def LoadPersonsExpos():
# this fills in those people for whom 2008 was their first expo
#print "Loading personexpeditions 2008"
#expoers2008 = """Edvin Deadman,Kathryn Hopkins,Djuke Veldhuis,Becka Lawson,Julian Todd,Natalie Uomini,Aaron Curtis,Tony Rooke,Ollie Stevens,Frank Tully,Martin Jahnke,Mark Shinwell,Jess Stirrups,Nial Peters,Serena Povia,Olly Madge,Steve Jones,Pete Harley,Eeva Makiranta,Keith Curtis""".split(",")
#expomissing = set(expoers2008)
#for name in expomissing:
# firstname, lastname = name.split()
# is_guest = name in ["Eeva Makiranta", "Keith Curtis"]
@@ -103,18 +102,6 @@ def LoadPersonsExpos():
# personexpedition = models.PersonExpedition(person=person, expedition=expedition, nickname="", is_guest=is_guest)
# personexpedition.save()
#Notability is now a method of person. Makes no sense to store it in the database; it would need to be recalculated every time something changes. - AC 16 Feb 09
# could rank according to surveying as well
#print "Setting person notability"
#for person in models.Person.objects.all():
#person.notability = 0.0
#for personexpedition in person.personexpedition_set.all():
#if not personexpedition.is_guest:
#person.notability += 1.0 / (2012 - int(personexpedition.expedition.year))
#person.bisnotable = person.notability > 0.3 # I don't know how to filter by this
#person.save()
# used in other referencing parser functions
# expedition name lookup cached for speed (it's a very big list)
Gpersonexpeditionnamelookup = { }

View File

@@ -92,13 +92,13 @@ def RecursiveLoad(survexblock, survexfile, fin, textlines):
teammembers = [ ]
# uncomment to print out all files during parsing
print("Reading file:", survexblock.survexfile.path)
print("Reading file: " + survexblock.survexfile.path)
while True:
svxline = fin.readline().decode("latin1")
if not svxline:
return
textlines.append(svxline)
# break the line at the comment
sline, comment = re.match(r"([^;]*?)\s*(?:;\s*(.*))?\n?$", svxline.strip()).groups()

View File

@@ -99,7 +99,7 @@ def parseSurveyScans(expedition, logfile=None):
#scanList = listdir(expedition.year, surveyFolder)
scanList=os.listdir(os.path.join(yearPath,surveyFolder))
except AttributeError:
print(surveyFolder + " ignored\r",)
print("Folder: " + surveyFolder + " ignored\r")
continue
for scan in scanList:
@@ -107,7 +107,7 @@ def parseSurveyScans(expedition, logfile=None):
scanChopped=re.match(r'(?i).*(notes|elev|plan|elevation|extend)(\d*)\.(png|jpg|jpeg)',scan).groups()
scanType,scanNumber,scanFormat=scanChopped
except AttributeError:
print(scan + " ignored\r",)
print("File: " + scan + " ignored\r")
continue
if scanType == 'elev' or scanType == 'extend':
scanType = 'elevation'