diff --git a/parsers/survex.py b/parsers/survex.py index ccc4ed6..540ea0b 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -3,11 +3,29 @@ import expo.models as models import re import os +roles = {"Insts": "Insts", + "insts": "Insts", + "Instruments": "Insts", + "instruments": "Insts", + "Inst": "Insts", + "inst": "Insts", + "dog": "Other", + "Dog": "Other", + "other": "Other", + "Other": "Other", + "Notes": "Notes", + "notes": "notes", + "pics": "Pics", + "Pics": "Pics", + "Tape": "Tape", + "tape": "Tape"} + re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE) re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE) -re_begin = re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE) -re_end = re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE) -re_date = re.compile(r"^\s*\*date\s+(.*?)\s*$", re.IGNORECASE) +flags = {"begin": re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE), + "end": re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE), + "date": re.compile(r"^\s*\*date\s+(.*?)\s*$", re.IGNORECASE), + "team": re.compile(r"^\s*\*team\s+(.*?)\s*$", re.IGNORECASE)} def fileIterator(directory, filename): survex_file = os.path.join(directory, filename + ".svx") @@ -39,29 +57,68 @@ def make_model(name, parent, iter_lines, sf, c, l): else: m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l) m.save() - for survex_file, count, line in iter_lines: - begin = re_begin.match(line.split(";")[0]) - end = re_end.match(line.split(";")[0]) - date = re_date.match(line.split(";")[0]) - if begin: - make_model(begin.groups()[0], m, iter_lines, survex_file, count, line) - elif end: - m.text = m.text + line + + def saveEnd(survex_file, count): + if m.start_year and team: + try: + exp = models.Expedition.objects.get(year = str(file_year[1])) + for file_, (role, names) in team: + if names.strip("\t").strip(" ") == "both" or names.strip("\t").strip(" ") == "Both": + names = reduce(lambda x, y: x + u" & " + y, + [names for file_, (role, names) in team + if names.strip("\t").strip(" ") != "both" + and names.strip("\t").strip(" ") != "Both"]) + for name in re.split("&|/|\+|,|;", names): + try: + models.PersonRole(person = exp.GetPersonExpedition(name.strip(" ")).person, + survex_block = m, + role = models.Role.objects.get(name = roles[role])).save() + except AttributeError: + print "Person not found: " + name + " in " + file_ + except AssertionError, inst: + print inst, ": ", file_year[0] + m.end_file = survex_file m.end_char = count m.save() - assert (end.groups()[0]).lower() == (name).lower() - return None - elif date: - #print date.groups()[0] - m.text = m.text + line + team = [] + file_year = None + for survex_file, count, line in iter_lines: + #Dictionary compreshension + res = dict([(key, regex.match(line.split(";")[0])) for key, regex in flags.iteritems()]) + if res["begin"]: + make_model(res["begin"].groups()[0], m, iter_lines, survex_file, count, line) else: m.text = m.text + line + if res["end"]: + saveEnd(survex_file, count) + assert (res["end"].groups()[0]).lower() == (name).lower() + return None + elif res["date"]: + datere = re.match("(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:-(\d+))?(?:\.(\d+))?(?:\.(\d+))?", + res["date"].groups()[0]) + if datere is not None: + startYear, startMonth, startDay, endYear, endMonth, endDay = datere.groups() + m.start_year = startYear + m.start_month = startMonth + m.start_day = startDay + m.end_year = endYear + m.end_month = endMonth + m.end_day = endDay + file_year = survex_file, startYear + elif res["team"]: + h = re.match("((?:[Ii]nst(?:s|ruments)?)|(?:[Pp]ics)|(?:[Tt]ape)|(?:[Nn]otes)|(?:[Oo]ther))\s*(.*)", + res["team"].groups()[0]) + if h: + team.append((survex_file, h.groups())) + else: + print "Role not found: " + line + " in: " + sf m.text = m.text + line - m.end_file = survex_file - m.end_char = count - m.save() + saveEnd(survex_file, count) +for role in ["Insts", "Notes", "Pics", "Tape", "Other"]: + models.Role(name = role).save() + filename = "all" make_model("", None, fileIterator("", filename), filename, 0, "")