2009-05-13 05:14:03 +01:00
|
|
|
import settings
|
|
|
|
import expo.models as models
|
|
|
|
import re
|
|
|
|
import os
|
|
|
|
|
2009-05-13 05:21:16 +01:00
|
|
|
roles = {"Insts": "Insts",
|
|
|
|
"insts": "Insts",
|
|
|
|
"Instruments": "Insts",
|
|
|
|
"instruments": "Insts",
|
|
|
|
"Inst": "Insts",
|
|
|
|
"inst": "Insts",
|
|
|
|
"dog": "Other",
|
|
|
|
"Dog": "Other",
|
|
|
|
"other": "Other",
|
|
|
|
"Other": "Other",
|
|
|
|
"Notes": "Notes",
|
|
|
|
"notes": "notes",
|
|
|
|
"pics": "Pics",
|
|
|
|
"Pics": "Pics",
|
|
|
|
"Tape": "Tape",
|
|
|
|
"tape": "Tape"}
|
|
|
|
|
2009-05-13 05:14:03 +01:00
|
|
|
re_include_extension = re.compile(r"^\s*\*include\s+([^\s]*).svx$", re.IGNORECASE)
|
|
|
|
re_include_no_extension = re.compile(r"^\s*\*include\s+([^\s]*)$", re.IGNORECASE)
|
2009-05-13 05:21:16 +01:00
|
|
|
flags = {"begin": re.compile(r"^\s*\*begin\s+(.*?)\s*$", re.IGNORECASE),
|
|
|
|
"end": re.compile(r"^\s*\*end\s+(.*?)\s*$", re.IGNORECASE),
|
|
|
|
"date": re.compile(r"^\s*\*date\s+(.*?)\s*$", re.IGNORECASE),
|
|
|
|
"team": re.compile(r"^\s*\*team\s+(.*?)\s*$", re.IGNORECASE)}
|
2009-05-13 05:14:03 +01:00
|
|
|
|
|
|
|
def fileIterator(directory, filename):
|
|
|
|
survex_file = os.path.join(directory, filename + ".svx")
|
2009-05-13 05:20:08 +01:00
|
|
|
try:
|
|
|
|
f = open(os.path.join(settings.SURVEX_DATA, survex_file), "rb")
|
|
|
|
except:
|
|
|
|
f = open(os.path.join(settings.SURVEX_DATA, survex_file).lower(), "rb")
|
2009-05-13 05:14:03 +01:00
|
|
|
char = 0
|
|
|
|
for line in f.readlines():
|
2009-05-13 05:17:16 +01:00
|
|
|
line = unicode(line, "latin1")
|
2009-05-13 05:14:03 +01:00
|
|
|
include_extension = re_include_extension.match(line)
|
|
|
|
include_no_extension = re_include_no_extension.match(line)
|
|
|
|
def a(include):
|
|
|
|
link = re.split(r"/|\\", include)
|
|
|
|
return fileIterator(os.path.join(directory, *link[:-1]), link[-1])
|
|
|
|
if include_extension:
|
|
|
|
for sf, c, l in a(include_extension.groups()[0]):
|
|
|
|
yield sf, c, l
|
|
|
|
elif include_no_extension:
|
|
|
|
for sf, c, l in a(include_no_extension.groups()[0]):
|
|
|
|
yield sf, c, l
|
|
|
|
else:
|
|
|
|
yield survex_file, char, line
|
|
|
|
char = char + len(line)
|
|
|
|
|
|
|
|
def make_model(name, parent, iter_lines, sf, c, l):
|
|
|
|
if parent:
|
|
|
|
m = models.SurvexBlock(name = name, parent = parent, begin_file = sf, begin_char = c, text = l)
|
|
|
|
else:
|
|
|
|
m = models.SurvexBlock(name = name, begin_file = sf, begin_char = c, text = l)
|
2009-05-13 05:17:16 +01:00
|
|
|
m.save()
|
2009-05-13 05:21:16 +01:00
|
|
|
|
|
|
|
def saveEnd(survex_file, count):
|
|
|
|
if m.start_year and team:
|
|
|
|
try:
|
|
|
|
exp = models.Expedition.objects.get(year = str(file_year[1]))
|
|
|
|
for file_, (role, names) in team:
|
|
|
|
if names.strip("\t").strip(" ") == "both" or names.strip("\t").strip(" ") == "Both":
|
|
|
|
names = reduce(lambda x, y: x + u" & " + y,
|
|
|
|
[names for file_, (role, names) in team
|
|
|
|
if names.strip("\t").strip(" ") != "both"
|
|
|
|
and names.strip("\t").strip(" ") != "Both"])
|
|
|
|
for name in re.split("&|/|\+|,|;", names):
|
|
|
|
try:
|
|
|
|
models.PersonRole(person = exp.GetPersonExpedition(name.strip(" ")).person,
|
|
|
|
survex_block = m,
|
|
|
|
role = models.Role.objects.get(name = roles[role])).save()
|
|
|
|
except AttributeError:
|
|
|
|
print "Person not found: " + name + " in " + file_
|
|
|
|
except AssertionError, inst:
|
|
|
|
print inst, ": ", file_year[0]
|
|
|
|
|
2009-05-13 05:14:03 +01:00
|
|
|
m.end_file = survex_file
|
|
|
|
m.end_char = count
|
2009-05-13 05:17:16 +01:00
|
|
|
m.save()
|
2009-05-13 05:21:16 +01:00
|
|
|
team = []
|
|
|
|
file_year = None
|
|
|
|
for survex_file, count, line in iter_lines:
|
|
|
|
#Dictionary compreshension
|
|
|
|
res = dict([(key, regex.match(line.split(";")[0])) for key, regex in flags.iteritems()])
|
|
|
|
if res["begin"]:
|
|
|
|
make_model(res["begin"].groups()[0], m, iter_lines, survex_file, count, line)
|
2009-05-13 05:14:03 +01:00
|
|
|
else:
|
|
|
|
m.text = m.text + line
|
2009-05-13 05:21:16 +01:00
|
|
|
if res["end"]:
|
|
|
|
saveEnd(survex_file, count)
|
|
|
|
assert (res["end"].groups()[0]).lower() == (name).lower()
|
|
|
|
return None
|
|
|
|
elif res["date"]:
|
|
|
|
datere = re.match("(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:-(\d+))?(?:\.(\d+))?(?:\.(\d+))?",
|
|
|
|
res["date"].groups()[0])
|
|
|
|
if datere is not None:
|
|
|
|
startYear, startMonth, startDay, endYear, endMonth, endDay = datere.groups()
|
|
|
|
m.start_year = startYear
|
|
|
|
m.start_month = startMonth
|
|
|
|
m.start_day = startDay
|
|
|
|
m.end_year = endYear
|
|
|
|
m.end_month = endMonth
|
|
|
|
m.end_day = endDay
|
|
|
|
file_year = survex_file, startYear
|
|
|
|
elif res["team"]:
|
|
|
|
h = re.match("((?:[Ii]nst(?:s|ruments)?)|(?:[Pp]ics)|(?:[Tt]ape)|(?:[Nn]otes)|(?:[Oo]ther))\s*(.*)",
|
|
|
|
res["team"].groups()[0])
|
|
|
|
if h:
|
|
|
|
team.append((survex_file, h.groups()))
|
|
|
|
else:
|
|
|
|
print "Role not found: " + line + " in: " + sf
|
2009-05-13 05:14:03 +01:00
|
|
|
m.text = m.text + line
|
2009-05-13 05:21:16 +01:00
|
|
|
saveEnd(survex_file, count)
|
|
|
|
|
2009-05-13 05:14:03 +01:00
|
|
|
|
2009-05-13 05:21:16 +01:00
|
|
|
for role in ["Insts", "Notes", "Pics", "Tape", "Other"]:
|
|
|
|
models.Role(name = role).save()
|
2009-05-13 05:14:03 +01:00
|
|
|
|
|
|
|
filename = "all"
|
|
|
|
make_model("", None, fileIterator("", filename), filename, 0, "")
|