34,13 → 34,13 |
(u"sami", u"si", u"", u"am", u"", (()), (()), False), |
(u"to", u"to", u"", u"", u"", (()), (()), False), |
#(u"frato", u"to", u"", u"", u"", [[u"fra"]], (()), False), |
(u"soaiä", u"soaia", u"", u"", u"", (()), [[u"ä"]], False), |
(u"soaiä", u"soaia", u"", u"", u"", (()), [[(u"ä", None)]], False), |
(u"mengenga", u"ngenga", u"", u"", u"", [[u"me"]], (()), False), |
(u"pxengenga", u"ngenga", u"", u"", u"", [[u"pxe"]], (()), False), |
(u"kìmä", u"kä", u"", u"ìm", u"", (()), (()), False), |
(u"apxay", u"pxay", u"", u"", u"", [[u"a"]], (()), False), |
(u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False), # TODO remember why on earth this is needed; how is awng interpreted as awnga? |
(u"kawnga", u"kawng", u"", u"", u"", (()), [[u"a"]], False), |
(u"kawnga", u"kawng", u"", u"", u"", (()), [[(u"a", None)]], False), |
(u"kawng", u"kawng", u"", u"", u"", (()), (()), False), |
(u"ka", u"ka", u"", u"", u"", (()), (()), False), |
(u"uo", u"uo", u"", u"", u"", (()), (()), False), |
57,7 → 57,7 |
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?" |
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)" |
|
EXTRAADP = ("to", "sì") # words that act like adpositions but technically aren't |
EXTRAADP = (("to", [x["id"] for x in wordlist if x["navi"] == "to"][0]), ("sì", [x["id"] for x in wordlist if x["navi"] == "sì"][0])) # words that act like adpositions but technically aren't |
|
LENIT = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u"")) |
|
89,7 → 89,7 |
foundprefs.append([]) |
foundposts.append([]) |
center = u"" |
if u"<1>" in splitword[wor]: |
if u"<0>" in splitword[wor]: |
tempin1 = [] |
tempin2 = [] |
tempin3 = [] |
105,8 → 105,8 |
for in1 in tempin1: |
for in2 in tempin2: |
for in3 in tempin3: |
if splitword[wor].replace(u"<1><2>", in1 + in2).replace(u"<3>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") in wordin[wor]: |
center = splitword[wor].replace(u"<1><2>", in1 + in2).replace(u"<3>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") |
if splitword[wor].replace(u"<0><1>", in1 + in2).replace(u"<2>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") in wordin[wor]: |
center = splitword[wor].replace(u"<0><1>", in1 + in2).replace(u"<2>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") |
foundins = [in1, in2, in3] |
break |
if center != u"": |
167,13 → 167,13 |
last = u"" |
while last != posf: |
last = posf |
for pos in [x["navi"] for x in postfixes] + [x["navi"] for x in wordlist if x["type"] == "adp."] + list(EXTRAADP): |
for pos, posid in [(x["navi"], None) for x in postfixes] + [(x["navi"], x["id"]) for x in wordlist if x["type"] == "adp."] + list(EXTRAADP): |
if posf != u"": |
if posf.startswith(pos): |
if pos in foundposts[wor]: |
if (pos, posid) in foundposts[wor]: |
break |
if pos != u"ä" or word["navi"] != u"pey": # XXX HACK - fix for peyä. THIS SHOULD NOT BE HERE! |
foundposts[wor].append(pos) |
foundposts[wor].append((pos, posid)) |
posf = posf[len(pos):] |
break |
else: |