/tsimapiak/dbconnector.py |
---|
36,7 → 36,7 |
if row["infixes"] and row["infixes"] != "NULL": # yeah seriously |
ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["infixes"].lower(), "type": row["partOfSpeech"]}) |
else: |
ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["navi"].lower(), "type": row["partOfSpeech"]}) |
ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["navi"].replace("+", "").replace("-", "").lower(), "type": row["partOfSpeech"]}) |
cur.close() |
db.close() |
return ret |
/tsimapiak/parse.py |
---|
39,9 → 39,9 |
(u"pxengenga", u"ngenga", u"", u"", u"", [[u"pxe"]], (()), False), |
(u"kìmä", u"kä", u"", u"ìm", u"", (()), (()), False), |
(u"apxay", u"pxay", u"", u"", u"", [[u"a"]], (()), False), |
(u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False), |
(u"kawnga", u"kawng", u"", u"", u"", (()), [[(u"a", None)]], False), |
(u"kawng", u"kawng", u"", u"", u"", (()), (()), False), |
#(u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False), |
#(u"kawnga", u"kawng", u"", u"", u"", (()), [[(u"a", None)]], False), |
#(u"kawng", u"kawng", u"", u"", u"", (()), (()), False), |
(u"ka", u"ka", u"", u"", u"", (()), (()), False), |
(u"uo", u"uo", u"", u"", u"", (()), (()), False), |
(u"sìk", u"sìk", u"", u"", u"", (()), (()), False), |
57,10 → 57,33 |
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?" |
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)" |
EXTRAINFIXES = [ |
{"id": "-1", "navi": "eiy", "gloss": "LAUD.", "position": 2}, |
{"id": "-2", "navi": "eng", "gloss": "PEJ.", "position": 2}, |
] |
EXTRAPOSTFIXES = [ |
{"id": "-3", "navi": "eyä", "gloss": "GEN."}, |
] |
EXTRAADP = (("to", [x["id"] for x in wordlist if x["navi"] == "to"][0]), ("sì", [x["id"] for x in wordlist if x["navi"] == "sì"][0])) # words that act like adpositions but technically aren't |
LENIT = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u"")) |
# Let's lenit the prefixes |
extraprefixes = [] |
for prefix in prefixes: |
for letter, replacement in LENIT: |
if prefix["navi"].startswith(letter): |
new_prefix = prefix["navi"].replace(letter, replacement, 1) |
if not [x for x in prefixes if x["navi"] == new_prefix]: # always assume a dictionary word over a lenited prefix |
extraprefixes.append({"id": prefix["id"], "navi": new_prefix, "gloss": prefix["gloss"] + ".LENTD"}) |
break |
prefixes = sorted(prefixes + extraprefixes, key=lambda x: len(x["navi"]), reverse=True) |
infixes = sorted(infixes + EXTRAINFIXES, key=lambda x: len(x["navi"]), reverse=True) |
postfixes = sorted(postfixes + EXTRAPOSTFIXES, key=lambda x: len(x["navi"]), reverse=True) |
def parseword(wordin): |
tempid = 0 |
temptype = u"" |
126,21 → 149,45 |
center = temp |
if center == u"": |
if splitword[wor].endswith(u"nga"): |
temp = splitword[wor][:-3] + u"ng" |
temp = splitword[wor][:-3] + u"nge" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"fo"): |
temp = splitword[wor][:-2] + u"f" |
temp = splitword[wor][:-2] + u"fe" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"po"): |
temp = splitword[wor][:-2] + u"p" |
temp = splitword[wor][:-2] + u"pe" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"tsa"): |
temp = splitword[wor][:-3] + u"ts" |
temp = splitword[wor][:-3] + u"tse" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"fko"): |
temp = splitword[wor][:-3] + u"fke" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"tsa'u"): |
temp = splitword[wor][:-5] + u"tse" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"sa'u"): |
temp = splitword[wor][:-4] + u"se" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"sa"): |
temp = splitword[wor][:-2] + u"se" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"sno"): |
temp = splitword[wor][:-3] + u"sne" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"ayla"): |
temp = splitword[wor][:-3] + u"ayle" |
if temp in wordin[wor]: |
center = temp |
if center == u"": |
foundit = False |
break |
/tsimapiak/translate.py |
---|
31,8 → 31,6 |
#ADPOSITIONS = ((u"mungwrr", u"except"), (u"kxamlä", u"through"), (u"pximaw", u"right.after"), (u"pxisre", u"right.before"), (u"tafkip", u"from.up.among"), (u"nemfa", u"into.inside"), (u"takip", u"from among"), (u"mìkam", u"between"), (u"teri", u"about.concerning"), (u"fkip", u"up.among"), (u"luke", u"without"), (u"pxel", u"like.as"), (u"pxaw", u"around"), (u"rofa", u"beside.alongside"), (u"ìlä", u"by.via.following"), (u"fpi", u"for.the.sake/benefit.of"), (u"ftu", u"from.direction"), (u"kip", u"among"), (u"lok", u"close.to"), (u"maw", u"after.time"), (u"sre", u"before.time"), (u"sìn", u"on.onto"), (u"vay", u"up.to"), (u"eo", u"before.in.front.of"), (u"fa", u"with.by.means.of"), (u"hu", u"with.accompaniment"), (u"io", u"above"), (u"ka", u"across"), (u"mì", u"in.on"), (u"na", u"like.as"), (u"ne", u"to.towards"), (u"ro", u"at.locative"), (u"ta", u"from"), (u"uo", u"behind"), (u"wä", u"against.opposition"), (u"äo", u"below"), (u"to", u"than"), (u"sì", u"and")) |
#POSTFIXES = ADPOSITIONS + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ari", u"TOP."), (u"ayä", u"GEN."), (u"aru", u"DAT."), (u"ati", u"ACC."), (u"ay", u"GEN."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"al", u"ERG."), (u"at", u"ACC."), (u"ar", u"DAT."), (u"ey", u"GEN."), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJ.PRE"), (u"ä", u"GEN."), (u"r", u"DAT.")) |
prefixes, infixes, postfixes = dbconnector.getaffixlists() |
def translatesent(sent, lang): |
sent = parse.parsesent(sent) |
for word in sent: |
46,25 → 44,25 |
if word["translated"] == u"": |
word["translated"] = word["word"]["navi"] |
if word["inf"][0] != u"": |
for fix in [(x["navi"], x["gloss"]) for x in infixes if x["position"] == 0]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 0]: |
if fix[0] == word["inf"][0]: |
word["translated"] += '-' + fix[1] |
if word["inf"][1] != u"": |
for fix in [(x["navi"], x["gloss"]) for x in infixes if x["position"] == 1]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 1]: |
if fix[0] == word["inf"][1]: |
word["translated"] += '-' + fix[1] |
if word["inf"][2] != u"": |
for fix in [(x["navi"], x["gloss"]) for x in infixes if x["position"] == 2]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 2]: |
if fix[0] == word["inf"][2]: |
word["translated"] += '-' + fix[1] |
for temp in word["pref"]: |
for navf in temp: |
for fix in [(x["navi"], x["gloss"]) for x in prefixes]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.prefixes]: |
if fix[0] == navf: |
word["translated"] += '-' + fix[1] |
for temp in word["post"]: |
for navf, navfid in temp: |
for fix in [(x["navi"], x["gloss"]) for x in postfixes]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.postfixes]: |
if fix[0] == navf: |
word["translated"] += '-' + fix[1] |
break |