/tsimapiak/parsenum.py |
---|
65,7 → 65,7 |
def parse(numin): |
if u"mm" in numin: |
return None |
if (numin[0] == u"a") and (numin[len(numin)-1] == u"a"): |
if (numin == u"") or ((numin[0] == u"a") and (numin[len(numin)-1] == u"a")): |
return None |
prefs = [] |
posts = [] |
101,41 → 101,48 |
ret["oct"] = outoct |
return ret |
#other numbers |
notbase = False |
for n in range(len(base)): |
if numin.startswith(base[n] + u"zazam"): |
outoct += (n+1) * (10**4) |
outdec += (n+1) * (8**4) |
numin = numin[len(base[n]) + 5:] |
notbase = True |
for n in range(len(base)): |
if numin.startswith(base[n] + u"vozam"): |
outoct += (n+1) * (10**3) |
outdec += (n+1) * (8**3) |
numin = numin[len(base[n]) + 5:] |
notbase = True |
for n in range(len(base)): |
if numin.startswith(base[n] + u"zam"): |
outoct += (n+1) * (10**2) |
outdec += (n+1) * (8**2) |
numin = numin[len(base[n]) + 3:] |
notbase = True |
for n in range(len(base)): |
if numin.startswith(base[n] + u"vol"): |
outoct += (n+1) * 10 |
outdec += (n+1) * 8 |
numin = numin[len(base[n]) + 3:] |
notbase = True |
if numin.startswith(base[n] + u"vo"): |
outoct += (n+1) * 10 |
outdec += (n+1) * 8 |
numin = numin[len(base[n]) + 2:] |
for n in range(len(rem)): |
if u"ve" in posts: |
if numin == remord[n]: |
outoct += n + 1 |
outdec += n + 1 |
numin = u"" |
else: |
if numin == rem[n]: |
outoct += n + 1 |
outdec += n + 1 |
numin = u"" |
notbase = True |
if notbase: |
for n in range(len(rem)): |
if u"ve" in posts: |
if numin == remord[n]: |
outoct += n + 1 |
outdec += n + 1 |
numin = u"" |
else: |
if numin == rem[n]: |
outoct += n + 1 |
outdec += n + 1 |
numin = u"" |
if numin == u"": |
ret["word"]["navi"] = unicode(outdec) if not u"ve" in posts else unicode(outdec) + u"." |
ret["dec"] = outdec |
/tsimapiak/parse.py |
---|
24,8 → 24,10 |
import dbconnector |
import parsenum |
wordlist = dbconnector.getnavilist() |
wordlist = [{"id": 0, "navi": u"tawtute", "infix": u"tawtute", "type": u"n."}] + dbconnector.getnavilist() + [{"id": 0, "navi": u"na'vi", "infix": u"na'vi", "type": u"n."}] |
brokenwords = ((u"sami", u"si", u"", u"am", u"", (()), (()), False), (u"to", u"to", u"", u"", u"", (()), (()), False), (u"poltxe", u"plltxe", u"", u"ol", u"", (()), (()), False)) # These are words that are either not in Eana Eltu, or that get interpreted wrongly for whatever reason. The latter should be removed from this list when the parser gets more sophisticated. The former should also have an entry in the equivalent array in the translator! If it can take infixes, consider adding it to the main wordlist above (see the examples). The order is - original, Na'vi root, 0-pos infix, 1-pos infix, 2-pos infix, prefixes, suffixes |
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"") |
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"") |
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"") |
38,6 → 40,15 |
lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u"")) |
def parseword(wordin): |
tempid = 0 |
temptype = u"" |
for brokenword in brokenwords: |
if wordin[0] == brokenword[0]: |
for word in wordlist: |
if brokenword[1] == word["navi"]: |
tempid = word["id"] |
temptype = word["type"] |
return {"word": {"id": tempid, "navi": brokenword[1], "infix": u"", "type": temptype}, "pref": brokenword[5], "post": brokenword[6], "len": brokenword[7], "inf": (brokenword[2], brokenword[3], brokenword[4]) } |
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}} |
for word in wordlist: |
foundit = True |
82,7 → 93,7 |
else: |
if splitword[wor] in wordin[wor]: |
center = splitword[wor] |
if center == u"": |
if center == u"" and (wordin[wor] == u"paya" or splitword[wor] != u"pxay"): # XXX - maybe fixable without hardcoding? |
for i in lenit: |
temp = u"" |
if splitword[wor].startswith(i[0]): |
129,9 → 140,12 |
if posf.startswith(pos): |
if pos in foundposts[wor]: |
break |
foundposts[wor].append(pos) |
posf = posf[len(pos):] |
break |
if pos != ä || word["navi"] != "pey" |
foundposts[wor].append(pos) |
posf = posf[len(pos):] |
break |
else |
break |
if posf != u"": |
foundit = False |
break |
/tsimapiak/translate.py |
---|
23,6 → 23,7 |
import parse |
import dbconnector |
brokenwords = ((u"to", u"than"), (u"tawtute", u"skyperson"), (u"na'vi", u"The People")) # words not in EE |
infixes1 = ((u"awn", u"P.PART"), (u"eyk", u"CAUS"), (u"us", u"A.PART"), (u"äp", u"REFL.")) |
infixes2 = ((u"ìyev", u"FUT.SUBJ"), (u"iyev", u"FUT.SUBJ"), (u"ìmìy", u"REC.PAST.REC.FUT"), (u"arm", u"IMPF.PAST"), (u"asy", u"FUT.D"), (u"ilv", u"PRES.PER.SUBJ"), (u"ìmv", u"REC.PAST.SUBJ"), (u"imv", u"PAST.SUBJ"), (u"ìrm", u"IMPF.REC.PAST"), (u"irv", u"PRES.IMPF.SUBJ"), (u"ìsy", u"IMM.FUT.D"), (u"aly", u"PERF.FUT"), (u"ary", u"IMPF.FUT"), (u"ìly", u"PERF.IMM.FUT"), (u"ìry", u"IMPF.IMM.FUT"), (u"ìlm", u"PERF.REC.PAST"), (u"alm", u"PERF.PAST"), (u"am", u"PAST."), (u"ay", u"FUT."), (u"er", u"IMPF."), (u"ìm", u"REC.PAST"), (u"iv", u"SUBJ."), (u"ìy", u"IMM.FUT"), (u"ol", u"PERF.")) |
infixes3 = ((u"äng", u"PEJ."), (u"ats", u"INFR."), (u"eiy", u"LAUD."), (u"ei", u"LAUD."), (u"uy", u"HON.")) |
33,10 → 34,15 |
def translatesent(sent, lang): |
sent = parse.parsesent(sent) |
for word in sent: |
word["translated"] = "" |
if word["word"]["id"] != 0: |
word["translated"] = dbconnector.translate(word["word"]["id"],lang) |
else: |
word["translated"] = word["word"]["navi"] |
for brokenword in brokenwords: |
if brokenword[0] == word["word"]["navi"]: |
word["translated"] = brokenword[1] |
if word["translated"] == u"": |
word["translated"] = word["word"]["navi"] |
if word["inf"][0] != u"": |
for fix in infixes1: |
if fix[0] == word["inf"][0]: |
49,12 → 55,12 |
for fix in infixes3: |
if fix[0] == word["inf"][2]: |
word["translated"] += '-' + fix[1] |
for temp in word["pref"]: # double array? WTF? |
for temp in word["pref"]: |
for navf in temp: |
for fix in prefixes: |
if fix[0] == navf: |
word["translated"] += '-' + fix[1] |
for temp in word["post"]: # double array? WTF? |
for temp in word["post"]: |
for navf in temp: |
for fix in postfixes: |
if fix[0] == navf: |
/webapp/main.py |
---|
58,14 → 58,6 |
def get(self): |
os.system("/usr/bin/restartnavi") |
class TestDB(tornado.web.RequestHandler): |
def get(self): |
lis = dbconnector.getnavilist() |
text = u"id | navi | infix | partofspeech<br />" |
text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis) |
self.write(text) |
class Parse(tornado.web.RequestHandler): |
def get(self): |
self.render("templates/parse.html", last="", out=None) |
99,7 → 91,6 |
("/", Index), |
("/number", Number), |
("/restart", Restart), |
("/testdb", TestDB), |
("/parse", Parse), |
("/translate", Translate) |
], **settings) |