/tsimapiak/translate.py |
---|
0,0 → 1,41 |
# -*- coding: utf-8 -*- |
import parse |
import dbconnector |
infixes1 = ((u"awn", u"P.PART"), (u"eyk", u"CAUS"), (u"us", u"A.PART"), (u"äp", u"REFL.")) |
infixes2 = ((u"ìyev", u"FUT.SUBJ"), (u"iyev", u"FUT.SUBJ"), (u"ìmìy", u"REC.PAST.REC.FUT"), (u"arm", u"IMPF.PAST"), (u"asy", u"FUT.D"), (u"ilv", u"PRES.PER.SUBJ"), (u"ìmv", u"REC.PAST.SUBJ"), (u"imv", u"PAST.SUBJ"), (u"ìrm", u"IMPF.REC.PAST"), (u"irv", u"PRES.IMPF.SUBJ"), (u"ìsy", u"IMM.FUT.D"), (u"aly", u"PERF.FUT"), (u"ary", u"IMPF.FUT"), (u"ìly", u"PERF.IMM.FUT"), (u"ìry", u"IMPF.IMM.FUT"), (u"ìlm", u"PERF.REC.PAST"), (u"alm", u"PERF.PAST"), (u"am", u"PAST."), (u"ay", u"FUT."), (u"er", u"IMPF."), (u"ìm", u"REC.PAST"), (u"iv", u"SUBJ."), (u"ìy", u"IMM.FUT"), (u"ol", u"PERF.")) |
infixes3 = ((u"äng", u"PEJ."), (u"ats", u"INFR."), (u"eiy", u"LAUD."), (u"ei", u"LAUD."), (u"uy", u"HON.")) |
prefixes = ((u"tsay", u"those"), (u"say", u"those-LENTD"), (u"fay", u"these"), (u"fra", u"every"), (u"pxe", u"TRI."), (u"fne", u"type"), (u"tsa", u"that"), (u"sa", u"that-LENTD"), (u"pe", u"what"), (u"fe", u"what-LENTD"), (u"le", u"ADJD."), (u"nì", u"ADVD."), (u"sä", u"INSTD."), (u"tì", u"NOUND."), (u"sì", u"NOUND.-LENTD"), (u"ay", u"PL."), (u"me", u"DU."), (u"fì", u"this"), (u"ke", u"not"), (u"he", u"not-LENTD"), (u"a", u"ADJA.")) |
adpositions = ((u"mungwrr", u"except"), (u"kxamlä", u"through"), (u"pximaw", u"right.after"), (u"pxisre", u"right.before"), (u"tafkip", u"from.up.among"), (u"nemfa", u"into.inside"), (u"takip", u"from among"), (u"mìkam", u"between"), (u"teri", u"about.concerning"), (u"fkip", u"up.among"), (u"luke", u"without"), (u"pxel", u"like.as"), (u"pxaw", u"around"), (u"rofa", u"beside.alongside"), (u"ìlä", u"by.via.following"), (u"fpi", u"for.the.sake/benefit.of"), (u"ftu", u"from.direction"), (u"kip", u"among"), (u"lok", u"close.to"), (u"maw", u"after.time"), (u"sre", u"before.time"), (u"sìn", u"on.onto"), (u"vay", u"up.to"), (u"eo", u"before.in.front.of"), (u"fa", u"with.by.means.of"), (u"hu", u"with.accompaniment"), (u"io", u"above"), (u"ka", u"across"), (u"mì", u"in.on"), (u"na", u"like.as"), (u"ne", u"to.towards"), (u"ro", u"at.locative"), (u"ta", u"from"), (u"uo", u"behind"), (u"wä", u"against.opposition"), (u"äo", u"below"), (u"to", u"than")) |
postfixes = adpositions + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJA."), (u"ä", u"GEN."), (u"r", u"DAT.")) |
def translatesent(sent, lang): |
sent = parse.parsesent(sent) |
for word in sent: |
if word["word"]["id"] != 0: |
word["translated"] = dbconnector.translate(word["word"]["id"],lang) |
else: |
word["translated"] = word["word"]["navi"] |
if word["inf"][0] != u"": |
for fix in infixes1: |
if fix[0] == word["inf"][0]: |
word["translated"] += fix[1] |
if word["inf"][1] != u"": |
for fix in infixes2: |
if fix[0] == word["inf"][1]: |
word["translated"] += fix[1] |
if word["inf"][2] != u"": |
for fix in infixes3: |
if fix[0] == word["inf"][2]: |
word["translated"] += fix[1] |
if word["pref"] != None: |
for origfix in word["pref"]: |
for fix in prefixes: |
if fix[0] == origfix: |
word["translated"] += fix[1] |
if word["post"] != None: |
for origfix in word["post"]: |
for fix in postfixes: |
if fix[0] == origfix: |
word["translated"] += fix[1] |
return sent |
/tsimapiak/parse.py |
---|
0,0 → 1,131 |
#!/usr/bin/python |
# -*- coding: utf-8 -*- |
import re |
import dbconnector |
import parsenum |
wordlist = dbconnector.getnavilist() |
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"") |
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"") |
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"") |
prefixes = (u"tsay", u"say", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"sa", u"pe", u"fe", u"le", u"nì", u"sä", u"tì", u"sì", u"ay", u"me", u"fì", u"ke", u"he", u"a") |
adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìlä", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to") |
postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r") |
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?" |
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)" |
lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u"")) |
def parseword(wordin): |
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}} |
for word in wordlist: |
foundit = True |
foundprefs = [] |
foundposts = [] |
lenited = False |
splitword = word["infix"].split(u" ") |
if len(wordin) < len(splitword): |
foundit = False |
next |
for wor in range(len(splitword)): |
if not foundit: |
break |
foundprefs.append([]) |
foundposts.append([]) |
center = u"" |
foundins = [u"", u"", u""] |
pre = [] |
post = [] |
if u"<1>" in splitword[wor]: |
for in1 in infixes1: |
for in2 in infixes2: |
for in3 in infixes3: |
if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]: |
center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) |
foundins = [in1, in2, in3] |
break |
if center != u"": break |
if center != u"": break |
else: |
if splitword[wor] in wordin[wor]: |
center = splitword[wor] |
if center == u"": |
for i in lenit: |
temp = u"" |
if splitword[wor].startswith(i[0]): |
temp = i[1] + splitword[wor][len(i[0]):] |
if temp in wordin[wor]: |
lenited = True |
center = temp |
if center == u"": |
if splitword[wor].endswith(u"nga"): |
temp = splitword[wor][:-3] + u"ng" |
if temp in wordin[wor]: |
center = temp |
if splitword[wor].endswith(u"po"): |
temp = splitword[wor][:-3] + u"p" |
if temp in wordin[wor]: |
center = temp |
if center == u"": |
foundit = False |
break |
temp = wordin[wor].split(center) |
if len(temp) != 2: |
foundit = False |
break |
pref, posf = temp |
last = u"" |
while last != pref: |
last = pref |
for pre in prefixes: |
if pref != u"": |
if pref.endswith(pre): |
if pre in foundprefs[wor]: |
break |
foundprefs[wor].append(pre) |
pref = pref[:-len(pre)] |
break |
if pref != u"": |
foundit = False |
break |
last = u"" |
while last != posf: |
last = posf |
for pos in postfixes: |
if posf != u"": |
if posf.startswith(pos): |
if pos in foundposts[wor]: |
break |
foundposts[wor].append(pos) |
posf = posf[len(pos):] |
break |
if posf != u"": |
foundit = False |
break |
if foundit == True: |
foundword = word |
break |
ret["pref"] = foundprefs |
ret["post"] = foundposts |
ret["inf"] = foundins |
ret["len"] = lenited |
if foundit == True: |
ret["word"] = foundword |
return ret |
def parsesent(sent): |
sent = sent.strip().lower().replace(u"’", u"'") |
sent = re.sub(ur"[^\wìä' ]",u"",sent) |
sent = re.sub(ur"\ +",u" ",sent) |
sent = sent.split(u" ") |
ret = [] |
left = len(sent) |
while left: |
word = parsenum.parse(sent[len(sent)-left]) |
if word == None: |
word = parseword(sent[-left:]) |
left -= len(word["word"]["navi"].split(" ")) |
ret.append(word) |
return ret |
/tsimapiak/parsenum.py |
---|
1,81 → 1,129 |
#!/usr/bin/python |
# -*- coding: utf-8 -*- |
import re |
num = [u"kew", |
u"'aw", |
u"mune", |
u"pxey", |
u"tsìng", |
u"mrr", |
u"pukap", |
u"kinä"] |
rem = [u"aw", |
u"mun", |
u"pey", |
u"sìng", |
u"mrr", |
u"fu", |
u"hin"] |
base = [u"", |
u"me", |
u"pxe", |
u"tsì", |
u"mrr", |
u"pu", |
u"ki"] |
numre = \ |
u"^(?:(" + "|".join(base) + u")zazam??)?" + \ |
u"(?:(" + "|".join(base) + u")vozam??)?" + \ |
u"(?:(" + "|".join(base) + u")zam??)?" + \ |
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \ |
u"((?:" + "|".join(rem) + u")|" + \ |
u"(?:" + "|".join(num) + u"))?$" |
numre = re.compile(numre) |
def parse(numin): |
if type(numin) != unicode: |
return None |
if numin == u"": |
return None |
numin = numin.replace(u"í",u"ì").replace(u"á",u"ä") |
try: |
mat = numre.match(numin).groups() |
except: |
return None |
numout = 0 |
numoct = 0 |
try: |
numout += rem.index(mat[4]) + 1 |
numoct += rem.index(mat[4]) + 1 |
except: |
try: |
numout += num.index(mat[4]) |
numoct += num.index(mat[4]) |
except: pass |
try: |
numout += (base.index(mat[3]) + 1) * 8 |
numoct += (base.index(mat[3]) + 1) * 10 |
except: pass |
try: |
numout += (base.index(mat[2]) + 1) * 8**2 |
numoct += (base.index(mat[2]) + 1) * 10**2 |
except: pass |
try: |
numout += (base.index(mat[1]) + 1) * 8**3 |
numoct += (base.index(mat[1]) + 1) * 10**3 |
except: pass |
try: |
numout += (base.index(mat[0]) + 1) * 8**4 |
numoct += (base.index(mat[0]) + 1) * 10**4 |
except: pass |
return numout, numoct |
if __name__ == "__main__": |
print parse(u"mrrvolaw") |
#!/usr/bin/python |
# -*- coding: utf-8 -*- |
num = [u"kew", |
u"'aw", |
u"mune", |
u"pxey", |
u"tsìng", |
u"mrr", |
u"pukap", |
u"kinä"] |
numord = [u"kew", |
u"'aw", |
u"mu", |
u"pxey", |
u"tsì", |
u"mrr", |
u"pu", |
u"ki"] |
rem = [u"aw", |
u"mun", |
u"pey", |
u"sìng", |
u"mrr", |
u"fu", |
u"hin"] |
remord = [u"aw", |
u"mu", |
u"pey", |
u"sì", |
u"mrr", |
u"fu", |
u"hi"] |
base = [u"", |
u"me", |
u"pxe", |
u"tsì", |
u"mrr", |
u"pu", |
u"ki"] |
def parse(numin): |
if u"mm" in numin: |
return None |
if (numin[0] == u"a") and (numin[len(numin)-1] == u"a"): |
return None |
prefs = [] |
posts = [] |
outoct = 0 |
outdec = 0 |
ret = {"word": {"id": 0, "navi": u"", "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": outdec, "oct": outoct} |
if numin[0] == u"a": |
prefs.append(u"a") |
numin = numin[1:] |
if numin[len(numin)-1] == u"a": |
posts.append(u"a") |
numin = numin[:-1] |
if numin[-2:] == u"ve": |
posts.append(u"ve") |
numin = numin[:-2] |
#base numbers |
for n in range(len(num)): |
if u"ve" in posts: |
if numin == numord[n]: |
outoct = n |
outdec = n |
ret["word"]["navi"] = unicode(outdec) + u"." |
ret["dec"] = outdec |
ret["oct"] = outoct |
return ret |
else: |
if numin == num[n]: |
outoct = n |
outdec = n |
ret["word"]["navi"] = unicode(outdec) |
ret["dec"] = outdec |
ret["oct"] = outoct |
return ret |
#other numbers |
for n in range(len(base)): |
if numin.startswith(base[n] + u"zazam"): |
outoct += (n+1) * (10**4) |
outdec += (n+1) * (8**4) |
numin = numin[len(base[n]) + 5:] |
for n in range(len(base)): |
if numin.startswith(base[n] + u"vozam"): |
outoct += (n+1) * (10**3) |
outdec += (n+1) * (8**3) |
numin = numin[len(base[n]) + 5:] |
for n in range(len(base)): |
if numin.startswith(base[n] + u"zam"): |
outoct += (n+1) * (10**2) |
outdec += (n+1) * (8**2) |
numin = numin[len(base[n]) + 3:] |
for n in range(len(base)): |
if numin.startswith(base[n] + u"vol"): |
outoct += (n+1) * 10 |
outdec += (n+1) * 8 |
numin = numin[len(base[n]) + 3:] |
if numin.startswith(base[n] + u"vo"): |
outoct += (n+1) * 10 |
outdec += (n+1) * 8 |
numin = numin[len(base[n]) + 2:] |
for n in range(len(rem)): |
if u"ve" in posts: |
if numin == remord[n]: |
outoct += n + 1 |
outdec += n + 1 |
numin = u"" |
else: |
if numin == rem[n]: |
outoct += n + 1 |
outdec += n + 1 |
numin = u"" |
if numin == u"": |
ret["word"]["navi"] = unicode(outdec) if not u"ve" in posts else unicode(outdec) + u"." |
ret["dec"] = outdec |
ret["oct"] = outoct |
return ret |
else: |
return None |
if __name__ == "__main__": |
print parse(u"mevolawve") |
/tsimapiak/dbconnector.py |
---|
9,44 → 9,24 |
current = u"" |
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi") |
for row in db.query(""" |
SELECT *, CHAR_LENGTH(navi) AS NL |
SELECT * |
FROM `metaWords` |
ORDER BY NL DESC"""): |
if row["partOfSpeech"] in (u"v.", u"vin.", u"vtr."): |
current = unicode(row["ipa"]) |
current = current.replace(ur"ɛ",ur"e").replace(ur".",ur"").replace(ur"ɾ",ur"r") \ |
.replace(ur"ɪ",ur"ì").replace(ur"ˈ",ur"").replace(ur"'",ur"x") \ |
.replace(ur"ŋ",ur"ng").replace(ur"j",ur"y").replace(ur"ʔ",ur"'") \ |
.replace(ur"æ",ur"ä").replace(ur"ˌ",ur"").replace(ur"\t{ts}",ur"ts") \ |
.replace(ur"ṛ",ur"rr").replace(ur"ḷ",ur"ll").replace(ur"k̚",ur"k ") \ |
.replace(ur"p̚",ur"p ").replace(ur"t̚",ur"t ").replace(ur"'̚",ur"' ") \ |
.replace(u"\\",ur"").replace(ur"(",ur"").replace(ur")",ur"") \ |
.replace(ur"[",ur"").replace(ur"]",ur"").replace(ur" "," ") \ |
.strip() |
current = re.sub(ur" or.*","",current) |
current = re.sub(ur"z(.*)engk(.*)e",ur"z\1enk\2e",current) |
current = re.sub(ur"t(.*)ì(m|n)\ ",ur"t\1ìng ",current) |
current = current.split(ur"$cdot$") |
if len(current) == 3: |
current = current[0] + u"<0><1>" + current[1] + u"<2>" + current[2] |
elif len(current) == 2: |
current = current[0] + u"<0><1><2>" + current[1] |
else: |
current = u"<0><1><2>" + current[0] |
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix" |
ORDER BY CHAR_LENGTH(navi) DESC"""): |
if row["infixes"]: |
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]}) |
else: |
current = unicode(row["navi"]) |
ret.append([row["id"], row["navi"], current, row["partOfSpeech"]]) |
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]}) |
db.close() |
return ret |
def getnavi(word): |
ret = [] |
def translate(wid,language): |
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi") |
for row in db.query(""" |
SELECT * |
FROM `metaWords` |
WHERE navi = ?""",word): |
ret.append([row["id"],row["navi"], row["infix"], row["partOfSpeech"]]) |
FROM `localizedWords` |
WHERE id = %s AND languageCode = %s""",wid,language): |
ret = row["localized"] |
break |
db.close() |
return ret |
return ret |
/webapp/templates/translate.html |
---|
0,0 → 1,68 |
{% extends "base.html" %} |
{% block title %}Translator{% end %} |
{% block body %} |
<b>Na'vi sentence:</b><br /> |
<form action="/translate" method="post"> |
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" /> |
<select name="lang" id="lang"> |
<option value="eng" selected="selected">English</option> |
<option value="hu">Hungarian - Magyar</option> |
<option value="de">German - Deutsch</option> |
<option value="est">Estonian - Eesti</option> |
<option value="ptbr">Brazilian Portuguese - Português do Brasil</option> |
</select> |
<input name="btn" type="submit" value="Translate!" /> |
</form> |
{% if out %} |
<table border="1"> |
<tr> |
<th>Words</th> |
<th>Translated</th> |
<th>Parts</th> |
<th>Data</th> |
</tr> |
{% for wor in out %} |
<tr> |
<td rowspan="4">{{ wor["word"]["navi"] }}</td> |
<td rowspan="4">{{ wor["translated"] }}</td> |
<td>Infixes:</td> |
<td>{{ u", ".join(wor["inf"]) }}</td> |
</tr> |
<tr> |
<td>Prefixes:</td> |
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td> |
</tr> |
<tr> |
<td>Postfixes:</td> |
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td> |
</tr> |
<tr> |
<td>Lenited:</td> |
<td>{{ str(wor["len"]) }}</td> |
</tr> |
{% end %} |
</table> |
{% end %} |
<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p> |
<script type="text/javascript"> |
document.getElementById("word").focus(); |
</script> |
{% if lang != "eng" %} |
<script type="text/javascript"> |
if("{{ lang }}" == "hu"){ |
document.getElementById("lang").selectedIndex = 1 |
} |
if("{{ lang }}" == "de"){ |
document.getElementById("lang").selectedIndex = 2 |
} |
if("{{ lang }}" == "est"){ |
document.getElementById("lang").selectedIndex = 3 |
} |
if("{{ lang }}" == "ptbr"){ |
document.getElementById("lang").selectedIndex = 4 |
} |
</script> |
{% end %} |
{% end %} |
/webapp/templates/parse.html |
---|
0,0 → 1,43 |
{% extends "base.html" %} |
{% block title %}Sentence parser{% end %} |
{% block body %} |
<b>Na'vi sentence:</b><br /> |
<form action="/parse" method="post"> |
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" /> |
<input name="btn" type="submit" value="Parse!" /> |
</form> |
{% if out %} |
<table border="1"> |
<tr> |
<th>Words</th> |
<th>Parts</th> |
<th>Data</th> |
</tr> |
{% for wor in out %} |
<tr> |
<td rowspan="4">{{ wor["word"]["navi"] }}</td> |
<td>Infixes:</td> |
<td>{{ u", ".join(wor["inf"]) }}</td> |
</tr> |
<tr> |
<td>Prefixes:</td> |
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td> |
</tr> |
<tr> |
<td>Postfixes:</td> |
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td> |
</tr> |
<tr> |
<td>Lenited:</td> |
<td>{{ str(wor["len"]) }}</td> |
</tr> |
{% end %} |
</table> |
{% end %} |
<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p> |
<script type="text/javascript"> |
document.getElementById("word").focus(); |
</script> |
{% end %} |
/webapp/templates/base.html |
---|
1,6 → 1,7 |
<html> |
<head> |
<title>Tsim Apiak - {% block title %}Title{% end %}</title> |
<link rel="shortcut icon" type="image/x-icon" href="static/favicon.ico" /> |
<style type="text/css"> |
body { |
background: #145179; |
/webapp/templates/index.html |
---|
0,0 → 1,8 |
{% extends "base.html" %} |
{% block title %}Home{% end %} |
{% block body %} |
<a href="/number"><b>Number translator</b></a> - this webapp allows you to translate written-out Na'vi numbers into decimal and octal.<br /> |
<a href="/parse"><b>Parser</b></a> - this webapp can parse Na'vi sentences into the base words, prefixes, infixes and suffixes. It does not currently translate the words, but that will come. |
{% end %} |
/webapp/templates/number.html |
---|
17,4 → 17,4 |
<script type="text/javascript"> |
document.getElementById("num").focus(); |
</script> |
{% end %} |
{% end %} |
/webapp/main.py |
---|
9,12 → 9,14 |
import os |
import re |
from tsimapiak.parsenum import parse |
from tsimapiak.dbconnector import getnavilist |
from tsimapiak import parsenum |
from tsimapiak import dbconnector |
from tsimapiak import parse |
from tsimapiak import translate |
class Index(tornado.web.RequestHandler): |
def get(self): |
self.redirect("/number") |
self.render("templates/index.html") |
class Number(tornado.web.RequestHandler): |
def get(self): |
25,10 → 27,12 |
num = self.get_argument("num").strip() |
except: |
self.redirect("/number") |
numout = parse(num.replace(" ","")) |
numout = parsenum.parse(num.replace(" ","")) |
if numout == None: |
numout = -1 |
self.render("templates/number.html", last=num, numout=numout) |
numoutt = -1 |
else: |
numoutt = (numout["dec"], numout["oct"]) |
self.render("templates/number.html", last=num, numout=numoutt) |
class Restart(tornado.web.RequestHandler): |
def get(self): |
37,20 → 41,51 |
class TestDB(tornado.web.RequestHandler): |
def get(self): |
lis = getnavilist() |
lis = dbconnector.getnavilist() |
text = u"id | navi | infix | partofspeech<br />" |
text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis) |
self.write(text) |
class Parse(tornado.web.RequestHandler): |
def get(self): |
self.render("templates/parse.html", last="", out=None) |
def post(self): |
try: |
word = self.get_argument("word") |
except: |
self.redirect("/parse") |
out = parse.parsesent(word) |
self.render("templates/parse.html", last=word, out=out) |
class Translate(tornado.web.RequestHandler): |
def get(self): |
self.render("templates/translate.html", last="", out=None, lang="eng") |
def post(self): |
try: |
word = self.get_argument("word") |
lang = self.get_argument("lang") |
except: |
self.redirect("/translate") |
out = translate.translatesent(word, lang) |
self.render("templates/translate.html", last=word, out=out, lang=lang) |
settings = { |
"static_path": os.path.join(os.path.dirname(__file__), "static") |
} |
application = tornado.web.Application([ |
("/", Index), |
("/number", Number), |
("/restart", Restart), |
("/testdb", TestDB) |
]) |
("/testdb", TestDB), |
("/parse", Parse), |
("/translate", Translate) |
], **settings) |
if __name__ == "__main__": |
http_server = tornado.httpserver.HTTPServer(application) |
http_server.listen(1337) |
tornado.autoreload.start() |
tornado.ioloop.IOLoop.instance().start() |
#tornado.autoreload.start() |
tornado.ioloop.IOLoop.instance().start() |
/webapp/static/favicon.ico |
---|
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
/webapp/static/favicon.ico |
---|
Property changes: |
Added: svn:mime-type |
## -0,0 +1 ## |
+application/octet-stream |
\ No newline at end of property |
Index: dev/naviparse.py |
=================================================================== |
--- dev/naviparse.py (revision 48) |
+++ dev/naviparse.py (nonexistent) |
@@ -1,92 +0,0 @@ |
-# -*- coding: utf-8 -*- |
-import re |
- |
-strings = [ u"oe", u"nga", u"k<0><1>am<2>e", u"t<0><1><2>ìng nari", u"s<0><1><2>i", u"t<0><1><2>ìng" ] |
- |
-infixes0 = [ u"awn", u"eyk", u"us", u"äp" ] |
-infixes1 = [ u"ìyev", u"iyev", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ] |
-infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ] |
- |
-# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited? |
-def parsefix(original): |
- realword = u"" |
- infix0 = u"" |
- infix1 = u"" |
- infix2 = u"" |
- infix01 = u"" |
- infix_1 = u"" |
- infix_2 = u"" |
- for eachword in strings: |
- regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword) |
- regex = re.sub(u"^",u"[^ ]*",regex) |
- regex = re.sub(u"$",u"[^ ]*",regex) |
- regex = re.sub(u"<0><1>",u"[^ ]*",regex) |
- regex = re.sub(u"<2>",u"[^ ]*",regex) |
- if re.match(regex,original): |
- realword = eachword |
- break |
- if realword == u"": |
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
- else: |
- if re.search(u"<",realword): |
- beginning = re.sub(u"<0><1>.*",u"",realword) |
- middle = re.sub(u".*<0><1>(.*)<2>.*",ur"\1",realword) |
- end = re.sub(u".*<2>",u"",realword) |
- infix01 = re.sub(u".*?" + re.sub(u"<0><1>",u"([^ ]*)",re.sub(u"<2>",u"[^ ]*",realword)) + u".*?",ur"\1",original) |
- infix_2 = re.sub(u".*?" + re.sub(u"<2>",u"([^ ]*)",re.sub(u"<0><1>",u"[^ ]*",realword)) + u".*?",ur"\1",original) |
- for eachinfix in infixes0: |
- if infix01.startswith(eachinfix): |
- infix0 = eachinfix |
- infix_1 = infix01[len(eachinfix):] |
- break |
- else: |
- infix0 = u"" |
- infix_1 = infix01 |
- gotinfix1 = False |
- for eachinfix in infixes1: |
- if infix_1.startswith(eachinfix): |
- infix1 = eachinfix |
- infix_1 = infix_1[len(eachinfix):] |
- if infix_1 != u"": |
- if re.search(u"<0><1><2>",realword): |
- infix_2 = infix_1 |
- else: |
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
- gotinfix1 = True |
- break |
- if gotinfix1 == False: |
- if re.search(u"<0><1><2>",realword): |
- if infix_1 == u"": |
- infix_2 = infix_1 |
- infix1 = u"" |
- elif infix_1 == u"": |
- infix1 = u"" |
- else: |
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
- gotinfix2 = False |
- for eachinfix in infixes2: |
- if infix_2.startswith(eachinfix): |
- infix2 = infix_2 |
- gotinfix2 = True |
- break |
- if gotinfix2 == False: |
- if infix_2.startswith(end): |
- suffixes = infix2[len(end) - 1:] + end |
- elif infix_2 == u"": |
- infix2 = u"" |
- else: |
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
-# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2) |
- return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
- else: |
- return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
- |
-print parsefix(u"oel") |
-print parsefix(u"ngati") |
-print parsefix(u"kameie") |
-print parsefix(u"kìyevame") |
-print parsefix(u"English") |
-print parsefix(u"keykivame") |
-print parsefix(u"tìsusiti") |
-print parsefix(u"tayìng nari") |
-print parsefix(u"tìtusìng") |