Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 48 → Rev 172

/tsimapiak/parse.py
0,0 → 1,131
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
import dbconnector
import parsenum
 
wordlist = dbconnector.getnavilist()
 
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
prefixes = (u"tsay", u"say", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"sa", u"pe", u"fe", u"le", u"nì", u"sä", u"tì", u"sì", u"ay", u"me", u"fì", u"ke", u"he", u"a")
adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìlä", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to", u"sì")
postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
 
def parseword(wordin):
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
for word in wordlist:
foundit = True
foundprefs = []
foundposts = []
lenited = False
splitword = word["infix"].split(u" ")
foundins = [u"", u"", u""]
if len(wordin) < len(splitword):
foundit = False
next
for wor in range(len(splitword)):
if not foundit:
break
foundprefs.append([])
foundposts.append([])
center = u""
pre = []
post = []
if u"<1>" in splitword[wor]:
for in1 in infixes1:
for in2 in infixes2:
for in3 in infixes3:
if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
foundins = [in1, in2, in3]
break
if center != u"": break
if center != u"": break
else:
if splitword[wor] in wordin[wor]:
center = splitword[wor]
if center == u"":
for i in lenit:
temp = u""
if splitword[wor].startswith(i[0]):
temp = i[1] + splitword[wor][len(i[0]):]
if temp in wordin[wor]:
lenited = True
center = temp
if center == u"":
if splitword[wor].endswith(u"nga"):
temp = splitword[wor][:-3] + u"ng"
if temp in wordin[wor]:
center = temp
if splitword[wor].endswith(u"po"):
temp = splitword[wor][:-3] + u"p"
if temp in wordin[wor]:
center = temp
if center == u"":
foundit = False
break
temp = wordin[wor].split(center)
if len(temp) != 2:
foundit = False
break
pref, posf = temp
last = u""
while last != pref:
last = pref
for pre in prefixes:
if pref != u"":
if pref.endswith(pre):
if pre in foundprefs[wor]:
break
foundprefs[wor].append(pre)
pref = pref[:-len(pre)]
break
if pref != u"":
foundit = False
break
last = u""
while last != posf:
last = posf
for pos in postfixes:
if posf != u"":
if posf.startswith(pos):
if pos in foundposts[wor]:
break
foundposts[wor].append(pos)
posf = posf[len(pos):]
break
if posf != u"":
foundit = False
break
if foundit == True:
foundword = word
break
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
ret["len"] = lenited
if foundit == True:
ret["word"] = foundword
return ret
 
def parsesent(sent):
sent = sent.strip().lower().replace(u"’", u"'")
sent = re.sub(ur"[^\wìä' ]",u"",sent)
sent = re.sub(ur"\ +",u" ",sent)
sent = sent.split(u" ")
ret = []
left = len(sent)
while left:
word = parsenum.parse(sent[len(sent)-left])
if word == None:
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
ret.append(word)
return ret
/tsimapiak/translate.py
0,0 → 1,43
# -*- coding: utf-8 -*-
import parse
import dbconnector
 
infixes1 = ((u"awn", u"P.PART"), (u"eyk", u"CAUS"), (u"us", u"A.PART"), (u"äp", u"REFL."))
infixes2 = ((u"ìyev", u"FUT.SUBJ"), (u"iyev", u"FUT.SUBJ"), (u"ìmìy", u"REC.PAST.REC.FUT"), (u"arm", u"IMPF.PAST"), (u"asy", u"FUT.D"), (u"ilv", u"PRES.PER.SUBJ"), (u"ìmv", u"REC.PAST.SUBJ"), (u"imv", u"PAST.SUBJ"), (u"ìrm", u"IMPF.REC.PAST"), (u"irv", u"PRES.IMPF.SUBJ"), (u"ìsy", u"IMM.FUT.D"), (u"aly", u"PERF.FUT"), (u"ary", u"IMPF.FUT"), (u"ìly", u"PERF.IMM.FUT"), (u"ìry", u"IMPF.IMM.FUT"), (u"ìlm", u"PERF.REC.PAST"), (u"alm", u"PERF.PAST"), (u"am", u"PAST."), (u"ay", u"FUT."), (u"er", u"IMPF."), (u"ìm", u"REC.PAST"), (u"iv", u"SUBJ."), (u"ìy", u"IMM.FUT"), (u"ol", u"PERF."))
infixes3 = ((u"äng", u"PEJ."), (u"ats", u"INFR."), (u"eiy", u"LAUD."), (u"ei", u"LAUD."), (u"uy", u"HON."))
prefixes = ((u"tsay", u"those"), (u"say", u"those-LENTD"), (u"fay", u"these"), (u"fra", u"every"), (u"pxe", u"TRI."), (u"fne", u"type"), (u"tsa", u"that"), (u"sa", u"that-LENTD"), (u"pe", u"what"), (u"fe", u"what-LENTD"), (u"le", u"ADJD."), (u"nì", u"ADVD."), (u"sä", u"INSTD."), (u"tì", u"NOUND."), (u"sì", u"NOUND.-LENTD"), (u"ay", u"PL."), (u"me", u"DU."), (u"fì", u"this"), (u"ke", u"not"), (u"he", u"not-LENTD"), (u"a", u"ADJ.POST"))
adpositions = ((u"mungwrr", u"except"), (u"kxamlä", u"through"), (u"pximaw", u"right.after"), (u"pxisre", u"right.before"), (u"tafkip", u"from.up.among"), (u"nemfa", u"into.inside"), (u"takip", u"from among"), (u"mìkam", u"between"), (u"teri", u"about.concerning"), (u"fkip", u"up.among"), (u"luke", u"without"), (u"pxel", u"like.as"), (u"pxaw", u"around"), (u"rofa", u"beside.alongside"), (u"ìlä", u"by.via.following"), (u"fpi", u"for.the.sake/benefit.of"), (u"ftu", u"from.direction"), (u"kip", u"among"), (u"lok", u"close.to"), (u"maw", u"after.time"), (u"sre", u"before.time"), (u"sìn", u"on.onto"), (u"vay", u"up.to"), (u"eo", u"before.in.front.of"), (u"fa", u"with.by.means.of"), (u"hu", u"with.accompaniment"), (u"io", u"above"), (u"ka", u"across"), (u"mì", u"in.on"), (u"na", u"like.as"), (u"ne", u"to.towards"), (u"ro", u"at.locative"), (u"ta", u"from"), (u"uo", u"behind"), (u"wä", u"against.opposition"), (u"äo", u"below"), (u"to", u"than"), (u"sì", u"and"))
postfixes = adpositions + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJ.PRE"), (u"ä", u"GEN."), (u"r", u"DAT."))
 
def translatesent(sent, lang):
sent = parse.parsesent(sent)
for word in sent:
if word["word"]["id"] != 0:
word["translated"] = dbconnector.translate(word["word"]["id"],lang)
else:
word["translated"] = word["word"]["navi"]
if word["inf"][0] != u"":
for fix in infixes1:
if fix[0] == word["inf"][0]:
word["translated"] += '-' + fix[1]
if word["inf"][1] != u"":
for fix in infixes2:
if fix[0] == word["inf"][1]:
word["translated"] += '-' + fix[1]
if word["inf"][2] != u"":
for fix in infixes3:
if fix[0] == word["inf"][2]:
word["translated"] += '-' + fix[1]
for temp in word["pref"]: # double array? WTF?
for navf in temp:
for fix in prefixes:
if fix[0] == navf:
word["translated"] += '-' + fix[1]
for temp in word["post"]: # double array? WTF?
for navf in temp:
for fix in postfixes:
if fix[0] == navf:
word["translated"] += '-' + fix[1]
if word["len"]:
word["translated"] += '-' + 'LENTD'
return sent
/tsimapiak/parsenum.py
1,81 → 1,129
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
 
num = [u"kew",
u"'aw",
u"mune",
u"pxey",
u"tsìng",
u"mrr",
u"pukap",
u"kinä"]
 
rem = [u"aw",
u"mun",
u"pey",
u"sìng",
u"mrr",
u"fu",
u"hin"]
 
base = [u"",
u"me",
u"pxe",
u"tsì",
u"mrr",
u"pu",
u"ki"]
 
 
numre = \
u"^(?:(" + "|".join(base) + u")zazam??)?" + \
u"(?:(" + "|".join(base) + u")vozam??)?" + \
u"(?:(" + "|".join(base) + u")zam??)?" + \
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
u"((?:" + "|".join(rem) + u")|" + \
u"(?:" + "|".join(num) + u"))?$"
numre = re.compile(numre)
 
def parse(numin):
if type(numin) != unicode:
return None
if numin == u"":
return None
numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
try:
mat = numre.match(numin).groups()
except:
return None
numout = 0
numoct = 0
try:
numout += rem.index(mat[4]) + 1
numoct += rem.index(mat[4]) + 1
except:
try:
numout += num.index(mat[4])
numoct += num.index(mat[4])
except: pass
try:
numout += (base.index(mat[3]) + 1) * 8
numoct += (base.index(mat[3]) + 1) * 10
except: pass
try:
numout += (base.index(mat[2]) + 1) * 8**2
numoct += (base.index(mat[2]) + 1) * 10**2
except: pass
try:
numout += (base.index(mat[1]) + 1) * 8**3
numoct += (base.index(mat[1]) + 1) * 10**3
except: pass
try:
numout += (base.index(mat[0]) + 1) * 8**4
numoct += (base.index(mat[0]) + 1) * 10**4
except: pass
return numout, numoct
 
 
if __name__ == "__main__":
print parse(u"mrrvolaw")
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
num = [u"kew",
u"'aw",
u"mune",
u"pxey",
u"tsìng",
u"mrr",
u"pukap",
u"kinä"]
 
numord = [u"kew",
u"'aw",
u"mu",
u"pxey",
u"tsì",
u"mrr",
u"pu",
u"ki"]
 
rem = [u"aw",
u"mun",
u"pey",
u"sìng",
u"mrr",
u"fu",
u"hin"]
 
remord = [u"aw",
u"mu",
u"pey",
u"sì",
u"mrr",
u"fu",
u"hi"]
 
base = [u"",
u"me",
u"pxe",
u"tsì",
u"mrr",
u"pu",
u"ki"]
 
def parse(numin):
if u"mm" in numin:
return None
if (numin[0] == u"a") and (numin[len(numin)-1] == u"a"):
return None
prefs = []
posts = []
outoct = 0
outdec = 0
ret = {"word": {"id": 0, "navi": u"", "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": outdec, "oct": outoct}
if numin[0] == u"a":
prefs.append(u"a")
numin = numin[1:]
if numin[len(numin)-1] == u"a":
posts.append(u"a")
numin = numin[:-1]
if numin[-2:] == u"ve":
posts.append(u"ve")
numin = numin[:-2]
#base numbers
for n in range(len(num)):
if u"ve" in posts:
if numin == numord[n]:
outoct = n
outdec = n
ret["word"]["navi"] = unicode(outdec) + u"."
ret["dec"] = outdec
ret["oct"] = outoct
return ret
else:
if numin == num[n]:
outoct = n
outdec = n
ret["word"]["navi"] = unicode(outdec)
ret["dec"] = outdec
ret["oct"] = outoct
return ret
#other numbers
for n in range(len(base)):
if numin.startswith(base[n] + u"zazam"):
outoct += (n+1) * (10**4)
outdec += (n+1) * (8**4)
numin = numin[len(base[n]) + 5:]
for n in range(len(base)):
if numin.startswith(base[n] + u"vozam"):
outoct += (n+1) * (10**3)
outdec += (n+1) * (8**3)
numin = numin[len(base[n]) + 5:]
for n in range(len(base)):
if numin.startswith(base[n] + u"zam"):
outoct += (n+1) * (10**2)
outdec += (n+1) * (8**2)
numin = numin[len(base[n]) + 3:]
for n in range(len(base)):
if numin.startswith(base[n] + u"vol"):
outoct += (n+1) * 10
outdec += (n+1) * 8
numin = numin[len(base[n]) + 3:]
if numin.startswith(base[n] + u"vo"):
outoct += (n+1) * 10
outdec += (n+1) * 8
numin = numin[len(base[n]) + 2:]
for n in range(len(rem)):
if u"ve" in posts:
if numin == remord[n]:
outoct += n + 1
outdec += n + 1
numin = u""
else:
if numin == rem[n]:
outoct += n + 1
outdec += n + 1
numin = u""
if numin == u"":
ret["word"]["navi"] = unicode(outdec) if not u"ve" in posts else unicode(outdec) + u"."
ret["dec"] = outdec
ret["oct"] = outoct
return ret
else:
return None
 
if __name__ == "__main__":
print parse(u"mevolawve")
/tsimapiak/dbconnector.py
9,44 → 9,24
current = u""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *, CHAR_LENGTH(navi) AS NL
SELECT *
FROM `metaWords`
ORDER BY NL DESC"""):
if row["partOfSpeech"] in (u"v.", u"vin.", u"vtr."):
current = unicode(row["ipa"])
current = current.replace(ur"ɛ",ur"e").replace(ur".",ur"").replace(ur"ɾ",ur"r") \
.replace(ur"ɪ",ur"ì").replace(ur"ˈ",ur"").replace(ur"'",ur"x") \
.replace(ur"ŋ",ur"ng").replace(ur"j",ur"y").replace(ur"ʔ",ur"'") \
.replace(ur"æ",ur"ä").replace(ur"ˌ",ur"").replace(ur"\t{ts}",ur"ts") \
.replace(ur"ṛ",ur"rr").replace(ur"ḷ",ur"ll").replace(ur"k̚",ur"k ") \
.replace(ur"p̚",ur"p ").replace(ur"t̚",ur"t ").replace(ur"'̚",ur"' ") \
.replace(u"\\",ur"").replace(ur"(",ur"").replace(ur")",ur"") \
.replace(ur"[",ur"").replace(ur"]",ur"").replace(ur" "," ") \
.strip()
current = re.sub(ur" or.*","",current)
current = re.sub(ur"z(.*)engk(.*)e",ur"z\1enk\2e",current)
current = re.sub(ur"t(.*)ì(m|n)\ ",ur"t\1ìng ",current)
current = current.split(ur"$cdot$")
if len(current) == 3:
current = current[0] + u"<0><1>" + current[1] + u"<2>" + current[2]
elif len(current) == 2:
current = current[0] + u"<0><1><2>" + current[1]
else:
current = u"<0><1><2>" + current[0]
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
ORDER BY CHAR_LENGTH(navi) DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else:
current = unicode(row["navi"])
ret.append([row["id"], row["navi"], current, row["partOfSpeech"]])
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
db.close()
return ret
 
def getnavi(word):
ret = []
def translate(wid,language):
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *
FROM `metaWords`
WHERE navi = ?""",word):
ret.append([row["id"],row["navi"], row["infix"], row["partOfSpeech"]])
FROM `localizedWords`
WHERE id = %s AND languageCode = %s""",wid,language):
ret = row["localized"]
break
db.close()
return ret
return ret
/webapp/templates/translate.html
0,0 → 1,68
{% extends "base.html" %}
 
{% block title %}Translator{% end %}
 
{% block body %}
<b>Na'vi sentence:</b><br />
<form action="/translate" method="post">
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<select name="lang" id="lang">
<option value="eng" selected="selected">English</option>
<option value="hu">Hungarian - Magyar</option>
<option value="de">German - Deutsch</option>
<option value="est">Estonian - Eesti</option>
<option value="ptbr">Brazilian Portuguese - Português do Brasil</option>
</select>
<input name="btn" type="submit" value="Translate!" />
</form>
{% if out %}
<table border="1">
<tr>
<th>Words</th>
<th>Translated</th>
<th>Parts</th>
<th>Data</th>
</tr>
{% for wor in out %}
<tr>
<td rowspan="4">{{ wor["word"]["navi"] }}</td>
<td rowspan="4">{{ wor["translated"] }}</td>
<td>Infixes:</td>
<td>{{ u", ".join(wor["inf"]) }}</td>
</tr>
<tr>
<td>Prefixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
</tr>
<tr>
<td>Postfixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
</tr>
<tr>
<td>Lenited:</td>
<td>{{ str(wor["len"]) }}</td>
</tr>
{% end %}
</table>
{% end %}
<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p>
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% if lang != "eng" %}
<script type="text/javascript">
if("{{ lang }}" == "hu"){
document.getElementById("lang").selectedIndex = 1
}
if("{{ lang }}" == "de"){
document.getElementById("lang").selectedIndex = 2
}
if("{{ lang }}" == "est"){
document.getElementById("lang").selectedIndex = 3
}
if("{{ lang }}" == "ptbr"){
document.getElementById("lang").selectedIndex = 4
}
</script>
{% end %}
{% end %}
/webapp/templates/parse.html
0,0 → 1,43
{% extends "base.html" %}
 
{% block title %}Sentence parser{% end %}
 
{% block body %}
<b>Na'vi sentence:</b><br />
<form action="/parse" method="post">
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<input name="btn" type="submit" value="Parse!" />
</form>
{% if out %}
<table border="1">
<tr>
<th>Words</th>
<th>Parts</th>
<th>Data</th>
</tr>
{% for wor in out %}
<tr>
<td rowspan="4">{{ wor["word"]["navi"] }}</td>
<td>Infixes:</td>
<td>{{ u", ".join(wor["inf"]) }}</td>
</tr>
<tr>
<td>Prefixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
</tr>
<tr>
<td>Postfixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
</tr>
<tr>
<td>Lenited:</td>
<td>{{ str(wor["len"]) }}</td>
</tr>
{% end %}
</table>
{% end %}
<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p>
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
/webapp/templates/base.html
1,6 → 1,7
<html>
<head>
<title>Tsim Apiak - {% block title %}Title{% end %}</title>
<link rel="shortcut icon" type="image/x-icon" href="static/favicon.ico" />
<style type="text/css">
body {
background: #145179;
/webapp/templates/index.html
0,0 → 1,8
{% extends "base.html" %}
 
{% block title %}Home{% end %}
 
{% block body %}
<a href="/number"><b>Number translator</b></a> - this webapp allows you to translate written-out Na'vi numbers into decimal and octal.<br />
<a href="/parse"><b>Parser</b></a> - this webapp can parse Na'vi sentences into the base words, prefixes, infixes and suffixes. It does not currently translate the words, but that will come.
{% end %}
/webapp/templates/number.html
17,4 → 17,4
<script type="text/javascript">
document.getElementById("num").focus();
</script>
{% end %}
{% end %}
/webapp/main.py
9,12 → 9,14
import os
import re
 
from tsimapiak.parsenum import parse
from tsimapiak.dbconnector import getnavilist
from tsimapiak import parsenum
from tsimapiak import dbconnector
from tsimapiak import parse
from tsimapiak import translate
 
class Index(tornado.web.RequestHandler):
def get(self):
self.redirect("/number")
self.render("templates/index.html")
 
class Number(tornado.web.RequestHandler):
def get(self):
25,10 → 27,12
num = self.get_argument("num").strip()
except:
self.redirect("/number")
numout = parse(num.replace(" ",""))
numout = parsenum.parse(num.replace(" ",""))
if numout == None:
numout = -1
self.render("templates/number.html", last=num, numout=numout)
numoutt = -1
else:
numoutt = (numout["dec"], numout["oct"])
self.render("templates/number.html", last=num, numout=numoutt)
 
class Restart(tornado.web.RequestHandler):
def get(self):
37,20 → 41,51
 
class TestDB(tornado.web.RequestHandler):
def get(self):
lis = getnavilist()
lis = dbconnector.getnavilist()
text = u"id | navi | infix | partofspeech<br />"
text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis)
self.write(text)
 
class Parse(tornado.web.RequestHandler):
def get(self):
self.render("templates/parse.html", last="", out=None)
def post(self):
try:
word = self.get_argument("word")
except:
self.redirect("/parse")
out = parse.parsesent(word)
self.render("templates/parse.html", last=word, out=out)
 
class Translate(tornado.web.RequestHandler):
def get(self):
self.render("templates/translate.html", last="", out=None, lang="eng")
def post(self):
try:
word = self.get_argument("word")
lang = self.get_argument("lang")
except:
self.redirect("/translate")
out = translate.translatesent(word, lang)
self.render("templates/translate.html", last=word, out=out, lang=lang)
 
settings = {
"static_path": os.path.join(os.path.dirname(__file__), "static")
}
 
application = tornado.web.Application([
("/", Index),
("/number", Number),
("/restart", Restart),
("/testdb", TestDB)
])
("/testdb", TestDB),
("/parse", Parse),
("/translate", Translate)
], **settings)
 
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(1337)
tornado.autoreload.start()
tornado.ioloop.IOLoop.instance().start()
#tornado.autoreload.start()
tornado.ioloop.IOLoop.instance().start()
/webapp/static/favicon.ico
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/webapp/static/favicon.ico
Property changes:
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Index: dev/naviparse.py
===================================================================
--- dev/naviparse.py (revision 48)
+++ dev/naviparse.py (nonexistent)
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-import re
-
-strings = [ u"oe", u"nga", u"k<0><1>am<2>e", u"t<0><1><2>ìng nari", u"s<0><1><2>i", u"t<0><1><2>ìng" ]
-
-infixes0 = [ u"awn", u"eyk", u"us", u"äp" ]
-infixes1 = [ u"ìyev", u"iyev", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ]
-infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ]
-
-# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited?
-def parsefix(original):
- realword = u""
- infix0 = u""
- infix1 = u""
- infix2 = u""
- infix01 = u""
- infix_1 = u""
- infix_2 = u""
- for eachword in strings:
- regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword)
- regex = re.sub(u"^",u"[^ ]*",regex)
- regex = re.sub(u"$",u"[^ ]*",regex)
- regex = re.sub(u"<0><1>",u"[^ ]*",regex)
- regex = re.sub(u"<2>",u"[^ ]*",regex)
- if re.match(regex,original):
- realword = eachword
- break
- if realword == u"":
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- else:
- if re.search(u"<",realword):
- beginning = re.sub(u"<0><1>.*",u"",realword)
- middle = re.sub(u".*<0><1>(.*)<2>.*",ur"\1",realword)
- end = re.sub(u".*<2>",u"",realword)
- infix01 = re.sub(u".*?" + re.sub(u"<0><1>",u"([^ ]*)",re.sub(u"<2>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
- infix_2 = re.sub(u".*?" + re.sub(u"<2>",u"([^ ]*)",re.sub(u"<0><1>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
- for eachinfix in infixes0:
- if infix01.startswith(eachinfix):
- infix0 = eachinfix
- infix_1 = infix01[len(eachinfix):]
- break
- else:
- infix0 = u""
- infix_1 = infix01
- gotinfix1 = False
- for eachinfix in infixes1:
- if infix_1.startswith(eachinfix):
- infix1 = eachinfix
- infix_1 = infix_1[len(eachinfix):]
- if infix_1 != u"":
- if re.search(u"<0><1><2>",realword):
- infix_2 = infix_1
- else:
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- gotinfix1 = True
- break
- if gotinfix1 == False:
- if re.search(u"<0><1><2>",realword):
- if infix_1 == u"":
- infix_2 = infix_1
- infix1 = u""
- elif infix_1 == u"":
- infix1 = u""
- else:
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- gotinfix2 = False
- for eachinfix in infixes2:
- if infix_2.startswith(eachinfix):
- infix2 = infix_2
- gotinfix2 = True
- break
- if gotinfix2 == False:
- if infix_2.startswith(end):
- suffixes = infix2[len(end) - 1:] + end
- elif infix_2 == u"":
- infix2 = u""
- else:
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2)
- return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- else:
- return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-
-print parsefix(u"oel")
-print parsefix(u"ngati")
-print parsefix(u"kameie")
-print parsefix(u"kìyevame")
-print parsefix(u"English")
-print parsefix(u"keykivame")
-print parsefix(u"tìsusiti")
-print parsefix(u"tayìng nari")
-print parsefix(u"tìtusìng")