Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 2 → Rev 103

/tsimapiak/parsenum.py
30,12 → 30,12
 
 
numre = \
u"^(?:(" + "|".join(base) + u")zazam??)?" + \
u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
u"(?:(" + "|".join(base) + u")vozam??)?" + \
u"(?:(" + "|".join(base) + u")zam??)?" + \
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
u"((?:" + "|".join(rem) + u")|" + \
u"(?:" + "|".join(num) + u"))?$"
u"(?:" + "|".join(num) + u"))?(ve?)(a?)$"
numre = re.compile(numre)
 
def parse(numin):
51,30 → 51,34
numout = 0
numoct = 0
try:
numout += rem.index(mat[4]) + 1
numoct += rem.index(mat[4]) + 1
numout += rem.index(mat[5]) + 1
numoct += rem.index(mat[5]) + 1
except:
try:
numout += num.index(mat[4])
numoct += num.index(mat[4])
numout += num.index(mat[5])
numoct += num.index(mat[5])
except: pass
try:
numout += (base.index(mat[3]) + 1) * 8
numoct += (base.index(mat[3]) + 1) * 10
numout += (base.index(mat[4]) + 1) * 8
numoct += (base.index(mat[4]) + 1) * 10
except: pass
try:
numout += (base.index(mat[2]) + 1) * 8**2
numoct += (base.index(mat[2]) + 1) * 10**2
numout += (base.index(mat[3]) + 1) * 8**2
numoct += (base.index(mat[3]) + 1) * 10**2
except: pass
try:
numout += (base.index(mat[1]) + 1) * 8**3
numoct += (base.index(mat[1]) + 1) * 10**3
numout += (base.index(mat[2]) + 1) * 8**3
numoct += (base.index(mat[2]) + 1) * 10**3
except: pass
try:
numout += (base.index(mat[0]) + 1) * 8**4
numoct += (base.index(mat[0]) + 1) * 10**4
numout += (base.index(mat[1]) + 1) * 8**4
numoct += (base.index(mat[1]) + 1) * 10**4
except: pass
return numout, numoct
retnum = unicode(numout)
if mat[6] != u"":
retnum += u"."
return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [mat[0]], "post": [mat[6], mat[7]], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numdec}
#return numout, numoct
 
 
if __name__ == "__main__":
/tsimapiak/parse2.py
0,0 → 1,119
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
import dbconnector
import parsenum
 
wordlist = dbconnector.getnavilist()
 
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
prefixes = (u"a", u"pe", u"le", u"nì", u"sä", u"tì", u"fne", u"tsay", u"fay", u"fra", u"pxe", u"ay", u"me", u"tsa", u"fì", u"ke")
adpositions = (u"kxamlä", u"mungwrr", u"nemfa", u"pximaw", u"pxisre", u"tafkip", u"takip", u"teri", u"mìkam", u"ìla", u"fkip", u"fpi", u"ftu", u"kip", u"lok", u"luke", u"maw", u"pxel", u"pxaw", u"rofa", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo")
postfixes = (u"an", u"ng", u"eyä", u"e", u"tsyìp", u"o", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"l", u"t", u"y", u"a", u"ä") + adpositions
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
 
def parseword(wordin):
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
for word in wordlist:
foundit = True
foundprefs = []
foundposts = []
lenited = False
splitword = word["infix"].split(u" ")
if len(wordin) < len(splitword):
foundit = False
next
for wor in range(len(splitword)):
if not foundit:
break
foundprefs.append([])
foundposts.append([])
center = u""
foundins = [u"", u"", u""]
pre = []
post = []
if u"<1>" in splitword[wor]:
for in1 in infixes1:
for in2 in infixes2:
for in3 in infixes3:
if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
foundins = [in1, in2, in3]
break
if center != u"": break
if center != u"": break
else:
if splitword[wor] in wordin[wor]:
center = splitword[wor]
if center == u"":
for i in lenit:
temp = u""
if splitword[wor].startswith(i[0]):
temp = i[1] + splitword[wor][len(i[0]):]
if temp in wordin[wor]:
lenited = True
center = temp
if center == u"":
if splitword[wor].endswith(u"nga"):
temp = splitword[wor][:-3] + u"ng"
if temp in wordin[wor]:
center = temp
if splitword[wor].endswith(u"po"):
temp = splitword[wor][:-3] + u"p"
if temp in wordin[wor]:
center = temp
if center == u"":
foundit = False
break
temp = wordin[wor].split(center)
if len(temp) != 2:
foundit = False
break
pref, posf = temp
for pre in prefixes:
if pref != u"":
if pref.endswith(pre):
foundprefs[wor].append(pre)
pref = pref[:-len(pre)]
if pref != u"":
foundit = False
break
for pos in postfixes:
if posf != u"":
if posf.startswith(pos):
foundposts[wor].append(pos)
posf = posf[len(pos):]
if posf != u"":
foundit = False
break
if foundit == True:
foundword = word
break
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
ret["len"] = lenited
if foundit == True:
ret["word"] = foundword
return ret
 
def parsesent(sent):
sent = sent.strip().lower().replace(u"’", u"'")
sent = re.sub(ur"[^\wìä' ]",u"",sent)
sent = re.sub(ur"\ +",u" ",sent)
sent = sent.split(u" ")
ret = []
left = len(sent)
while left:
word = parsenum.parse(sent[0])
if word == None:
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
ret.append(word)
return ret
/tsimapiak/dbconnector.py
0,0 → 1,49
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import tornado.database
import re
 
def getnavilist():
ret = []
current = u""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *
FROM `metaWords`
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
ORDER BY CHAR_LENGTH(navi) DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
db.close()
return ret
 
def getnavi(word):
ret = []
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *
FROM `metaWords`
WHERE navi = ?""",word):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
db.close()
return ret
 
#def gettrans(id, cod):
#ret = []
#if cod not in (u"est",u"ptbr",u"de",u"eng",u"all"):
#return ret
#db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
#if cod == "all":
#for row in db.query("""
#SELECT *
#FROM `metaWords`
#WHERE id = ?""",idd):
#infix = makeinfix(row)
#ret.append([row["id"],row["navi"], infix, row["partOfSpeech"]])
#db.close()
/tsimapiak/parse.py
0,0 → 1,86
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
from dbconnector import getnavilist
 
wordlist = getnavilist()
 
infixes0 = [ u"awn", u"eyk", u"us", u"äp" ]
infixes1 = [ u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ]
infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ]
 
# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited?
def parsefix(original):
realword = u""
infix0 = u""
infix1 = u""
infix2 = u""
infix01 = u""
infix_1 = u""
infix_2 = u""
for eachword in wordlist:
regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword["infix"])
regex = re.sub(u"^",u"[^ ]*",regex)
regex = re.sub(u"$",u"[^ ]*",regex)
regex = re.sub(u"<1><2>",u"[^ ]*",regex)
regex = re.sub(u"<3>",u"[^ ]*",regex)
if re.match(regex,original):
realword = eachword["infix"]
break
if realword == u"":
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
else:
if re.search(u"<",realword):
beginning = re.sub(u"<1><2>.*",u"",realword)
middle = re.sub(u".*<1><2>(.*)<3>.*",ur"\1",realword)
end = re.sub(u".*<3>",u"",realword)
infix01 = re.sub(u".*?" + re.sub(u"<1><2>",u"([^ ]*)",re.sub(u"<3>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
infix_2 = re.sub(u".*?" + re.sub(u"<3>",u"([^ ]*)",re.sub(u"<1><2>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
for eachinfix in infixes0:
if infix01.startswith(eachinfix):
infix0 = eachinfix
infix_1 = infix01[len(eachinfix):]
break
else:
infix0 = u""
infix_1 = infix01
gotinfix1 = False
for eachinfix in infixes1:
if infix_1.startswith(eachinfix):
infix1 = eachinfix
infix_1 = infix_1[len(eachinfix):]
if infix_1 != u"":
if re.search(u"<1><2><3>",realword):
infix_2 = infix_1
else:
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
gotinfix1 = True
break
if gotinfix1 == False:
if re.search(u"<1><2><3>",realword):
if infix_1 == u"":
infix_2 = infix_1
infix1 = u""
elif infix_1 == u"":
infix1 = u""
else:
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
gotinfix2 = False
for eachinfix in infixes2:
if infix_2.startswith(eachinfix):
infix2 = infix_2[:len(eachinfix)]
infix_2 = infix_2[len(eachinfix) - 1:]
gotinfix2 = True
break
if gotinfix2 == False or infix_2 != u"":
if infix_2.startswith(end):
suffixes = infix2[len(end) - 1:] + end
elif infix_2 == u"":
infix2 = u""
else:
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2)
return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
else:
return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
/webapp/main.py
5,11 → 5,14
import tornado.ioloop
import tornado.web
import tornado.autoreload
import tornado.database
 
import os
import re
 
from navi.parsenum import parse
from tsimapiak import parsenum
from tsimapiak import dbconnector
from tsimapiak import parse
from tsimapiak import parse2
 
class Index(tornado.web.RequestHandler):
def get(self):
24,32 → 27,60
num = self.get_argument("num").strip()
except:
self.redirect("/number")
numout = parse(num.replace(" ",""))
numout = parsenum.parse(num.replace(" ",""))
if numout == None:
numout = -1
self.render("templates/number.html", last=num, numout=numout)
numoutt = -1
else:
numoutt = [numout["dec"], numout["oct"]]
self.render("templates/number.html", last=num, numout=numoutt)
 
class Restart(tornado.web.RequestHandler):
def get(self):
os.system("/usr/bin/restartnavi")
 
 
class TestDB(tornado.web.RequestHandler):
def get(self):
text = ""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for thing in db.query("SELECT * FROM test"):
text = "<br />".join((text, str(thing.id) + " - " + thing.asd))
lis = dbconnector.getnavilist()
text = u"id | navi | infix | partofspeech<br />"
text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis)
self.write(text)
 
class Parse(tornado.web.RequestHandler):
def get(self):
self.render("templates/parse.html", last="", out=None)
def post(self):
try:
word = self.get_argument("word").strip()
except:
self.redirect("/parse")
out = parse.parsefix(word)
self.render("templates/parse.html", last=word, out=out)
 
class Parse2(tornado.web.RequestHandler):
def get(self):
self.render("templates/parse2.html", last="", out=None)
def post(self):
try:
word = self.get_argument("word")
except:
self.redirect("/parse2")
out = parse2.parsesent(word)
self.render("templates/parse2.html", last=word, out=out)
 
application = tornado.web.Application([
("/", Index),
("/number", Number),
("/restart", Restart),
("/testdb", TestDB)
("/testdb", TestDB),
("/parse", Parse),
("/parse2", Parse2)
])
 
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(1337)
tornado.autoreload.start()
#tornado.autoreload.start()
tornado.ioloop.IOLoop.instance().start()
/webapp/templates/parse2.html
0,0 → 1,42
{% extends "base.html" %}
 
{% block title %}Word parser{% end %}
 
{% block body %}
<b>Na'vi word:</b><br />
<form action="/parse2" method="post">
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<input name="btn" type="submit" value="Parse!" />
</form>
{% if out %}
<table border="1">
<tr>
<th>Words</th>
<th>Parts</th>
<th>Data</th>
</tr>
{% for wor in out %}
<tr>
<td rowspan="4">{{ wor["word"]["navi"] }}</td>
<td>Infixes:</td>
<td>{{ u", ".join(wor["inf"]) }}</td>
</tr>
<tr>
<td>Prefixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
</tr>
<tr>
<td>Postfixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
</tr>
<tr>
<td>Lenited:</td>
<td>{{ str(wor["len"]) }}</td>
</tr>
{% end %}
</table>
{% end %}
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
/webapp/templates/parse.html
0,0 → 1,20
{% extends "base.html" %}
 
{% block title %}Word parser{% end %}
 
{% block body %}
<b>Na'vi word:</b><br />
<form action="/parse" method="post">
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<input name="btn" type="submit" value="Parse!" />
</form>
{% if type(out) == list %}
{{ out[0]}} <br />
{{ out[1]}} <br />
{{ out[2]}} <br />
{{ out[3]}}
{% end %}
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
/webapp/templates/base.html
1,6 → 1,6
<html>
<head>
<title>{% block title %}Title{% end %}</title>
<title>Tsim Apiak - {% block title %}Title{% end %}</title>
<style type="text/css">
body {
background: #145179;
14,6 → 14,10
text-align: center;
font-size: 52px;
}
h2 {
text-align: center;
font-size: 24px;
}
#center {
background: #2594DE;
width: 760px;
25,7 → 29,8
</head>
<body>
<div id="center">
<h1>{% block title %}Title{% end %}</h1>
<h1>Tsim Apiak</h1>
<h2>{% block title %}Title{% end %}</h2>
{% block body %}Body{% end %}
</div>
</body>
/webapp/templates/number.html
1,6 → 1,6
{% extends "base.html" %}
 
{% block title %}Na'vi number translator{% end %}
{% block title %}Number translator{% end %}
 
{% block body %}
<b>Na'vi number:</b><br />