Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 3 → Rev 86

/webapp/templates/parse2.html
0,0 → 1,38
{% extends "base.html" %}
 
{% block title %}Word parser{% end %}
 
{% block body %}
<b>Na'vi word:</b><br />
<form action="/parse2" method="post">
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<input name="btn" type="submit" value="Parse!" />
</form>
{% if out %}
<table border="1">
<tr>
<th>Words</th>
<th>Parts</th>
<th>Data</th>
</tr>
{% for wor in out %}
<tr>
<td rowspan="3">{{ wor["word"]["navi"] }}</td>
<td>Infixes:</td>
<td>{{ u", ".join(wor["inf"]) }}</td>
</tr>
<tr>
<td>Prefixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
</tr>
<tr>
<td>Postfixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
</tr>
{% end %}
</table>
{% end %}
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
/webapp/templates/parse.html
0,0 → 1,20
{% extends "base.html" %}
 
{% block title %}Word parser{% end %}
 
{% block body %}
<b>Na'vi word:</b><br />
<form action="/parse" method="post">
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<input name="btn" type="submit" value="Parse!" />
</form>
{% if type(out) == list %}
{{ out[0]}} <br />
{{ out[1]}} <br />
{{ out[2]}} <br />
{{ out[3]}}
{% end %}
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
/webapp/templates/base.html
14,7 → 14,7
text-align: center;
font-size: 52px;
}
h1 {
h2 {
text-align: center;
font-size: 24px;
}
/webapp/main.py
5,11 → 5,14
import tornado.ioloop
import tornado.web
import tornado.autoreload
import tornado.database
 
import os
import re
 
from navi.parsenum import parse
from tsimapiak import parsenum
from tsimapiak import dbconnector
from tsimapiak import parse
from tsimapiak import parse2
 
class Index(tornado.web.RequestHandler):
def get(self):
24,7 → 27,7
num = self.get_argument("num").strip()
except:
self.redirect("/number")
numout = parse(num.replace(" ",""))
numout = parsenum.parse(num.replace(" ",""))
if numout == None:
numout = -1
self.render("templates/number.html", last=num, numout=numout)
33,23 → 36,49
def get(self):
os.system("/usr/bin/restartnavi")
 
 
class TestDB(tornado.web.RequestHandler):
def get(self):
text = ""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for thing in db.query("SELECT * FROM test"):
text = "<br />".join((text, str(thing.id) + " - " + thing.asd))
lis = dbconnector.getnavilist()
text = u"id | navi | infix | partofspeech<br />"
text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis)
self.write(text)
 
class Parse(tornado.web.RequestHandler):
def get(self):
self.render("templates/parse.html", last="", out=None)
def post(self):
try:
word = self.get_argument("word").strip()
except:
self.redirect("/parse")
out = parse.parsefix(word)
self.render("templates/parse.html", last=word, out=out)
 
class Parse2(tornado.web.RequestHandler):
def get(self):
self.render("templates/parse2.html", last="", out=None)
def post(self):
try:
word = self.get_argument("word").strip()
except:
self.redirect("/parse2")
out = parse2.parsesent(word.split(u" "))
self.render("templates/parse2.html", last=word, out=out)
 
application = tornado.web.Application([
("/", Index),
("/number", Number),
("/restart", Restart),
("/testdb", TestDB)
("/testdb", TestDB),
("/parse", Parse),
("/parse2", Parse2)
])
 
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(1337)
tornado.autoreload.start()
#tornado.autoreload.start()
tornado.ioloop.IOLoop.instance().start()
/tsimapiak/parse2.py
0,0 → 1,84
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
import dbconnector
 
wordlist = dbconnector.getnavilist()
 
infixes1 = [u"awn", u"eyk", u"us", u"äp", u""]
infixes2 = [u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u""]
infixes3 = [u"äng", u"ats", u"eiy", u"ei", u"uy", u""]
prefixes = [u"tsay", u"fay", u"tsa", u"fra", u"pxe", u"ay", u"me", u"pe", u"le", u"nì", u"sä", u"tì", u"ke", u"fì", u"a"]
postfixes = [u"eyä", u"ìri", u"an", u"ìl", u"it", u"lo", u"ng", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"e", u"l", u"o", u"t", u"y", u"a", u"ä"]
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
def parseword(wordin):
ret = {"word": {"id": 0, "navi": u" ".join(wordin), "infix": u"", "type": u""}}
for word in wordlist:
foundit = True
foundprefs = []
foundposts = []
splitword = word["infix"].split(u" ")
if len(wordin) < len(splitword):
foundit = False
next
for wor in range(len(splitword)):
if not foundit:
break
foundprefs.append([])
foundposts.append([])
center = u""
foundins = [u"", u"", u""]
pre = []
post = []
if u"<1>" in splitword[wor]:
for in1 in infixes1:
for in2 in infixes2:
for in3 in infixes3:
if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
foundins = [in1, in2, in3]
break
if center != u"": break
if center != u"": break
else:
if splitword[wor] in wordin[wor]:
center = splitword[wor]
if center == u"":
foundit = False
break
pref, posf = wordin[wor].split(center)
for pre in prefixes:
if pref.endswith(pre):
foundprefs[wor].append(pre)
pref = pref[:-len(pre)]
if pref != u"":
foundit = False
break
for pos in postfixes:
if posf.startswith(pos):
foundposts[wor].append(pos)
posf = posf[len(pos):]
if posf != u"":
foundit = False
break
if foundit == True:
foundword = word
break
if foundit == True:
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
ret["word"] = foundword
return ret
 
def parsesent(sent):
ret = []
left = len(sent)
while left:
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
ret.append(word)
return ret
/tsimapiak/parse.py
0,0 → 1,86
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
from dbconnector import getnavilist
 
wordlist = getnavilist()
 
infixes0 = [ u"awn", u"eyk", u"us", u"äp" ]
infixes1 = [ u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ]
infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ]
 
# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited?
def parsefix(original):
realword = u""
infix0 = u""
infix1 = u""
infix2 = u""
infix01 = u""
infix_1 = u""
infix_2 = u""
for eachword in wordlist:
regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword["infix"])
regex = re.sub(u"^",u"[^ ]*",regex)
regex = re.sub(u"$",u"[^ ]*",regex)
regex = re.sub(u"<1><2>",u"[^ ]*",regex)
regex = re.sub(u"<3>",u"[^ ]*",regex)
if re.match(regex,original):
realword = eachword["infix"]
break
if realword == u"":
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
else:
if re.search(u"<",realword):
beginning = re.sub(u"<1><2>.*",u"",realword)
middle = re.sub(u".*<1><2>(.*)<3>.*",ur"\1",realword)
end = re.sub(u".*<3>",u"",realword)
infix01 = re.sub(u".*?" + re.sub(u"<1><2>",u"([^ ]*)",re.sub(u"<3>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
infix_2 = re.sub(u".*?" + re.sub(u"<3>",u"([^ ]*)",re.sub(u"<1><2>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
for eachinfix in infixes0:
if infix01.startswith(eachinfix):
infix0 = eachinfix
infix_1 = infix01[len(eachinfix):]
break
else:
infix0 = u""
infix_1 = infix01
gotinfix1 = False
for eachinfix in infixes1:
if infix_1.startswith(eachinfix):
infix1 = eachinfix
infix_1 = infix_1[len(eachinfix):]
if infix_1 != u"":
if re.search(u"<1><2><3>",realword):
infix_2 = infix_1
else:
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
gotinfix1 = True
break
if gotinfix1 == False:
if re.search(u"<1><2><3>",realword):
if infix_1 == u"":
infix_2 = infix_1
infix1 = u""
elif infix_1 == u"":
infix1 = u""
else:
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
gotinfix2 = False
for eachinfix in infixes2:
if infix_2.startswith(eachinfix):
infix2 = infix_2[:len(eachinfix)]
infix_2 = infix_2[len(eachinfix) - 1:]
gotinfix2 = True
break
if gotinfix2 == False or infix_2 != u"":
if infix_2.startswith(end):
suffixes = infix2[len(end) - 1:] + end
elif infix_2 == u"":
infix2 = u""
else:
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2)
return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
else:
return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
/tsimapiak/dbconnector.py
0,0 → 1,48
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import tornado.database
import re
 
def getnavilist():
ret = []
current = u""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *, CHAR_LENGTH(navi) AS NL
FROM `metaWords`
ORDER BY NL DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"], "type": row["partOfSpeech"]})
else:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"], "type": row["partOfSpeech"]})
db.close()
return ret
 
def getnavi(word):
ret = []
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *
FROM `metaWords`
WHERE navi = ?""",word):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"], "type": row["partOfSpeech"]})
else:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"], "type": row["partOfSpeech"]})
db.close()
return ret
 
#def gettrans(id, cod):
#ret = []
#if cod not in (u"est",u"ptbr",u"de",u"eng",u"all"):
#return ret
#db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
#if cod == "all":
#for row in db.query("""
#SELECT *
#FROM `metaWords`
#WHERE id = ?""",idd):
#infix = makeinfix(row)
#ret.append([row["id"],row["navi"], infix, row["partOfSpeech"]])
#db.close()