Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 103 → Rev 132

/webapp/main.py
12,11 → 12,10
from tsimapiak import parsenum
from tsimapiak import dbconnector
from tsimapiak import parse
from tsimapiak import parse2
 
class Index(tornado.web.RequestHandler):
def get(self):
self.redirect("/number")
self.render("templates/index.html")
 
class Number(tornado.web.RequestHandler):
def get(self):
31,7 → 30,7
if numout == None:
numoutt = -1
else:
numoutt = [numout["dec"], numout["oct"]]
numoutt = (numout["dec"], numout["oct"])
self.render("templates/number.html", last=num, numout=numoutt)
 
class Restart(tornado.web.RequestHandler):
52,23 → 51,15
def post(self):
try:
word = self.get_argument("word").strip()
word = self.get_argument("word")
except:
self.redirect("/parse")
out = parse.parsefix(word)
out = parse.parsesent(word)
self.render("templates/parse.html", last=word, out=out)
 
class Parse2(tornado.web.RequestHandler):
def get(self):
self.render("templates/parse2.html", last="", out=None)
def post(self):
try:
word = self.get_argument("word")
except:
self.redirect("/parse2")
out = parse2.parsesent(word)
self.render("templates/parse2.html", last=word, out=out)
settings = {
"static_path": os.path.join(os.path.dirname(__file__), "static")
}
 
application = tornado.web.Application([
("/", Index),
75,12 → 66,11
("/number", Number),
("/restart", Restart),
("/testdb", TestDB),
("/parse", Parse),
("/parse2", Parse2)
])
("/parse", Parse)
], **settings)
 
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(1337)
#tornado.autoreload.start()
tornado.ioloop.IOLoop.instance().start()
tornado.ioloop.IOLoop.instance().start()
/webapp/templates/parse2.html
File deleted
\ No newline at end of file
/webapp/templates/base.html
1,6 → 1,7
<html>
<head>
<title>Tsim Apiak - {% block title %}Title{% end %}</title>
<link rel="shortcut icon" type="image/x-icon" href="static/favicon.ico" />
<style type="text/css">
body {
background: #145179;
/webapp/templates/index.html
0,0 → 1,8
{% extends "base.html" %}
 
{% block title %}Home{% end %}
 
{% block body %}
<a href="/number"><b>Number translator</b></a> - this webapp allows you to translate written-out Na'vi numbers into decimal and octal.<br />
<a href="/parse"><b>Parser</b></a> - this webapp can parse Na'vi sentences into the base words, prefixes, infixes and suffixes. It does not currently translate the words, but that will come.
{% end %}
/webapp/templates/number.html
17,4 → 17,4
<script type="text/javascript">
document.getElementById("num").focus();
</script>
{% end %}
{% end %}
/webapp/templates/parse.html
8,13 → 8,36
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
<input name="btn" type="submit" value="Parse!" />
</form>
{% if type(out) == list %}
{{ out[0]}} <br />
{{ out[1]}} <br />
{{ out[2]}} <br />
{{ out[3]}}
{% if out %}
<table border="1">
<tr>
<th>Words</th>
<th>Parts</th>
<th>Data</th>
</tr>
{% for wor in out %}
<tr>
<td rowspan="4">{{ wor["word"]["navi"] }}</td>
<td>Infixes:</td>
<td>{{ u", ".join(wor["inf"]) }}</td>
</tr>
<tr>
<td>Prefixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
</tr>
<tr>
<td>Postfixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
</tr>
<tr>
<td>Lenited:</td>
<td>{{ str(wor["len"]) }}</td>
</tr>
{% end %}
</table>
{% end %}
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p>
{% end %}
/webapp/static/favicon.ico
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/webapp/static/favicon.ico
Property changes:
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Index: tsimapiak/parse2.py
===================================================================
--- tsimapiak/parse2.py (revision 103)
+++ tsimapiak/parse2.py (nonexistent)
@@ -1,119 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-import re
-import dbconnector
-import parsenum
-
-wordlist = dbconnector.getnavilist()
-
-infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
-infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
-infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
-prefixes = (u"a", u"pe", u"le", u"nì", u"sä", u"tì", u"fne", u"tsay", u"fay", u"fra", u"pxe", u"ay", u"me", u"tsa", u"fì", u"ke")
-adpositions = (u"kxamlä", u"mungwrr", u"nemfa", u"pximaw", u"pxisre", u"tafkip", u"takip", u"teri", u"mìkam", u"ìla", u"fkip", u"fpi", u"ftu", u"kip", u"lok", u"luke", u"maw", u"pxel", u"pxaw", u"rofa", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo")
-postfixes = (u"an", u"ng", u"eyä", u"e", u"tsyìp", u"o", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"l", u"t", u"y", u"a", u"ä") + adpositions
-#prefixesn = ur"(?P(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
-#prefixesv = ur"(?P(?:nì|sä|tì|rä'ä |ke )?)"
-
-lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
-
-def parseword(wordin):
- ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
- for word in wordlist:
- foundit = True
- foundprefs = []
- foundposts = []
- lenited = False
- splitword = word["infix"].split(u" ")
- if len(wordin) < len(splitword):
- foundit = False
- next
- for wor in range(len(splitword)):
- if not foundit:
- break
- foundprefs.append([])
- foundposts.append([])
- center = u""
- foundins = [u"", u"", u""]
- pre = []
- post = []
- if u"<1>" in splitword[wor]:
- for in1 in infixes1:
- for in2 in infixes2:
- for in3 in infixes3:
- if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
- center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
- foundins = [in1, in2, in3]
- break
- if center != u"": break
- if center != u"": break
- else:
- if splitword[wor] in wordin[wor]:
- center = splitword[wor]
- if center == u"":
- for i in lenit:
- temp = u""
- if splitword[wor].startswith(i[0]):
- temp = i[1] + splitword[wor][len(i[0]):]
- if temp in wordin[wor]:
- lenited = True
- center = temp
- if center == u"":
- if splitword[wor].endswith(u"nga"):
- temp = splitword[wor][:-3] + u"ng"
- if temp in wordin[wor]:
- center = temp
- if splitword[wor].endswith(u"po"):
- temp = splitword[wor][:-3] + u"p"
- if temp in wordin[wor]:
- center = temp
- if center == u"":
- foundit = False
- break
- temp = wordin[wor].split(center)
- if len(temp) != 2:
- foundit = False
- break
- pref, posf = temp
- for pre in prefixes:
- if pref != u"":
- if pref.endswith(pre):
- foundprefs[wor].append(pre)
- pref = pref[:-len(pre)]
- if pref != u"":
- foundit = False
- break
- for pos in postfixes:
- if posf != u"":
- if posf.startswith(pos):
- foundposts[wor].append(pos)
- posf = posf[len(pos):]
- if posf != u"":
- foundit = False
- break
- if foundit == True:
- foundword = word
- break
- ret["pref"] = foundprefs
- ret["post"] = foundposts
- ret["inf"] = foundins
- ret["len"] = lenited
- if foundit == True:
- ret["word"] = foundword
- return ret
-
-def parsesent(sent):
- sent = sent.strip().lower().replace(u"’", u"'")
- sent = re.sub(ur"[^\wìä' ]",u"",sent)
- sent = re.sub(ur"\ +",u" ",sent)
- sent = sent.split(u" ")
- ret = []
- left = len(sent)
- while left:
- word = parsenum.parse(sent[0])
- if word == None:
- word = parseword(sent[-left:])
- left -= len(word["word"]["navi"].split(" "))
- ret.append(word)
- return ret
\ No newline at end of file
Index: tsimapiak/parse.py
===================================================================
--- tsimapiak/parse.py (revision 103)
+++ tsimapiak/parse.py (revision 132)
@@ -2,85 +2,130 @@
# -*- coding: utf-8 -*-
import re
-from dbconnector import getnavilist
+import dbconnector
+import parsenum
-wordlist = getnavilist()
+wordlist = dbconnector.getnavilist()
-infixes0 = [ u"awn", u"eyk", u"us", u"äp" ]
-infixes1 = [ u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ]
-infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ]
+infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
+infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
+infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
+prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
+adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
+postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
+#prefixesn = ur"(?P(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
+#prefixesv = ur"(?P(?:nì|sä|tì|rä'ä |ke )?)"
-# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited?
-def parsefix(original):
- realword = u""
- infix0 = u""
- infix1 = u""
- infix2 = u""
- infix01 = u""
- infix_1 = u""
- infix_2 = u""
- for eachword in wordlist:
- regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword["infix"])
- regex = re.sub(u"^",u"[^ ]*",regex)
- regex = re.sub(u"$",u"[^ ]*",regex)
- regex = re.sub(u"<1><2>",u"[^ ]*",regex)
- regex = re.sub(u"<3>",u"[^ ]*",regex)
- if re.match(regex,original):
- realword = eachword["infix"]
- break
- if realword == u"":
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- else:
- if re.search(u"<",realword):
- beginning = re.sub(u"<1><2>.*",u"",realword)
- middle = re.sub(u".*<1><2>(.*)<3>.*",ur"\1",realword)
- end = re.sub(u".*<3>",u"",realword)
- infix01 = re.sub(u".*?" + re.sub(u"<1><2>",u"([^ ]*)",re.sub(u"<3>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
- infix_2 = re.sub(u".*?" + re.sub(u"<3>",u"([^ ]*)",re.sub(u"<1><2>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
- for eachinfix in infixes0:
- if infix01.startswith(eachinfix):
- infix0 = eachinfix
- infix_1 = infix01[len(eachinfix):]
- break
- else:
- infix0 = u""
- infix_1 = infix01
- gotinfix1 = False
- for eachinfix in infixes1:
- if infix_1.startswith(eachinfix):
- infix1 = eachinfix
- infix_1 = infix_1[len(eachinfix):]
- if infix_1 != u"":
- if re.search(u"<1><2><3>",realword):
- infix_2 = infix_1
+lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
+
+def parseword(wordin):
+ ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
+ for word in wordlist:
+ foundit = True
+ foundprefs = []
+ foundposts = []
+ lenited = False
+ splitword = word["infix"].split(u" ")
+ if len(wordin) < len(splitword):
+ foundit = False
+ next
+ for wor in range(len(splitword)):
+ if not foundit:
+ break
+ foundprefs.append([])
+ foundposts.append([])
+ center = u""
+ foundins = [u"", u"", u""]
+ pre = []
+ post = []
+ if u"<1>" in splitword[wor]:
+ for in1 in infixes1:
+ for in2 in infixes2:
+ for in3 in infixes3:
+ if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
+ center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
+ foundins = [in1, in2, in3]
+ break
+ if center != u"": break
+ if center != u"": break
else:
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- gotinfix1 = True
- break
- if gotinfix1 == False:
- if re.search(u"<1><2><3>",realword):
- if infix_1 == u"":
- infix_2 = infix_1
- infix1 = u""
- elif infix_1 == u"":
- infix1 = u""
- else:
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- gotinfix2 = False
- for eachinfix in infixes2:
- if infix_2.startswith(eachinfix):
- infix2 = infix_2[:len(eachinfix)]
- infix_2 = infix_2[len(eachinfix) - 1:]
- gotinfix2 = True
- break
- if gotinfix2 == False or infix_2 != u"":
- if infix_2.startswith(end):
- suffixes = infix2[len(end) - 1:] + end
- elif infix_2 == u"":
- infix2 = u""
- else:
- return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2)
- return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
- else:
- return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
+ if splitword[wor] in wordin[wor]:
+ center = splitword[wor]
+ if center == u"":
+ for i in lenit:
+ temp = u""
+ if splitword[wor].startswith(i[0]):
+ temp = i[1] + splitword[wor][len(i[0]):]
+ if temp in wordin[wor]:
+ lenited = True
+ center = temp
+ if center == u"":
+ if splitword[wor].endswith(u"nga"):
+ temp = splitword[wor][:-3] + u"ng"
+ if temp in wordin[wor]:
+ center = temp
+ if splitword[wor].endswith(u"po"):
+ temp = splitword[wor][:-3] + u"p"
+ if temp in wordin[wor]:
+ center = temp
+ if center == u"":
+ foundit = False
+ break
+ temp = wordin[wor].split(center)
+ if len(temp) != 2:
+ foundit = False
+ break
+ pref, posf = temp
+ last = u""
+ while last != pref:
+ last = pref
+ for pre in prefixes:
+ if pref != u"":
+ if pref.endswith(pre):
+ if pre in foundprefs[wor]:
+ break
+ foundprefs[wor].append(pre)
+ pref = pref[:-len(pre)]
+ break
+ if pref != u"":
+ foundit = False
+ break
+ last = u""
+ while last != posf:
+ last = posf
+ for pos in postfixes:
+ if posf != u"":
+ if posf.startswith(pos):
+ if pos in foundposts[wor]:
+ break
+ foundposts[wor].append(pos)
+ posf = posf[len(pos):]
+ break
+ if posf != u"":
+ foundit = False
+ break
+ if foundit == True:
+ foundword = word
+ break
+ ret["pref"] = foundprefs
+ ret["post"] = foundposts
+ ret["inf"] = foundins
+ ret["len"] = lenited
+ if foundit == True:
+ ret["word"] = foundword
+ return ret
+
+def parsesent(sent):
+ sent = sent.strip().lower().replace(u"’", u"'")
+ sent = re.sub(ur"[^\wìä' ]",u"",sent)
+ sent = re.sub(ur"\ +",u" ",sent)
+ sent = sent.split(u" ")
+ ret = []
+ left = len(sent)
+ while left:
+ word = parsenum.parse(sent[len(sent)-left])
+ if word == None:
+ word = parseword(sent[-left:])
+ left -= len(word["word"]["navi"].split(" "))
+ ret.append(word)
+ return ret
\ No newline at end of file
/tsimapiak/dbconnector.py
32,18 → 32,4
else:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
db.close()
return ret
 
#def gettrans(id, cod):
#ret = []
#if cod not in (u"est",u"ptbr",u"de",u"eng",u"all"):
#return ret
#db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
#if cod == "all":
#for row in db.query("""
#SELECT *
#FROM `metaWords`
#WHERE id = ?""",idd):
#infix = makeinfix(row)
#ret.append([row["id"],row["navi"], infix, row["partOfSpeech"]])
#db.close()
return ret
/tsimapiak/parsenum.py
35,15 → 35,12
u"(?:(" + "|".join(base) + u")zam??)?" + \
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
u"((?:" + "|".join(rem) + u")|" + \
u"(?:" + "|".join(num) + u"))?(ve?)(a?)$"
u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
numre = re.compile(numre)
 
def parse(numin):
if type(numin) != unicode:
if numin in (u"a", u"aa", u"ave", u"avea", u"ve", u"vea"):
return None
if numin == u"":
return None
numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
try:
mat = numre.match(numin).groups()
except:
77,9 → 74,17
retnum = unicode(numout)
if mat[6] != u"":
retnum += u"."
return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [mat[0]], "post": [mat[6], mat[7]], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numdec}
prefs = []
posts = []
if mat[0] != u"":
prefs.append(mat[0])
if mat[6] != u"":
posts.append(mat[6])
if mat[7] != u"":
posts.append(mat[7])
return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
#return numout, numoct
 
 
if __name__ == "__main__":
print parse(u"mrrvolaw")
print parse(u"mrrvolawvea")