Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 48 → Rev 126

/webapp/static/favicon.ico
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream
/webapp/static/favicon.ico
Property changes:
Added: svn:mime-type
## -0,0 +1 ##
+application/octet-stream
\ No newline at end of property
Index: webapp/templates/index.html
===================================================================
--- webapp/templates/index.html (nonexistent)
+++ webapp/templates/index.html (revision 126)
@@ -0,0 +1,8 @@
+{% extends "base.html" %}
+
+{% block title %}Home{% end %}
+
+{% block body %}
+Number translator - this webapp allows you to translate written-out Na'vi numbers into decimal and octal.
+Parser - this webapp can parse Na'vi sentences into the base words, prefixes, infixes and suffixes. It does not currently translate the words, but that will come.
+{% end %}
Index: webapp/templates/number.html
===================================================================
--- webapp/templates/number.html (revision 48)
+++ webapp/templates/number.html (revision 126)
@@ -17,4 +17,4 @@
-{% end %}
+{% end %}
\ No newline at end of file
Index: webapp/templates/parse.html
===================================================================
--- webapp/templates/parse.html (nonexistent)
+++ webapp/templates/parse.html (revision 126)
@@ -0,0 +1,43 @@
+{% extends "base.html" %}
+
+{% block title %}Word parser{% end %}
+
+{% block body %}
+Na'vi word:
+
+
+
+
+{% if out %}
+
+
+ Words
+ Parts
+ Data
+
+{% for wor in out %}
+
+ {{ wor["word"]["navi"] }}
+ Infixes:
+ {{ u", ".join(wor["inf"]) }}
+
+
+ Prefixes:
+ {{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}
+
+
+ Postfixes:
+ {{ u"; ".join(u", ".join(x) for x in wor["post"]) }}
+
+
+ Lenited:
+ {{ str(wor["len"]) }}
+
+{% end %}
+
+{% end %}
+
+

This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!

+{% end %}
Index: webapp/main.py
===================================================================
--- webapp/main.py (revision 48)
+++ webapp/main.py (revision 126)
@@ -9,12 +9,13 @@
import os
import re
-from tsimapiak.parsenum import parse
-from tsimapiak.dbconnector import getnavilist
+from tsimapiak import parsenum
+from tsimapiak import dbconnector
+from tsimapiak import parse
class Index(tornado.web.RequestHandler):
def get(self):
- self.redirect("/number")
+ self.render("templates/index.html")
class Number(tornado.web.RequestHandler):
def get(self):
@@ -25,10 +26,12 @@
num = self.get_argument("num").strip()
except:
self.redirect("/number")
- numout = parse(num.replace(" ",""))
+ numout = parsenum.parse(num.replace(" ",""))
if numout == None:
- numout = -1
- self.render("templates/number.html", last=num, numout=numout)
+ numoutt = -1
+ else:
+ numoutt = (numout["dec"], numout["oct"])
+ self.render("templates/number.html", last=num, numout=numoutt)
class Restart(tornado.web.RequestHandler):
def get(self):
@@ -37,20 +40,33 @@
class TestDB(tornado.web.RequestHandler):
def get(self):
- lis = getnavilist()
+ lis = dbconnector.getnavilist()
text = u"id | navi | infix | partofspeech
"
text += u"
".join(u" | ".join(unicode(y) for y in x) for x in lis)
self.write(text)
+class Parse(tornado.web.RequestHandler):
+ def get(self):
+ self.render("templates/parse.html", last="", out=None)
+
+ def post(self):
+ try:
+ word = self.get_argument("word")
+ except:
+ self.redirect("/parse")
+ out = parse.parsesent(word)
+ self.render("templates/parse.html", last=word, out=out)
+
application = tornado.web.Application([
("/", Index),
("/number", Number),
("/restart", Restart),
- ("/testdb", TestDB)
+ ("/testdb", TestDB),
+ ("/parse", Parse)
])
if __name__ == "__main__":
http_server = tornado.httpserver.HTTPServer(application)
http_server.listen(1337)
- tornado.autoreload.start()
- tornado.ioloop.IOLoop.instance().start()
+ #tornado.autoreload.start()
+ tornado.ioloop.IOLoop.instance().start()
\ No newline at end of file
/tsimapiak/parse.py
0,0 → 1,131
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
import dbconnector
import parsenum
 
wordlist = dbconnector.getnavilist()
 
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
 
def parseword(wordin):
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
for word in wordlist:
foundit = True
foundprefs = []
foundposts = []
lenited = False
splitword = word["infix"].split(u" ")
if len(wordin) < len(splitword):
foundit = False
next
for wor in range(len(splitword)):
if not foundit:
break
foundprefs.append([])
foundposts.append([])
center = u""
foundins = [u"", u"", u""]
pre = []
post = []
if u"<1>" in splitword[wor]:
for in1 in infixes1:
for in2 in infixes2:
for in3 in infixes3:
if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
foundins = [in1, in2, in3]
break
if center != u"": break
if center != u"": break
else:
if splitword[wor] in wordin[wor]:
center = splitword[wor]
if center == u"":
for i in lenit:
temp = u""
if splitword[wor].startswith(i[0]):
temp = i[1] + splitword[wor][len(i[0]):]
if temp in wordin[wor]:
lenited = True
center = temp
if center == u"":
if splitword[wor].endswith(u"nga"):
temp = splitword[wor][:-3] + u"ng"
if temp in wordin[wor]:
center = temp
if splitword[wor].endswith(u"po"):
temp = splitword[wor][:-3] + u"p"
if temp in wordin[wor]:
center = temp
if center == u"":
foundit = False
break
temp = wordin[wor].split(center)
if len(temp) != 2:
foundit = False
break
pref, posf = temp
last = u""
while last != pref:
last = pref
for pre in prefixes:
if pref != u"":
if pref.endswith(pre):
if pre in foundprefs[wor]:
break
foundprefs[wor].append(pre)
pref = pref[:-len(pre)]
break
if pref != u"":
foundit = False
break
last = u""
while last != posf:
last = posf
for pos in postfixes:
if posf != u"":
if posf.startswith(pos):
if pos in foundposts[wor]:
break
foundposts[wor].append(pos)
posf = posf[len(pos):]
break
if posf != u"":
foundit = False
break
if foundit == True:
foundword = word
break
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
ret["len"] = lenited
if foundit == True:
ret["word"] = foundword
return ret
 
def parsesent(sent):
sent = sent.strip().lower().replace(u"’", u"'")
sent = re.sub(ur"[^\wìä' ]",u"",sent)
sent = re.sub(ur"\ +",u" ",sent)
sent = sent.split(u" ")
ret = []
left = len(sent)
while left:
word = parsenum.parse(sent[len(sent)-left])
if word == None:
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
ret.append(word)
return ret
/tsimapiak/dbconnector.py
9,33 → 9,14
current = u""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *, CHAR_LENGTH(navi) AS NL
SELECT *
FROM `metaWords`
ORDER BY NL DESC"""):
if row["partOfSpeech"] in (u"v.", u"vin.", u"vtr."):
current = unicode(row["ipa"])
current = current.replace(ur"ɛ",ur"e").replace(ur".",ur"").replace(ur"ɾ",ur"r") \
.replace(ur"ɪ",ur"ì").replace(ur"ˈ",ur"").replace(ur"'",ur"x") \
.replace(ur"ŋ",ur"ng").replace(ur"j",ur"y").replace(ur"ʔ",ur"'") \
.replace(ur"æ",ur"ä").replace(ur"ˌ",ur"").replace(ur"\t{ts}",ur"ts") \
.replace(ur"ṛ",ur"rr").replace(ur"ḷ",ur"ll").replace(ur"k̚",ur"k ") \
.replace(ur"p̚",ur"p ").replace(ur"t̚",ur"t ").replace(ur"'̚",ur"' ") \
.replace(u"\\",ur"").replace(ur"(",ur"").replace(ur")",ur"") \
.replace(ur"[",ur"").replace(ur"]",ur"").replace(ur" "," ") \
.strip()
current = re.sub(ur" or.*","",current)
current = re.sub(ur"z(.*)engk(.*)e",ur"z\1enk\2e",current)
current = re.sub(ur"t(.*)ì(m|n)\ ",ur"t\1ìng ",current)
current = current.split(ur"$cdot$")
if len(current) == 3:
current = current[0] + u"<0><1>" + current[1] + u"<2>" + current[2]
elif len(current) == 2:
current = current[0] + u"<0><1><2>" + current[1]
else:
current = u"<0><1><2>" + current[0]
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
ORDER BY CHAR_LENGTH(navi) DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else:
current = unicode(row["navi"])
ret.append([row["id"], row["navi"], current, row["partOfSpeech"]])
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
db.close()
return ret
 
46,7 → 27,9
SELECT *
FROM `metaWords`
WHERE navi = ?""",word):
ret.append([row["id"],row["navi"], row["infix"], row["partOfSpeech"]])
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
db.close()
return ret
return ret
/tsimapiak/parsenum.py
30,20 → 30,17
 
 
numre = \
u"^(?:(" + "|".join(base) + u")zazam??)?" + \
u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
u"(?:(" + "|".join(base) + u")vozam??)?" + \
u"(?:(" + "|".join(base) + u")zam??)?" + \
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
u"((?:" + "|".join(rem) + u")|" + \
u"(?:" + "|".join(num) + u"))?$"
u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
numre = re.compile(numre)
 
def parse(numin):
if type(numin) != unicode:
if numin in (u"a", u"aa", u"ave", u"avea", u"ve", u"vea"):
return None
if numin == u"":
return None
numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
try:
mat = numre.match(numin).groups()
except:
51,31 → 48,43
numout = 0
numoct = 0
try:
numout += rem.index(mat[4]) + 1
numoct += rem.index(mat[4]) + 1
numout += rem.index(mat[5]) + 1
numoct += rem.index(mat[5]) + 1
except:
try:
numout += num.index(mat[4])
numoct += num.index(mat[4])
numout += num.index(mat[5])
numoct += num.index(mat[5])
except: pass
try:
numout += (base.index(mat[3]) + 1) * 8
numoct += (base.index(mat[3]) + 1) * 10
numout += (base.index(mat[4]) + 1) * 8
numoct += (base.index(mat[4]) + 1) * 10
except: pass
try:
numout += (base.index(mat[2]) + 1) * 8**2
numoct += (base.index(mat[2]) + 1) * 10**2
numout += (base.index(mat[3]) + 1) * 8**2
numoct += (base.index(mat[3]) + 1) * 10**2
except: pass
try:
numout += (base.index(mat[1]) + 1) * 8**3
numoct += (base.index(mat[1]) + 1) * 10**3
numout += (base.index(mat[2]) + 1) * 8**3
numoct += (base.index(mat[2]) + 1) * 10**3
except: pass
try:
numout += (base.index(mat[0]) + 1) * 8**4
numoct += (base.index(mat[0]) + 1) * 10**4
numout += (base.index(mat[1]) + 1) * 8**4
numoct += (base.index(mat[1]) + 1) * 10**4
except: pass
return numout, numoct
retnum = unicode(numout)
if mat[6] != u"":
retnum += u"."
prefs = []
posts = []
if mat[0] != u"":
prefs.append(mat[0])
if mat[6] != u"":
posts.append(mat[6])
if mat[7] != u"":
posts.append(mat[7])
return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
#return numout, numoct
 
 
if __name__ == "__main__":
print parse(u"mrrvolaw")
print parse(u"mrrvolawvea")
/dev/naviparse.py
File deleted