WebSVN - navi - Path Comparison - / Rev 48 and / Rev 126

Ignore whitespace Rev 48 → Rev 126

/webapp/static/favicon.ico
Cannot display: file marked as a binary type.
svn:mime-type = application/octet-stream

/webapp/static/favicon.ico

Property changes:

Added: svn:mime-type

## -0,0 +1 ##

+application/octet-stream

\ No newline at end of property

Index: webapp/templates/index.html

===================================================================

--- webapp/templates/index.html (nonexistent)

+++ webapp/templates/index.html (revision 126)

@@ -0,0 +1,8 @@

+{% extends "base.html" %}

+{% block title %}Home{% end %}

+{% block body %}

+Number translator - this webapp allows you to translate written-out Na'vi numbers into decimal and octal.

+Parser - this webapp can parse Na'vi sentences into the base words, prefixes, infixes and suffixes. It does not currently translate the words, but that will come.

+{% end %}

Index: webapp/templates/number.html

===================================================================

--- webapp/templates/number.html (revision 48)

+++ webapp/templates/number.html (revision 126)

@@ -17,4 +17,4 @@

-{% end %}

+{% end %}

\ No newline at end of file

Index: webapp/templates/parse.html

===================================================================

--- webapp/templates/parse.html (nonexistent)

+++ webapp/templates/parse.html (revision 126)

@@ -0,0 +1,43 @@

+{% extends "base.html" %}

+{% block title %}Word parser{% end %}

+{% block body %}

+Na'vi word:

+{% if out %}

+
+	Words
+	Parts
+	Data
+
+{% for wor in out %}
+
+	{{ wor["word"]["navi"] }}
+		Infixes:
+		{{ u", ".join(wor["inf"]) }}
+
+
+	Prefixes:
+	{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}
+
+
+	Postfixes:
+	{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}
+
+
+	Lenited:
+	{{ str(wor["len"]) }}
+
+{% end %}
+

+{% end %}

This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!

+{% end %}

Index: webapp/main.py

===================================================================

--- webapp/main.py (revision 48)

+++ webapp/main.py (revision 126)

@@ -9,12 +9,13 @@

import os

import re

-from tsimapiak.parsenum import parse

-from tsimapiak.dbconnector import getnavilist

+from tsimapiak import parsenum

+from tsimapiak import dbconnector

+from tsimapiak import parse

class Index(tornado.web.RequestHandler):

def get(self):

- self.redirect("/number")

+ self.render("templates/index.html")

class Number(tornado.web.RequestHandler):

def get(self):

@@ -25,10 +26,12 @@

num = self.get_argument("num").strip()

except:

self.redirect("/number")

- numout = parse(num.replace(" ",""))

+ numout = parsenum.parse(num.replace(" ",""))

if numout == None:

- numout = -1

- self.render("templates/number.html", last=num, numout=numout)

+ numoutt = -1

+ else:

+ numoutt = (numout["dec"], numout["oct"])

+ self.render("templates/number.html", last=num, numout=numoutt)

class Restart(tornado.web.RequestHandler):

def get(self):

@@ -37,20 +40,33 @@

class TestDB(tornado.web.RequestHandler):

def get(self):

- lis = getnavilist()

+ lis = dbconnector.getnavilist()

text = u"id | navi | infix | partofspeech
"

text += u"
".join(u" | ".join(unicode(y) for y in x) for x in lis)

self.write(text)

+class Parse(tornado.web.RequestHandler):

+ def get(self):

+ self.render("templates/parse.html", last="", out=None)

+ def post(self):

+ try:

+ word = self.get_argument("word")

+ except:

+ self.redirect("/parse")

+ out = parse.parsesent(word)

+ self.render("templates/parse.html", last=word, out=out)

application = tornado.web.Application([

("/", Index),

("/number", Number),

("/restart", Restart),

- ("/testdb", TestDB)

+ ("/testdb", TestDB),

+ ("/parse", Parse)

])

if __name__ == "__main__":

http_server = tornado.httpserver.HTTPServer(application)

http_server.listen(1337)

- tornado.autoreload.start()

- tornado.ioloop.IOLoop.instance().start()

+ #tornado.autoreload.start()

+ tornado.ioloop.IOLoop.instance().start()

\ No newline at end of file

 /tsimapiak/parse.py
 ,0 → 1,131
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+import re
+import dbconnector
+import parsenum
+wordlist = dbconnector.getnavilist()
+infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
+infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
+infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
+prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
+adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
+postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
+#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
+#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
+lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
+def parseword(wordin):
+    ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
+    for word in wordlist:
+        foundit = True
+        foundprefs = []
+        foundposts = []
+        lenited = False
+        splitword = word["infix"].split(u" ")
+        if len(wordin) < len(splitword):
+            foundit = False
+            next
+        for wor in range(len(splitword)):
+            if not foundit:
+                break
+            foundprefs.append([])
+            foundposts.append([])
+            center = u""
+            foundins = [u"", u"", u""]
+            pre = []
+            post = []
+            if u"<1>" in splitword[wor]:
+                for in1 in infixes1:
+                    for in2 in infixes2:
+                        for in3 in infixes3:
+                            if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
+                                center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
+                                foundins = [in1, in2, in3]
+                                break
+                        if center != u"": break
+                    if center != u"": break
+            else:
+                if splitword[wor] in wordin[wor]:
+                    center = splitword[wor]
+                if center == u"":
+                    for i in lenit:
+                        temp = u""
+                        if splitword[wor].startswith(i[0]):
+                            temp = i[1] + splitword[wor][len(i[0]):]
+                            if temp in wordin[wor]:
+                                lenited = True
+                                center = temp
+                if center == u"":
+                    if splitword[wor].endswith(u"nga"):
+                        temp = splitword[wor][:-3] + u"ng"
+                        if temp in wordin[wor]:
+                            center = temp
+                    if splitword[wor].endswith(u"po"):
+                        temp = splitword[wor][:-3] + u"p"
+                        if temp in wordin[wor]:
+                            center = temp
+            if center == u"":
+                foundit = False
+                break
+            temp = wordin[wor].split(center)
+            if len(temp) != 2:
+                foundit = False
+                break
+            pref, posf = temp
+            last = u""
+            while last != pref:
+                last = pref
+                for pre in prefixes:
+                    if pref != u"":
+                        if pref.endswith(pre):
+                            if pre in foundprefs[wor]:
+                                break
+                            foundprefs[wor].append(pre)
+                            pref = pref[:-len(pre)]
+                            break
+            if pref != u"":
+                foundit = False
+                break
+            last = u""
+            while last != posf:
+                last = posf
+                for pos in postfixes:
+                    if posf != u"":
+                        if posf.startswith(pos):
+                            if pos in foundposts[wor]:
+                                break
+                            foundposts[wor].append(pos)
+                            posf = posf[len(pos):]
+                            break
+            if posf != u"":
+                foundit = False
+                break
+        if foundit == True:
+            foundword = word
+            break
+    ret["pref"] = foundprefs
+    ret["post"] = foundposts
+    ret["inf"] = foundins
+    ret["len"] = lenited
+    if foundit == True:
+        ret["word"] = foundword
+    return ret
+def parsesent(sent):
+    sent = sent.strip().lower().replace(u"’", u"'")
+    sent = re.sub(ur"[^\wìä' ]",u"",sent)
+    sent = re.sub(ur"\ +",u" ",sent)
+    sent = sent.split(u" ")
+    ret = []
+    left = len(sent)
+    while left:
+        word = parsenum.parse(sent[len(sent)-left])
+        if word == None:
+            word = parseword(sent[-left:])
+        left -= len(word["word"]["navi"].split(" "))
+        ret.append(word)
+    return ret

 /tsimapiak/dbconnector.py
 ,33 → 9,14
     current = u""
     db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
     for row in db.query("""
-    SELECT *, CHAR_LENGTH(navi) AS NL
+    SELECT *
     FROM `metaWords`
-    ORDER BY NL DESC"""):
-        if row["partOfSpeech"] in (u"v.", u"vin.", u"vtr."):
-            current = unicode(row["ipa"])
-            current = current.replace(ur"ɛ",ur"e").replace(ur".",ur"").replace(ur"ɾ",ur"r") \
-           .replace(ur"ɪ",ur"ì").replace(ur"ˈ",ur"").replace(ur"'",ur"x") \
-           .replace(ur"ŋ",ur"ng").replace(ur"j",ur"y").replace(ur"ʔ",ur"'") \
-           .replace(ur"æ",ur"ä").replace(ur"ˌ",ur"").replace(ur"\t{ts}",ur"ts") \
-           .replace(ur"ṛ",ur"rr").replace(ur"ḷ",ur"ll").replace(ur"k̚",ur"k ") \
-           .replace(ur"p̚",ur"p ").replace(ur"t̚",ur"t ").replace(ur"'̚",ur"' ") \
-           .replace(u"\\",ur"").replace(ur"(",ur"").replace(ur")",ur"") \
-           .replace(ur"[",ur"").replace(ur"]",ur"").replace(ur"  "," ") \
-           .strip()
-            current = re.sub(ur" or.*","",current)
-            current = re.sub(ur"z(.*)engk(.*)e",ur"z\1enk\2e",current)
-            current = re.sub(ur"t(.*)ì(m|n)\ ",ur"t\1ìng ",current)
-            current = current.split(ur"$cdot$")
-            if len(current) == 3:
-                current = current[0] + u"<0><1>" + current[1] + u"<2>" + current[2]
-            elif len(current) == 2:
-                current = current[0] + u"<0><1><2>" + current[1]
-            else:
-                current = u"<0><1><2>" + current[0]
+    WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
+    ORDER BY CHAR_LENGTH(navi) DESC"""):
+        if row["infixes"]:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
         else:
-            current = unicode(row["navi"])
-        ret.append([row["id"], row["navi"], current, row["partOfSpeech"]])
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
     db.close()
     return ret
 ,7 → 27,9
     SELECT *
     FROM `metaWords`
     WHERE navi = ?""",word):
-        ret.append([row["id"],row["navi"], row["infix"], row["partOfSpeech"]])
+        if row["infixes"]:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
+        else:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
     db.close()
-    return ret
+    return ret

 /tsimapiak/parsenum.py
 ,20 → 30,17
 numre = \
-      u"^(?:(" + "|".join(base) + u")zazam??)?" + \
+      u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
       u"(?:(" + "|".join(base) + u")vozam??)?" + \
       u"(?:(" + "|".join(base) + u")zam??)?" + \
       u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
       u"((?:" + "|".join(rem) + u")|" + \
-      u"(?:" + "|".join(num) + u"))?$"
+      u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
 numre = re.compile(numre)
 def parse(numin):
-    if type(numin) != unicode:
+    if numin in (u"a", u"aa", u"ave", u"avea", u"ve", u"vea"):
         return None
-    if numin == u"":
-        return None
-    numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
     try:
         mat = numre.match(numin).groups()
     except:
 ,31 → 48,43
     numout = 0
     numoct = 0
     try:
-        numout += rem.index(mat[4]) + 1
-        numoct += rem.index(mat[4]) + 1
+        numout += rem.index(mat[5]) + 1
+        numoct += rem.index(mat[5]) + 1
     except:
         try:
-            numout += num.index(mat[4])
-            numoct += num.index(mat[4])
+            numout += num.index(mat[5])
+            numoct += num.index(mat[5])
         except: pass
     try:
-        numout += (base.index(mat[3]) + 1) * 8
-        numoct += (base.index(mat[3]) + 1) * 10
+        numout += (base.index(mat[4]) + 1) * 8
+        numoct += (base.index(mat[4]) + 1) * 10
     except: pass
     try:
-        numout += (base.index(mat[2]) + 1) * 8**2
-        numoct += (base.index(mat[2]) + 1) * 10**2
+        numout += (base.index(mat[3]) + 1) * 8**2
+        numoct += (base.index(mat[3]) + 1) * 10**2
     except: pass
     try:
-        numout += (base.index(mat[1]) + 1) * 8**3
-        numoct += (base.index(mat[1]) + 1) * 10**3
+        numout += (base.index(mat[2]) + 1) * 8**3
+        numoct += (base.index(mat[2]) + 1) * 10**3
     except: pass
     try:
-        numout += (base.index(mat[0]) + 1) * 8**4
-        numoct += (base.index(mat[0]) + 1) * 10**4
+        numout += (base.index(mat[1]) + 1) * 8**4
+        numoct += (base.index(mat[1]) + 1) * 10**4
     except: pass
-    return numout, numoct
+    retnum = unicode(numout)
+    if mat[6] != u"":
+        retnum += u"."
+    prefs = []
+    posts = []
+    if mat[0] != u"":
+        prefs.append(mat[0])
+    if mat[6] != u"":
+        posts.append(mat[6])
+    if mat[7] != u"":
+        posts.append(mat[7])
+    return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
+    #return numout, numoct
 if __name__ == "__main__":
-    print parse(u"mrrvolaw")
+    print parse(u"mrrvolawvea")

Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 48 → Rev 126