WebSVN - navi - Path Comparison - / Rev 3 and / Rev 122

Ignore whitespace Rev 3 → Rev 122

 /tsimapiak/parse.py
 ,0 → 1,131
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+import re
+import dbconnector
+import parsenum
+wordlist = dbconnector.getnavilist()
+infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
+infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
+infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
+prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
+adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
+postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
+#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
+#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
+lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
+def parseword(wordin):
+    ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
+    for word in wordlist:
+        foundit = True
+        foundprefs = []
+        foundposts = []
+        lenited = False
+        splitword = word["infix"].split(u" ")
+        if len(wordin) < len(splitword):
+            foundit = False
+            next
+        for wor in range(len(splitword)):
+            if not foundit:
+                break
+            foundprefs.append([])
+            foundposts.append([])
+            center = u""
+            foundins = [u"", u"", u""]
+            pre = []
+            post = []
+            if u"<1>" in splitword[wor]:
+                for in1 in infixes1:
+                    for in2 in infixes2:
+                        for in3 in infixes3:
+                            if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
+                                center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
+                                foundins = [in1, in2, in3]
+                                break
+                        if center != u"": break
+                    if center != u"": break
+            else:
+                if splitword[wor] in wordin[wor]:
+                    center = splitword[wor]
+                if center == u"":
+                    for i in lenit:
+                        temp = u""
+                        if splitword[wor].startswith(i[0]):
+                            temp = i[1] + splitword[wor][len(i[0]):]
+                            if temp in wordin[wor]:
+                                lenited = True
+                                center = temp
+                if center == u"":
+                    if splitword[wor].endswith(u"nga"):
+                        temp = splitword[wor][:-3] + u"ng"
+                        if temp in wordin[wor]:
+                            center = temp
+                    if splitword[wor].endswith(u"po"):
+                        temp = splitword[wor][:-3] + u"p"
+                        if temp in wordin[wor]:
+                            center = temp
+            if center == u"":
+                foundit = False
+                break
+            temp = wordin[wor].split(center)
+            if len(temp) != 2:
+                foundit = False
+                break
+            pref, posf = temp
+            last = u""
+            while last != pref:
+                last = pref
+                for pre in prefixes:
+                    if pref != u"":
+                        if pref.endswith(pre):
+                            if pre in foundprefs[wor]:
+                                break
+                            foundprefs[wor].append(pre)
+                            pref = pref[:-len(pre)]
+                            break
+            if pref != u"":
+                foundit = False
+                break
+            last = u""
+            while last != posf:
+                last = posf
+                for pos in postfixes:
+                    if posf != u"":
+                        if posf.startswith(pos):
+                            if pos in foundposts[wor]:
+                                break
+                            foundposts[wor].append(pos)
+                            posf = posf[len(pos):]
+                            break
+            if posf != u"":
+                foundit = False
+                break
+        if foundit == True:
+            foundword = word
+            break
+    ret["pref"] = foundprefs
+    ret["post"] = foundposts
+    ret["inf"] = foundins
+    ret["len"] = lenited
+    if foundit == True:
+        ret["word"] = foundword
+    return ret
+def parsesent(sent):
+    sent = sent.strip().lower().replace(u"’", u"'")
+    sent = re.sub(ur"[^\wìä' ]",u"",sent)
+    sent = re.sub(ur"\ +",u" ",sent)
+    sent = sent.split(u" ")
+    ret = []
+    left = len(sent)
+    while left:
+        word = parsenum.parse(sent[len(sent)-left])
+        if word == None:
+            word = parseword(sent[-left:])
+        left -= len(word["word"]["navi"].split(" "))
+        ret.append(word)
+    return ret

 /tsimapiak/dbconnector.py
 ,0 → 1,35
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+import tornado.database
+import re
+def getnavilist():
+    ret = []
+    current = u""
+    db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
+    for row in db.query("""
+    SELECT *
+    FROM `metaWords`
+    WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
+    ORDER BY CHAR_LENGTH(navi) DESC"""):
+        if row["infixes"]:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
+        else:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
+    db.close()
+    return ret
+def getnavi(word):
+    ret = []
+    db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
+    for row in db.query("""
+    SELECT *
+    FROM `metaWords`
+    WHERE navi = ?""",word):
+        if row["infixes"]:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
+        else:
+            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
+    db.close()
+    return ret

 /tsimapiak/parsenum.py
 ,20 → 30,17
 numre = \
-      u"^(?:(" + "|".join(base) + u")zazam??)?" + \
+      u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
       u"(?:(" + "|".join(base) + u")vozam??)?" + \
       u"(?:(" + "|".join(base) + u")zam??)?" + \
       u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
       u"((?:" + "|".join(rem) + u")|" + \
-      u"(?:" + "|".join(num) + u"))?$"
+      u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
 numre = re.compile(numre)
 def parse(numin):
-    if type(numin) != unicode:
+    if numin in (u"a", u"aa", u"ave", u"avea", u"ve", u"vea"):
         return None
-    if numin == u"":
-        return None
-    numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
     try:
         mat = numre.match(numin).groups()
     except:
 ,31 → 48,43
     numout = 0
     numoct = 0
     try:
-        numout += rem.index(mat[4]) + 1
-        numoct += rem.index(mat[4]) + 1
+        numout += rem.index(mat[5]) + 1
+        numoct += rem.index(mat[5]) + 1
     except:
         try:
-            numout += num.index(mat[4])
-            numoct += num.index(mat[4])
+            numout += num.index(mat[5])
+            numoct += num.index(mat[5])
         except: pass
     try:
-        numout += (base.index(mat[3]) + 1) * 8
-        numoct += (base.index(mat[3]) + 1) * 10
+        numout += (base.index(mat[4]) + 1) * 8
+        numoct += (base.index(mat[4]) + 1) * 10
     except: pass
     try:
-        numout += (base.index(mat[2]) + 1) * 8**2
-        numoct += (base.index(mat[2]) + 1) * 10**2
+        numout += (base.index(mat[3]) + 1) * 8**2
+        numoct += (base.index(mat[3]) + 1) * 10**2
     except: pass
     try:
-        numout += (base.index(mat[1]) + 1) * 8**3
-        numoct += (base.index(mat[1]) + 1) * 10**3
+        numout += (base.index(mat[2]) + 1) * 8**3
+        numoct += (base.index(mat[2]) + 1) * 10**3
     except: pass
     try:
-        numout += (base.index(mat[0]) + 1) * 8**4
-        numoct += (base.index(mat[0]) + 1) * 10**4
+        numout += (base.index(mat[1]) + 1) * 8**4
+        numoct += (base.index(mat[1]) + 1) * 10**4
     except: pass
-    return numout, numoct
+    retnum = unicode(numout)
+    if mat[6] != u"":
+        retnum += u"."
+    prefs = []
+    posts = []
+    if mat[0] != u"":
+        prefs.append(mat[0])
+    if mat[6] != u"":
+        posts.append(mat[6])
+    if mat[7] != u"":
+        posts.append(mat[7])
+    return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
+    #return numout, numoct
 if __name__ == "__main__":
-    print parse(u"mrrvolaw")
+    print parse(u"mrrvolawvea")

 /webapp/main.py
 ,15 → 5,17
 import tornado.ioloop
 import tornado.web
 import tornado.autoreload
-import tornado.database
 import os
+import re
-from navi.parsenum import parse
+from tsimapiak import parsenum
+from tsimapiak import dbconnector
+from tsimapiak import parse
 class Index(tornado.web.RequestHandler):
     def get(self):
-        self.redirect("/number")
+        self.render("templates/index.html")
 class Number(tornado.web.RequestHandler):
     def get(self):
 ,32 → 26,47
             num = self.get_argument("num").strip()
         except:
             self.redirect("/number")
-        numout = parse(num.replace(" ",""))
+        numout = parsenum.parse(num.replace(" ",""))
         if numout == None:
-            numout = -1
-        self.render("templates/number.html", last=num, numout=numout)
+            numoutt = -1
+        else:
+            numoutt = (numout["dec"], numout["oct"])
+        self.render("templates/number.html", last=num, numout=numoutt)
 class Restart(tornado.web.RequestHandler):
     def get(self):
         os.system("/usr/bin/restartnavi")
 class TestDB(tornado.web.RequestHandler):
     def get(self):
-        text = ""
-        db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
-        for thing in db.query("SELECT * FROM test"):
-            text = "<br />".join((text, str(thing.id) + " - " + thing.asd))
+        lis = dbconnector.getnavilist()
+        text = u"id | navi | infix | partofspeech<br />"
+        text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis)
         self.write(text)
+class Parse(tornado.web.RequestHandler):
+    def get(self):
+        self.render("templates/parse.html", last="", out=None)
+    def post(self):
+        try:
+            word = self.get_argument("word")
+        except:
+            self.redirect("/parse")
+        out = parse.parsesent(word)
+        self.render("templates/parse.html", last=word, out=out)
 application = tornado.web.Application([
     ("/", Index),
     ("/number", Number),
     ("/restart", Restart),
-    ("/testdb", TestDB)
+    ("/testdb", TestDB),
+    ("/parse", Parse)
 ])
 if __name__ == "__main__":
     http_server = tornado.httpserver.HTTPServer(application)
     http_server.listen(1337)
-    tornado.autoreload.start()
-    tornado.ioloop.IOLoop.instance().start()
+    #tornado.autoreload.start()
+    tornado.ioloop.IOLoop.instance().start()

/webapp/templates/number.html
17,4 → 17,4
<script type="text/javascript">
document.getElementById("num").focus();
</script>
{% end %}
{% end %}

 /webapp/templates/parse.html
 ,0 → 1,43
+{% extends "base.html" %}
+{% block title %}Word parser{% end %}
+{% block body %}
+<b>Na'vi word:</b><br />
+<form action="/parse" method="post">
+<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
+<input name="btn" type="submit" value="Parse!" />
+</form>
+{% if out %}
+<table border="1">
+<tr>
+    <th>Words</th>
+    <th>Parts</th>
+    <th>Data</th>
+</tr>
+{% for wor in out %}
+<tr>
+    <td rowspan="4">{{ wor["word"]["navi"] }}</td>
+    <td>Infixes:</td>
+    <td>{{ u", ".join(wor["inf"]) }}</td>
+</tr>
+<tr>
+    <td>Prefixes:</td>
+    <td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
+</tr>
+<tr>
+    <td>Postfixes:</td>
+    <td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
+</tr>
+<tr>
+    <td>Lenited:</td>
+    <td>{{ str(wor["len"]) }}</td>
+</tr>
+{% end %}
+</table>
+{% end %}
+<script type="text/javascript">
+document.getElementById("word").focus();
+</script>
+<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p>
+{% end %}

 /webapp/templates/base.html
 ,7 → 14,7
         text-align: center;
         font-size: 52px;
 }
-h1 {
+h2 {
         text-align: center;
         font-size: 24px;
 }

Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 3 → Rev 122