WebSVN - navi - Path Comparison - / Rev 89 and / Rev 113

Ignore whitespace Rev 89 → Rev 113

 /webapp/templates/parse2.html
 ,7 → 17,7
 </tr>
 {% for wor in out %}
 <tr>
-    <td rowspan="3">{{ wor["word"]["navi"] }}</td>
+    <td rowspan="4">{{ wor["word"]["navi"] }}</td>
     <td>Infixes:</td>
     <td>{{ u", ".join(wor["inf"]) }}</td>
 </tr>
 ,6 → 29,10
     <td>Postfixes:</td>
     <td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
 </tr>
+<tr>
+    <td>Lenited:</td>
+    <td>{{ str(wor["len"]) }}</td>
+</tr>
 {% end %}
 </table>
 {% end %}
 ,4 → 39,5
 <script type="text/javascript">
 document.getElementById("word").focus();
 </script>
-{% end %}
+<b>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</b>
+{% end %}

 /webapp/main.py
 ,8 → 29,10
             self.redirect("/number")
         numout = parsenum.parse(num.replace(" ",""))
         if numout == None:
-            numout = -1
-        self.render("templates/number.html", last=num, numout=numout)
+            numoutt = -1
+        else:
+            numoutt = [numout["dec"], numout["oct"]]
+        self.render("templates/number.html", last=num, numout=numoutt)
 class Restart(tornado.web.RequestHandler):
     def get(self):

 /tsimapiak/parse2.py
 ,23 → 3,28
 import re
 import dbconnector
+import parsenum
 wordlist = dbconnector.getnavilist()
-infixes1 = [u"awn", u"eyk", u"us", u"äp", u""]
-infixes2 = [u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u""]
-infixes3 = [u"äng", u"ats", u"eiy", u"ei", u"uy", u""]
-prefixes = [u"tsay", u"fay", u"tsa", u"fra", u"pxe", u"ay", u"me", u"pe", u"le", u"nì", u"sä", u"tì", u"ke", u"fì", u"a"]
-postfixes = [u"eyä", u"ìri", u"an", u"ìl", u"it", u"lo", u"ng", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"e", u"l", u"o", u"t", u"y", u"a", u"ä"]
+infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
+infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
+infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
+prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
+adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
+postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"e", u"o", u"l", u"t", u"y", u"a", u"ä")
 #prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
 #prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
+lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
 def parseword(wordin):
-    ret = {"word": {"id": 0, "navi": u"[" + u" ".wordin[0] + u"]", "infix": u"", "type": u""}}
+    ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
     for word in wordlist:
         foundit = True
         foundprefs = []
         foundposts = []
+        lenited = False
         splitword = word["infix"].split(u" ")
         if len(wordin) < len(splitword):
             foundit = False
 ,21 → 51,44
             else:
                 if splitword[wor] in wordin[wor]:
                     center = splitword[wor]
+                if center == u"":
+                    for i in lenit:
+                        temp = u""
+                        if splitword[wor].startswith(i[0]):
+                            temp = i[1] + splitword[wor][len(i[0]):]
+                            if temp in wordin[wor]:
+                                lenited = True
+                                center = temp
+                if center == u"":
+                    if splitword[wor].endswith(u"nga"):
+                        temp = splitword[wor][:-3] + u"ng"
+                        if temp in wordin[wor]:
+                            center = temp
+                    if splitword[wor].endswith(u"po"):
+                        temp = splitword[wor][:-3] + u"p"
+                        if temp in wordin[wor]:
+                            center = temp
             if center == u"":
                 foundit = False
                 break
-            pref, posf = wordin[wor].split(center)
+            temp = wordin[wor].split(center)
+            if len(temp) != 2:
+                foundit = False
+                break
+            pref, posf = temp
             for pre in prefixes:
-                if pref.endswith(pre):
-                    foundprefs[wor].append(pre)
-                    pref = pref[:-len(pre)]
+                if pref != u"":
+                    if pref.endswith(pre):
+                        foundprefs[wor].append(pre)
+                        pref = pref[:-len(pre)]
             if pref != u"":
                 foundit = False
                 break
             for pos in postfixes:
-                if posf.startswith(pos):
-                    foundposts[wor].append(pos)
-                    posf = posf[len(pos):]
+                if posf != u"":
+                    if posf.startswith(pos):
+                        foundposts[wor].append(pos)
+                        posf = posf[len(pos):]
             if posf != u"":
                 foundit = False
                 break
 ,19 → 98,22
     ret["pref"] = foundprefs
     ret["post"] = foundposts
     ret["inf"] = foundins
+    ret["len"] = lenited
     if foundit == True:
         ret["word"] = foundword
     return ret
 def parsesent(sent):
-    sent = sent.strip().lower()
-    sent = re.sub(ur"[^\w ]",u"",sent)
+    sent = sent.strip().lower().replace(u"’", u"'")
+    sent = re.sub(ur"[^\wìä' ]",u"",sent)
     sent = re.sub(ur"\ +",u" ",sent)
     sent = sent.split(u" ")
     ret = []
     left = len(sent)
     while left:
-        word = parseword(sent[-left:])
+        word = parsenum.parse(sent[len(sent)-left])
+        if word == None:
+            word = parseword(sent[-left:])
         left -= len(word["word"]["navi"].split(" "))
         ret.append(word)
-    return ret
+    return ret

 /tsimapiak/parsenum.py
 ,20 → 30,15
 numre = \
-      u"^(?:(" + "|".join(base) + u")zazam??)?" + \
+      u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
       u"(?:(" + "|".join(base) + u")vozam??)?" + \
       u"(?:(" + "|".join(base) + u")zam??)?" + \
       u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
       u"((?:" + "|".join(rem) + u")|" + \
-      u"(?:" + "|".join(num) + u"))?$"
+      u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
 numre = re.compile(numre)
 def parse(numin):
-    if type(numin) != unicode:
-        return None
-    if numin == u"":
-        return None
-    numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
     try:
         mat = numre.match(numin).groups()
     except:
 ,31 → 46,43
     numout = 0
     numoct = 0
     try:
-        numout += rem.index(mat[4]) + 1
-        numoct += rem.index(mat[4]) + 1
+        numout += rem.index(mat[5]) + 1
+        numoct += rem.index(mat[5]) + 1
     except:
         try:
-            numout += num.index(mat[4])
-            numoct += num.index(mat[4])
+            numout += num.index(mat[5])
+            numoct += num.index(mat[5])
         except: pass
     try:
-        numout += (base.index(mat[3]) + 1) * 8
-        numoct += (base.index(mat[3]) + 1) * 10
+        numout += (base.index(mat[4]) + 1) * 8
+        numoct += (base.index(mat[4]) + 1) * 10
     except: pass
     try:
-        numout += (base.index(mat[2]) + 1) * 8**2
-        numoct += (base.index(mat[2]) + 1) * 10**2
+        numout += (base.index(mat[3]) + 1) * 8**2
+        numoct += (base.index(mat[3]) + 1) * 10**2
     except: pass
     try:
-        numout += (base.index(mat[1]) + 1) * 8**3
-        numoct += (base.index(mat[1]) + 1) * 10**3
+        numout += (base.index(mat[2]) + 1) * 8**3
+        numoct += (base.index(mat[2]) + 1) * 10**3
     except: pass
     try:
-        numout += (base.index(mat[0]) + 1) * 8**4
-        numoct += (base.index(mat[0]) + 1) * 10**4
+        numout += (base.index(mat[1]) + 1) * 8**4
+        numoct += (base.index(mat[1]) + 1) * 10**4
     except: pass
-    return numout, numoct
+    retnum = unicode(numout)
+    if mat[6] != u"":
+        retnum += u"."
+    prefs = []
+    posts = []
+    if mat[0] != u"":
+        prefs.append(mat[0])
+    if mat[6] != u"":
+        posts.append(mat[6])
+    if mat[7] != u"":
+        posts.append(mat[7])
+    return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
+    #return numout, numoct
 if __name__ == "__main__":
-    print parse(u"mrrvolaw")
+    print parse(u"mrrvolawvea")

 /tsimapiak/dbconnector.py
 ,9 → 9,10
     current = u""
     db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
     for row in db.query("""
-    SELECT *, CHAR_LENGTH(navi) AS NL
+    SELECT *
     FROM `metaWords`
-    ORDER BY NL DESC"""):
+    WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
+    ORDER BY CHAR_LENGTH(navi) DESC"""):
         if row["infixes"]:
             ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
         else:

Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 89 → Rev 113