WebSVN - navi - Path Comparison - / Rev 99 and / Rev 113

Ignore whitespace Rev 99 → Rev 113

/webapp/templates/parse2.html
39,4 → 39,5
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
<b>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</b>
{% end %}

 /webapp/main.py
 ,8 → 29,10
             self.redirect("/number")
         numout = parsenum.parse(num.replace(" ",""))
         if numout == None:
-            numout = -1
-        self.render("templates/number.html", last=num, numout=numout)
+            numoutt = -1
+        else:
+            numoutt = [numout["dec"], numout["oct"]]
+        self.render("templates/number.html", last=num, numout=numoutt)
 class Restart(tornado.web.RequestHandler):
     def get(self):

 /tsimapiak/parse2.py
 ,6 → 3,7
 import re
 import dbconnector
+import parsenum
 wordlist = dbconnector.getnavilist()
 ,9 → 10,9
 infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
 infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
 infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
-prefixes = (u"a", u"pe", u"le", u"nì", u"sä", u"tì", u"fne", u"tsay", u"fay", u"fra", u"pxe", u"ay", u"me", u"tsa", u"fì", u"ke")
-adpositions = (u"kxamlä", u"mungwrr", u"nemfa", u"pximaw", u"pxisre", u"tafkip", u"takip", u"teri", u"mìkam", u"ìla", u"fkip", u"fpi", u"ftu", u"kip", u"lok", u"luke", u"maw", u"pxel", u"pxaw", u"rofa", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo")
-postfixes = (u"an", u"eyä", u"e", u"tsyìp", u"o", u"ìri", u"ìl", u"it", u"lo", u"ng", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"l", u"t", u"y", u"a", u"ä") + adpositions
+prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
+adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
+postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"e", u"o", u"l", u"t", u"y", u"a", u"ä")
 #prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
 #prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 ,6 → 64,10
                         temp = splitword[wor][:-3] + u"ng"
                         if temp in wordin[wor]:
                             center = temp
+                    if splitword[wor].endswith(u"po"):
+                        temp = splitword[wor][:-3] + u"p"
+                        if temp in wordin[wor]:
+                            center = temp
             if center == u"":
                 foundit = False
                 break
 ,14 → 104,16
     return ret
 def parsesent(sent):
-    sent = sent.strip().lower()
-    sent = re.sub(ur"[^\wìä ]",u"",sent)
+    sent = sent.strip().lower().replace(u"’", u"'")
+    sent = re.sub(ur"[^\wìä' ]",u"",sent)
     sent = re.sub(ur"\ +",u" ",sent)
     sent = sent.split(u" ")
     ret = []
     left = len(sent)
     while left:
-        word = parseword(sent[-left:])
+        word = parsenum.parse(sent[len(sent)-left])
+        if word == None:
+            word = parseword(sent[-left:])
         left -= len(word["word"]["navi"].split(" "))
         ret.append(word)
-    return ret
+    return ret

 /tsimapiak/parsenum.py
 ,20 → 30,15
 numre = \
-      u"^(?:(" + "|".join(base) + u")zazam??)?" + \
+      u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
       u"(?:(" + "|".join(base) + u")vozam??)?" + \
       u"(?:(" + "|".join(base) + u")zam??)?" + \
       u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
       u"((?:" + "|".join(rem) + u")|" + \
-      u"(?:" + "|".join(num) + u"))?$"
+      u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
 numre = re.compile(numre)
 def parse(numin):
-    if type(numin) != unicode:
-        return None
-    if numin == u"":
-        return None
-    numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
     try:
         mat = numre.match(numin).groups()
     except:
 ,31 → 46,43
     numout = 0
     numoct = 0
     try:
-        numout += rem.index(mat[4]) + 1
-        numoct += rem.index(mat[4]) + 1
+        numout += rem.index(mat[5]) + 1
+        numoct += rem.index(mat[5]) + 1
     except:
         try:
-            numout += num.index(mat[4])
-            numoct += num.index(mat[4])
+            numout += num.index(mat[5])
+            numoct += num.index(mat[5])
         except: pass
     try:
-        numout += (base.index(mat[3]) + 1) * 8
-        numoct += (base.index(mat[3]) + 1) * 10
+        numout += (base.index(mat[4]) + 1) * 8
+        numoct += (base.index(mat[4]) + 1) * 10
     except: pass
     try:
-        numout += (base.index(mat[2]) + 1) * 8**2
-        numoct += (base.index(mat[2]) + 1) * 10**2
+        numout += (base.index(mat[3]) + 1) * 8**2
+        numoct += (base.index(mat[3]) + 1) * 10**2
     except: pass
     try:
-        numout += (base.index(mat[1]) + 1) * 8**3
-        numoct += (base.index(mat[1]) + 1) * 10**3
+        numout += (base.index(mat[2]) + 1) * 8**3
+        numoct += (base.index(mat[2]) + 1) * 10**3
     except: pass
     try:
-        numout += (base.index(mat[0]) + 1) * 8**4
-        numoct += (base.index(mat[0]) + 1) * 10**4
+        numout += (base.index(mat[1]) + 1) * 8**4
+        numoct += (base.index(mat[1]) + 1) * 10**4
     except: pass
-    return numout, numoct
+    retnum = unicode(numout)
+    if mat[6] != u"":
+        retnum += u"."
+    prefs = []
+    posts = []
+    if mat[0] != u"":
+        prefs.append(mat[0])
+    if mat[6] != u"":
+        posts.append(mat[6])
+    if mat[7] != u"":
+        posts.append(mat[7])
+    return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
+    #return numout, numoct
 if __name__ == "__main__":
-    print parse(u"mrrvolaw")
+    print parse(u"mrrvolawvea")

 /tsimapiak/dbconnector.py
 ,9 → 9,10
     current = u""
     db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
     for row in db.query("""
-    SELECT *, CHAR_LENGTH(navi) AS NL
+    SELECT *
     FROM `metaWords`
-    ORDER BY NL DESC"""):
+    WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
+    ORDER BY CHAR_LENGTH(navi) DESC"""):
         if row["infixes"]:
             ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
         else:

Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 99 → Rev 113