WebSVN - navi - Path Comparison - / Rev 114 and / Rev 117

Ignore whitespace Rev 114 → Rev 117

/tsimapiak/parse2.py
File deleted

 /tsimapiak/parsenum.py
 ,6 → 43,8
         mat = numre.match(numin).groups()
     except:
         return None
+    if mat[5] == u"" and mat[4] == u"" and mat[3] == u"" and mat[2] == u"" and mat[1] == u"":
+        return None
     numout = 0
     numoct = 0
     try:

 /tsimapiak/parse.py
 ,85 → 2,118
 # -*- coding: utf-8 -*-
 import re
-from dbconnector import getnavilist
+import dbconnector
+import parsenum
-wordlist = getnavilist()
+wordlist = dbconnector.getnavilist()
-infixes0 = [ u"awn", u"eyk", u"us", u"äp" ]
-infixes1 = [ u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ]
-infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ]
+infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
+infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
+infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
+prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
+adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
+postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
+#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
+#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
-# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited?
-def parsefix(original):
-  realword = u""
-  infix0 = u""
-  infix1 = u""
-  infix2 = u""
-  infix01 = u""
-  infix_1 = u""
-  infix_2 = u""
-  for eachword in wordlist:
-    regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword["infix"])
-    regex = re.sub(u"^",u"[^ ]*",regex)
-    regex = re.sub(u"$",u"[^ ]*",regex)
-    regex = re.sub(u"<1><2>",u"[^ ]*",regex)
-    regex = re.sub(u"<3>",u"[^ ]*",regex)
-    if re.match(regex,original):
-      realword = eachword["infix"]
-      break
-  if realword == u"":
-    return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-  else:
-    if re.search(u"<",realword):
-      beginning = re.sub(u"<1><2>.*",u"",realword)
-      middle = re.sub(u".*<1><2>(.*)<3>.*",ur"\1",realword)
-      end = re.sub(u".*<3>",u"",realword)
-      infix01 = re.sub(u".*?" + re.sub(u"<1><2>",u"([^ ]*)",re.sub(u"<3>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
-      infix_2 = re.sub(u".*?" + re.sub(u"<3>",u"([^ ]*)",re.sub(u"<1><2>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
-      for eachinfix in infixes0:
-        if infix01.startswith(eachinfix):
-          infix0 = eachinfix
-          infix_1 = infix01[len(eachinfix):]
-          break
-        else:
-          infix0 = u""
-          infix_1 = infix01
-      gotinfix1 = False
-      for eachinfix in infixes1:
-        if infix_1.startswith(eachinfix):
-          infix1 = eachinfix
-          infix_1 = infix_1[len(eachinfix):]
-          if infix_1 != u"":
-            if re.search(u"<1><2><3>",realword):
-              infix_2 = infix_1
+lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
+def parseword(wordin):
+    ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
+    for word in wordlist:
+        foundit = True
+        foundprefs = []
+        foundposts = []
+        lenited = False
+        splitword = word["infix"].split(u" ")
+        if len(wordin) < len(splitword):
+            foundit = False
+            next
+        for wor in range(len(splitword)):
+            if not foundit:
+                break
+            foundprefs.append([])
+            foundposts.append([])
+            center = u""
+            foundins = [u"", u"", u""]
+            pre = []
+            post = []
+            if u"<1>" in splitword[wor]:
+                for in1 in infixes1:
+                    for in2 in infixes2:
+                        for in3 in infixes3:
+                            if splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3) in wordin[wor]:
+                                center = splitword[wor].replace(u"<1><2>",in1+in2).replace(u"<3>",in3)
+                                foundins = [in1, in2, in3]
+                                break
+                        if center != u"": break
+                    if center != u"": break
             else:
-              return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-          gotinfix1 = True
-          break
-      if gotinfix1 == False:
-        if re.search(u"<1><2><3>",realword):
-          if infix_1 == u"":
-            infix_2 = infix_1
-            infix1 = u""
-        elif infix_1 == u"":
-          infix1 = u""
-        else:
-          return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-      gotinfix2 = False
-      for eachinfix in infixes2:
-        if infix_2.startswith(eachinfix):
-          infix2 = infix_2[:len(eachinfix)]
-          infix_2 = infix_2[len(eachinfix) - 1:]
-          gotinfix2 = True
-          break
-      if gotinfix2 == False or infix_2 != u"":
-        if infix_2.startswith(end):
-          suffixes = infix2[len(end) - 1:] + end
-        elif infix_2 == u"":
-          infix2 = u""
-        else:
-          return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-#      print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2)
-      return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
-    else:
-      return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
+                if splitword[wor] in wordin[wor]:
+                    center = splitword[wor]
+                if center == u"":
+                    for i in lenit:
+                        temp = u""
+                        if splitword[wor].startswith(i[0]):
+                            temp = i[1] + splitword[wor][len(i[0]):]
+                            if temp in wordin[wor]:
+                                lenited = True
+                                center = temp
+                if center == u"":
+                    if splitword[wor].endswith(u"nga"):
+                        temp = splitword[wor][:-3] + u"ng"
+                        if temp in wordin[wor]:
+                            center = temp
+                    if splitword[wor].endswith(u"po"):
+                        temp = splitword[wor][:-3] + u"p"
+                        if temp in wordin[wor]:
+                            center = temp
+            if center == u"":
+                foundit = False
+                break
+            temp = wordin[wor].split(center)
+            if len(temp) != 2:
+                foundit = False
+                break
+            pref, posf = temp
+            for pre in prefixes:
+                if pref != u"":
+                    if pref.endswith(pre):
+                        foundprefs[wor].append(pre)
+                        pref = pref[:-len(pre)]
+            if pref != u"":
+                foundit = False
+                break
+            for pos in postfixes:
+                if posf != u"":
+                    if posf.startswith(pos):
+                        foundposts[wor].append(pos)
+                        posf = posf[len(pos):]
+            if posf != u"":
+                foundit = False
+                break
+        if foundit == True:
+            foundword = word
+            break
+    ret["pref"] = foundprefs
+    ret["post"] = foundposts
+    ret["inf"] = foundins
+    ret["len"] = lenited
+    if foundit == True:
+        ret["word"] = foundword
+    return ret
+def parsesent(sent):
+    sent = sent.strip().lower().replace(u"’", u"'")
+    sent = re.sub(ur"[^\wìä' ]",u"",sent)
+    sent = re.sub(ur"\ +",u" ",sent)
+    sent = sent.split(u" ")
+    ret = []
+    left = len(sent)
+    while left:
+        word = parsenum.parse(sent[len(sent)-left])
+        if word == None:
+            word = parseword(sent[-left:])
+        left -= len(word["word"]["navi"].split(" "))
+        ret.append(word)
+    return ret

 /webapp/main.py
 ,7 → 12,6
 from tsimapiak import parsenum
 from tsimapiak import dbconnector
 from tsimapiak import parse
-from tsimapiak import parse2
 class Index(tornado.web.RequestHandler):
     def get(self):
 ,7 → 30,7
         if numout == None:
             numoutt = -1
         else:
-            numoutt = [numout["dec"], numout["oct"]]
+            numoutt = (numout["dec"], numout["oct"])
         self.render("templates/number.html", last=num, numout=numoutt)
 class Restart(tornado.web.RequestHandler):
 ,31 → 51,18
     def post(self):
         try:
-            word = self.get_argument("word").strip()
+            word = self.get_argument("word")
         except:
             self.redirect("/parse")
-        out = parse.parsefix(word)
+        out = parse.parsesent(word)
         self.render("templates/parse.html", last=word, out=out)
-class Parse2(tornado.web.RequestHandler):
-    def get(self):
-        self.render("templates/parse2.html", last="", out=None)
-    def post(self):
-        try:
-            word = self.get_argument("word")
-        except:
-            self.redirect("/parse2")
-        out = parse2.parsesent(word)
-        self.render("templates/parse2.html", last=word, out=out)
 application = tornado.web.Application([
     ("/", Index),
     ("/number", Number),
     ("/restart", Restart),
     ("/testdb", TestDB),
-    ("/parse", Parse),
-    ("/parse2", Parse2)
+    ("/parse", Parse)
 ])
 if __name__ == "__main__":

/webapp/templates/parse2.html
File deleted

 /webapp/templates/parse.html
 ,13 → 8,36
 <input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" />
 <input name="btn" type="submit" value="Parse!" />
 </form>
-{% if type(out) == list %}
-{{ out[0]}} <br />
-{{ out[1]}} <br />
-{{ out[2]}} <br />
-{{ out[3]}}
+{% if out %}
+<table border="1">
+<tr>
+    <th>Words</th>
+    <th>Parts</th>
+    <th>Data</th>
+</tr>
+{% for wor in out %}
+<tr>
+    <td rowspan="4">{{ wor["word"]["navi"] }}</td>
+    <td>Infixes:</td>
+    <td>{{ u", ".join(wor["inf"]) }}</td>
+</tr>
+<tr>
+    <td>Prefixes:</td>
+    <td>{{ u"; ".join(u", ".join(x) for x in wor["pref"]) }}</td>
+</tr>
+<tr>
+    <td>Postfixes:</td>
+    <td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
+</tr>
+<tr>
+    <td>Lenited:</td>
+    <td>{{ str(wor["len"]) }}</td>
+</tr>
 {% end %}
+</table>
+{% end %}
 <script type="text/javascript">
 document.getElementById("word").focus();
 </script>
-{% end %}
+<p>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</p>
+{% end %}

Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 114 → Rev 117