WebSVN - navi - Path Comparison - /tsimapiak Rev 283 and /tsimapiak Rev 284

Ignore whitespace Rev 283 → Rev 284

 /tsimapiak/dbconnector.py
 ,10 → 33,10
     WHERE partOfSpeech <> 'num.' AND partOfSpeech <> 'prefix' AND partOfSpeech <> 'affix'
     ORDER BY CHAR_LENGTH(navi) DESC""")
     for row in cur:
-        if row["infixes"]:
-            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
+        if row["infixes"] and row["infixes"] != "NULL": # yeah seriously
+            ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
         else:
-            ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
+            ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["navi"].lower(), "type": row["partOfSpeech"]})
     cur.close()
     db.close()
     return ret
 ,7 → 55,7
         elif row["navi"] and row["navi"][-1] in ("-", "+"):
             ret[0].append({"id": row["id"], "navi": row["navi"].replace("-", ""), "gloss": row["shorthand"].upper()})
         else:
-            if not row["position"]:
+            if not row["position"] or row["position"] == "NULL":
                 # not actually an affix
                 continue
             ret[1].append({"id": row["id"], "navi": row["navi"].replace("-", ""), "gloss": row["shorthand"].upper(), "position": int(row["position"])})

 /tsimapiak/parse.py
 ,13 → 34,13
     (u"sami", u"si", u"", u"am", u"", (()), (()), False),
     (u"to", u"to", u"", u"", u"", (()), (()), False),
     #(u"frato", u"to", u"", u"", u"", [[u"fra"]],  (()), False),
-    (u"soaiä", u"soaia", u"", u"", u"", (()), [[u"ä"]], False),
+    (u"soaiä", u"soaia", u"", u"", u"", (()), [[(u"ä", None)]], False),
     (u"mengenga", u"ngenga", u"", u"", u"", [[u"me"]], (()), False),
     (u"pxengenga", u"ngenga", u"", u"", u"", [[u"pxe"]], (()), False),
     (u"kìmä", u"kä", u"", u"ìm", u"", (()), (()), False),
     (u"apxay", u"pxay", u"", u"", u"", [[u"a"]], (()), False),
     (u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False), # TODO remember why on earth this is needed; how is awng interpreted as awnga?
-    (u"kawnga", u"kawng", u"", u"", u"", (()), [[u"a"]], False),
+    (u"kawnga", u"kawng", u"", u"", u"", (()), [[(u"a", None)]], False),
     (u"kawng", u"kawng", u"", u"", u"", (()), (()), False),
     (u"ka", u"ka", u"", u"", u"", (()), (()), False),
     (u"uo", u"uo", u"", u"", u"", (()), (()), False),
 ,7 → 57,7
 #prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
 #prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
-EXTRAADP = ("to", "sì") # words that act like adpositions but technically aren't
+EXTRAADP = (("to", [x["id"] for x in wordlist if x["navi"] == "to"][0]), ("sì", [x["id"] for x in wordlist if x["navi"] == "sì"][0])) # words that act like adpositions but technically aren't
 LENIT = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
 ,7 → 89,7
             foundprefs.append([])
             foundposts.append([])
             center = u""
-            if u"<1>" in splitword[wor]:
+            if u"<0>" in splitword[wor]:
                 tempin1 = []
                 tempin2 = []
                 tempin3 = []
 ,8 → 105,8
                 for in1 in tempin1:
                     for in2 in tempin2:
                         for in3 in tempin3:
-                            if splitword[wor].replace(u"<1><2>", in1 + in2).replace(u"<3>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") in wordin[wor]:
-                                center = splitword[wor].replace(u"<1><2>", in1 + in2).replace(u"<3>", in3).replace(u"lll", u"l").replace(u"rrr", u"r")
+                            if splitword[wor].replace(u"<0><1>", in1 + in2).replace(u"<2>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") in wordin[wor]:
+                                center = splitword[wor].replace(u"<0><1>", in1 + in2).replace(u"<2>", in3).replace(u"lll", u"l").replace(u"rrr", u"r")
                                 foundins = [in1, in2, in3]
                                 break
                         if center != u"":
 ,13 → 167,13
             last = u""
             while last != posf:
                 last = posf
-                for pos in [x["navi"] for x in postfixes] + [x["navi"] for x in wordlist if x["type"] == "adp."] + list(EXTRAADP):
+                for pos, posid in [(x["navi"], None) for x in postfixes] + [(x["navi"], x["id"]) for x in wordlist if x["type"] == "adp."] + list(EXTRAADP):
                     if posf != u"":
                         if posf.startswith(pos):
-                            if pos in foundposts[wor]:
+                            if (pos, posid) in foundposts[wor]:
                                 break
                             if pos != u"ä" or word["navi"] != u"pey": # XXX HACK - fix for peyä. THIS SHOULD NOT BE HERE!
-                                foundposts[wor].append(pos)
+                                foundposts[wor].append((pos, posid))
                                 posf = posf[len(pos):]
                                 break
                             else:

 /tsimapiak/parsenum.py
 ,15 → 78,15
         prefs.append(u"a")
         numin = numin[1:]
     if numin[len(numin) - 1] == u"a":
-        posts.append(u"a")
+        posts.append((u"a", None))
         numin = numin[:-1]
     if numin[-2:] == u"ve":
-        posts.append(u"ve")
+        posts.append((u"ve", None))
         numin = numin[:-2]
     #BASE numbers
     for n in range(len(NUM)):
-        if u"ve" in posts:
+        if (u"ve", None) in posts:
             if numin == NUMORD[n]:
                 outoct = n
                 outdec = n
 ,7 → 153,7
             notbase = True
     if notbase:
         for n in range(len(REM)):
-            if u"ve" in posts:
+            if (u"ve", None) in posts:
                 if numin == REMORD[n]:
                     outoct += n + 1
                     outdec += n + 1
 ,7 → 164,7
                     outdec += n + 1
                     numin = u""
     if numin == u"":
-        ret["word"]["navi"] = str(outdec) if not u"ve" in posts else str(outdec) + u"."
+        ret["word"]["navi"] = str(outdec) if not (u"ve", None) in posts else str(outdec) + u"."
         ret["dec"] = outdec
         ret["oct"] = outoct
         return ret

 /tsimapiak/translate.py
 ,7 → 63,7
                     if fix[0] == navf:
                         word["translated"] += '-' + fix[1]
         for temp in word["post"]:
-            for navf in temp:
+            for navf, navfid in temp:
                 for fix in [(x["navi"], x["gloss"]) for x in postfixes]:
                     if fix[0] == navf:
                         word["translated"] += '-' + fix[1]
 ,7 → 70,7
                         break
                 else:
                     # adpositions and the like
-                    word["translated"] += "-" + dbconnector.translate(navf, lang)
+                    word["translated"] += "-" + dbconnector.translate(navfid, lang)
         if word["len"]:
             word["translated"] += '-' + 'LENTD'
     return sent

Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 283 → Rev 284