Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 299 → Rev 300

/tsimapiak/parse.py
48,8 → 48,13
#(u"sìk", u"sìk", u"", u"", u"", (()), (()), False),
#(u"sim", u"sim", u"", u"", u"", (()), (()), False), # probably not tsim lenited
(u"tse", u"tse", u"", u"", u"", (()), (()), False, "tse"), # otherwise parses as tsa'u abbreviated (special case)
(u"por", u"po", u"", u"", u"", (()), [[("r", None)]], False, "po"), # otherwise parses as lenited pxor which is unlikely
)
 
BANNEDNUMBERS = { # words which must not be parsed by the number parser
"pey" # more likely dictionary word pey than lenited pxey 3
}
 
#INFIXES1 = (u"awn", u"eyk", u"us", u"äp", u"")
#INFIXES2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
#INFIXES3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
66,6 → 71,7
 
EXTRAPOSTFIXES = [
{"id": "-3", "navi": "eyä", "orig_navi": "yä", "gloss": "GEN."},
{"id": "-4", "navi": "pxì", "orig_navi": "pxì", "gloss": "FRAC."},
]
 
EXTRAADP = (("to", [x["id"] for x in wordlist if x["navi"] == "to"][0]), ("sì", [x["id"] for x in wordlist if x["navi"] == "sì"][0])) # words that act like adpositions but technically aren't
130,7 → 136,7
tempid = word["id"]
temptype = word["type"]
return {"word": {"id": tempid, "navi": brokenword[1], "infix": u"", "type": temptype, "orig_navi": brokenword[8]}, "pref": brokenword[5], "post": brokenword[6], "len": brokenword[7], "inf": (brokenword[2], brokenword[3], brokenword[4]) }
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u"", "orig_navi": "[" + wordin[0] + "]"}, "len": False}
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u"", "orig_navi": "[" + wordin[0] + "]"}, "len": False, "pref": [], "post": [], "inf": ["", "", ""]}
for word in wordlist:
word["navi"] = word["navi"].lower()
foundit = True
255,10 → 261,10
if foundit == True:
foundword = word
break
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
if foundit == True:
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
ret["len"] = word["lenited"]
ret["word"] = foundword
return ret
271,7 → 277,9
ret = []
left = len(sent)
while left:
word = parsenum.parse(sent[len(sent) - left])
word = None
if sent[len(sent) - left] not in BANNEDNUMBERS:
word = parsenum.parse(sent[len(sent) - left])
if word == None:
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
/tsimapiak/parsenum.py
31,6 → 31,15
u"pukap",
u"kinä"]
 
NUMLEN = [u"hew",
u"aw",
u"mune",
u"pey",
u"sìng",
u"mrr",
u"fukap",
u"hinä"]
 
NUMORD = [u"kew",
u"'aw",
u"mu",
40,6 → 49,15
u"pu",
u"ki"]
 
NUMORDLEN = [u"hew",
u"aw",
u"mu",
u"pey",
u"sì",
u"mrr",
u"fu",
u"hi"]
 
REM = [u"aw",
u"mun",
u"pey",
64,19 → 82,31
u"pu",
u"ki"]
 
BASELEN = [u"",
u"me",
u"pe",
u"sì",
u"mrr",
u"fu",
u"hi"]
 
def parse(numin):
if u"mm" in numin:
return None
if (numin == u"") or ((numin[0] == u"a") and (numin[len(numin) - 1] == u"a")):
if (numin == u"") or len(numin) == 1 or ((numin[0] == u"a" and numin[1] != "w") and (numin[len(numin) - 1] == u"a")):
return None
prefs = []
posts = []
outoct = 0
outdec = 0
ret = {"word": {"id": 0, "navi": u"", "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": outdec, "oct": outoct}
if numin[0] == u"a":
frac = False
ret = {"word": {"id": 0, "navi": u"", "orig_navi": "", "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": outdec, "oct": outoct}
if numin[0] == u"a" and len(numin) > 1 and numin[1] != "w":
prefs.append((u"a", "a"))
numin = numin[1:]
elif numin[0:2] == "nì":
prefs.append(("nì", "nì"))
numin = numin[2:]
if numin[len(numin) - 1] == u"a":
posts.append((u"a", None))
numin = numin[:-1]
83,7 → 113,21
if numin[-2:] == u"ve":
posts.append((u"ve", None))
numin = numin[:-2]
if numin[-3:] == u"pxì":
posts.append((u"pxì", None))
numin = numin[:-3]
 
# Special fractions
if numin in ("mawl", "pan", "fan"):
outoct = 2 if numin == "mawl" else 3
outdec = 2 if numin == "mawl" else 3
ret["word"]["navi"] = "1/" + str(outdec)
ret["word"]["orig_navi"] = "mawl" if numin == "mawl" else "pan"
ret["dec"] = outdec
ret["oct"] = outoct
ret["len"] = True if numin == "fan" else False
return ret
 
#BASE numbers
for n in range(len(NUM)):
if (u"ve", None) in posts:
91,80 → 135,156
outoct = n
outdec = n
ret["word"]["navi"] = str(outdec) + u"."
ret["word"]["orig_navi"] = NUMORD[n]
ret["dec"] = outdec
ret["oct"] = outoct
return ret
if numin == NUMORDLEN[n]:
outoct = n
outdec = n
ret["word"]["navi"] = str(outdec) + u"."
ret["word"]["orig_navi"] = NUMORD[n]
ret["dec"] = outdec
ret["oct"] = outoct
ret["len"] = True
return ret
elif ("pxì", None) in posts and n > 3:
if numin == NUMORD[n]:
outoct = n
outdec = n
ret["word"]["navi"] = "1/" + str(outdec)
ret["word"]["orig_navi"] = NUMORD[n]
ret["dec"] = outdec
ret["oct"] = outoct
return ret
if numin == NUMORDLEN[n]:
outoct = n
outdec = n
ret["word"]["navi"] = "1/" + str(outdec)
ret["word"]["orig_navi"] = NUMORD[n]
ret["dec"] = outdec
ret["oct"] = outoct
ret["len"] = True
return ret
else:
if numin == NUM[n]:
outoct = n
outdec = n
ret["word"]["navi"] = str(outdec)
ret["word"]["orig_navi"] = NUM[n]
ret["dec"] = outdec
ret["oct"] = outoct
return ret
if numin == NUMLEN[n]:
outoct = n
outdec = n
ret["word"]["navi"] = str(outdec)
ret["word"]["orig_navi"] = NUM[n]
ret["dec"] = outdec
ret["oct"] = outoct
ret["len"] = True
return ret
#other numbers
notbase = False
orig_navi = ""
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"vozazam"):
if numin.startswith(BASE[n] + u"vozaza") or (not notbase and numin.startswith(BASELEN[n] + "vozaza")):
base = BASE[n]
if not numin.startswith(BASE[n]):
base = BASELEN[n]
ret["len"] = True
outoct += (n + 1) * (10 ** 5)
outdec += (n + 1) * (8 ** 5)
if numin[len(BASE[n]) + 6:].startswith(u"mrr") or numin[len(BASE[n]) + 6:].startswith(u"me"):
numin = numin[len(BASE[n]) + 6:]
if numin[len(base) + 6:].startswith(u"mrr") or numin[len(base) + 6:].startswith(u"me") or numin[len(base) + 6:].startswith("mu") or not numin[len(base) + 6:].startswith("m"):
orig_navi += BASE[n] + "vozaza"
numin = numin[len(base) + 6:]
else:
numin = numin[len(BASE[n]) + 7:]
orig_navi += BASE[n] + "vozazam"
numin = numin[len(base) + 7:]
notbase = True
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"zazam"):
if numin.startswith(BASE[n] + u"zaza") or (not notbase and numin.startswith(BASELEN[n] + "zaza")):
base = BASE[n]
if not numin.startswith(BASE[n]):
base = BASELEN[n]
ret["len"] = True
outoct += (n + 1) * (10 ** 4)
outdec += (n + 1) * (8 ** 4)
if numin[len(BASE[n]) + 4:].startswith(u"mrr") or numin[len(BASE[n]) + 4:].startswith(u"me"):
numin = numin[len(BASE[n]) + 4:]
if numin[len(base) + 4:].startswith(u"mrr") or numin[len(base) + 4:].startswith(u"me") or numin[len(base) + 4:].startswith("mu") or not numin[len(base) + 4:].startswith("m"):
orig_navi += BASE[n] + "zaza"
numin = numin[len(base) + 4:]
else:
numin = numin[len(BASE[n]) + 5:]
orig_navi += BASE[n] + "zazam"
numin = numin[len(base) + 5:]
notbase = True
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"vozam"):
if numin.startswith(BASE[n] + u"voza") or (not notbase and numin.startswith(BASELEN[n] + "voza")):
base = BASE[n]
if not numin.startswith(BASE[n]):
base = BASELEN[n]
ret["len"] = True
outoct += (n + 1) * (10 ** 3)
outdec += (n + 1) * (8 ** 3)
if numin[len(BASE[n]) + 4:].startswith(u"mrr") or numin[len(BASE[n]) + 4:].startswith(u"me"):
numin = numin[len(BASE[n]) + 4:]
if numin[len(base) + 4:].startswith(u"mrr") or numin[len(base) + 4:].startswith(u"me") or numin[len(base) + 4:].startswith("mu") or not numin[len(base) + 4:].startswith("m"):
orig_navi += BASE[n] + "voza"
numin = numin[len(base) + 4:]
else:
numin = numin[len(BASE[n]) + 5:]
orig_navi += BASE[n] + "vozam"
numin = numin[len(base) + 5:]
notbase = True
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"zam"):
if numin.startswith(BASE[n] + u"za") or (not notbase and numin.startswith(BASELEN[n] + "za")):
base = BASE[n]
if not numin.startswith(BASE[n]):
base = BASELEN[n]
ret["len"] = True
outoct += (n + 1) * (10 ** 2)
outdec += (n + 1) * (8 ** 2)
if numin[len(BASE[n]) + 2:].startswith(u"mrr") or numin[len(BASE[n]) + 2:].startswith(u"me"):
numin = numin[len(BASE[n]) + 2:]
if numin[len(base) + 2:].startswith(u"mrr") or numin[len(base) + 2:].startswith(u"me") or numin[len(base) + 2:].startswith("mu") or not numin[len(base) + 2:].startswith("m"):
orig_navi += BASE[n] + "za"
numin = numin[len(base) + 2:]
else:
numin = numin[len(BASE[n]) + 3:]
orig_navi += BASE[n] + "zam"
numin = numin[len(base) + 3:]
notbase = True
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"vol"):
if numin.startswith(BASE[n] + u"vol") or (not notbase and numin.startswith(BASELEN[n] + "vol")):
base = BASE[n]
if not numin.startswith(BASE[n]):
base = BASELEN[n]
ret["len"] = True
outoct += (n + 1) * 10
outdec += (n + 1) * 8
numin = numin[len(BASE[n]) + 3:]
numin = numin[len(base) + 3:]
notbase = True
if numin.startswith(BASE[n] + u"vo"):
orig_navi += BASE[n] + "vol"
if numin.startswith(BASE[n] + u"vo") or (not notbase and numin.startswith(BASELEN[n] + "vo")):
base = BASE[n]
if not numin.startswith(BASE[n]):
base = BASELEN[n]
ret["len"] = True
outoct += (n + 1) * 10
outdec += (n + 1) * 8
numin = numin[len(BASE[n]) + 2:]
numin = numin[len(base) + 2:]
notbase = True
orig_navi += BASE[n] + "vo"
if notbase:
for n in range(len(REM)):
if (u"ve", None) in posts:
if (u"ve", None) in posts or ("pxì", None) in posts:
if numin == REMORD[n]:
orig_navi += numin
outoct += n + 1
outdec += n + 1
numin = u""
else:
if numin == REM[n]:
orig_navi += numin
outoct += n + 1
outdec += n + 1
numin = u""
if numin == u"":
ret["word"]["navi"] = str(outdec) if not (u"ve", None) in posts else str(outdec) + u"."
ret["word"]["navi"] = ("" if not ("pxì", None) in posts else "1/") + str(outdec) + ("" if not (u"ve", None) in posts else ".")
ret["word"]["orig_navi"] = orig_navi
ret["dec"] = outdec
ret["oct"] = outoct
return ret
/tsimapiak/translate.py
42,7 → 42,7
# if brokenword[0] == word["word"]["navi"]:
# word["translated"] = brokenword[1]
if word["translated"] == u"":
word["translated"] = word["word"]["orig_navi"]
word["translated"] = word["word"]["navi"]
if word["inf"][0] != u"":
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 0]:
if fix[0] == word["inf"][0]: