Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 113 → Rev 99

/tsimapiak/parse2.py
3,7 → 3,6
 
import re
import dbconnector
import parsenum
 
wordlist = dbconnector.getnavilist()
 
10,9 → 9,9
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"e", u"o", u"l", u"t", u"y", u"a", u"ä")
prefixes = (u"a", u"pe", u"le", u"nì", u"sä", u"tì", u"fne", u"tsay", u"fay", u"fra", u"pxe", u"ay", u"me", u"tsa", u"fì", u"ke")
adpositions = (u"kxamlä", u"mungwrr", u"nemfa", u"pximaw", u"pxisre", u"tafkip", u"takip", u"teri", u"mìkam", u"ìla", u"fkip", u"fpi", u"ftu", u"kip", u"lok", u"luke", u"maw", u"pxel", u"pxaw", u"rofa", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo")
postfixes = (u"an", u"eyä", u"e", u"tsyìp", u"o", u"ìri", u"ìl", u"it", u"lo", u"ng", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"l", u"t", u"y", u"a", u"ä") + adpositions
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
64,10 → 63,6
temp = splitword[wor][:-3] + u"ng"
if temp in wordin[wor]:
center = temp
if splitword[wor].endswith(u"po"):
temp = splitword[wor][:-3] + u"p"
if temp in wordin[wor]:
center = temp
if center == u"":
foundit = False
break
104,16 → 99,14
return ret
 
def parsesent(sent):
sent = sent.strip().lower().replace(u"’", u"'")
sent = re.sub(ur"[^\wìä' ]",u"",sent)
sent = sent.strip().lower()
sent = re.sub(ur"[^\wìä ]",u"",sent)
sent = re.sub(ur"\ +",u" ",sent)
sent = sent.split(u" ")
ret = []
left = len(sent)
while left:
word = parsenum.parse(sent[len(sent)-left])
if word == None:
word = parseword(sent[-left:])
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
ret.append(word)
return ret
return ret
/tsimapiak/dbconnector.py
9,10 → 9,9
current = u""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *
SELECT *, CHAR_LENGTH(navi) AS NL
FROM `metaWords`
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
ORDER BY CHAR_LENGTH(navi) DESC"""):
ORDER BY NL DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else:
/tsimapiak/parsenum.py
30,15 → 30,20
 
 
numre = \
u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
u"^(?:(" + "|".join(base) + u")zazam??)?" + \
u"(?:(" + "|".join(base) + u")vozam??)?" + \
u"(?:(" + "|".join(base) + u")zam??)?" + \
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
u"((?:" + "|".join(rem) + u")|" + \
u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
u"(?:" + "|".join(num) + u"))?$"
numre = re.compile(numre)
 
def parse(numin):
if type(numin) != unicode:
return None
if numin == u"":
return None
numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
try:
mat = numre.match(numin).groups()
except:
46,43 → 51,31
numout = 0
numoct = 0
try:
numout += rem.index(mat[5]) + 1
numoct += rem.index(mat[5]) + 1
numout += rem.index(mat[4]) + 1
numoct += rem.index(mat[4]) + 1
except:
try:
numout += num.index(mat[5])
numoct += num.index(mat[5])
numout += num.index(mat[4])
numoct += num.index(mat[4])
except: pass
try:
numout += (base.index(mat[4]) + 1) * 8
numoct += (base.index(mat[4]) + 1) * 10
numout += (base.index(mat[3]) + 1) * 8
numoct += (base.index(mat[3]) + 1) * 10
except: pass
try:
numout += (base.index(mat[3]) + 1) * 8**2
numoct += (base.index(mat[3]) + 1) * 10**2
numout += (base.index(mat[2]) + 1) * 8**2
numoct += (base.index(mat[2]) + 1) * 10**2
except: pass
try:
numout += (base.index(mat[2]) + 1) * 8**3
numoct += (base.index(mat[2]) + 1) * 10**3
numout += (base.index(mat[1]) + 1) * 8**3
numoct += (base.index(mat[1]) + 1) * 10**3
except: pass
try:
numout += (base.index(mat[1]) + 1) * 8**4
numoct += (base.index(mat[1]) + 1) * 10**4
numout += (base.index(mat[0]) + 1) * 8**4
numoct += (base.index(mat[0]) + 1) * 10**4
except: pass
retnum = unicode(numout)
if mat[6] != u"":
retnum += u"."
prefs = []
posts = []
if mat[0] != u"":
prefs.append(mat[0])
if mat[6] != u"":
posts.append(mat[6])
if mat[7] != u"":
posts.append(mat[7])
return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
#return numout, numoct
return numout, numoct
 
 
if __name__ == "__main__":
print parse(u"mrrvolawvea")
print parse(u"mrrvolaw")
/webapp/templates/parse2.html
39,5 → 39,4
<script type="text/javascript">
document.getElementById("word").focus();
</script>
<b>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</b>
{% end %}
{% end %}
/webapp/main.py
29,10 → 29,8
self.redirect("/number")
numout = parsenum.parse(num.replace(" ",""))
if numout == None:
numoutt = -1
else:
numoutt = [numout["dec"], numout["oct"]]
self.render("templates/number.html", last=num, numout=numoutt)
numout = -1
self.render("templates/number.html", last=num, numout=numout)
 
class Restart(tornado.web.RequestHandler):
def get(self):