Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 89 → Rev 113

/webapp/templates/parse2.html
17,7 → 17,7
</tr>
{% for wor in out %}
<tr>
<td rowspan="3">{{ wor["word"]["navi"] }}</td>
<td rowspan="4">{{ wor["word"]["navi"] }}</td>
<td>Infixes:</td>
<td>{{ u", ".join(wor["inf"]) }}</td>
</tr>
29,6 → 29,10
<td>Postfixes:</td>
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td>
</tr>
<tr>
<td>Lenited:</td>
<td>{{ str(wor["len"]) }}</td>
</tr>
{% end %}
</table>
{% end %}
35,4 → 39,5
<script type="text/javascript">
document.getElementById("word").focus();
</script>
{% end %}
<b>This program uses Eana Eltu for the list of words and infix positions (but nothing else), created by Tuiq and Taronyu. Thanks also go to the rest of the Learn Na'vi community!</b>
{% end %}
/webapp/main.py
29,8 → 29,10
self.redirect("/number")
numout = parsenum.parse(num.replace(" ",""))
if numout == None:
numout = -1
self.render("templates/number.html", last=num, numout=numout)
numoutt = -1
else:
numoutt = [numout["dec"], numout["oct"]]
self.render("templates/number.html", last=num, numout=numoutt)
 
class Restart(tornado.web.RequestHandler):
def get(self):
/tsimapiak/parse2.py
3,23 → 3,28
 
import re
import dbconnector
import parsenum
 
wordlist = dbconnector.getnavilist()
 
infixes1 = [u"awn", u"eyk", u"us", u"äp", u""]
infixes2 = [u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u""]
infixes3 = [u"äng", u"ats", u"eiy", u"ei", u"uy", u""]
prefixes = [u"tsay", u"fay", u"tsa", u"fra", u"pxe", u"ay", u"me", u"pe", u"le", u"nì", u"sä", u"tì", u"ke", u"fì", u"a"]
postfixes = [u"eyä", u"ìri", u"an", u"ìl", u"it", u"lo", u"ng", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"e", u"l", u"o", u"t", u"y", u"a", u"ä"]
infixes1 = (u"awn", u"eyk", u"us", u"äp", u"")
infixes2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
infixes3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
prefixes = (u"tsay", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"pe", u"le", u"nì", u"sä", u"tì", u"ay", u"me", u"fì", u"ke", u"a")
adpositions = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìla", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to")
postfixes = adpositions + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"e", u"o", u"l", u"t", u"y", u"a", u"ä")
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
lenit = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u""))
 
def parseword(wordin):
ret = {"word": {"id": 0, "navi": u"[" + u" ".wordin[0] + u"]", "infix": u"", "type": u""}}
ret = {"word": {"id": 0, "navi": u"[" + wordin[0] + u"]", "infix": u"", "type": u""}}
for word in wordlist:
foundit = True
foundprefs = []
foundposts = []
lenited = False
splitword = word["infix"].split(u" ")
if len(wordin) < len(splitword):
foundit = False
46,21 → 51,44
else:
if splitword[wor] in wordin[wor]:
center = splitword[wor]
if center == u"":
for i in lenit:
temp = u""
if splitword[wor].startswith(i[0]):
temp = i[1] + splitword[wor][len(i[0]):]
if temp in wordin[wor]:
lenited = True
center = temp
if center == u"":
if splitword[wor].endswith(u"nga"):
temp = splitword[wor][:-3] + u"ng"
if temp in wordin[wor]:
center = temp
if splitword[wor].endswith(u"po"):
temp = splitword[wor][:-3] + u"p"
if temp in wordin[wor]:
center = temp
if center == u"":
foundit = False
break
pref, posf = wordin[wor].split(center)
temp = wordin[wor].split(center)
if len(temp) != 2:
foundit = False
break
pref, posf = temp
for pre in prefixes:
if pref.endswith(pre):
foundprefs[wor].append(pre)
pref = pref[:-len(pre)]
if pref != u"":
if pref.endswith(pre):
foundprefs[wor].append(pre)
pref = pref[:-len(pre)]
if pref != u"":
foundit = False
break
for pos in postfixes:
if posf.startswith(pos):
foundposts[wor].append(pos)
posf = posf[len(pos):]
if posf != u"":
if posf.startswith(pos):
foundposts[wor].append(pos)
posf = posf[len(pos):]
if posf != u"":
foundit = False
break
70,19 → 98,22
ret["pref"] = foundprefs
ret["post"] = foundposts
ret["inf"] = foundins
ret["len"] = lenited
if foundit == True:
ret["word"] = foundword
return ret
 
def parsesent(sent):
sent = sent.strip().lower()
sent = re.sub(ur"[^\w ]",u"",sent)
sent = sent.strip().lower().replace(u"’", u"'")
sent = re.sub(ur"[^\wìä' ]",u"",sent)
sent = re.sub(ur"\ +",u" ",sent)
sent = sent.split(u" ")
ret = []
left = len(sent)
while left:
word = parseword(sent[-left:])
word = parsenum.parse(sent[len(sent)-left])
if word == None:
word = parseword(sent[-left:])
left -= len(word["word"]["navi"].split(" "))
ret.append(word)
return ret
return ret
/tsimapiak/parsenum.py
30,20 → 30,15
 
 
numre = \
u"^(?:(" + "|".join(base) + u")zazam??)?" + \
u"^(a?)(?:(" + "|".join(base) + u")zazam??)?" + \
u"(?:(" + "|".join(base) + u")vozam??)?" + \
u"(?:(" + "|".join(base) + u")zam??)?" + \
u"(?:(" + "|".join(base) + u")vo(?:l(?=a|))?)?" + \
u"((?:" + "|".join(rem) + u")|" + \
u"(?:" + "|".join(num) + u"))?$"
u"(?:" + "|".join(num) + u"))?((?:ve)?)(a?)$"
numre = re.compile(numre)
 
def parse(numin):
if type(numin) != unicode:
return None
if numin == u"":
return None
numin = numin.replace(u"í",u"ì").replace(u"á",u"ä")
try:
mat = numre.match(numin).groups()
except:
51,31 → 46,43
numout = 0
numoct = 0
try:
numout += rem.index(mat[4]) + 1
numoct += rem.index(mat[4]) + 1
numout += rem.index(mat[5]) + 1
numoct += rem.index(mat[5]) + 1
except:
try:
numout += num.index(mat[4])
numoct += num.index(mat[4])
numout += num.index(mat[5])
numoct += num.index(mat[5])
except: pass
try:
numout += (base.index(mat[3]) + 1) * 8
numoct += (base.index(mat[3]) + 1) * 10
numout += (base.index(mat[4]) + 1) * 8
numoct += (base.index(mat[4]) + 1) * 10
except: pass
try:
numout += (base.index(mat[2]) + 1) * 8**2
numoct += (base.index(mat[2]) + 1) * 10**2
numout += (base.index(mat[3]) + 1) * 8**2
numoct += (base.index(mat[3]) + 1) * 10**2
except: pass
try:
numout += (base.index(mat[1]) + 1) * 8**3
numoct += (base.index(mat[1]) + 1) * 10**3
numout += (base.index(mat[2]) + 1) * 8**3
numoct += (base.index(mat[2]) + 1) * 10**3
except: pass
try:
numout += (base.index(mat[0]) + 1) * 8**4
numoct += (base.index(mat[0]) + 1) * 10**4
numout += (base.index(mat[1]) + 1) * 8**4
numoct += (base.index(mat[1]) + 1) * 10**4
except: pass
return numout, numoct
retnum = unicode(numout)
if mat[6] != u"":
retnum += u"."
prefs = []
posts = []
if mat[0] != u"":
prefs.append(mat[0])
if mat[6] != u"":
posts.append(mat[6])
if mat[7] != u"":
posts.append(mat[7])
return {"word": {"id": 0, "navi": retnum, "infix": u"", "type": u""}, "pref": [prefs], "post": [posts], "inf": [u"", u"", u""], "len": False, "dec": numout, "oct": numoct}
#return numout, numoct
 
 
if __name__ == "__main__":
print parse(u"mrrvolaw")
print parse(u"mrrvolawvea")
/tsimapiak/dbconnector.py
9,9 → 9,10
current = u""
db = tornado.database.Connection("127.0.0.1", "navi", user="navi", password="navi")
for row in db.query("""
SELECT *, CHAR_LENGTH(navi) AS NL
SELECT *
FROM `metaWords`
ORDER BY NL DESC"""):
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
ORDER BY CHAR_LENGTH(navi) DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
else: