/tsimapiak/parse2.py |
---|
0,0 → 1,23 |
#!/usr/bin/python |
# -*- coding: utf-8 -*- |
import re |
import dbconnector |
infixes0 = ur"(?P<in0>(?:awn|eyk|us|äp)?)" |
infixes1 = ur"(?P<in1>(?:ìyev|iyev|ìmìy|arm|asy|ilv|ìmv|imv|ìrm|irv|ìsy|aly|ary|ìly|ìry|ìlm|alm|am|ay|er|ìm|iv|ìy|ol)?)" |
infixes2 = ur"(?P<in2>(?:äng|ats|eiy|ei|uy)?)" |
#prefixesn = ur"(?P<pre>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?" |
def parseword(wordin): |
wordfound = [0, u"", u"", u""] |
wordre = None |
ret = [wordfound, u"", u"", u""] |
for word in dbconnector.getnavilist(): |
wordre = u" ".join(ur"(?:.*)" + x + ur"(?:.*)" for x in word[2].split(u" ")).replace(u"<0><1>", infixes0 + infixes1).replace(u"<2>",infixes2) |
rem = re.match(wordre,wordin) |
if rem: |
wordfound = word |
ret = [wordfound, rem.group("in0"), rem.group("in1"), rem.group("in2")] |
break |
return ret |
/tsimapiak/parse.py |
---|
7,7 → 7,7 |
wordlist = getnavilist() |
infixes0 = [ u"awn", u"eyk", u"us", u"äp" ] |
infixes1 = [ u"ìyev", u"iyev", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ] |
infixes1 = [ u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ] |
infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ] |
# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited? |
69,10 → 69,11 |
gotinfix2 = False |
for eachinfix in infixes2: |
if infix_2.startswith(eachinfix): |
infix2 = infix_2 |
infix2 = infix_2[:len(eachinfix)] |
infix_2 = infix_2[len(eachinfix) - 1:] |
gotinfix2 = True |
break |
if gotinfix2 == False: |
if gotinfix2 == False or infix_2 != u"": |
if infix_2.startswith(end): |
suffixes = infix2[len(end) - 1:] + end |
elif infix_2 == u"": |
/webapp/main.py |
---|
12,6 → 12,7 |
from tsimapiak import parsenum |
from tsimapiak import dbconnector |
from tsimapiak import parse |
from tsimapiak import parse2 |
class Index(tornado.web.RequestHandler): |
def get(self): |
43,7 → 44,7 |
text += u"<br />".join(u" | ".join(unicode(y) for y in x) for x in lis) |
self.write(text) |
class ParseTest(tornado.web.RequestHandler): |
class Parse(tornado.web.RequestHandler): |
def get(self): |
self.render("templates/parse.html", last="", out=None) |
55,12 → 56,25 |
out = parse.parsefix(word) |
self.render("templates/parse.html", last=word, out=out) |
class Parse2(tornado.web.RequestHandler): |
def get(self): |
self.render("templates/parse.html", last="", out=None) |
def post(self): |
try: |
word = self.get_argument("word").strip() |
except: |
self.redirect("/parse2") |
out = parse2.parseword(word) |
self.render("templates/parse.html", last=word, out=out) |
application = tornado.web.Application([ |
("/", Index), |
("/number", Number), |
("/restart", Restart), |
("/testdb", TestDB), |
("/parse", ParseTest) |
("/parse", Parse), |
("/parse2", Parse2) |
]) |
if __name__ == "__main__": |
/webapp/templates/parse.html |
---|
9,10 → 9,10 |
<input name="btn" type="submit" value="Parse!" /> |
</form> |
{% if type(out) == list %} |
out[0]<br /> |
out[1]<br /> |
out[2]<br /> |
out[3] |
{{ out[0]}} <br /> |
{{ out[1]}} <br /> |
{{ out[2]}} <br /> |
{{ out[3]}} |
{% end %} |
<script type="text/javascript"> |
document.getElementById("word").focus(); |