Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 55 → Rev 56

/tsimapiak/parse2.py
0,0 → 1,23
#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import re
import dbconnector
 
infixes0 = ur"(?P<in0>(?:awn|eyk|us|äp)?)"
infixes1 = ur"(?P<in1>(?:ìyev|iyev|ìmìy|arm|asy|ilv|ìmv|imv|ìrm|irv|ìsy|aly|ary|ìly|ìry|ìlm|alm|am|ay|er|ìm|iv|ìy|ol)?)"
infixes2 = ur"(?P<in2>(?:äng|ats|eiy|ei|uy)?)"
#prefixesn = ur"(?P<pre>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
 
def parseword(wordin):
wordfound = [0, u"", u"", u""]
wordre = None
ret = [wordfound, u"", u"", u""]
for word in dbconnector.getnavilist():
wordre = u" ".join(ur"(?:.*)" + x + ur"(?:.*)" for x in word[2].split(u" ")).replace(u"<0><1>", infixes0 + infixes1).replace(u"<2>",infixes2)
rem = re.match(wordre,wordin)
if rem:
wordfound = word
ret = [wordfound, rem.group("in0"), rem.group("in1"), rem.group("in2")]
break
return ret