Blame |
Compare with Previous |
Last modification |
View Log
| RSS feed
# -*- coding: utf-8 -*-
import re
strings = [ "oe", "nga", "k<0><1>am<2>e", "t<0><1><2>ìng nari" ]
infixes0 = [ "awn", "eyk", "us", "äp" ]
infixes1 = [ "ìyev", "iyev", "arm", "asy", "ilv", "ìmv", "ìrm", "irv", "ìsy", "am", "ay", "er", "ìm", "iv", "ìy", "ol" ]
infixes2 = [ "äng", "ats", "ei", "uy" ]
# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä
def parsefix(original):
realword = u""
infix0 = u""
infix1 = u""
infix2 = u""
infix01 = u""
infix_1 = u""
infix_2 = u""
for eachword in strings:
regex = re.sub(" ","[^ ]* [^ ]*",eachword)
regex = re.sub("^","[^ ]*",regex)
regex = re.sub("$","[^ ]*",regex)
regex = re.sub("<0><1>","[^ ]*",regex)
regex = re.sub("<2>","[^ ]*",regex)
if re.match(regex,original):
realword = eachword
break
if realword == u"":
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
else:
if re.search("<",realword):
beginning = re.sub("<0><1>.*",u"",realword)
middle = re.sub(".*<0><1>(.*)<2>.*","\\1",realword)
end = re.sub(".*<2>",u"",realword)
infix01 = re.sub(".*" + re.sub("<0><1>","([^ ]*)",re.sub("<2>","[^ ]*",realword)) + ".*","\\1",original)
infix_2 = re.sub(".*" + re.sub("<2>","([^ ]*)",re.sub("<0><1>","[^ ]*",realword)) + ".*","\\1",original)
for eachinfix in infixes0:
if infix01.startswith(eachinfix):
infix0 = eachinfix
infix_1 = infix01[len(eachinfix):]
break
else:
infix0 = u""
infix_1 = infix01
gotinfix1 = False
for eachinfix in infixes1:
if infix_1.startswith(eachinfix):
infix1 = eachinfix
infix_1 = infix_1[len(eachinfix):]
if infix_1 != "":
if re.search("<0><1><2>",realword):
infix_2 = infix_1
else:
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
gotinfix1 = True
break
if gotinfix1 == False:
if re.search("<0><1><2>",realword):
if infix_1 == u"":
infix_2 = infix_1
infix1 = u""
elif infix_1 == u"":
infix1 = u""
else:
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
gotinfix2 = False
for eachinfix in infixes2:
if infix_2.startswith(eachinfix):
infix2 = infix_2
gotinfix2 = True
break
if gotinfix2 == False:
if infix_2.startswith(end):
suffixes = infix2[len(end) - 1:] + end
elif infix_2 == u"":
infix2 = ""
else:
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
# print "0" + unicode(infix0) + " 1" + unicode(infix1) + " 2" + unicode(infix2)
return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
else:
return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
print parsefix("oel")
print parsefix("ngati")
print parsefix("kameie")
print parsefix("kìyevame")
print parsefix("English")