0,0 → 1,91 |
# -*- coding: utf-8 -*- |
import re |
|
strings = [ u"oe", u"nga", u"k<0><1>am<2>e", u"t<0><1><2>ìng nari", u"s<0><1><2>i" ] |
|
infixes0 = [ u"awn", u"eyk", u"us", u"äp" ] |
infixes1 = [ u"ìyev", u"iyev", u"arm", u"asy", u"ilv", u"ìmv", u"ìrm", u"irv", u"ìsy", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ] |
infixes2 = [ u"äng", u"ats", u"ei", u"uy" ] |
|
# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä |
def parsefix(original): |
realword = u"" |
infix0 = u"" |
infix1 = u"" |
infix2 = u"" |
infix01 = u"" |
infix_1 = u"" |
infix_2 = u"" |
for eachword in strings: |
regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword) |
regex = re.sub(u"^",u"[^ ]*",regex) |
regex = re.sub(u"$",u"[^ ]*",regex) |
regex = re.sub(u"<0><1>",u"[^ ]*",regex) |
regex = re.sub(u"<2>",u"[^ ]*",regex) |
if re.match(regex,original): |
realword = eachword |
break |
if realword == u"": |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
else: |
if re.search(u"<",realword): |
beginning = re.sub(u"<0><1>.*",u"",realword) |
middle = re.sub(u".*<0><1>(.*)<2>.*",ur"\1",realword) |
end = re.sub(u".*<2>",u"",realword) |
infix01 = re.sub(u".*?" + re.sub(u"<0><1>",u"([^ ]*)",re.sub(u"<2>",u"[^ ]*",realword)) + u".*?",ur"\1",original) |
infix_2 = re.sub(u".*?" + re.sub(u"<2>",u"([^ ]*)",re.sub(u"<0><1>",u"[^ ]*",realword)) + u".*?",ur"\1",original) |
for eachinfix in infixes0: |
if infix01.startswith(eachinfix): |
infix0 = eachinfix |
infix_1 = infix01[len(eachinfix):] |
break |
else: |
infix0 = u"" |
infix_1 = infix01 |
gotinfix1 = False |
for eachinfix in infixes1: |
if infix_1.startswith(eachinfix): |
infix1 = eachinfix |
infix_1 = infix_1[len(eachinfix):] |
if infix_1 != u"": |
if re.search(u"<0><1><2>",realword): |
infix_2 = infix_1 |
else: |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
gotinfix1 = True |
break |
if gotinfix1 == False: |
if re.search(u"<0><1><2>",realword): |
if infix_1 == u"": |
infix_2 = infix_1 |
infix1 = u"" |
elif infix_1 == u"": |
infix1 = u"" |
else: |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
gotinfix2 = False |
for eachinfix in infixes2: |
if infix_2.startswith(eachinfix): |
infix2 = infix_2 |
gotinfix2 = True |
break |
if gotinfix2 == False: |
if infix_2.startswith(end): |
suffixes = infix2[len(end) - 1:] + end |
elif infix_2 == u"": |
infix2 = u"" |
else: |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2) |
return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
else: |
return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
|
print parsefix(u"oel") |
print parsefix(u"ngati") |
print parsefix(u"kameie") |
print parsefix(u"kìyevame") |
print parsefix(u"English") |
print parsefix(u"keykivame") |
print parsefix(u"tìsusiti") |
print parsefix(u"tayìng nari") |