Subversion Repositories navi

Rev

Blame | Compare with Previous | Last modification | View Log | RSS feed

# -*- coding: utf-8 -*-
import re

strings = [ "oe", "nga", "k<0><1>am<2>e", "t<0><1><2>ìng nari" ]

infixes0 = [ "awn", "eyk", "us", "äp" ]
infixes1 = [ "ìyev", "iyev", "arm", "asy", "ilv", "ìmv", "ìrm", "irv", "ìsy", "am", "ay", "er", "ìm", "iv", "ìy", "ol" ]
infixes2 = [ "äng", "ats", "ei", "uy" ]

# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä
def parsefix(original):
  realword = u""
  infix0 = u""
  infix1 = u""
  infix2 = u""
  infix01 = u""
  infix_1 = u""
  infix_2 = u""
  for eachword in strings:
    regex = re.sub(" ","[^ ]* [^ ]*",eachword)
    regex = re.sub("^","[^ ]*",regex)
    regex = re.sub("$","[^ ]*",regex)
    regex = re.sub("<0><1>","[^ ]*",regex)
    regex = re.sub("<2>","[^ ]*",regex)
    if re.match(regex,original):
      realword = eachword
      break
  if realword == u"":
    return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
  else:
    if re.search("<",realword):
      beginning = re.sub("<0><1>.*",u"",realword)
      middle = re.sub(".*<0><1>(.*)<2>.*","\\1",realword)
      end = re.sub(".*<2>",u"",realword)
      infix01 = re.sub(".*" + re.sub("<0><1>","([^ ]*)",re.sub("<2>","[^ ]*",realword)) + ".*","\\1",original)
      infix_2 = re.sub(".*" + re.sub("<2>","([^ ]*)",re.sub("<0><1>","[^ ]*",realword)) + ".*","\\1",original)
      for eachinfix in infixes0:
        if infix01.startswith(eachinfix):
          infix0 = eachinfix
          infix_1 = infix01[len(eachinfix):]
          break
        else:
          infix0 = u""
          infix_1 = infix01
      gotinfix1 = False
      for eachinfix in infixes1:
        if infix_1.startswith(eachinfix):
          infix1 = eachinfix
          infix_1 = infix_1[len(eachinfix):]
          if infix_1 != "":
            if re.search("<0><1><2>",realword):
              infix_2 = infix_1
            else:
              return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
          gotinfix1 = True
          break
      if gotinfix1 == False:
        if re.search("<0><1><2>",realword):
          if infix_1 == u"":
            infix_2 = infix_1
            infix1 = u""
        elif infix_1 == u"":
          infix1 = u""
        else:
          return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
      gotinfix2 = False
      for eachinfix in infixes2:
        if infix_2.startswith(eachinfix):
          infix2 = infix_2
          gotinfix2 = True
          break
      if gotinfix2 == False:
        if infix_2.startswith(end):
          suffixes = infix2[len(end) - 1:] + end
        elif infix_2 == u"":
          infix2 = ""
        else:
          return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
#      print "0" + unicode(infix0) + " 1" + unicode(infix1) + " 2" + unicode(infix2)
      return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
    else:
      return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]

print parsefix("oel")
print parsefix("ngati")
print parsefix("kameie")
print parsefix("kìyevame")
print parsefix("English")