Subversion Repositories navi

Rev

Rev 54 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/usr/bin/python
# -*- coding: utf-8 -*-

import re
from dbconnector import getnavilist

wordlist = getnavilist()

infixes0 = [ u"awn", u"eyk", u"us", u"äp" ]
infixes1 = [ u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv" u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol" ]
infixes2 = [ u"äng", u"ats", u"eiy", u"ei", u"uy" ]

# Returns array with Word,Infix 0,Infix 1,Infix 2,Case,Gender,Number suffixes,Inclusive,Indefinite,Vocative (suffix),Plural,Adposition,Adject pre,Adject suff,am/ay/tu/vi/yu,adverbial,nominalise,sä,fne,lenited?
def parsefix(original):
  realword = u""
  infix0 = u""
  infix1 = u""
  infix2 = u""
  infix01 = u""
  infix_1 = u""
  infix_2 = u""
  for eachword in wordlist:
    regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword[2])
    regex = re.sub(u"^",u"[^ ]*",regex)
    regex = re.sub(u"$",u"[^ ]*",regex)
    regex = re.sub(u"<0><1>",u"[^ ]*",regex)
    regex = re.sub(u"<2>",u"[^ ]*",regex)
    if re.match(regex,original):
      realword = eachword[2]
      break
  if realword == u"":
    return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
  else:
    if re.search(u"<",realword):
      beginning = re.sub(u"<0><1>.*",u"",realword)
      middle = re.sub(u".*<0><1>(.*)<2>.*",ur"\1",realword)
      end = re.sub(u".*<2>",u"",realword)
      infix01 = re.sub(u".*?" + re.sub(u"<0><1>",u"([^ ]*)",re.sub(u"<2>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
      infix_2 = re.sub(u".*?" + re.sub(u"<2>",u"([^ ]*)",re.sub(u"<0><1>",u"[^ ]*",realword)) + u".*?",ur"\1",original)
      for eachinfix in infixes0:
        if infix01.startswith(eachinfix):
          infix0 = eachinfix
          infix_1 = infix01[len(eachinfix):]
          break
        else:
          infix0 = u""
          infix_1 = infix01
      gotinfix1 = False
      for eachinfix in infixes1:
        if infix_1.startswith(eachinfix):
          infix1 = eachinfix
          infix_1 = infix_1[len(eachinfix):]
          if infix_1 != u"":
            if re.search(u"<0><1><2>",realword):
              infix_2 = infix_1
            else:
              return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
          gotinfix1 = True
          break
      if gotinfix1 == False:
        if re.search(u"<0><1><2>",realword):
          if infix_1 == u"":
            infix_2 = infix_1
            infix1 = u""
        elif infix_1 == u"":
          infix1 = u""
        else:
          return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
      gotinfix2 = False
      for eachinfix in infixes2:
        if infix_2.startswith(eachinfix):
          infix2 = infix_2[:len(eachinfix)]
          infix_2 = infix_2[len(eachinfix) - 1:]
          gotinfix2 = True
          break
      if gotinfix2 == False or infix_2 != u"":
        if infix_2.startswith(end):
          suffixes = infix2[len(end) - 1:] + end
        elif infix_2 == u"":
          infix2 = u""
        else:
          return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
#      print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2)
      return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]
    else:
      return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""]