17,23 → 17,23 |
infix_1 = u"" |
infix_2 = u"" |
for eachword in strings: |
regex = re.sub(" ","[^ ]* [^ ]*",eachword) |
regex = re.sub("^","[^ ]*",regex) |
regex = re.sub("$","[^ ]*",regex) |
regex = re.sub("<0><1>","[^ ]*",regex) |
regex = re.sub("<2>","[^ ]*",regex) |
regex = re.sub(u" ",u"[^ ]* [^ ]*",eachword) |
regex = re.sub(u"^",u"[^ ]*",regex) |
regex = re.sub(u"$",u"[^ ]*",regex) |
regex = re.sub(u"<0><1>",u"[^ ]*",regex) |
regex = re.sub(u"<2>",u"[^ ]*",regex) |
if re.match(regex,original): |
realword = eachword |
break |
if realword == u"": |
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
else: |
if re.search("<",realword): |
beginning = re.sub("<0><1>.*",u"",realword) |
middle = re.sub(".*<0><1>(.*)<2>.*","\\1",realword) |
end = re.sub(".*<2>",u"",realword) |
infix01 = re.sub(".*?" + re.sub("<0><1>","([^ ]*)",re.sub("<2>","[^ ]*",realword)) + ".*?","\\1",original) |
infix_2 = re.sub(".*?" + re.sub("<2>","([^ ]*)",re.sub("<0><1>","[^ ]*",realword)) + ".*?","\\1",original) |
if re.search(u"<",realword): |
beginning = re.sub(u"<0><1>.*",u"",realword) |
middle = re.sub(u".*<0><1>(.*)<2>.*",ur"\1",realword) |
end = re.sub(u".*<2>",u"",realword) |
infix01 = re.sub(u".*?" + re.sub(u"<0><1>",u"([^ ]*)",re.sub(u"<2>",u"[^ ]*",realword)) + u".*?",ur"\1",original) |
infix_2 = re.sub(u".*?" + re.sub(u"<2>",u"([^ ]*)",re.sub(u"<0><1>",u"[^ ]*",realword)) + u".*?",ur"\1",original) |
for eachinfix in infixes0: |
if infix01.startswith(eachinfix): |
infix0 = eachinfix |
47,15 → 47,15 |
if infix_1.startswith(eachinfix): |
infix1 = eachinfix |
infix_1 = infix_1[len(eachinfix):] |
if infix_1 != "": |
if re.search("<0><1><2>",realword): |
if infix_1 != u"": |
if re.search(u"<0><1><2>",realword): |
infix_2 = infix_1 |
else: |
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
gotinfix1 = True |
break |
if gotinfix1 == False: |
if re.search("<0><1><2>",realword): |
if re.search(u"<0><1><2>",realword): |
if infix_1 == u"": |
infix_2 = infix_1 |
infix1 = u"" |
62,7 → 62,7 |
elif infix_1 == u"": |
infix1 = u"" |
else: |
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
gotinfix2 = False |
for eachinfix in infixes2: |
if infix_2.startswith(eachinfix): |
73,10 → 73,10 |
if infix_2.startswith(end): |
suffixes = infix2[len(end) - 1:] + end |
elif infix_2 == u"": |
infix2 = "" |
infix2 = u"" |
else: |
return ["[" + original + "]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
# print "0" + unicode(infix0) + " 1" + unicode(infix1) + " 2" + unicode(infix2) |
return [u"[" + original + u"]",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
# print u"0" + unicode(infix0) + u" 1" + unicode(infix1) + u" 2" + unicode(infix2) |
return [realword,infix0,infix1,infix2,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |
else: |
return [realword,u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u"",u""] |