Rev 256 | Rev 262 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
175 | szabot | 1 | #!/usr/bin/python |
159 | muzer | 2 | # -*- coding: utf-8 -*- |
176 | muzer | 3 | # This file is part of Tsim Apiak. |
4 | # |
||
5 | # Tsim Apiak is free software: you can redistribute it and/or modify |
||
6 | # it under the terms of the GNU General Public Licence as published by |
||
7 | # the Free Software Foundation, either version 3 of the Licence, or |
||
8 | # (at your option) any later version. |
||
9 | # |
||
10 | # In addition to this, you must also comply with clause 4 of the |
||
11 | # Apache Licence, version 2.0, concerning attribution. Where there |
||
12 | # is a contradiction between the two licences, the GPL |
||
13 | # takes preference. |
||
14 | # |
||
186 | szabot | 15 | # Tsim Apiak is distributed in the hope that it will be useful, |
176 | muzer | 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
18 | # GNU General Public License for more details. |
||
19 | # |
||
20 | # You should have received a copy of the GNU General Public License |
||
21 | # along with Tsim Apiak. If not, see <http://www.gnu.org/licenses/>. |
||
175 | szabot | 22 | |
246 | szabot | 23 | import dbconnector |
146 | szabot | 24 | import parse |
158 | muzer | 25 | |
246 | szabot | 26 | #BROKENWORDS = [[u"", u"", u"", u"", u"", u"", u""]] #, (u"tawtute", u"skyperson"), (u"na'vi", u"The People")) # XXX HACK - words not in EE |
27 | INFIXES1 = ((u"awn", u"P.PART"), (u"eyk", u"CAUS"), (u"us", u"A.PART"), (u"äp", u"REFL.")) |
||
28 | INFIXES2 = ((u"ìyev", u"FUT.SUBJ"), (u"iyev", u"FUT.SUBJ"), (u"ìmìy", u"REC.PAST.REC.FUT"), (u"arm", u"IMPF.PAST"), (u"asy", u"FUT.D"), (u"ilv", u"PRES.PER.SUBJ"), (u"ìmv", u"REC.PAST.SUBJ"), (u"imv", u"PAST.SUBJ"), (u"ìrm", u"IMPF.REC.PAST"), (u"irv", u"PRES.IMPF.SUBJ"), (u"ìsy", u"IMM.FUT.D"), (u"aly", u"PERF.FUT"), (u"ary", u"IMPF.FUT"), (u"ìly", u"PERF.IMM.FUT"), (u"ìry", u"IMPF.IMM.FUT"), (u"ìlm", u"PERF.REC.PAST"), (u"alm", u"PERF.PAST"), (u"am", u"PAST."), (u"ay", u"FUT."), (u"er", u"IMPF."), (u"ìm", u"REC.PAST"), (u"iv", u"SUBJ."), (u"ìy", u"IMM.FUT"), (u"ol", u"PERF.")) |
||
29 | INFIXES3 = ((u"äng", u"PEJ."), (u"ats", u"INFR."), (u"eiy", u"LAUD."), (u"ei", u"LAUD."), (u"uy", u"HON.")) |
||
261 | muzer | 30 | PREFIXES = ((u"tsay", u"those"), (u"fray", u"every-PL."), (u"say", u"those-LENTD"), (u"fay", u"these"), (u"fra", u"every"), (u"pxe", u"TRI."), (u"fne", u"type"), (u"tsa", u"that"), (u"sa", u"that-LENTD"), (u"pe", u"what"), (u"fe", u"what-LENTD"), (u"le", u"ADJD."), (u"nì", u"ADVD."), (u"sä", u"INSTD."), (u"tì", u"NOUND."), (u"sì", u"NOUND.-LENTD"), (u"ay", u"PL."), (u"me", u"DU."), (u"fì", u"this"), (u"ke", u"not"), (u"he", u"not-LENTD"), (u"px", u"TRI."), (u"a", u"ADJ.POST"), (u"m", u"DU.")) |
246 | szabot | 31 | ADPOSITIONS = ((u"mungwrr", u"except"), (u"kxamlä", u"through"), (u"pximaw", u"right.after"), (u"pxisre", u"right.before"), (u"tafkip", u"from.up.among"), (u"nemfa", u"into.inside"), (u"takip", u"from among"), (u"mìkam", u"between"), (u"teri", u"about.concerning"), (u"fkip", u"up.among"), (u"luke", u"without"), (u"pxel", u"like.as"), (u"pxaw", u"around"), (u"rofa", u"beside.alongside"), (u"ìlä", u"by.via.following"), (u"fpi", u"for.the.sake/benefit.of"), (u"ftu", u"from.direction"), (u"kip", u"among"), (u"lok", u"close.to"), (u"maw", u"after.time"), (u"sre", u"before.time"), (u"sìn", u"on.onto"), (u"vay", u"up.to"), (u"eo", u"before.in.front.of"), (u"fa", u"with.by.means.of"), (u"hu", u"with.accompaniment"), (u"io", u"above"), (u"ka", u"across"), (u"mì", u"in.on"), (u"na", u"like.as"), (u"ne", u"to.towards"), (u"ro", u"at.locative"), (u"ta", u"from"), (u"uo", u"behind"), (u"wä", u"against.opposition"), (u"äo", u"below"), (u"to", u"than"), (u"sì", u"and")) |
256 | muzer | 32 | POSTFIXES = ADPOSITIONS + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ari", u"TOP."), (u"ayä", u"GEN."), (u"aru", u"DAT."), (u"ati", u"ACC."), (u"ay", u"GEN."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"al", u"ERG."), (u"at", u"ACC."), (u"ar", u"DAT."), (u"ey", u"GEN."), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJ.PRE"), (u"ä", u"GEN."), (u"r", u"DAT.")) |
158 | muzer | 33 | |
146 | szabot | 34 | def translatesent(sent, lang): |
35 | sent = parse.parsesent(sent) |
||
36 | for word in sent: |
||
189 | muzer | 37 | word["translated"] = "" |
246 | szabot | 38 | if word["word"]["id"] > 0: |
39 | word["translated"] = dbconnector.translate(word["word"]["id"], lang) |
||
146 | szabot | 40 | else: |
246 | szabot | 41 | # for brokenword in BROKENWORDS: |
42 | # if brokenword[0] == word["word"]["navi"]: |
||
43 | # word["translated"] = brokenword[1] |
||
188 | muzer | 44 | if word["translated"] == u"": |
187 | muzer | 45 | word["translated"] = word["word"]["navi"] |
158 | muzer | 46 | if word["inf"][0] != u"": |
246 | szabot | 47 | for fix in INFIXES1: |
158 | muzer | 48 | if fix[0] == word["inf"][0]: |
162 | muzer | 49 | word["translated"] += '-' + fix[1] |
158 | muzer | 50 | if word["inf"][1] != u"": |
246 | szabot | 51 | for fix in INFIXES2: |
158 | muzer | 52 | if fix[0] == word["inf"][1]: |
162 | muzer | 53 | word["translated"] += '-' + fix[1] |
158 | muzer | 54 | if word["inf"][2] != u"": |
246 | szabot | 55 | for fix in INFIXES3: |
158 | muzer | 56 | if fix[0] == word["inf"][2]: |
162 | muzer | 57 | word["translated"] += '-' + fix[1] |
208 | szabot | 58 | for temp in word["pref"]: |
169 | muzer | 59 | for navf in temp: |
246 | szabot | 60 | for fix in PREFIXES: |
169 | muzer | 61 | if fix[0] == navf: |
62 | word["translated"] += '-' + fix[1] |
||
208 | szabot | 63 | for temp in word["post"]: |
169 | muzer | 64 | for navf in temp: |
246 | szabot | 65 | for fix in POSTFIXES: |
169 | muzer | 66 | if fix[0] == navf: |
67 | word["translated"] += '-' + fix[1] |
||
163 | muzer | 68 | if word["len"]: |
69 | word["translated"] += '-' + 'LENTD' |
||
158 | muzer | 70 | return sent |