Subversion Repositories navi

Compare Revisions

Ignore whitespace Rev 270 → Rev 247

/ircbot/bot.py
58,7 → 58,7
if (cmd.split(" ")[0] == "tr") or (cmd.split(" ")[0] == "translate"):
lang = "eng"
if len(cmd.split(" ")) > 1 and cmd.split(" ")[1].startswith("-"):
if cmd.split(" ")[1][1:] in ("hu", "de", "ptbr", "est", "sv", "nl"):
if cmd.split(" ")[1][1:] in ("hu", "de", "ptbr", "est", "sv"):
lang = cmd.split(" ")[1][1:]
sent = " ".join(cmd.split(" ")[2:])
else:
/cliapp/README
File deleted
/cliapp/tsimapiakcli.py
23,13 → 23,8
import sys
 
for line in sys.stdin:
try:
line = line.decode("utf-8")
except:
line = line.decode("iso-8859-1")
translated = []
for word in translate.translatesent(line, "eng"):
for word in translate.translatesent(line, lang):
translated.append(word["translated"])
translated = " | ".join(translated)
translated = nm_to_n(e.source()) + ": " + " | ".join(translated)
print translated
 
/tsimapiak/parse.py
28,14 → 28,13
wordlist = dbconnector.getnavilist()
 
 
BROKENWORDS = ((u"sami", u"si", u"", u"am", u"", (()), (()), False), (u"to", u"to", u"", u"", u"", (()), (()), False), (u"frato", u"to", u"", u"", u"", [[u"fra"]], (()), False), (u"mengenga", u"ngenga", u"", u"", u"", [[u"me"]], (()), False), (u"pxengenga", u"ngenga", u"", u"", u"", [[u"pxe"]], (()), False), (u"kìmä", u"kä", u"", u"ìm", u"", (()), (()), False), (u"apxay", u"pxay", u"", u"", u"", [[u"a"]], (()), False), (u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False))
# XXX HACK - These are words that are either not in Eana Eltu, or that get interpreted wrongly for whatever reason. The latter should be removed from this list when the parser gets more sophisticated. The former should also have an entry in the equivalent array in the translator! If it can take infixes, consider adding it to the main wordlist above (see the examples). The order is - original, Na'vi root, 0-pos infix, 1-pos infix, 2-pos infix, PREFIXES, suffixes. Things that can take affixes should go in the above list instead.
BROKENWORDS = ((u"sami", u"si", u"", u"am", u"", (()), (()), False), (u"to", u"to", u"", u"", u"", (()), (()), False), (u"frato", u"to", u"", u"", u"", [[u"fra"]], (()), False)) # XXX HACK - These are words that are either not in Eana Eltu, or that get interpreted wrongly for whatever reason. The latter should be removed from this list when the parser gets more sophisticated. The former should also have an entry in the equivalent array in the translator! If it can take infixes, consider adding it to the main wordlist above (see the examples). The order is - original, Na'vi root, 0-pos infix, 1-pos infix, 2-pos infix, PREFIXES, suffixes. Things that can take affixes should go in the above list instead.
INFIXES1 = (u"awn", u"eyk", u"us", u"äp", u"")
INFIXES2 = (u"ìyev", u"iyev", u"ìmìy", u"arm", u"asy", u"ilv", u"ìmv", u"imv", u"ìrm", u"irv", u"ìsy", u"aly", u"ary", u"ìly", u"ìry", u"ìlm", u"alm", u"am", u"ay", u"er", u"ìm", u"iv", u"ìy", u"ol", u"")
INFIXES3 = (u"äng", u"ats", u"eiy", u"ei", u"uy", u"")
PREFIXES = (u"tsay", u"fray", u"say", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"kel", u"lek", u"sa", u"pe", u"fe", u"le", u"nì", u"sä", u"tì", u"sì", u"ay", u"me", u"fì", u"ke", u"he", u"px", u"a", u"m", u"k")
PREFIXES = (u"tsay", u"say", u"fay", u"fra", u"pxe", u"fne", u"tsa", u"sa", u"pe", u"fe", u"le", u"nì", u"sä", u"tì", u"sì", u"ay", u"me", u"fì", u"ke", u"he", u"a")
ADPOSITIONS = (u"mungwrr", u"kxamlä", u"pximaw", u"pxisre", u"tafkip", u"nemfa", u"takip", u"mìkam", u"teri", u"fkip", u"luke", u"pxel", u"pxaw", u"rofa", u"ìlä", u"fpi", u"ftu", u"kip", u"lok", u"maw", u"sre", u"sìn", u"vay", u"eo", u"fa", u"hu", u"io", u"ka", u"mì", u"na", u"ne", u"ro", u"ta", u"uo", u"wä", u"äo", u"to", u"sì")
POSTFIXES = ADPOSITIONS + (u"tsyìp", u"eyä", u"ìri", u"aru", u"ati", u"ayä", u"ari", u"ay", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"al", u"at", u"ar", u"ey", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
POSTFIXES = ADPOSITIONS + (u"tsyìp", u"eyä", u"ìri", u"ìl", u"it", u"lo", u"ri", u"ru", u"ti", u"ur", u"ve", u"yä", u"ya", u"tu", u"vi", u"yu", u"an", u"ng", u"ke", u"e", u"o", u"l", u"t", u"y", u"a", u"ä", u"r")
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?"
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)"
 
113,10 → 112,6
temp = splitword[wor][:-2] + u"p"
if temp in wordin[wor]:
center = temp
if splitword[wor].endswith(u"tsa"):
temp = splitword[wor][:-3] + u"ts"
if temp in wordin[wor]:
center = temp
if center == u"":
foundit = False
break
/tsimapiak/parsenum.py
103,15 → 103,6
#other numbers
notbase = False
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"vozazam"):
outoct += (n + 1) * (10 ** 5)
outdec += (n + 1) * (8 ** 5)
if numin[len(BASE[n]) + 6:].startswith(u"mrr") or numin[len(BASE[n]) + 6:].startswith(u"me"):
numin = numin[len(BASE[n]) + 6:]
else:
numin = numin[len(BASE[n]) + 7:]
notbase = True
for n in range(len(BASE)):
if numin.startswith(BASE[n] + u"zazam"):
outoct += (n + 1) * (10 ** 4)
outdec += (n + 1) * (8 ** 4)
/tsimapiak/translate.py
27,9 → 27,9
INFIXES1 = ((u"awn", u"P.PART"), (u"eyk", u"CAUS"), (u"us", u"A.PART"), (u"äp", u"REFL."))
INFIXES2 = ((u"ìyev", u"FUT.SUBJ"), (u"iyev", u"FUT.SUBJ"), (u"ìmìy", u"REC.PAST.REC.FUT"), (u"arm", u"IMPF.PAST"), (u"asy", u"FUT.D"), (u"ilv", u"PRES.PER.SUBJ"), (u"ìmv", u"REC.PAST.SUBJ"), (u"imv", u"PAST.SUBJ"), (u"ìrm", u"IMPF.REC.PAST"), (u"irv", u"PRES.IMPF.SUBJ"), (u"ìsy", u"IMM.FUT.D"), (u"aly", u"PERF.FUT"), (u"ary", u"IMPF.FUT"), (u"ìly", u"PERF.IMM.FUT"), (u"ìry", u"IMPF.IMM.FUT"), (u"ìlm", u"PERF.REC.PAST"), (u"alm", u"PERF.PAST"), (u"am", u"PAST."), (u"ay", u"FUT."), (u"er", u"IMPF."), (u"ìm", u"REC.PAST"), (u"iv", u"SUBJ."), (u"ìy", u"IMM.FUT"), (u"ol", u"PERF."))
INFIXES3 = ((u"äng", u"PEJ."), (u"ats", u"INFR."), (u"eiy", u"LAUD."), (u"ei", u"LAUD."), (u"uy", u"HON."))
PREFIXES = ((u"tsay", u"those"), (u"fray", u"every-PL."), (u"say", u"those-LENTD"), (u"fay", u"these"), (u"fra", u"every"), (u"pxe", u"TRI."), (u"fne", u"type"), (u"tsa", u"that"), (u"kel", u"ADJD.-not"), (u"lek", u"not-ADJD."), (u"sa", u"that-LENTD"), (u"pe", u"what"), (u"fe", u"what-LENTD"), (u"le", u"ADJD."), (u"nì", u"ADVD."), (u"sä", u"INSTD."), (u"tì", u"NOUND."), (u"sì", u"NOUND.-LENTD"), (u"ay", u"PL."), (u"me", u"DU."), (u"fì", u"this"), (u"ke", u"not"), (u"he", u"not-LENTD"), (u"px", u"TRI."), (u"a", u"ADJ.POST"), (u"m", u"DU."), (u"k", u"not"))
PREFIXES = ((u"tsay", u"those"), (u"say", u"those-LENTD"), (u"fay", u"these"), (u"fra", u"every"), (u"pxe", u"TRI."), (u"fne", u"type"), (u"tsa", u"that"), (u"sa", u"that-LENTD"), (u"pe", u"what"), (u"fe", u"what-LENTD"), (u"le", u"ADJD."), (u"nì", u"ADVD."), (u"sä", u"INSTD."), (u"tì", u"NOUND."), (u"sì", u"NOUND.-LENTD"), (u"ay", u"PL."), (u"me", u"DU."), (u"fì", u"this"), (u"ke", u"not"), (u"he", u"not-LENTD"), (u"a", u"ADJ.POST"))
ADPOSITIONS = ((u"mungwrr", u"except"), (u"kxamlä", u"through"), (u"pximaw", u"right.after"), (u"pxisre", u"right.before"), (u"tafkip", u"from.up.among"), (u"nemfa", u"into.inside"), (u"takip", u"from among"), (u"mìkam", u"between"), (u"teri", u"about.concerning"), (u"fkip", u"up.among"), (u"luke", u"without"), (u"pxel", u"like.as"), (u"pxaw", u"around"), (u"rofa", u"beside.alongside"), (u"ìlä", u"by.via.following"), (u"fpi", u"for.the.sake/benefit.of"), (u"ftu", u"from.direction"), (u"kip", u"among"), (u"lok", u"close.to"), (u"maw", u"after.time"), (u"sre", u"before.time"), (u"sìn", u"on.onto"), (u"vay", u"up.to"), (u"eo", u"before.in.front.of"), (u"fa", u"with.by.means.of"), (u"hu", u"with.accompaniment"), (u"io", u"above"), (u"ka", u"across"), (u"mì", u"in.on"), (u"na", u"like.as"), (u"ne", u"to.towards"), (u"ro", u"at.locative"), (u"ta", u"from"), (u"uo", u"behind"), (u"wä", u"against.opposition"), (u"äo", u"below"), (u"to", u"than"), (u"sì", u"and"))
POSTFIXES = ADPOSITIONS + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ari", u"TOP."), (u"ayä", u"GEN."), (u"aru", u"DAT."), (u"ati", u"ACC."), (u"ay", u"GEN."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"al", u"ERG."), (u"at", u"ACC."), (u"ar", u"DAT."), (u"ey", u"GEN."), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJ.PRE"), (u"ä", u"GEN."), (u"r", u"DAT."))
POSTFIXES = ADPOSITIONS + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJ.PRE"), (u"ä", u"GEN."), (u"r", u"DAT."))
 
def translatesent(sent, lang):
sent = parse.parsesent(sent)
/tsimapiak/dbconnector.py
29,7 → 29,7
for row in db.query("""
SELECT *
FROM `metaWords`
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> 'prefix'
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> "prefix"
ORDER BY CHAR_LENGTH(navi) DESC"""):
if row["infixes"]:
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]})
/webapp/templates/translate.html
35,7 → 35,6
<option value="est">Estonian - Eesti</option>
<option value="ptbr">Brazilian Portuguese - Português do Brasil</option>
<option value="sv">Swedish - Svenska</option>
<option value="nl">Dutch - Nederlands</option>
</select>
<input name="btn" type="submit" value="Translate!" />
</form>
90,9 → 89,6
if("{{ lang }}" == "sv"){
document.getElementById("lang").selectedIndex = 5
}
if("{{ lang }}" == "nl"){
document.getElementById("lang").selectedIndex = 6
}
</script>
{% end %}
{% end %}