/tsimapiak/parse.py |
---|
34,13 → 34,13 |
(u"sami", u"si", u"", u"am", u"", (()), (()), False), |
(u"to", u"to", u"", u"", u"", (()), (()), False), |
#(u"frato", u"to", u"", u"", u"", [[u"fra"]], (()), False), |
(u"soaiä", u"soaia", u"", u"", u"", (()), [[u"ä"]], False), |
(u"soaiä", u"soaia", u"", u"", u"", (()), [[(u"ä", None)]], False), |
(u"mengenga", u"ngenga", u"", u"", u"", [[u"me"]], (()), False), |
(u"pxengenga", u"ngenga", u"", u"", u"", [[u"pxe"]], (()), False), |
(u"kìmä", u"kä", u"", u"ìm", u"", (()), (()), False), |
(u"apxay", u"pxay", u"", u"", u"", [[u"a"]], (()), False), |
(u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False), # TODO remember why on earth this is needed; how is awng interpreted as awnga? |
(u"kawnga", u"kawng", u"", u"", u"", (()), [[u"a"]], False), |
(u"akawng", u"kawng", u"", u"", u"", [[u"a"]], (()), False), |
(u"kawnga", u"kawng", u"", u"", u"", (()), [[(u"a", None)]], False), |
(u"kawng", u"kawng", u"", u"", u"", (()), (()), False), |
(u"ka", u"ka", u"", u"", u"", (()), (()), False), |
(u"uo", u"uo", u"", u"", u"", (()), (()), False), |
57,10 → 57,20 |
#prefixesn = ur"(?P<npr>(?:(?:fì|tsa)?(?:me|pxe|ay|fra)?|(?:fay)?|(?:tsay)?)(?:fne)?(?:tì|sä)?" |
#prefixesv = ur"(?P<vpr>(?:nì|sä|tì|rä'ä |ke )?)" |
EXTRAADP = ("to", "sì") # words that act like adpositions but technically aren't |
EXTRAADP = (("to", [x["id"] for x in wordlist if x["navi"] == "to"][0]), ("sì", [x["id"] for x in wordlist if x["navi"] == "sì"][0])) # words that act like adpositions but technically aren't |
LENIT = ((u"px", u"p"), (u"tx", u"t"), (u"kx", u"k"), (u"ts", u"s"), (u"t", u"s"), (u"p", u"f"), (u"k", u"h"), (u"'", u"")) |
# Let's lenit the prefixes |
extraprefixes = [] |
for prefix in prefixes: |
for letter, replacement in LENIT: |
if prefix["navi"].startswith(letter): |
extraprefixes.append({"id": prefix["id"], "navi": prefix["navi"].replace(letter, replacement, 1), "gloss": prefix["gloss"] + ".LENTD"}) |
break |
prefixes = sorted(prefixes + extraprefixes, key=lambda x: len(x["navi"]), reverse=True) |
def parseword(wordin): |
tempid = 0 |
temptype = u"" |
89,7 → 99,7 |
foundprefs.append([]) |
foundposts.append([]) |
center = u"" |
if u"<1>" in splitword[wor]: |
if u"<0>" in splitword[wor]: |
tempin1 = [] |
tempin2 = [] |
tempin3 = [] |
105,8 → 115,8 |
for in1 in tempin1: |
for in2 in tempin2: |
for in3 in tempin3: |
if splitword[wor].replace(u"<1><2>", in1 + in2).replace(u"<3>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") in wordin[wor]: |
center = splitword[wor].replace(u"<1><2>", in1 + in2).replace(u"<3>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") |
if splitword[wor].replace(u"<0><1>", in1 + in2).replace(u"<2>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") in wordin[wor]: |
center = splitword[wor].replace(u"<0><1>", in1 + in2).replace(u"<2>", in3).replace(u"lll", u"l").replace(u"rrr", u"r") |
foundins = [in1, in2, in3] |
break |
if center != u"": |
167,13 → 177,13 |
last = u"" |
while last != posf: |
last = posf |
for pos in [x["navi"] for x in postfixes] + [x["navi"] for x in wordlist if x["type"] == "adp."] + list(EXTRAADP): |
for pos, posid in sorted([(x["navi"], None) for x in postfixes] + [(x["navi"], x["id"]) for x in wordlist if x["type"] == "adp."] + list(EXTRAADP), key=lambda x: len(x[0]), reverse=True): |
if posf != u"": |
if posf.startswith(pos): |
if pos in foundposts[wor]: |
if (pos, posid) in foundposts[wor]: |
break |
if pos != u"ä" or word["navi"] != u"pey": # XXX HACK - fix for peyä. THIS SHOULD NOT BE HERE! |
foundposts[wor].append(pos) |
foundposts[wor].append((pos, posid)) |
posf = posf[len(pos):] |
break |
else: |
/tsimapiak/translate.py |
---|
31,8 → 31,6 |
#ADPOSITIONS = ((u"mungwrr", u"except"), (u"kxamlä", u"through"), (u"pximaw", u"right.after"), (u"pxisre", u"right.before"), (u"tafkip", u"from.up.among"), (u"nemfa", u"into.inside"), (u"takip", u"from among"), (u"mìkam", u"between"), (u"teri", u"about.concerning"), (u"fkip", u"up.among"), (u"luke", u"without"), (u"pxel", u"like.as"), (u"pxaw", u"around"), (u"rofa", u"beside.alongside"), (u"ìlä", u"by.via.following"), (u"fpi", u"for.the.sake/benefit.of"), (u"ftu", u"from.direction"), (u"kip", u"among"), (u"lok", u"close.to"), (u"maw", u"after.time"), (u"sre", u"before.time"), (u"sìn", u"on.onto"), (u"vay", u"up.to"), (u"eo", u"before.in.front.of"), (u"fa", u"with.by.means.of"), (u"hu", u"with.accompaniment"), (u"io", u"above"), (u"ka", u"across"), (u"mì", u"in.on"), (u"na", u"like.as"), (u"ne", u"to.towards"), (u"ro", u"at.locative"), (u"ta", u"from"), (u"uo", u"behind"), (u"wä", u"against.opposition"), (u"äo", u"below"), (u"to", u"than"), (u"sì", u"and")) |
#POSTFIXES = ADPOSITIONS + ((u"tsyìp", u"DIM."), (u"eyä", u"GEN."), (u"ìri", u"TOP."), (u"ari", u"TOP."), (u"ayä", u"GEN."), (u"aru", u"DAT."), (u"ati", u"ACC."), (u"ay", u"GEN."), (u"ìl", u"ERG."), (u"it", u"ACC"), (u"lo", u"MULT."), (u"ri", u"TOP."), (u"ru", u"DAT."), (u"ti", u"ACC."), (u"ur", u"DAT."), (u"ve", u"ORD."), (u"yä", u"GEN."), (u"ya", u"VOC."), (u"tu", u"OBJD."), (u"vi", u"PART."), (u"yu", u"AGENTD."), (u"an", u"MASC."), (u"ng", u"INCL."), (u"ke", u"not"), (u"al", u"ERG."), (u"at", u"ACC."), (u"ar", u"DAT."), (u"ey", u"GEN."), (u"e", u"FEM."), (u"o", u"INDEF."), (u"l", u"ERG."), (u"t", u"ACC."), (u"y", u"GEN."), (u"a", u"ADJ.PRE"), (u"ä", u"GEN."), (u"r", u"DAT.")) |
prefixes, infixes, postfixes = dbconnector.getaffixlists() |
def translatesent(sent, lang): |
sent = parse.parsesent(sent) |
for word in sent: |
46,31 → 44,31 |
if word["translated"] == u"": |
word["translated"] = word["word"]["navi"] |
if word["inf"][0] != u"": |
for fix in [(x["navi"], x["gloss"]) for x in infixes if x["position"] == 0]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 0]: |
if fix[0] == word["inf"][0]: |
word["translated"] += '-' + fix[1] |
if word["inf"][1] != u"": |
for fix in [(x["navi"], x["gloss"]) for x in infixes if x["position"] == 1]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 1]: |
if fix[0] == word["inf"][1]: |
word["translated"] += '-' + fix[1] |
if word["inf"][2] != u"": |
for fix in [(x["navi"], x["gloss"]) for x in infixes if x["position"] == 2]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.infixes if x["position"] == 2]: |
if fix[0] == word["inf"][2]: |
word["translated"] += '-' + fix[1] |
for temp in word["pref"]: |
for navf in temp: |
for fix in [(x["navi"], x["gloss"]) for x in prefixes]: |
for fix in [(x["navi"], x["gloss"]) for x in parse.prefixes]: |
if fix[0] == navf: |
word["translated"] += '-' + fix[1] |
for temp in word["post"]: |
for navf in temp: |
for fix in [(x["navi"], x["gloss"]) for x in postfixes]: |
for navf, navfid in temp: |
for fix in [(x["navi"], x["gloss"]) for x in parse.postfixes]: |
if fix[0] == navf: |
word["translated"] += '-' + fix[1] |
break |
else: |
# adpositions and the like |
word["translated"] += "-" + dbconnector.translate(navf, lang) |
word["translated"] += "-" + dbconnector.translate(navfid, lang) |
if word["len"]: |
word["translated"] += '-' + 'LENTD' |
return sent |
/tsimapiak/dbconnector.py |
---|
33,10 → 33,10 |
WHERE partOfSpeech <> 'num.' AND partOfSpeech <> 'prefix' AND partOfSpeech <> 'affix' |
ORDER BY CHAR_LENGTH(navi) DESC""") |
for row in cur: |
if row["infixes"]: |
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["infixes"].lower(), "type": row["partOfSpeech"]}) |
if row["infixes"] and row["infixes"] != "NULL": # yeah seriously |
ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["infixes"].lower(), "type": row["partOfSpeech"]}) |
else: |
ret.append({"id": row["id"], "navi": row["navi"], "infix": row["navi"].lower(), "type": row["partOfSpeech"]}) |
ret.append({"id": row["id"], "navi": row["navi"].replace("+", "").replace("-", ""), "infix": row["navi"].lower(), "type": row["partOfSpeech"]}) |
cur.close() |
db.close() |
return ret |
50,17 → 50,24 |
FROM `metaInfixes` |
ORDER BY CHAR_LENGTH(navi) DESC""") |
for row in cur: |
endfix = False |
if row["navi"] and row["navi"][0] == "-": |
ret[2].append({"id": row["id"], "navi": row["navi"].replace("-", ""), "gloss": row["shorthand"].upper()}) |
elif row["navi"] and row["navi"][-1] in ("-", "+"): |
ret[0].append({"id": row["id"], "navi": row["navi"].replace("-", ""), "gloss": row["shorthand"].upper()}) |
else: |
if not row["position"]: |
endfix = True |
if row["navi"] and row["navi"][-1] in ("-", "+"): |
ret[0].append({"id": row["id"], "navi": row["navi"].replace("-", "").replace("+", ""), "gloss": row["shorthand"].upper()}) |
endfix = True |
if not endfix: |
if not row["position"] or row["position"] == "NULL": |
# not actually an affix |
continue |
ret[1].append({"id": row["id"], "navi": row["navi"].replace("-", ""), "gloss": row["shorthand"].upper(), "position": int(row["position"])}) |
ret[1].append({"id": row["id"], "navi": row["navi"], "gloss": row["shorthand"].upper(), "position": int(row["position"])}) |
cur.close() |
db.close() |
for subret in ret: |
subret.sort(key=lambda x: len(x["navi"]), reverse=True) |
return ret |
def translate(wid, language): |
/tsimapiak/parsenum.py |
---|
78,15 → 78,15 |
prefs.append(u"a") |
numin = numin[1:] |
if numin[len(numin) - 1] == u"a": |
posts.append(u"a") |
posts.append((u"a", None)) |
numin = numin[:-1] |
if numin[-2:] == u"ve": |
posts.append(u"ve") |
posts.append((u"ve", None)) |
numin = numin[:-2] |
#BASE numbers |
for n in range(len(NUM)): |
if u"ve" in posts: |
if (u"ve", None) in posts: |
if numin == NUMORD[n]: |
outoct = n |
outdec = n |
153,7 → 153,7 |
notbase = True |
if notbase: |
for n in range(len(REM)): |
if u"ve" in posts: |
if (u"ve", None) in posts: |
if numin == REMORD[n]: |
outoct += n + 1 |
outdec += n + 1 |
164,7 → 164,7 |
outdec += n + 1 |
numin = u"" |
if numin == u"": |
ret["word"]["navi"] = str(outdec) if not u"ve" in posts else str(outdec) + u"." |
ret["word"]["navi"] = str(outdec) if not (u"ve", None) in posts else str(outdec) + u"." |
ret["dec"] = outdec |
ret["oct"] = outoct |
return ret |
/discordbot/README.txt |
---|
0,0 → 1,3 |
This is a Discord bot for TsimApiak. |
To use it you have to put the tsimapiak directory inside this dir, and run bot.py. |
/discordbot/bot.py |
---|
0,0 → 1,60 |
#! /usr/bin/env python |
# This file is part of Tsim Apiak. |
# |
# Tsim Apiak is free software: you can redistribute it and/or modify |
# it under the terms of the GNU General Public Licence as published by |
# the Free Software Foundation, either version 3 of the Licence, or |
# (at your option) any later version. |
# |
# In addition to this, you must also comply with clause 4 of the |
# Apache Licence, version 2.0, concerning attribution. Where there |
# is a contradiction between the two licences, the GPL |
# takes preference. |
# |
# Tsim Apiak is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
# GNU General Public License for more details. |
# |
# You should have received a copy of the GNU General Public License |
# along with Tsim Apiak. If not, see <http://www.gnu.org/licenses/>.# |
import os |
import discord |
import dotenv |
from typing import Optional |
from tsimapiak import translate |
dotenv.load_dotenv(dotenv.find_dotenv()) |
intents = discord.Intents.default() |
client = discord.Client(intents=intents) |
tree = discord.app_commands.CommandTree(client) |
@client.event |
async def on_ready(): |
print(f"{client.user} Connected to discord") |
await tree.sync() |
@tree.command( |
name="translate", |
description="Translate (gloss) Na'vi to English" |
) |
async def on_translate(interaction: discord.Interaction, argument: str, language: Optional[str]): |
if not language: |
language = "en" |
translated = [] |
for word in translate.translatesent(argument, language): |
translated.append(word["translated"]) |
await interaction.response.send_message(argument + "\n" + " | ".join(translated)) |
def main(): |
TOKEN = os.getenv('DISCORD_TOKEN') |
client.run(TOKEN) |
if __name__ == "__main__": |
main() |
Property changes: |
Added: svn:executable |
## -0,0 +1 ## |
+* |
\ No newline at end of property |
Index: cliapp/tsimapiakcli.py |
=================================================================== |
--- cliapp/tsimapiakcli.py (revision 283) |
+++ cliapp/tsimapiakcli.py (revision 289) |
@@ -28,7 +28,7 @@ |
except: |
line = line.decode("iso-8859-1") |
translated = [] |
- for word in translate.translatesent(line, "eng"): |
+ for word in translate.translatesent(line, "en"): |
translated.append(word["translated"]) |
translated = " | ".join(translated) |
print translated |
Index: ircbot/bot.py |
=================================================================== |
--- ircbot/bot.py (revision 283) |
+++ ircbot/bot.py (revision 289) |
@@ -64,7 +64,7 @@ |
c = self.connection |
if (cmd.split(" ")[0] == "tr") or (cmd.split(" ")[0] == "translate"): |
- lang = "eng" |
+ lang = "en" |
if len(cmd.split(" ")) > 1 and cmd.split(" ")[1].startswith("-"): |
if cmd.split(" ")[1][1:] in ("hu", "de", "ptbr", "est", "sv", "nl"): |
lang = cmd.split(" ")[1][1:] |
/webapp/main.py |
---|
67,7 → 67,7 |
class Translate(tornado.web.RequestHandler): |
def get(self): |
self.render("templates/translate.html", last="", out=None, lang="eng") |
self.render("templates/translate.html", last="", out=None, lang="en") |
def post(self): |
try: |
101,7 → 101,8 |
("/number", Number), |
("/restart", Restart), |
("/parse", Parse), |
("/translate", Translate) |
("/translate", Translate), |
("/(\\.well-known/.*)", tornado.web.StaticFileHandler, dict(path=settings["static_path"])) |
], **settings) |
if __name__ == "__main__": |
/webapp/templates/parse.html |
---|
49,7 → 49,7 |
</tr> |
<tr> |
<td>Postfixes:</td> |
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td> |
<td>{{ u"; ".join(u", ".join([y[0] for y in x]) for x in wor["post"]) }}</td> |
</tr> |
<tr> |
<td>Lenited:</td> |
/webapp/templates/translate.html |
---|
29,11 → 29,11 |
<form action="/translate" method="post"> |
<input id="word" name="word" type="text" value="{{last}}" style="width: 100%;" /> |
<select name="lang" id="lang"> |
<option value="eng" selected="selected">English</option> |
<option value="en" selected="selected">English</option> |
<option value="hu">Hungarian - Magyar</option> |
<option value="de">German - Deutsch</option> |
<option value="est">Estonian - Eesti</option> |
<option value="ptbr">Brazilian Portuguese - Português do Brasil</option> |
<option value="et">Estonian - Eesti</option> |
<option value="pt">Brazilian Portuguese - Português do Brasil</option> |
<option value="sv">Swedish - Svenska</option> |
<option value="nl">Dutch - Nederlands</option> |
</select> |
60,7 → 60,7 |
</tr> |
<tr> |
<td>Postfixes:</td> |
<td>{{ u"; ".join(u", ".join(x) for x in wor["post"]) }}</td> |
<td>{{ u"; ".join(u", ".join([y[0] for y in x]) for x in wor["post"]) }}</td> |
</tr> |
<tr> |
<td>Lenited:</td> |
73,7 → 73,7 |
<script type="text/javascript"> |
document.getElementById("word").focus(); |
</script> |
{% if lang != "eng" %} |
{% if lang != "en" %} |
<script type="text/javascript"> |
if("{{ lang }}" == "hu"){ |
document.getElementById("lang").selectedIndex = 1 |