From 5116a224e39282fc5055cead3308678cc27975ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Cort=C3=A9z?= Date: Tue, 16 Feb 2016 17:05:10 -0600 Subject: [PATCH] Fixed translator --- src/controller/messages.py | 46 -------- src/extra/translator/translator.py | 163 ++--------------------------- src/extra/translator/wx_ui.py | 46 ++++---- 3 files changed, 33 insertions(+), 222 deletions(-) diff --git a/src/controller/messages.py b/src/controller/messages.py index 9f0f303..61e401a 100644 --- a/src/controller/messages.py +++ b/src/controller/messages.py @@ -13,7 +13,6 @@ class post(object): self.message.set_title(title) widgetUtils.connect_event(self.message.spellcheck, widgetUtils.BUTTON_PRESSED, self.spellcheck) widgetUtils.connect_event(self.message.translateButton, widgetUtils.BUTTON_PRESSED, self.translate) -# self.text_processor() self.image = None # widgetUtils.connect_event(self.message.upload_image, widgetUtils.BUTTON_PRESSED, self.upload_image) @@ -38,51 +37,6 @@ class post(object): else: return -# def shorten(self, event=None): -# urls = utils.find_urls_in_text(self.message.get_text()) -# if len(urls) == 0: -# output.speak(_(u"There's no URL to be shortened")) -# self.message.text_focus() -# elif len(urls) == 1: -# self.message.set_text(self.message.get_text().replace(urls[0], url_shortener.shorten(urls[0]))) -# output.speak(_(u"URL shortened")) -# self.message.text_focus() -# elif len(urls) > 1: -# list_urls = urlList.urlList() -# list_urls.populate_list(urls) -# if list_urls.get_response() == widgetUtils.OK: -# self.message.set_text(self.message.get_text().replace(urls[list_urls.get_item()], url_shortener.shorten(list_urls.get_string()))) -# output.speak(_(u"URL shortened")) -# self.message.text_focus() - -# def unshorten(self, event=None): -# urls = utils.find_urls_in_text(self.message.get_text()) -# if len(urls) == 0: -# output.speak(_(u"There's no URL to be expanded")) -# self.message.text_focus() -# elif len(urls) == 1: -# self.message.set_text(self.message.get_text().replace(urls[0], url_shortener.unshorten(urls[0]))) -# output.speak(_(u"URL expanded")) -# self.message.text_focus() -# elif len(urls) > 1: -# list_urls = urlList.urlList() -# list_urls.populate_list(urls) -# if list_urls.get_response() == widgetUtils.OK: -# self.message.set_text(self.message.get_text().replace(urls[list_urls.get_item()], url_shortener.unshorten(list_urls.get_string()))) -# output.speak(_(u"URL expanded")) -# self.message.text_focus() - -# def text_processor(self, *args, **kwargs): -# self.message.set_title(_(u"%s - %s of 140 characters") % (self.title, len(self.message.get_text()))) -# if len(self.message.get_text()) > 1: -# self.message.enable_button("shortenButton") -# self.message.enable_button("unshortenButton") -# else: -# self.message.disable_button("shortenButton") -# self.message.disable_button("unshortenButton") -# if len(self.message.get_text()) > 140: -# self.session.sound.play("max_length.ogg") - def spellcheck(self, event=None): text = self.message.get_text() checker = SpellChecker.spellchecker.spellChecker(text, "") diff --git a/src/extra/translator/translator.py b/src/extra/translator/translator.py index dbf0d92..0ec6e4c 100644 --- a/src/extra/translator/translator.py +++ b/src/extra/translator/translator.py @@ -1,153 +1,10 @@ -# encoding: utf-8 -# -# Copyright (C) 2013 Mesar Hameed -# This file is covered by the GNU General Public License. +# -*- coding: utf-8 -*- +from microsofttranslator import Translator -import os -import re -import sys -import threading -from time import sleep -from random import randint -import logging -log = logging.getLogger("translator") -import urllib2 +def translate(text="", source="auto", target="en"): + t = Translator("twblue", "4KZA26GYIfmVAqQA/z16Hlucbg64hVSDTIpRjT2FqIU=") + return t.translate(text, target) -# Each group has to be a class of possible breaking points for the writing script. -# Usually this is the major syntax marks, such as: -# full stop, comma, exclaim, question, etc. -arabicBreaks = u'[،؛؟]' -# Thanks to Talori in the NVDA irc room: -# U+3000 to U+303F, U+FE10 to U+FE1F, U+FE30 to U+FE6F, U+FF01 to U+FF60 -chineseBreaks = u'[ -〿︐-︟︰-﹯!-⦆]' -latinBreaks = r'[.,!?;:\n]' -splitReg = re.compile(u"{arabic}|{chinese}|{latin}".format(arabic=arabicBreaks, chinese=chineseBreaks, latin=latinBreaks)) - -def translate(text, source="auto", target="en"): - if source == "": source = "auto" - t = Translator(lang_from=source, lang_to=target, text=text) - t.start() - while t.isAlive(): - sleep(0.1) - t.join() - return t.translation - -def splitChunks(text, chunksize): - pos = 0 - potentialPos = 0 - for splitMark in splitReg.finditer(text): - if (splitMark.start() - pos +1) < chunksize: - potentialPos = splitMark.start() - continue - else: - yield text[pos:potentialPos+1] - pos = potentialPos + 1 - potentialPos = splitMark.start() - yield text[pos:] - -class Translator(threading.Thread): - - def __init__(self, lang_from, lang_to, text, lang_swap=None, chunksize=350, *args, **kwargs): - super(Translator, self).__init__(*args, **kwargs) - self._stop = threading.Event() - self.text = text - self.chunksize = chunksize - self.lang_to = lang_to - self.lang_from = lang_from - self.lang_swap = lang_swap - self.translation = '' - self.lang_translated = '' - self.firstChunk = True - - def stop(self): - self._stop.set() - - def run(self): - for chunk in splitChunks(self.text, self.chunksize): - # Make sure we don't send requests to google too often. - # Try to simulate a human. - if not self.firstChunk: - sleep(randint(1, 10)) - req = self.buildRequest(chunk, self.lang_from, self.lang_to) - try: - response = urllib2.urlopen(req) - translation, lang_translated = self.parseData(response) - if self.firstChunk and self.lang_from == "auto" and lang_translated == self.lang_to and self.lang_swap is not None: - self.lang_to = self.lang_swap - self.firstChunk = False - req = self.buildRequest(chunk.encode('utf-8'), self.lang_from, self.lang_to) - response = urllib2.urlopen(req) - translation, lang_translated = self.parseData(response) - except Exception as e: - log.exception("Can not translate text '%s'" %chunk) - # We have probably been blocked, so stop trying to translate. - raise e - self.translation += translation - # some adjustment, better to do on full text - self.translation = self.fixNewlines(self.translation) - self.lang_translated = lang_translated - - def buildRequest(self, text, lang_from, lang_to): - """Build POST request which will be sent to Google.""" - urlTemplate = 'http://translate.google.com/translate_a/single?client=t&sl={lang_from}&tl={lang_to}&ie=utf-8&oe=utf-8&dt=t&dt=bd&tk=' - url = urlTemplate.format(lang_from=lang_from, lang_to=lang_to) - header = {'User-agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded'} - data = 'text=%s' %urllib2.quote(text) - req = urllib2.Request(url, data, header) - return req - - def parseData(self, response): - """Parse unstructured response.""" - data = response.readlines()[0] - # get segments with couples ["translation","original text"] - l1, l2 = data.split(']],', 1) - translation = l1[3:] - if l2.startswith('[[\"'): - # get list of synonyms - syn = l2[l2.find(',[')+1:l2.find(']')].split(',') - temp = ', '.join([x.replace('\"', '') for x in syn]) - else: - # get a list with each couple as item - sentences = translation.split('],[') - temp = '' - # get translation, removing first char (quote symbol) - for item in sentences: - item = item.split('\",\"', 1)[0][1:] - # join all translations - temp = ' '.join([temp, item]) - translation = temp.decode('string-escape').decode('utf-8') - translation = self.fixPunctuation(translation) - # get the language of original text - tempLang = data.partition(']],,\"')[2] - lang = tempLang[:tempLang.find('\"')] - if lang == '': - lang = _("unavailable") - return translation, lang - - def fixPunctuation(self, translation): - """Clean text from space before punctuation symbol.""" - # list of potentially positions of spaces to remove - spacePos = [] - for puncMark in splitReg.finditer(translation): - spacePos.append(puncMark.start()-1) - if len(spacePos) == 0: - return translation - fixedTranslation = '' - for n in xrange(0,len(translation)): - temp = translation[n] - if n in spacePos and temp == ' ': - continue - else: - fixedTranslation += temp - return fixedTranslation - - def fixNewlines(self, translation): - """Adjust newlines and (subsequent or double) spaces.""" - fixes = [('\r\n ', '\r\n'), ('\n ', '\r\n'), (' ', ' ')] - for fix in fixes: - translation = translation.replace(fix[0], fix[1]) - # first char is a space, so... - return translation[1:] languages = { "af": _(u"Afrikaans"), @@ -244,8 +101,8 @@ languages = { } def available_languages(): - l = languages.keys() - d = languages.values() - l.insert(0, '') - d.insert(0, _(u"autodetect")) - return sorted(zip(l, d)) + l = languages.keys() + d = languages.values() + l.insert(0, '') + d.insert(0, _(u"autodetect")) + return sorted(zip(l, d)) diff --git a/src/extra/translator/wx_ui.py b/src/extra/translator/wx_ui.py index 95ab705..6ea0803 100644 --- a/src/extra/translator/wx_ui.py +++ b/src/extra/translator/wx_ui.py @@ -18,28 +18,28 @@ ############################################################ import translator import wx -from widgetUtils import BaseDialog +import widgetUtils -class translateDialog(BaseDialog): - def __init__(self): - super(translateDialog, self).__init__(None, -1, title=_(u"Translate message")) - panel = wx.Panel(self) - sizer = wx.BoxSizer(wx.VERTICAL) - staticSource = wx.StaticText(panel, -1, _(u"Source language")) - self.source_lang = wx.ComboBox(panel, -1, choices=[x[1] for x in translator.available_languages()], style = wx.CB_READONLY) - self.source_lang.SetFocus() - staticDest = wx.StaticText(panel, -1, _(u"Target language")) - self.source_lang.SetSelection(0) - self.dest_lang = wx.ComboBox(panel, -1, choices=[x[1] for x in translator.available_languages()], style = wx.CB_READONLY) - listSizer = wx.BoxSizer(wx.HORIZONTAL) - listSizer.Add(staticSource) - listSizer.Add(self.source_lang) - listSizer.Add(staticDest) - listSizer.Add(self.dest_lang) - ok = wx.Button(panel, wx.ID_OK) - ok.SetDefault() - cancel = wx.Button(panel, wx.ID_CANCEL) - self.SetEscapeId(wx.ID_CANCEL) +class translateDialog(widgetUtils.BaseDialog): + def __init__(self): + super(translateDialog, self).__init__(None, -1, title=_(u"Translate message")) + panel = wx.Panel(self) + sizer = wx.BoxSizer(wx.VERTICAL) + staticSource = wx.StaticText(panel, -1, _(u"Source language")) + self.source_lang = wx.ComboBox(panel, -1, choices=[x[1] for x in translator.available_languages()], style = wx.CB_READONLY) + self.source_lang.SetFocus() + staticDest = wx.StaticText(panel, -1, _(u"Target language")) + self.source_lang.SetSelection(0) + self.dest_lang = wx.ComboBox(panel, -1, choices=[x[1] for x in translator.available_languages()], style = wx.CB_READONLY) + listSizer = wx.BoxSizer(wx.HORIZONTAL) + listSizer.Add(staticSource) + listSizer.Add(self.source_lang) + listSizer.Add(staticDest) + listSizer.Add(self.dest_lang) + ok = wx.Button(panel, wx.ID_OK) + ok.SetDefault() + cancel = wx.Button(panel, wx.ID_CANCEL) + self.SetEscapeId(wx.ID_CANCEL) - def get(self, control): - return getattr(self, control).GetSelection() \ No newline at end of file + def get(self, control): + return getattr(self, control).GetSelection() \ No newline at end of file