Fixed translator
This commit is contained in:
parent
4336558f22
commit
5116a224e3
@ -13,7 +13,6 @@ class post(object):
|
|||||||
self.message.set_title(title)
|
self.message.set_title(title)
|
||||||
widgetUtils.connect_event(self.message.spellcheck, widgetUtils.BUTTON_PRESSED, self.spellcheck)
|
widgetUtils.connect_event(self.message.spellcheck, widgetUtils.BUTTON_PRESSED, self.spellcheck)
|
||||||
widgetUtils.connect_event(self.message.translateButton, widgetUtils.BUTTON_PRESSED, self.translate)
|
widgetUtils.connect_event(self.message.translateButton, widgetUtils.BUTTON_PRESSED, self.translate)
|
||||||
# self.text_processor()
|
|
||||||
self.image = None
|
self.image = None
|
||||||
# widgetUtils.connect_event(self.message.upload_image, widgetUtils.BUTTON_PRESSED, self.upload_image)
|
# widgetUtils.connect_event(self.message.upload_image, widgetUtils.BUTTON_PRESSED, self.upload_image)
|
||||||
|
|
||||||
@ -38,51 +37,6 @@ class post(object):
|
|||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
# def shorten(self, event=None):
|
|
||||||
# urls = utils.find_urls_in_text(self.message.get_text())
|
|
||||||
# if len(urls) == 0:
|
|
||||||
# output.speak(_(u"There's no URL to be shortened"))
|
|
||||||
# self.message.text_focus()
|
|
||||||
# elif len(urls) == 1:
|
|
||||||
# self.message.set_text(self.message.get_text().replace(urls[0], url_shortener.shorten(urls[0])))
|
|
||||||
# output.speak(_(u"URL shortened"))
|
|
||||||
# self.message.text_focus()
|
|
||||||
# elif len(urls) > 1:
|
|
||||||
# list_urls = urlList.urlList()
|
|
||||||
# list_urls.populate_list(urls)
|
|
||||||
# if list_urls.get_response() == widgetUtils.OK:
|
|
||||||
# self.message.set_text(self.message.get_text().replace(urls[list_urls.get_item()], url_shortener.shorten(list_urls.get_string())))
|
|
||||||
# output.speak(_(u"URL shortened"))
|
|
||||||
# self.message.text_focus()
|
|
||||||
|
|
||||||
# def unshorten(self, event=None):
|
|
||||||
# urls = utils.find_urls_in_text(self.message.get_text())
|
|
||||||
# if len(urls) == 0:
|
|
||||||
# output.speak(_(u"There's no URL to be expanded"))
|
|
||||||
# self.message.text_focus()
|
|
||||||
# elif len(urls) == 1:
|
|
||||||
# self.message.set_text(self.message.get_text().replace(urls[0], url_shortener.unshorten(urls[0])))
|
|
||||||
# output.speak(_(u"URL expanded"))
|
|
||||||
# self.message.text_focus()
|
|
||||||
# elif len(urls) > 1:
|
|
||||||
# list_urls = urlList.urlList()
|
|
||||||
# list_urls.populate_list(urls)
|
|
||||||
# if list_urls.get_response() == widgetUtils.OK:
|
|
||||||
# self.message.set_text(self.message.get_text().replace(urls[list_urls.get_item()], url_shortener.unshorten(list_urls.get_string())))
|
|
||||||
# output.speak(_(u"URL expanded"))
|
|
||||||
# self.message.text_focus()
|
|
||||||
|
|
||||||
# def text_processor(self, *args, **kwargs):
|
|
||||||
# self.message.set_title(_(u"%s - %s of 140 characters") % (self.title, len(self.message.get_text())))
|
|
||||||
# if len(self.message.get_text()) > 1:
|
|
||||||
# self.message.enable_button("shortenButton")
|
|
||||||
# self.message.enable_button("unshortenButton")
|
|
||||||
# else:
|
|
||||||
# self.message.disable_button("shortenButton")
|
|
||||||
# self.message.disable_button("unshortenButton")
|
|
||||||
# if len(self.message.get_text()) > 140:
|
|
||||||
# self.session.sound.play("max_length.ogg")
|
|
||||||
|
|
||||||
def spellcheck(self, event=None):
|
def spellcheck(self, event=None):
|
||||||
text = self.message.get_text()
|
text = self.message.get_text()
|
||||||
checker = SpellChecker.spellchecker.spellChecker(text, "")
|
checker = SpellChecker.spellchecker.spellChecker(text, "")
|
||||||
|
@ -1,153 +1,10 @@
|
|||||||
# encoding: utf-8
|
# -*- coding: utf-8 -*-
|
||||||
#
|
from microsofttranslator import Translator
|
||||||
# Copyright (C) 2013 Mesar Hameed <mhameed@src.gnome.org>
|
|
||||||
# This file is covered by the GNU General Public License.
|
|
||||||
|
|
||||||
import os
|
def translate(text="", source="auto", target="en"):
|
||||||
import re
|
t = Translator("twblue", "4KZA26GYIfmVAqQA/z16Hlucbg64hVSDTIpRjT2FqIU=")
|
||||||
import sys
|
return t.translate(text, target)
|
||||||
import threading
|
|
||||||
from time import sleep
|
|
||||||
from random import randint
|
|
||||||
import logging
|
|
||||||
log = logging.getLogger("translator")
|
|
||||||
import urllib2
|
|
||||||
|
|
||||||
# Each group has to be a class of possible breaking points for the writing script.
|
|
||||||
# Usually this is the major syntax marks, such as:
|
|
||||||
# full stop, comma, exclaim, question, etc.
|
|
||||||
arabicBreaks = u'[،؛؟]'
|
|
||||||
# Thanks to Talori in the NVDA irc room:
|
|
||||||
# U+3000 to U+303F, U+FE10 to U+FE1F, U+FE30 to U+FE6F, U+FF01 to U+FF60
|
|
||||||
chineseBreaks = u'[ -〿︐-︰-!-⦆]'
|
|
||||||
latinBreaks = r'[.,!?;:\n]'
|
|
||||||
splitReg = re.compile(u"{arabic}|{chinese}|{latin}".format(arabic=arabicBreaks, chinese=chineseBreaks, latin=latinBreaks))
|
|
||||||
|
|
||||||
def translate(text, source="auto", target="en"):
|
|
||||||
if source == "": source = "auto"
|
|
||||||
t = Translator(lang_from=source, lang_to=target, text=text)
|
|
||||||
t.start()
|
|
||||||
while t.isAlive():
|
|
||||||
sleep(0.1)
|
|
||||||
t.join()
|
|
||||||
return t.translation
|
|
||||||
|
|
||||||
def splitChunks(text, chunksize):
|
|
||||||
pos = 0
|
|
||||||
potentialPos = 0
|
|
||||||
for splitMark in splitReg.finditer(text):
|
|
||||||
if (splitMark.start() - pos +1) < chunksize:
|
|
||||||
potentialPos = splitMark.start()
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
yield text[pos:potentialPos+1]
|
|
||||||
pos = potentialPos + 1
|
|
||||||
potentialPos = splitMark.start()
|
|
||||||
yield text[pos:]
|
|
||||||
|
|
||||||
class Translator(threading.Thread):
|
|
||||||
|
|
||||||
def __init__(self, lang_from, lang_to, text, lang_swap=None, chunksize=350, *args, **kwargs):
|
|
||||||
super(Translator, self).__init__(*args, **kwargs)
|
|
||||||
self._stop = threading.Event()
|
|
||||||
self.text = text
|
|
||||||
self.chunksize = chunksize
|
|
||||||
self.lang_to = lang_to
|
|
||||||
self.lang_from = lang_from
|
|
||||||
self.lang_swap = lang_swap
|
|
||||||
self.translation = ''
|
|
||||||
self.lang_translated = ''
|
|
||||||
self.firstChunk = True
|
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
self._stop.set()
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
for chunk in splitChunks(self.text, self.chunksize):
|
|
||||||
# Make sure we don't send requests to google too often.
|
|
||||||
# Try to simulate a human.
|
|
||||||
if not self.firstChunk:
|
|
||||||
sleep(randint(1, 10))
|
|
||||||
req = self.buildRequest(chunk, self.lang_from, self.lang_to)
|
|
||||||
try:
|
|
||||||
response = urllib2.urlopen(req)
|
|
||||||
translation, lang_translated = self.parseData(response)
|
|
||||||
if self.firstChunk and self.lang_from == "auto" and lang_translated == self.lang_to and self.lang_swap is not None:
|
|
||||||
self.lang_to = self.lang_swap
|
|
||||||
self.firstChunk = False
|
|
||||||
req = self.buildRequest(chunk.encode('utf-8'), self.lang_from, self.lang_to)
|
|
||||||
response = urllib2.urlopen(req)
|
|
||||||
translation, lang_translated = self.parseData(response)
|
|
||||||
except Exception as e:
|
|
||||||
log.exception("Can not translate text '%s'" %chunk)
|
|
||||||
# We have probably been blocked, so stop trying to translate.
|
|
||||||
raise e
|
|
||||||
self.translation += translation
|
|
||||||
# some adjustment, better to do on full text
|
|
||||||
self.translation = self.fixNewlines(self.translation)
|
|
||||||
self.lang_translated = lang_translated
|
|
||||||
|
|
||||||
def buildRequest(self, text, lang_from, lang_to):
|
|
||||||
"""Build POST request which will be sent to Google."""
|
|
||||||
urlTemplate = 'http://translate.google.com/translate_a/single?client=t&sl={lang_from}&tl={lang_to}&ie=utf-8&oe=utf-8&dt=t&dt=bd&tk='
|
|
||||||
url = urlTemplate.format(lang_from=lang_from, lang_to=lang_to)
|
|
||||||
header = {'User-agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded'}
|
|
||||||
data = 'text=%s' %urllib2.quote(text)
|
|
||||||
req = urllib2.Request(url, data, header)
|
|
||||||
return req
|
|
||||||
|
|
||||||
def parseData(self, response):
|
|
||||||
"""Parse unstructured response."""
|
|
||||||
data = response.readlines()[0]
|
|
||||||
# get segments with couples ["translation","original text"]
|
|
||||||
l1, l2 = data.split(']],', 1)
|
|
||||||
translation = l1[3:]
|
|
||||||
if l2.startswith('[[\"'):
|
|
||||||
# get list of synonyms
|
|
||||||
syn = l2[l2.find(',[')+1:l2.find(']')].split(',')
|
|
||||||
temp = ', '.join([x.replace('\"', '') for x in syn])
|
|
||||||
else:
|
|
||||||
# get a list with each couple as item
|
|
||||||
sentences = translation.split('],[')
|
|
||||||
temp = ''
|
|
||||||
# get translation, removing first char (quote symbol)
|
|
||||||
for item in sentences:
|
|
||||||
item = item.split('\",\"', 1)[0][1:]
|
|
||||||
# join all translations
|
|
||||||
temp = ' '.join([temp, item])
|
|
||||||
translation = temp.decode('string-escape').decode('utf-8')
|
|
||||||
translation = self.fixPunctuation(translation)
|
|
||||||
# get the language of original text
|
|
||||||
tempLang = data.partition(']],,\"')[2]
|
|
||||||
lang = tempLang[:tempLang.find('\"')]
|
|
||||||
if lang == '':
|
|
||||||
lang = _("unavailable")
|
|
||||||
return translation, lang
|
|
||||||
|
|
||||||
def fixPunctuation(self, translation):
|
|
||||||
"""Clean text from space before punctuation symbol."""
|
|
||||||
# list of potentially positions of spaces to remove
|
|
||||||
spacePos = []
|
|
||||||
for puncMark in splitReg.finditer(translation):
|
|
||||||
spacePos.append(puncMark.start()-1)
|
|
||||||
if len(spacePos) == 0:
|
|
||||||
return translation
|
|
||||||
fixedTranslation = ''
|
|
||||||
for n in xrange(0,len(translation)):
|
|
||||||
temp = translation[n]
|
|
||||||
if n in spacePos and temp == ' ':
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
fixedTranslation += temp
|
|
||||||
return fixedTranslation
|
|
||||||
|
|
||||||
def fixNewlines(self, translation):
|
|
||||||
"""Adjust newlines and (subsequent or double) spaces."""
|
|
||||||
fixes = [('\r\n ', '\r\n'), ('\n ', '\r\n'), (' ', ' ')]
|
|
||||||
for fix in fixes:
|
|
||||||
translation = translation.replace(fix[0], fix[1])
|
|
||||||
# first char is a space, so...
|
|
||||||
return translation[1:]
|
|
||||||
|
|
||||||
languages = {
|
languages = {
|
||||||
"af": _(u"Afrikaans"),
|
"af": _(u"Afrikaans"),
|
||||||
|
@ -18,9 +18,9 @@
|
|||||||
############################################################
|
############################################################
|
||||||
import translator
|
import translator
|
||||||
import wx
|
import wx
|
||||||
from widgetUtils import BaseDialog
|
import widgetUtils
|
||||||
|
|
||||||
class translateDialog(BaseDialog):
|
class translateDialog(widgetUtils.BaseDialog):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(translateDialog, self).__init__(None, -1, title=_(u"Translate message"))
|
super(translateDialog, self).__init__(None, -1, title=_(u"Translate message"))
|
||||||
panel = wx.Panel(self)
|
panel = wx.Panel(self)
|
||||||
|
Loading…
Reference in New Issue
Block a user