Added some code for starting

2016-02-13 17:06:36 -06:00
parent 364472da5c
commit 0266100d66
110 changed files with 6044 additions and 0 deletions
--- a/src/extra/translator/init.py
+++ b/src/extra/translator/init.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+import translator
+import platform
+if platform.system() == "Windows":
+ import wx_ui as gui
+ 
--- a/src/extra/translator/translator.py
+++ b/src/extra/translator/translator.py
@@ -0,0 +1,251 @@
+# encoding: utf-8
+#
+# Copyright (C) 2013 Mesar Hameed <mhameed@src.gnome.org>
+# This file is covered by the GNU General Public License.
+
+import os
+import re
+import sys
+import threading
+from time import sleep
+from random import randint
+import logging
+log = logging.getLogger("translator")
+import urllib2
+
+# Each group has to be a class of possible breaking points for the writing script.
+# Usually this is the major syntax marks, such as:
+# full stop, comma, exclaim, question, etc.
+arabicBreaks = u'[،؛؟]'
+# Thanks to Talori in the NVDA irc room:
+# U+3000 to U+303F, U+FE10 to U+FE1F, U+FE30 to U+FE6F, U+FF01 to U+FF60
+chineseBreaks = u'[　-〿︐-︟︰-﹯！-｠]'
+latinBreaks = r'[.,!?;:\n]'
+splitReg = re.compile(u"{arabic}|{chinese}|{latin}".format(arabic=arabicBreaks, chinese=chineseBreaks, latin=latinBreaks))
+
+def translate(text, source="auto", target="en"):
+	if source == "": source = "auto"
+	t = Translator(lang_from=source, lang_to=target, text=text)
+	t.start()
+	while t.isAlive():
+		sleep(0.1)
+	t.join()
+	return t.translation
+
+def splitChunks(text, chunksize):
+	pos = 0
+	potentialPos = 0
+	for splitMark in splitReg.finditer(text):
+		if (splitMark.start() - pos +1) < chunksize:
+			potentialPos = splitMark.start()
+			continue
+		else:
+			yield text[pos:potentialPos+1]
+			pos = potentialPos + 1
+			potentialPos = splitMark.start()
+	yield text[pos:]
+
+class Translator(threading.Thread):
+
+	def __init__(self, lang_from, lang_to, text, lang_swap=None, chunksize=350, *args, **kwargs):
+		super(Translator, self).__init__(*args, **kwargs)
+		self._stop = threading.Event()
+		self.text = text
+		self.chunksize = chunksize
+		self.lang_to = lang_to
+		self.lang_from = lang_from
+		self.lang_swap = lang_swap
+		self.translation = ''
+		self.lang_translated = ''
+		self.firstChunk = True
+
+	def stop(self):
+		self._stop.set()
+
+	def run(self):
+		for chunk in splitChunks(self.text, self.chunksize):
+			# Make sure we don't send requests to google too often.
+			# Try to simulate a human.
+			if not self.firstChunk:
+				sleep(randint(1, 10))
+			req = self.buildRequest(chunk, self.lang_from, self.lang_to)
+			try:
+				response = urllib2.urlopen(req)
+				translation, lang_translated = self.parseData(response)
+				if self.firstChunk and self.lang_from == "auto" and  lang_translated == self.lang_to and self.lang_swap is not None:
+					self.lang_to = self.lang_swap
+					self.firstChunk = False
+					req = self.buildRequest(chunk.encode('utf-8'), self.lang_from, self.lang_to)
+					response = urllib2.urlopen(req)
+					translation, lang_translated = self.parseData(response)
+			except Exception as e:
+				log.exception("Can not translate text '%s'" %chunk)
+				# We have probably been blocked, so stop trying to translate.
+				raise e
+			self.translation += translation
+		# some adjustment, better to do on full text
+		self.translation = self.fixNewlines(self.translation)
+		self.lang_translated = lang_translated
+
+	def buildRequest(self, text, lang_from, lang_to):
+		"""Build POST request which will be sent to Google."""
+		urlTemplate = 'http://translate.google.com/translate_a/single?client=t&sl={lang_from}&tl={lang_to}&ie=utf-8&oe=utf-8&dt=t&dt=bd&tk='
+		url = urlTemplate.format(lang_from=lang_from, lang_to=lang_to)
+		header = {'User-agent': 'Mozilla/5.0', 'Content-Type': 'application/x-www-form-urlencoded'}
+		data = 'text=%s' %urllib2.quote(text)
+		req = urllib2.Request(url, data, header)
+		return req
+
+	def parseData(self, response):
+		"""Parse unstructured response."""
+		data = response.readlines()[0]
+		# get segments with couples ["translation","original text"]
+		l1, l2 = data.split(']],', 1)
+		translation = l1[3:]
+		if l2.startswith('[[\"'):
+			# get list of synonyms
+			syn = l2[l2.find(',[')+1:l2.find(']')].split(',')
+			temp = ', '.join([x.replace('\"', '') for x in syn])
+		else:
+			# get a list with each couple as item
+			sentences = translation.split('],[')
+			temp = ''
+			# get translation, removing first char (quote symbol)
+			for item in sentences:
+				item = item.split('\",\"', 1)[0][1:]
+				# join all translations
+				temp = ' '.join([temp, item])
+		translation = temp.decode('string-escape').decode('utf-8')
+		translation = self.fixPunctuation(translation)
+		# get the language of original text
+		tempLang = data.partition(']],,\"')[2]
+		lang = tempLang[:tempLang.find('\"')]
+		if lang == '':
+			lang = _("unavailable")
+		return translation, lang
+
+	def fixPunctuation(self, translation):
+		"""Clean text from space before punctuation symbol."""
+		# list of potentially positions of spaces to remove
+		spacePos = []
+		for puncMark in splitReg.finditer(translation):
+			spacePos.append(puncMark.start()-1)
+		if len(spacePos) == 0:
+			return translation
+		fixedTranslation = ''
+		for n in xrange(0,len(translation)):
+			temp = translation[n]
+			if n in spacePos and temp == ' ':
+				continue
+			else:
+				fixedTranslation += temp
+		return fixedTranslation
+
+	def fixNewlines(self, translation):
+		"""Adjust newlines and (subsequent or double) spaces."""
+		fixes = [('\r\n ', '\r\n'), ('\n ', '\r\n'), ('  ', ' ')]
+		for fix in fixes:
+			translation = translation.replace(fix[0], fix[1])
+		# first char is a space, so...
+		return translation[1:]
+
+languages = {
+  "af": _(u"Afrikaans"),
+  "sq": _(u"Albanian"),
+  "am": _(u"Amharic"),
+  "ar": _(u"Arabic"),
+  "hy": _(u"Armenian"),
+  "az": _(u"Azerbaijani"),
+  "eu": _(u"Basque"),
+  "be": _(u"Belarusian"),
+  "bn": _(u"Bengali"),
+  "bh": _(u"Bihari"),
+  "bg": _(u"Bulgarian"),
+  "my": _(u"Burmese"),
+  "ca": _(u"Catalan"),
+  "chr": _(u"Cherokee"),
+  "zh": _(u"Chinese"),
+  "zh-CN": _(u"Chinese_simplified"),
+  "zh-TW": _(u"Chinese_traditional"),
+  "hr": _(u"Croatian"),
+  "cs": _(u"Czech"),
+  "da": _(u"Danish"),
+  "dv": _(u"Dhivehi"),
+  "nl": _(u"Dutch"),
+  "en": _(u"English"),
+  "eo": _(u"Esperanto"),
+  "et": _(u"Estonian"),
+  "tl": _(u"Filipino"),
+  "fi": _(u"Finnish"),
+  "fr": _(u"French"),
+  "gl": _(u"Galician"),
+  "ka": _(u"Georgian"),
+  "de": _(u"German"),
+  "el": _(u"Greek"),
+  "gn": _(u"Guarani"),
+  "gu": _(u"Gujarati"),
+  "iw": _(u"Hebrew"),
+  "hi": _(u"Hindi"),
+  "hu": _(u"Hungarian"),
+  "is": _(u"Icelandic"),
+  "id": _(u"Indonesian"),
+  "iu": _(u"Inuktitut"),
+  "ga": _(u"Irish"),
+  "it": _(u"Italian"),
+  "ja": _(u"Japanese"),
+  "kn": _(u"Kannada"),
+  "kk": _(u"Kazakh"),
+  "km": _(u"Khmer"),
+  "ko": _(u"Korean"),
+  "ku": _(u"Kurdish"),
+  "ky": _(u"Kyrgyz"),
+  "lo": _(u"Laothian"),
+  "lv": _(u"Latvian"),
+  "lt": _(u"Lithuanian"),
+  "mk": _(u"Macedonian"),
+  "ms": _(u"Malay"),
+  "ml": _(u"Malayalam"),
+  "mt": _(u"Maltese"),
+  "mr": _(u"Marathi"),
+  "mn": _(u"Mongolian"),
+  "ne": _(u"Nepali"),
+  "no": _(u"Norwegian"),
+  "or": _(u"Oriya"),
+  "ps": _(u"Pashto"),
+  "fa": _(u"Persian"),
+  "pl": _(u"Polish"),
+  "pt-PT": _(u"Portuguese"),
+  "pa": _(u"Punjabi"),
+  "ro": _(u"Romanian"),
+  "ru": _(u"Russian"),
+  "sa": _(u"Sanskrit"),
+  "sr": _(u"Serbian"),
+  "sd": _(u"Sindhi"),
+  "si": _(u"Sinhalese"),
+  "sk": _(u"Slovak"),
+  "sl": _(u"Slovenian"),
+  "es": _(u"Spanish"),
+  "sw": _(u"Swahili"),
+  "sv": _(u"Swedish"),
+  "tg": _(u"Tajik"),
+  "ta": _(u"Tamil"),
+  "tl": _(u"Tagalog"),
+  "te": _(u"Telugu"),
+  "th": _(u"Thai"),
+  "bo": _(u"Tibetan"),
+  "tr": _(u"Turkish"),
+  "uk": _(u"Ukrainian"),
+  "ur": _(u"Urdu"),
+  "uz": _(u"Uzbek"),
+  "ug": _(u"Uighur"),
+  "vi": _(u"Vietnamese"),
+  "cy": _(u"Welsh"),
+  "yi": _(u"Yiddish")
+}
+
+def available_languages():
+    l = languages.keys()
+    d = languages.values()
+    l.insert(0, '')
+    d.insert(0, _(u"autodetect"))
+    return sorted(zip(l, d))
--- a/src/extra/translator/wx_ui.py
+++ b/src/extra/translator/wx_ui.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+############################################################
+#    Copyright (c) 2013, 2014 Manuel Eduardo Cortéz Vallejo <manuel@manuelcortez.net>
+#       
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+############################################################
+import translator
+import wx
+from widgetUtils  import BaseDialog
+
+class translateDialog(BaseDialog):
+ def __init__(self):
+  super(translateDialog, self).__init__(None, -1, title=_(u"Translate message"))
+  panel = wx.Panel(self)
+  sizer = wx.BoxSizer(wx.VERTICAL)
+  staticSource = wx.StaticText(panel, -1, _(u"Source language"))
+  self.source_lang = wx.ComboBox(panel, -1, choices=[x[1] for x in translator.available_languages()], style = wx.CB_READONLY)
+  self.source_lang.SetFocus()
+  staticDest = wx.StaticText(panel, -1, _(u"Target language"))
+  self.source_lang.SetSelection(0)
+  self.dest_lang = wx.ComboBox(panel, -1, choices=[x[1] for x in translator.available_languages()], style = wx.CB_READONLY)
+  listSizer = wx.BoxSizer(wx.HORIZONTAL)
+  listSizer.Add(staticSource)
+  listSizer.Add(self.source_lang)
+  listSizer.Add(staticDest)
+  listSizer.Add(self.dest_lang)
+  ok = wx.Button(panel, wx.ID_OK)
+  ok.SetDefault()
+  cancel = wx.Button(panel, wx.ID_CANCEL)
+  self.SetEscapeId(wx.ID_CANCEL)
+
+ def get(self, control):
+  return getattr(self, control).GetSelection()