Added bak the enchant based spelling correction module
This commit is contained in:
		| @@ -1,4 +1,6 @@ | ||||
| from __future__ import absolute_import | ||||
| from __future__ import unicode_literals | ||||
| from . import spellchecker | ||||
| import platform | ||||
| if platform.system() == "Windows": | ||||
| 	from .wx_ui import * | ||||
|  from .wx_ui import * | ||||
| @@ -1,115 +0,0 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| """ High level Spell checker module by using the SymSpellPy library. """ | ||||
| import os | ||||
| import glob | ||||
| import shutil | ||||
| import logging | ||||
| import paths | ||||
| from symspellpy.symspellpy import SymSpell, Verbosity | ||||
| from codecs import open as open_ | ||||
|  | ||||
| log = logging.getLogger("SpellChecker.checker") | ||||
|  | ||||
| loaded_dicts = dict() | ||||
| ready = False | ||||
|  | ||||
| def load_dicts(): | ||||
| 	global loaded_dicts, ready | ||||
| 	log.debug("Start dictionary loading for spelling checker module...") | ||||
| 	if len(loaded_dicts) > 0: | ||||
| 		loaded_dicts = dict() | ||||
| 	path = os.path.join(paths.config_path(), "dicts") | ||||
| 	if os.path.isdir(path): | ||||
| 		log.debug("Loading language dictionaries from path %s" % (path,)) | ||||
| 		files = glob.glob(os.path.join(path, "*.txt")) | ||||
| 		log.debug("%r files found." % (len(files))) | ||||
| 		for i in files: | ||||
| 			key = os.path.splitext(os.path.basename(i))[0] | ||||
| 			dictionary = SymSpell() | ||||
| 			dictionary.load_dictionary(i, 0, 1, encoding="utf-8") | ||||
| 			loaded_dicts[key] = dictionary | ||||
| 			log.debug("Added dictionary for language %s " % (key,)) | ||||
| 	ready = True | ||||
| 	log.debug("All dicts were loaded.") | ||||
|  | ||||
| def prepare_dicts(language): | ||||
| 	""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions. | ||||
| 	@ language: two letter language code. | ||||
| 	""" | ||||
| 	log.debug("preparing dictionary data...") | ||||
| 	path = os.path.join(paths.config_path(), "dicts") | ||||
| 	if os.path.exists(path) == False: | ||||
| 		log.debug("Creating dicts folder in config directory...") | ||||
| 		os.mkdir(path) | ||||
| 	original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt") | ||||
| 	if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False: | ||||
| 		log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,)) | ||||
| 		dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts")) | ||||
|  | ||||
| class SpellChecker(object): | ||||
|  | ||||
| 	def __init__(self, wordlist=None, *args, **kwargs): | ||||
| 		self.kwargs = kwargs | ||||
| 		self.dictionary = None | ||||
| 		self.ignored_words = [] | ||||
| 		self.word_index = 0 | ||||
|  | ||||
| 	def set_language(self, lang): | ||||
| 		global loaded_dicts | ||||
| 		if loaded_dicts.get(lang) != None: | ||||
| 			self.dictionary = loaded_dicts[lang] | ||||
| 		else: | ||||
| 			raise ValueError("Dictionary not found for the specified language") | ||||
|  | ||||
| 	def set_text(self, text): | ||||
| 		self.transformed_words = text.split() | ||||
| 		self.word_index = 0 | ||||
|  | ||||
| 	def check_words(self): | ||||
| 		for word in range(0, len(self.transformed_words)): | ||||
| 			if self.transformed_words[word] in self.ignored_words: | ||||
| 				continue | ||||
| 			suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True) | ||||
| 			valid_word = True | ||||
| 			if len(suggestions) == 0: | ||||
| 				continue | ||||
| 			for s in suggestions: | ||||
| 				if s.distance == 0: | ||||
| 					valid_word = False | ||||
| 			if valid_word == False: | ||||
| 				continue | ||||
| 			if word <= 10: | ||||
| 				if len(self.transformed_words) <= 10: | ||||
| 					context = " ".join(self.transformed_words) | ||||
| 				else: | ||||
| 					context = " ".join(self.transformed_words[0:10]) | ||||
| 			elif word >= len(self.transformed_words)-9: | ||||
| 				context = " ".join(self.transformed_words[-10]) | ||||
| 			else: | ||||
| 				context = " ".join(self.transformed_words[word-5:word+5]) | ||||
| 			self.word_index = word | ||||
| 			yield (suggestions, context, word) | ||||
|  | ||||
| 	def replace(self, suggestion): | ||||
| 		if len(self.transformed_words) < self.word_index: | ||||
| 			raise ValueError("Word index is not present in the current text") | ||||
| 		self.transformed_words[self.word_index] = suggestion | ||||
|  | ||||
| 	def replace_all(self, word): | ||||
| 		existing_word = self.word | ||||
| 		for i in range(0, len(self.transformed_words)): | ||||
| 			if self.transformed_words[i] == existing_word: | ||||
| 				self.transformed_words[i] = word | ||||
|  | ||||
| 	def ignore_word(self, word): | ||||
| 		self.ignored_words.append(word) | ||||
|  | ||||
| 	@property | ||||
| 	def text(self): | ||||
| 		return " ".join(self.transformed_words) | ||||
|  | ||||
| 	@property | ||||
| 	def word(self): | ||||
| 		if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words): | ||||
| 			return None | ||||
| 		return self.transformed_words[self.word_index] | ||||
| @@ -1,70 +1,80 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import absolute_import | ||||
| from __future__ import unicode_literals | ||||
| from builtins import next | ||||
| from builtins import object | ||||
| import os | ||||
| import logging | ||||
| from . import wx_ui | ||||
| import widgetUtils | ||||
| import output | ||||
| import config | ||||
| import languageHandler | ||||
| from platform_utils import paths | ||||
| from . import checker | ||||
| from . import wx_ui | ||||
|  | ||||
| import enchant | ||||
| import paths | ||||
| from . import twitterFilter | ||||
| from enchant.checker import SpellChecker | ||||
| from enchant.errors import DictNotFoundError | ||||
| from enchant import tokenize | ||||
| log = logging.getLogger("extra.SpellChecker.spellChecker") | ||||
|  | ||||
| class spellChecker(object): | ||||
| 	def __init__(self, text): | ||||
| 		super(spellChecker, self).__init__() | ||||
| 		self.active = True | ||||
| 		self.checker = checker.SpellChecker() | ||||
| 		log.debug("Using language: %s" % (languageHandler.getLanguage(),)) | ||||
| 		try: | ||||
| 			self.checker.set_language(languageHandler.curLang[:2]) | ||||
| 		except ValueError: | ||||
| 			log.exception("Dictionary for language %s not found." % (languageHandler.curLang,)) | ||||
| 			wx_ui.dict_not_found_error() | ||||
| 			self.active = False | ||||
| 		self.checker.set_text(text) | ||||
| 		self.generator = self.checker.check_words() | ||||
| 		if self.active == True: | ||||
| 			log.debug("Creating dialog...") | ||||
| 			self.dialog = wx_ui.spellCheckerDialog() | ||||
| 			widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore) | ||||
| 			widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll) | ||||
| 			widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace) | ||||
| 			widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll) | ||||
| 			self.check() | ||||
| 			self.dialog.get_response() | ||||
| 			self.fixed_text = self.checker.text | ||||
|  def __init__(self, text): | ||||
|   super(spellChecker, self).__init__() | ||||
|   self.active = True | ||||
|   try: | ||||
|    if config.app["app-settings"]["language"] == "system": | ||||
|     log.debug("Using the system language") | ||||
|     self.dict = enchant.DictWithPWL(languageHandler.curLang[:2], os.path.join(paths.config_path(), "wordlist.dict")) | ||||
|    else: | ||||
|     log.debug("Using language: %s" % (languageHandler.getLanguage(),)) | ||||
|     self.dict = enchant.DictWithPWL(languageHandler.getLanguage()[:2], os.path.join(paths.config_path(), "wordlist.dict")) | ||||
|   except DictNotFoundError: | ||||
|    log.exception("Dictionary for language %s not found." % (dictionary,)) | ||||
|    wx_ui.dict_not_found_error() | ||||
|    self.active = False | ||||
|   self.checker = SpellChecker(self.dict, filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter]) | ||||
|   self.checker.set_text(text) | ||||
|   if self.active == True: | ||||
|    log.debug("Creating dialog...") | ||||
|    self.dialog = wx_ui.spellCheckerDialog() | ||||
|    widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore) | ||||
|    widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll) | ||||
|    widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace) | ||||
|    widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll) | ||||
|    widgetUtils.connect_event(self.dialog.add, widgetUtils.BUTTON_PRESSED, self.add) | ||||
|    self.check() | ||||
|    self.dialog.get_response() | ||||
|    self.fixed_text = self.checker.get_text() | ||||
|  | ||||
| 	def check(self): | ||||
| 		try: | ||||
| 			suggestions, context, self.wordIndex = next(self.generator) | ||||
| 			textToSay = _("Misspelled word: %s") % (self.checker.word,) | ||||
| 			context = context | ||||
| 			self.dialog.set_title(textToSay) | ||||
| 			output.speak(textToSay) | ||||
| 			self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions]) | ||||
| 		except StopIteration: | ||||
| 			log.debug("Process finished.") | ||||
| 			wx_ui.finished() | ||||
| 			self.dialog.Destroy() | ||||
|  def check(self): | ||||
|   try: | ||||
|    next(self.checker) | ||||
|    textToSay = _(u"Misspelled word: %s") % (self.checker.word,) | ||||
|    context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10)) | ||||
|    self.dialog.set_title(textToSay) | ||||
|    output.speak(textToSay) | ||||
|    self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest()) | ||||
|   except StopIteration: | ||||
|    log.debug("Process finished.") | ||||
|    wx_ui.finished() | ||||
|    self.dialog.Destroy() | ||||
|  | ||||
| 	def ignore(self, ev): | ||||
| 		self.check() | ||||
|  def ignore(self, ev): | ||||
|   self.check() | ||||
|  | ||||
| 	def ignoreAll(self, ev): | ||||
| 		self.checker.ignore_word(word=self.checker.word) | ||||
| 		self.check() | ||||
|  def ignoreAll(self, ev): | ||||
|   self.checker.ignore_always(word=self.checker.word) | ||||
|   self.check() | ||||
|  | ||||
| 	def replace(self, ev): | ||||
| 		self.checker.replace(self.dialog.get_selected_suggestion()) | ||||
| 		self.check() | ||||
|  def replace(self, ev): | ||||
|   self.checker.replace(self.dialog.get_selected_suggestion()) | ||||
|   self.check() | ||||
|  | ||||
| 	def replaceAll(self, ev): | ||||
| 		self.checker.replace_all(self.dialog.get_selected_suggestion()) | ||||
| 		self.check() | ||||
|  def replaceAll(self, ev): | ||||
|   self.checker.replace_always(self.dialog.get_selected_suggestion()) | ||||
|   self.check() | ||||
|  | ||||
| 	def clean(self): | ||||
| 		if hasattr(self, "dialog"): | ||||
| 			self.dialog.Destroy() | ||||
|  def add(self, ev): | ||||
|   self.checker.add() | ||||
|   self.check() | ||||
|   | ||||
							
								
								
									
										16
									
								
								src/extra/SpellChecker/twitterFilter.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								src/extra/SpellChecker/twitterFilter.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
| import re | ||||
| from enchant.tokenize import Filter | ||||
|  | ||||
| class TwitterFilter(Filter): | ||||
|     """Filter skipping over twitter usernames and hashtags. | ||||
|     This filter skips any words matching the following regular expression: | ||||
|     ^[#@](\S){1, }$ | ||||
|     That is, any words that resemble users and hashtags. | ||||
|     """ | ||||
|     _pattern = re.compile(r"^[#@](\S){1,}$") | ||||
|     def _skip(self,word): | ||||
|         if self._pattern.match(word): | ||||
|             return True | ||||
|         return False | ||||
| @@ -21,60 +21,63 @@ import wx | ||||
| import application | ||||
|  | ||||
| class spellCheckerDialog(wx.Dialog): | ||||
| 	def __init__(self): | ||||
| 		super(spellCheckerDialog, self).__init__(None, 1) | ||||
| 		panel = wx.Panel(self) | ||||
| 		sizer = wx.BoxSizer(wx.VERTICAL) | ||||
| 		word = wx.StaticText(panel, -1, _("&Misspelled word")) | ||||
| 		self.word = wx.TextCtrl(panel, -1) | ||||
| 		wordBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
| 		wordBox.Add(word, 0, wx.ALL, 5) | ||||
| 		wordBox.Add(self.word, 0, wx.ALL, 5) | ||||
| 		context = wx.StaticText(panel, -1, _("Con&text")) | ||||
| 		self.context = wx.TextCtrl(panel, -1) | ||||
| 		contextBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
| 		contextBox.Add(context, 0, wx.ALL, 5) | ||||
| 		contextBox.Add(self.context, 0, wx.ALL, 5) | ||||
| 		suggest = wx.StaticText(panel, -1, _("&Suggestions")) | ||||
| 		self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE) | ||||
| 		suggestionsBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
| 		suggestionsBox.Add(suggest, 0, wx.ALL, 5) | ||||
| 		suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5) | ||||
| 		self.ignore = wx.Button(panel, -1, _("&Ignore")) | ||||
| 		self.ignoreAll = wx.Button(panel, -1, _("Ignore &all")) | ||||
| 		self.replace = wx.Button(panel, -1, _("&Replace")) | ||||
| 		self.replaceAll = wx.Button(panel, -1, _("Replace a&ll")) | ||||
| 		close = wx.Button(panel, wx.ID_CANCEL) | ||||
| 		btnBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
| 		btnBox.Add(self.ignore, 0, wx.ALL, 5) | ||||
| 		btnBox.Add(self.ignoreAll, 0, wx.ALL, 5) | ||||
| 		btnBox.Add(self.replace, 0, wx.ALL, 5) | ||||
| 		btnBox.Add(self.replaceAll, 0, wx.ALL, 5) | ||||
| 		btnBox.Add(close, 0, wx.ALL, 5) | ||||
| 		sizer.Add(wordBox, 0, wx.ALL, 5) | ||||
| 		sizer.Add(contextBox, 0, wx.ALL, 5) | ||||
| 		sizer.Add(suggestionsBox, 0, wx.ALL, 5) | ||||
| 		sizer.Add(btnBox, 0, wx.ALL, 5) | ||||
| 		panel.SetSizer(sizer) | ||||
| 		self.SetClientSize(sizer.CalcMin()) | ||||
|  def __init__(self): | ||||
|   super(spellCheckerDialog, self).__init__(None, 1) | ||||
|   panel = wx.Panel(self) | ||||
|   sizer = wx.BoxSizer(wx.VERTICAL) | ||||
|   word = wx.StaticText(panel, -1, _(u"Misspelled word")) | ||||
|   self.word = wx.TextCtrl(panel, -1) | ||||
|   wordBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
|   wordBox.Add(word, 0, wx.ALL, 5) | ||||
|   wordBox.Add(self.word, 0, wx.ALL, 5) | ||||
|   context = wx.StaticText(panel, -1, _(u"Context")) | ||||
|   self.context = wx.TextCtrl(panel, -1) | ||||
|   contextBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
|   contextBox.Add(context, 0, wx.ALL, 5) | ||||
|   contextBox.Add(self.context, 0, wx.ALL, 5) | ||||
|   suggest = wx.StaticText(panel, -1, _(u"Suggestions")) | ||||
|   self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE) | ||||
|   suggestionsBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
|   suggestionsBox.Add(suggest, 0, wx.ALL, 5) | ||||
|   suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5) | ||||
|   self.ignore = wx.Button(panel, -1, _(u"&Ignore")) | ||||
|   self.ignoreAll = wx.Button(panel, -1, _(u"I&gnore all")) | ||||
|   self.replace = wx.Button(panel, -1, _(u"&Replace")) | ||||
|   self.replaceAll = wx.Button(panel, -1, _(u"R&eplace all")) | ||||
|   self.add = wx.Button(panel, -1, _(u"&Add to personal dictionary")) | ||||
|   close = wx.Button(panel, wx.ID_CANCEL) | ||||
|   btnBox = wx.BoxSizer(wx.HORIZONTAL) | ||||
|   btnBox.Add(self.ignore, 0, wx.ALL, 5) | ||||
|   btnBox.Add(self.ignoreAll, 0, wx.ALL, 5) | ||||
|   btnBox.Add(self.replace, 0, wx.ALL, 5) | ||||
|   btnBox.Add(self.replaceAll, 0, wx.ALL, 5) | ||||
|   btnBox.Add(self.add, 0, wx.ALL, 5) | ||||
|   btnBox.Add(close, 0, wx.ALL, 5) | ||||
|   sizer.Add(wordBox, 0, wx.ALL, 5) | ||||
|   sizer.Add(contextBox, 0, wx.ALL, 5) | ||||
|   sizer.Add(suggestionsBox, 0, wx.ALL, 5) | ||||
|   sizer.Add(btnBox, 0, wx.ALL, 5) | ||||
|   panel.SetSizer(sizer) | ||||
|   self.SetClientSize(sizer.CalcMin()) | ||||
|  | ||||
| 	def get_response(self): | ||||
| 		return self.ShowModal() | ||||
|  | ||||
| 	def set_title(self, title): | ||||
| 		return self.SetTitle(title) | ||||
|  def get_response(self): | ||||
|   return self.ShowModal() | ||||
|  | ||||
| 	def set_word_and_suggestions(self, word, context, suggestions): | ||||
| 		self.word.SetValue(word) | ||||
| 		self.context.ChangeValue(context) | ||||
| 		self.suggestions.Set(suggestions) | ||||
| 		self.suggestions.SetFocus() | ||||
|  def set_title(self, title): | ||||
|   return self.SetTitle(title) | ||||
|  | ||||
| 	def get_selected_suggestion(self): | ||||
| 		return self.suggestions.GetStringSelection() | ||||
|  def set_word_and_suggestions(self, word, context, suggestions): | ||||
|   self.word.SetValue(word) | ||||
|   self.context.ChangeValue(context) | ||||
|   self.suggestions.Set(suggestions) | ||||
|   self.suggestions.SetFocus() | ||||
|  | ||||
|  def get_selected_suggestion(self): | ||||
|   return self.suggestions.GetStringSelection() | ||||
|  | ||||
| def dict_not_found_error(): | ||||
| 	wx.MessageDialog(None, _("An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _("Error"), wx.ICON_ERROR).ShowModal() | ||||
|  wx.MessageDialog(None, _(u"An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _(u"Error"), wx.ICON_ERROR).ShowModal() | ||||
|  | ||||
| def finished(): | ||||
| 	wx.MessageDialog(None, _("Spell check complete."), application.name, style=wx.OK).ShowModal() | ||||
|  wx.MessageDialog(None, _(u"Spell check complete."), application.name, style=wx.OK).ShowModal() | ||||
|   | ||||
| @@ -18,7 +18,6 @@ if hasattr(sys, "frozen"): | ||||
| 	sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z))) | ||||
| from mysc.thread_utils import call_threaded | ||||
| from wxUI import commonMessages | ||||
| from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction | ||||
|  | ||||
| log = logging.getLogger("main") | ||||
|  | ||||
| @@ -57,8 +56,6 @@ def setup(): | ||||
| 	del sm | ||||
| 	log.debug("Loading dictionaries for spelling correction...") | ||||
| 	# Let's copy dictionary files for the selected language just in case it is not present already. | ||||
| 	checker.prepare_dicts(languageHandler.curLang[:2]) | ||||
| 	call_threaded(checker.load_dicts) | ||||
| 	r = mainController.Controller() | ||||
| 	call_threaded(r.login) | ||||
| 	app.run() | ||||
|   | ||||
| @@ -36,7 +36,7 @@ build_exe_options = dict( | ||||
| 	include_msvcr=True, | ||||
| 	zip_include_packages=["accessible_output2", "sound_lib", "arrow"], | ||||
| 	replace_paths = [("*", "")], | ||||
| 	include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", ".")], | ||||
| 	include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", "."), ("../windows-dependencies/dictionaries", "lib/enchant/data/mingw32/share/enchant/hunspell")], | ||||
| 	packages=["interactors", "presenters", "views", "wxUI"], | ||||
| 	) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user