Added a new module for performing the spelling correction. Needs testing
This commit is contained in:
		
							
								
								
									
										119
									
								
								src/extra/SpellChecker/checker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								src/extra/SpellChecker/checker.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| """ High level Spell checker module by using the SymSpellPy library. """ | ||||
| import os | ||||
| import glob | ||||
| import shutil | ||||
| import logging | ||||
| import paths | ||||
| from symspellpy.symspellpy import SymSpell, Verbosity | ||||
| from codecs import open as open_ | ||||
|  | ||||
| log = logging.getLogger("SpellChecker.checker") | ||||
|  | ||||
| loaded_dicts = dict() | ||||
| ready = False | ||||
|  | ||||
| def load_dicts(): | ||||
| 	global loaded_dicts, ready | ||||
| 	log.debug("Start dictionary loading for spelling checker module...") | ||||
| 	if len(loaded_dicts) > 0: | ||||
| 		loaded_dicts = dict() | ||||
| 	path = os.path.join(paths.config_path(), "dicts") | ||||
| 	if os.path.isdir(path): | ||||
| 		log.debug("Loading language dictionaries from path %s" % (path,)) | ||||
| 		files = glob.glob(os.path.join(path, "*.txt")) | ||||
| 		log.debug("%r files found." % (len(files))) | ||||
| 		for i in files: | ||||
| 			key = os.path.splitext(os.path.basename(i))[0] | ||||
| 			dictionary = SymSpell() | ||||
| 			dictionary.load_dictionary(i, 0, 1, encoding="utf-8") | ||||
| 			loaded_dicts[key] = dictionary | ||||
| 			log.debug("Added dictionary for language %s " % (key,)) | ||||
| 	ready = True | ||||
| 	log.debug("All dicts were loaded.") | ||||
|  | ||||
| def prepare_dicts(language): | ||||
| 	""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions. | ||||
| 	@ language: two letter language code. | ||||
| 	""" | ||||
| 	log.debug("preparing dictionary data...") | ||||
| 	path = os.path.join(paths.config_path(), "dicts") | ||||
| 	if os.path.exists(path) == False: | ||||
| 		log.debug("Creating dicts folder in config directory...") | ||||
| 		os.mkdir(path) | ||||
| 	original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt") | ||||
| 	if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False: | ||||
| 		log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,)) | ||||
| 		dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts")) | ||||
|  | ||||
| class SpellChecker(object): | ||||
|  | ||||
| 	def __init__(self, wordlist=None, *args, **kwargs): | ||||
| 		self.kwargs = kwargs | ||||
| 		self.dictionary = None | ||||
| 		self.ignored_words = [] | ||||
| 		self.word_index = 0 | ||||
|  | ||||
| 	def set_language(self, lang): | ||||
| 		global loaded_dicts | ||||
| 		if loaded_dicts.get(lang) != None: | ||||
| 			self.dictionary = loaded_dicts[lang] | ||||
| 		else: | ||||
| 			raise ValueError("Dictionary not found for the specified language") | ||||
|  | ||||
| 	def set_text(self, text): | ||||
| 		self.transformed_words = text.split() | ||||
| 		self.word_index = 0 | ||||
|  | ||||
| 	def check_words(self): | ||||
| 		for word in range(0, len(self.transformed_words)): | ||||
| 			if self.transformed_words[word] in self.ignored_words: | ||||
| 				continue | ||||
| 			suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True) | ||||
| 			valid_word = True | ||||
| 			if len(suggestions) == 0: | ||||
| 				continue | ||||
| 			for s in suggestions: | ||||
| 				print(s.term) | ||||
| 				print(s.distance) | ||||
| 				if s.distance == 0: | ||||
| 					valid_word = False | ||||
| 			if valid_word == False: | ||||
| 				continue | ||||
| 			if word <= 10: | ||||
| 				if len(self.transformed_words) <= 10: | ||||
| 					context = " ".join(self.transformed_words) | ||||
| 				else: | ||||
| 					context = " ".join(self.transformed_words[0:10]) | ||||
| 			elif word >= len(self.transformed_words)-9: | ||||
| 				context = " ".join(self.transformed_words[-10]) | ||||
| 			else: | ||||
| 				context = " ".join(self.transformed_words[word-5:word+5]) | ||||
| 			self.word_index = word | ||||
| #			print(self.word) | ||||
| #			print(suggestions[0].distance) | ||||
| 			yield (suggestions, context, word) | ||||
|  | ||||
| 	def replace(self, suggestion): | ||||
| 		if len(self.transformed_words) < self.word_index: | ||||
| 			raise ValueError("Word index is not present in the current text") | ||||
| 		self.transformed_words[self.word_index] = suggestion | ||||
|  | ||||
| 	def replace_all(self, word): | ||||
| 		existing_word = self.word | ||||
| 		for i in range(0, len(self.transformed_words)): | ||||
| 			if self.transformed_words[i] == existing_word: | ||||
| 				self.transformed_words[i] = word | ||||
|  | ||||
| 	def ignore_word(self, word): | ||||
| 		self.ignored_words.append(word) | ||||
|  | ||||
| 	@property | ||||
| 	def text(self): | ||||
| 		return " ".join(self.transformed_words) | ||||
|  | ||||
| 	@property | ||||
| 	def word(self): | ||||
| 		if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words): | ||||
| 			return None | ||||
| 		return self.transformed_words[self.word_index] | ||||
| @@ -1,35 +1,31 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
| import os | ||||
| import logging | ||||
| import widgetUtils | ||||
| import output | ||||
| import config | ||||
| import languageHandler | ||||
| from enchant.checker import SpellChecker | ||||
| from enchant.errors import DictNotFoundError | ||||
| from enchant import tokenize | ||||
| from platform_utils import paths | ||||
| from . import checker | ||||
| from . import wx_ui | ||||
|  | ||||
| log = logging.getLogger("extra.SpellChecker.spellChecker") | ||||
|  | ||||
| class spellChecker(object): | ||||
| 	def __init__(self, text, dictionary): | ||||
| 	def __init__(self, text): | ||||
| 		super(spellChecker, self).__init__() | ||||
| 		log.debug("Creating the SpellChecker object. Dictionary: %s" % (dictionary,)) | ||||
| 		self.active = True | ||||
| 		self.checker = checker.SpellChecker() | ||||
| 		log.debug("Using language: %s" % (languageHandler.getLanguage(),)) | ||||
| 		try: | ||||
| 			if config.app["app-settings"]["language"] == "system": | ||||
| 				log.debug("Using the system language") | ||||
| 				self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter]) | ||||
| 			else: | ||||
| 				log.debug("Using language: %s" % (languageHandler.getLanguage(),)) | ||||
| 				self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter]) | ||||
| 			self.checker.set_text(text) | ||||
| 		except DictNotFoundError: | ||||
| 			print("no dict") | ||||
| 			log.exception("Dictionary for language %s not found." % (dictionary,)) | ||||
| 			self.checker.set_language(languageHandler.curLang) | ||||
| 		except ValueError: | ||||
| 			log.exception("Dictionary for language %s not found." % (languageHandler.curLang,)) | ||||
| 			wx_ui.dict_not_found_error() | ||||
| 			self.active = False | ||||
| 		self.checker.set_text(text) | ||||
| 		self.generator = self.checker.check_words() | ||||
| 		if self.active == True: | ||||
| 			log.debug("Creating dialog...") | ||||
| 			self.dialog = wx_ui.spellCheckerDialog() | ||||
| @@ -39,16 +35,16 @@ class spellChecker(object): | ||||
| 			widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll) | ||||
| 			self.check() | ||||
| 			self.dialog.get_response() | ||||
| 			self.fixed_text = self.checker.get_text() | ||||
| 			self.fixed_text = self.checker.text | ||||
|  | ||||
| 	def check(self): | ||||
| 		try: | ||||
| 			next(self.checker) | ||||
| 			suggestions, context, self.wordIndex = next(self.generator) | ||||
| 			textToSay = _("Misspelled word: %s") % (self.checker.word,) | ||||
| 			context = "... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10)) | ||||
| 			context = context | ||||
| 			self.dialog.set_title(textToSay) | ||||
| 			output.speak(textToSay) | ||||
| 			self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest()) | ||||
| 			self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions]) | ||||
| 		except StopIteration: | ||||
| 			log.debug("Process finished.") | ||||
| 			wx_ui.finished() | ||||
| @@ -58,7 +54,7 @@ class spellChecker(object): | ||||
| 		self.check() | ||||
|  | ||||
| 	def ignoreAll(self, ev): | ||||
| 		self.checker.ignore_always(word=self.checker.word) | ||||
| 		self.checker.ignore_word(word=self.checker.word) | ||||
| 		self.check() | ||||
|  | ||||
| 	def replace(self, ev): | ||||
| @@ -66,7 +62,7 @@ class spellChecker(object): | ||||
| 		self.check() | ||||
|  | ||||
| 	def replaceAll(self, ev): | ||||
| 		self.checker.replace_always(self.dialog.get_selected_suggestion()) | ||||
| 		self.checker.replace_all(self.dialog.get_selected_suggestion()) | ||||
| 		self.check() | ||||
|  | ||||
| 	def clean(self): | ||||
|   | ||||
| @@ -19,6 +19,7 @@ if hasattr(sys, "frozen"): | ||||
| 	sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z))) | ||||
| from mysc.thread_utils import call_threaded | ||||
| from wxUI import commonMessages | ||||
| from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction | ||||
|  | ||||
| log = logging.getLogger("main") | ||||
|  | ||||
| @@ -58,6 +59,10 @@ def setup(): | ||||
| 	sm = sessionManager.sessionManagerController() | ||||
| 	sm.show() | ||||
| 	del sm | ||||
| 	log.debug("Loading dictionaries for spelling correction...") | ||||
| 	# Let's copy dictionary files for the selected language just in case it is not present already. | ||||
| 	checker.prepare_dicts(languageHandler.curLang) | ||||
| 	call_threaded(checker.load_dicts) | ||||
| 	r = mainController.Controller() | ||||
| 	call_threaded(r.login) | ||||
| 	app.run() | ||||
|   | ||||
| @@ -47,7 +47,7 @@ class createPostPresenter(base.basePresenter): | ||||
| 		output.speak(_("Translated")) | ||||
|  | ||||
| 	def spellcheck(self, text): | ||||
| 		checker = SpellChecker.spellchecker.spellChecker(text, "") | ||||
| 		checker = SpellChecker.spellchecker.spellChecker(text) | ||||
| 		if hasattr(checker, "fixed_text"): | ||||
| 			self.send_message("set", control="text", value=checker.fixed_text) | ||||
| 			self.send_message("focus_control", control="text") | ||||
|   | ||||
| @@ -33,7 +33,7 @@ class sessionManagerController(object): | ||||
| 		self.sessions = [] | ||||
| 		log.debug("Filling the session list...") | ||||
| 		for i in os.listdir(paths.config_path()): | ||||
| 			if os.path.isdir(os.path.join(paths.config_path(), i)): | ||||
| 			if i != "dicts" and os.path.isdir(os.path.join(paths.config_path(), i)): | ||||
| 				log.debug("Adding session %s" % (i,)) | ||||
| 				config_test = Configuration(os.path.join(paths.config_path(), i, "session.conf")) | ||||
| 				name = config_test["vk"]["user"] | ||||
|   | ||||
| @@ -34,7 +34,7 @@ build_exe_options = dict( | ||||
| 	optimize=2, | ||||
| 	include_msvcr=True, | ||||
| 	zip_include_packages=["accessible_output2", "sound_lib", "arrow"], | ||||
| 	include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", ("../windows-dependencies/dictionaries", "lib/enchant/share/enchant/myspell"), find_sound_lib_datafiles(), find_accessible_output2_datafiles()], | ||||
| 	include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles()], | ||||
| 	packages=["interactors", "presenters", "views", "wxUI"], | ||||
| 	) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user