Added a new module for performing the spelling correction. Needs testing
This commit is contained in:
		
							
								
								
									
										119
									
								
								src/extra/SpellChecker/checker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								src/extra/SpellChecker/checker.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,119 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
""" High level Spell checker module by using the SymSpellPy library. """
 | 
			
		||||
import os
 | 
			
		||||
import glob
 | 
			
		||||
import shutil
 | 
			
		||||
import logging
 | 
			
		||||
import paths
 | 
			
		||||
from symspellpy.symspellpy import SymSpell, Verbosity
 | 
			
		||||
from codecs import open as open_
 | 
			
		||||
 | 
			
		||||
log = logging.getLogger("SpellChecker.checker")
 | 
			
		||||
 | 
			
		||||
loaded_dicts = dict()
 | 
			
		||||
ready = False
 | 
			
		||||
 | 
			
		||||
def load_dicts():
 | 
			
		||||
	global loaded_dicts, ready
 | 
			
		||||
	log.debug("Start dictionary loading for spelling checker module...")
 | 
			
		||||
	if len(loaded_dicts) > 0:
 | 
			
		||||
		loaded_dicts = dict()
 | 
			
		||||
	path = os.path.join(paths.config_path(), "dicts")
 | 
			
		||||
	if os.path.isdir(path):
 | 
			
		||||
		log.debug("Loading language dictionaries from path %s" % (path,))
 | 
			
		||||
		files = glob.glob(os.path.join(path, "*.txt"))
 | 
			
		||||
		log.debug("%r files found." % (len(files)))
 | 
			
		||||
		for i in files:
 | 
			
		||||
			key = os.path.splitext(os.path.basename(i))[0]
 | 
			
		||||
			dictionary = SymSpell()
 | 
			
		||||
			dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
 | 
			
		||||
			loaded_dicts[key] = dictionary
 | 
			
		||||
			log.debug("Added dictionary for language %s " % (key,))
 | 
			
		||||
	ready = True
 | 
			
		||||
	log.debug("All dicts were loaded.")
 | 
			
		||||
 | 
			
		||||
def prepare_dicts(language):
 | 
			
		||||
	""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
 | 
			
		||||
	@ language: two letter language code.
 | 
			
		||||
	"""
 | 
			
		||||
	log.debug("preparing dictionary data...")
 | 
			
		||||
	path = os.path.join(paths.config_path(), "dicts")
 | 
			
		||||
	if os.path.exists(path) == False:
 | 
			
		||||
		log.debug("Creating dicts folder in config directory...")
 | 
			
		||||
		os.mkdir(path)
 | 
			
		||||
	original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
 | 
			
		||||
	if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
 | 
			
		||||
		log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
 | 
			
		||||
		dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
 | 
			
		||||
 | 
			
		||||
class SpellChecker(object):
 | 
			
		||||
 | 
			
		||||
	def __init__(self, wordlist=None, *args, **kwargs):
 | 
			
		||||
		self.kwargs = kwargs
 | 
			
		||||
		self.dictionary = None
 | 
			
		||||
		self.ignored_words = []
 | 
			
		||||
		self.word_index = 0
 | 
			
		||||
 | 
			
		||||
	def set_language(self, lang):
 | 
			
		||||
		global loaded_dicts
 | 
			
		||||
		if loaded_dicts.get(lang) != None:
 | 
			
		||||
			self.dictionary = loaded_dicts[lang]
 | 
			
		||||
		else:
 | 
			
		||||
			raise ValueError("Dictionary not found for the specified language")
 | 
			
		||||
 | 
			
		||||
	def set_text(self, text):
 | 
			
		||||
		self.transformed_words = text.split()
 | 
			
		||||
		self.word_index = 0
 | 
			
		||||
 | 
			
		||||
	def check_words(self):
 | 
			
		||||
		for word in range(0, len(self.transformed_words)):
 | 
			
		||||
			if self.transformed_words[word] in self.ignored_words:
 | 
			
		||||
				continue
 | 
			
		||||
			suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
 | 
			
		||||
			valid_word = True
 | 
			
		||||
			if len(suggestions) == 0:
 | 
			
		||||
				continue
 | 
			
		||||
			for s in suggestions:
 | 
			
		||||
				print(s.term)
 | 
			
		||||
				print(s.distance)
 | 
			
		||||
				if s.distance == 0:
 | 
			
		||||
					valid_word = False
 | 
			
		||||
			if valid_word == False:
 | 
			
		||||
				continue
 | 
			
		||||
			if word <= 10:
 | 
			
		||||
				if len(self.transformed_words) <= 10:
 | 
			
		||||
					context = " ".join(self.transformed_words)
 | 
			
		||||
				else:
 | 
			
		||||
					context = " ".join(self.transformed_words[0:10])
 | 
			
		||||
			elif word >= len(self.transformed_words)-9:
 | 
			
		||||
				context = " ".join(self.transformed_words[-10])
 | 
			
		||||
			else:
 | 
			
		||||
				context = " ".join(self.transformed_words[word-5:word+5])
 | 
			
		||||
			self.word_index = word
 | 
			
		||||
#			print(self.word)
 | 
			
		||||
#			print(suggestions[0].distance)
 | 
			
		||||
			yield (suggestions, context, word)
 | 
			
		||||
 | 
			
		||||
	def replace(self, suggestion):
 | 
			
		||||
		if len(self.transformed_words) < self.word_index:
 | 
			
		||||
			raise ValueError("Word index is not present in the current text")
 | 
			
		||||
		self.transformed_words[self.word_index] = suggestion
 | 
			
		||||
 | 
			
		||||
	def replace_all(self, word):
 | 
			
		||||
		existing_word = self.word
 | 
			
		||||
		for i in range(0, len(self.transformed_words)):
 | 
			
		||||
			if self.transformed_words[i] == existing_word:
 | 
			
		||||
				self.transformed_words[i] = word
 | 
			
		||||
 | 
			
		||||
	def ignore_word(self, word):
 | 
			
		||||
		self.ignored_words.append(word)
 | 
			
		||||
 | 
			
		||||
	@property
 | 
			
		||||
	def text(self):
 | 
			
		||||
		return " ".join(self.transformed_words)
 | 
			
		||||
 | 
			
		||||
	@property
 | 
			
		||||
	def word(self):
 | 
			
		||||
		if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
 | 
			
		||||
			return None
 | 
			
		||||
		return self.transformed_words[self.word_index]
 | 
			
		||||
@@ -1,35 +1,31 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
import os
 | 
			
		||||
import logging
 | 
			
		||||
import widgetUtils
 | 
			
		||||
import output
 | 
			
		||||
import config
 | 
			
		||||
import languageHandler
 | 
			
		||||
from enchant.checker import SpellChecker
 | 
			
		||||
from enchant.errors import DictNotFoundError
 | 
			
		||||
from enchant import tokenize
 | 
			
		||||
from platform_utils import paths
 | 
			
		||||
from . import checker
 | 
			
		||||
from . import wx_ui
 | 
			
		||||
 | 
			
		||||
log = logging.getLogger("extra.SpellChecker.spellChecker")
 | 
			
		||||
 | 
			
		||||
class spellChecker(object):
 | 
			
		||||
	def __init__(self, text, dictionary):
 | 
			
		||||
	def __init__(self, text):
 | 
			
		||||
		super(spellChecker, self).__init__()
 | 
			
		||||
		log.debug("Creating the SpellChecker object. Dictionary: %s" % (dictionary,))
 | 
			
		||||
		self.active = True
 | 
			
		||||
		self.checker = checker.SpellChecker()
 | 
			
		||||
		log.debug("Using language: %s" % (languageHandler.getLanguage(),))
 | 
			
		||||
		try:
 | 
			
		||||
			if config.app["app-settings"]["language"] == "system":
 | 
			
		||||
				log.debug("Using the system language")
 | 
			
		||||
				self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
 | 
			
		||||
			else:
 | 
			
		||||
				log.debug("Using language: %s" % (languageHandler.getLanguage(),))
 | 
			
		||||
				self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
 | 
			
		||||
			self.checker.set_text(text)
 | 
			
		||||
		except DictNotFoundError:
 | 
			
		||||
			print("no dict")
 | 
			
		||||
			log.exception("Dictionary for language %s not found." % (dictionary,))
 | 
			
		||||
			self.checker.set_language(languageHandler.curLang)
 | 
			
		||||
		except ValueError:
 | 
			
		||||
			log.exception("Dictionary for language %s not found." % (languageHandler.curLang,))
 | 
			
		||||
			wx_ui.dict_not_found_error()
 | 
			
		||||
			self.active = False
 | 
			
		||||
		self.checker.set_text(text)
 | 
			
		||||
		self.generator = self.checker.check_words()
 | 
			
		||||
		if self.active == True:
 | 
			
		||||
			log.debug("Creating dialog...")
 | 
			
		||||
			self.dialog = wx_ui.spellCheckerDialog()
 | 
			
		||||
@@ -39,16 +35,16 @@ class spellChecker(object):
 | 
			
		||||
			widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
 | 
			
		||||
			self.check()
 | 
			
		||||
			self.dialog.get_response()
 | 
			
		||||
			self.fixed_text = self.checker.get_text()
 | 
			
		||||
			self.fixed_text = self.checker.text
 | 
			
		||||
 | 
			
		||||
	def check(self):
 | 
			
		||||
		try:
 | 
			
		||||
			next(self.checker)
 | 
			
		||||
			suggestions, context, self.wordIndex = next(self.generator)
 | 
			
		||||
			textToSay = _("Misspelled word: %s") % (self.checker.word,)
 | 
			
		||||
			context = "... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
 | 
			
		||||
			context = context
 | 
			
		||||
			self.dialog.set_title(textToSay)
 | 
			
		||||
			output.speak(textToSay)
 | 
			
		||||
			self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
 | 
			
		||||
			self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions])
 | 
			
		||||
		except StopIteration:
 | 
			
		||||
			log.debug("Process finished.")
 | 
			
		||||
			wx_ui.finished()
 | 
			
		||||
@@ -58,7 +54,7 @@ class spellChecker(object):
 | 
			
		||||
		self.check()
 | 
			
		||||
 | 
			
		||||
	def ignoreAll(self, ev):
 | 
			
		||||
		self.checker.ignore_always(word=self.checker.word)
 | 
			
		||||
		self.checker.ignore_word(word=self.checker.word)
 | 
			
		||||
		self.check()
 | 
			
		||||
 | 
			
		||||
	def replace(self, ev):
 | 
			
		||||
@@ -66,7 +62,7 @@ class spellChecker(object):
 | 
			
		||||
		self.check()
 | 
			
		||||
 | 
			
		||||
	def replaceAll(self, ev):
 | 
			
		||||
		self.checker.replace_always(self.dialog.get_selected_suggestion())
 | 
			
		||||
		self.checker.replace_all(self.dialog.get_selected_suggestion())
 | 
			
		||||
		self.check()
 | 
			
		||||
 | 
			
		||||
	def clean(self):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user