Added a new module for performing the spelling correction. Needs testing
This commit is contained in:
parent
28e2d3df08
commit
c74c6f6f5d
@ -11,6 +11,8 @@
|
||||
|
||||
### Changes
|
||||
|
||||
* Replaced the underlying library we were using for spelling correction as is no longer in development. Instead, we started to use a new approach in socializer, which, in theory, should allow us to switch language for spelling correction and other benefits a bit later. For now, available languages are Russian, Ukranian, English, Polish and Spanish, but more languages can be added by request.
|
||||
|
||||
## Changes in Version 0.23 (11.11.2019)
|
||||
|
||||
### New additions
|
||||
|
119
src/extra/SpellChecker/checker.py
Normal file
119
src/extra/SpellChecker/checker.py
Normal file
@ -0,0 +1,119 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
""" High level Spell checker module by using the SymSpellPy library. """
|
||||
import os
|
||||
import glob
|
||||
import shutil
|
||||
import logging
|
||||
import paths
|
||||
from symspellpy.symspellpy import SymSpell, Verbosity
|
||||
from codecs import open as open_
|
||||
|
||||
log = logging.getLogger("SpellChecker.checker")
|
||||
|
||||
loaded_dicts = dict()
|
||||
ready = False
|
||||
|
||||
def load_dicts():
|
||||
global loaded_dicts, ready
|
||||
log.debug("Start dictionary loading for spelling checker module...")
|
||||
if len(loaded_dicts) > 0:
|
||||
loaded_dicts = dict()
|
||||
path = os.path.join(paths.config_path(), "dicts")
|
||||
if os.path.isdir(path):
|
||||
log.debug("Loading language dictionaries from path %s" % (path,))
|
||||
files = glob.glob(os.path.join(path, "*.txt"))
|
||||
log.debug("%r files found." % (len(files)))
|
||||
for i in files:
|
||||
key = os.path.splitext(os.path.basename(i))[0]
|
||||
dictionary = SymSpell()
|
||||
dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
|
||||
loaded_dicts[key] = dictionary
|
||||
log.debug("Added dictionary for language %s " % (key,))
|
||||
ready = True
|
||||
log.debug("All dicts were loaded.")
|
||||
|
||||
def prepare_dicts(language):
|
||||
""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
|
||||
@ language: two letter language code.
|
||||
"""
|
||||
log.debug("preparing dictionary data...")
|
||||
path = os.path.join(paths.config_path(), "dicts")
|
||||
if os.path.exists(path) == False:
|
||||
log.debug("Creating dicts folder in config directory...")
|
||||
os.mkdir(path)
|
||||
original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
|
||||
if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
|
||||
log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
|
||||
dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
|
||||
|
||||
class SpellChecker(object):
|
||||
|
||||
def __init__(self, wordlist=None, *args, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
self.dictionary = None
|
||||
self.ignored_words = []
|
||||
self.word_index = 0
|
||||
|
||||
def set_language(self, lang):
|
||||
global loaded_dicts
|
||||
if loaded_dicts.get(lang) != None:
|
||||
self.dictionary = loaded_dicts[lang]
|
||||
else:
|
||||
raise ValueError("Dictionary not found for the specified language")
|
||||
|
||||
def set_text(self, text):
|
||||
self.transformed_words = text.split()
|
||||
self.word_index = 0
|
||||
|
||||
def check_words(self):
|
||||
for word in range(0, len(self.transformed_words)):
|
||||
if self.transformed_words[word] in self.ignored_words:
|
||||
continue
|
||||
suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
|
||||
valid_word = True
|
||||
if len(suggestions) == 0:
|
||||
continue
|
||||
for s in suggestions:
|
||||
print(s.term)
|
||||
print(s.distance)
|
||||
if s.distance == 0:
|
||||
valid_word = False
|
||||
if valid_word == False:
|
||||
continue
|
||||
if word <= 10:
|
||||
if len(self.transformed_words) <= 10:
|
||||
context = " ".join(self.transformed_words)
|
||||
else:
|
||||
context = " ".join(self.transformed_words[0:10])
|
||||
elif word >= len(self.transformed_words)-9:
|
||||
context = " ".join(self.transformed_words[-10])
|
||||
else:
|
||||
context = " ".join(self.transformed_words[word-5:word+5])
|
||||
self.word_index = word
|
||||
# print(self.word)
|
||||
# print(suggestions[0].distance)
|
||||
yield (suggestions, context, word)
|
||||
|
||||
def replace(self, suggestion):
|
||||
if len(self.transformed_words) < self.word_index:
|
||||
raise ValueError("Word index is not present in the current text")
|
||||
self.transformed_words[self.word_index] = suggestion
|
||||
|
||||
def replace_all(self, word):
|
||||
existing_word = self.word
|
||||
for i in range(0, len(self.transformed_words)):
|
||||
if self.transformed_words[i] == existing_word:
|
||||
self.transformed_words[i] = word
|
||||
|
||||
def ignore_word(self, word):
|
||||
self.ignored_words.append(word)
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return " ".join(self.transformed_words)
|
||||
|
||||
@property
|
||||
def word(self):
|
||||
if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
|
||||
return None
|
||||
return self.transformed_words[self.word_index]
|
@ -1,35 +1,31 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
import os
|
||||
import logging
|
||||
import widgetUtils
|
||||
import output
|
||||
import config
|
||||
import languageHandler
|
||||
from enchant.checker import SpellChecker
|
||||
from enchant.errors import DictNotFoundError
|
||||
from enchant import tokenize
|
||||
from platform_utils import paths
|
||||
from . import checker
|
||||
from . import wx_ui
|
||||
|
||||
log = logging.getLogger("extra.SpellChecker.spellChecker")
|
||||
|
||||
class spellChecker(object):
|
||||
def __init__(self, text, dictionary):
|
||||
def __init__(self, text):
|
||||
super(spellChecker, self).__init__()
|
||||
log.debug("Creating the SpellChecker object. Dictionary: %s" % (dictionary,))
|
||||
self.active = True
|
||||
self.checker = checker.SpellChecker()
|
||||
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
|
||||
try:
|
||||
if config.app["app-settings"]["language"] == "system":
|
||||
log.debug("Using the system language")
|
||||
self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
|
||||
else:
|
||||
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
|
||||
self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
|
||||
self.checker.set_text(text)
|
||||
except DictNotFoundError:
|
||||
print("no dict")
|
||||
log.exception("Dictionary for language %s not found." % (dictionary,))
|
||||
self.checker.set_language(languageHandler.curLang)
|
||||
except ValueError:
|
||||
log.exception("Dictionary for language %s not found." % (languageHandler.curLang,))
|
||||
wx_ui.dict_not_found_error()
|
||||
self.active = False
|
||||
self.checker.set_text(text)
|
||||
self.generator = self.checker.check_words()
|
||||
if self.active == True:
|
||||
log.debug("Creating dialog...")
|
||||
self.dialog = wx_ui.spellCheckerDialog()
|
||||
@ -39,16 +35,16 @@ class spellChecker(object):
|
||||
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
|
||||
self.check()
|
||||
self.dialog.get_response()
|
||||
self.fixed_text = self.checker.get_text()
|
||||
self.fixed_text = self.checker.text
|
||||
|
||||
def check(self):
|
||||
try:
|
||||
next(self.checker)
|
||||
suggestions, context, self.wordIndex = next(self.generator)
|
||||
textToSay = _("Misspelled word: %s") % (self.checker.word,)
|
||||
context = "... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
|
||||
context = context
|
||||
self.dialog.set_title(textToSay)
|
||||
output.speak(textToSay)
|
||||
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
|
||||
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions])
|
||||
except StopIteration:
|
||||
log.debug("Process finished.")
|
||||
wx_ui.finished()
|
||||
@ -58,7 +54,7 @@ class spellChecker(object):
|
||||
self.check()
|
||||
|
||||
def ignoreAll(self, ev):
|
||||
self.checker.ignore_always(word=self.checker.word)
|
||||
self.checker.ignore_word(word=self.checker.word)
|
||||
self.check()
|
||||
|
||||
def replace(self, ev):
|
||||
@ -66,7 +62,7 @@ class spellChecker(object):
|
||||
self.check()
|
||||
|
||||
def replaceAll(self, ev):
|
||||
self.checker.replace_always(self.dialog.get_selected_suggestion())
|
||||
self.checker.replace_all(self.dialog.get_selected_suggestion())
|
||||
self.check()
|
||||
|
||||
def clean(self):
|
||||
|
@ -19,6 +19,7 @@ if hasattr(sys, "frozen"):
|
||||
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
|
||||
from mysc.thread_utils import call_threaded
|
||||
from wxUI import commonMessages
|
||||
from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction
|
||||
|
||||
log = logging.getLogger("main")
|
||||
|
||||
@ -58,6 +59,10 @@ def setup():
|
||||
sm = sessionManager.sessionManagerController()
|
||||
sm.show()
|
||||
del sm
|
||||
log.debug("Loading dictionaries for spelling correction...")
|
||||
# Let's copy dictionary files for the selected language just in case it is not present already.
|
||||
checker.prepare_dicts(languageHandler.curLang)
|
||||
call_threaded(checker.load_dicts)
|
||||
r = mainController.Controller()
|
||||
call_threaded(r.login)
|
||||
app.run()
|
||||
|
@ -47,7 +47,7 @@ class createPostPresenter(base.basePresenter):
|
||||
output.speak(_("Translated"))
|
||||
|
||||
def spellcheck(self, text):
|
||||
checker = SpellChecker.spellchecker.spellChecker(text, "")
|
||||
checker = SpellChecker.spellchecker.spellChecker(text)
|
||||
if hasattr(checker, "fixed_text"):
|
||||
self.send_message("set", control="text", value=checker.fixed_text)
|
||||
self.send_message("focus_control", control="text")
|
||||
|
@ -33,7 +33,7 @@ class sessionManagerController(object):
|
||||
self.sessions = []
|
||||
log.debug("Filling the session list...")
|
||||
for i in os.listdir(paths.config_path()):
|
||||
if os.path.isdir(os.path.join(paths.config_path(), i)):
|
||||
if i != "dicts" and os.path.isdir(os.path.join(paths.config_path(), i)):
|
||||
log.debug("Adding session %s" % (i,))
|
||||
config_test = Configuration(os.path.join(paths.config_path(), i, "session.conf"))
|
||||
name = config_test["vk"]["user"]
|
||||
|
@ -34,7 +34,7 @@ build_exe_options = dict(
|
||||
optimize=2,
|
||||
include_msvcr=True,
|
||||
zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
|
||||
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", ("../windows-dependencies/dictionaries", "lib/enchant/share/enchant/myspell"), find_sound_lib_datafiles(), find_accessible_output2_datafiles()],
|
||||
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles()],
|
||||
packages=["interactors", "presenters", "views", "wxUI"],
|
||||
)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user