Added a new module for performing the spelling correction. Needs testing
This commit is contained in:
parent
28e2d3df08
commit
c74c6f6f5d
@ -11,6 +11,8 @@
|
|||||||
|
|
||||||
### Changes
|
### Changes
|
||||||
|
|
||||||
|
* Replaced the underlying library we were using for spelling correction as is no longer in development. Instead, we started to use a new approach in socializer, which, in theory, should allow us to switch language for spelling correction and other benefits a bit later. For now, available languages are Russian, Ukranian, English, Polish and Spanish, but more languages can be added by request.
|
||||||
|
|
||||||
## Changes in Version 0.23 (11.11.2019)
|
## Changes in Version 0.23 (11.11.2019)
|
||||||
|
|
||||||
### New additions
|
### New additions
|
||||||
|
119
src/extra/SpellChecker/checker.py
Normal file
119
src/extra/SpellChecker/checker.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
""" High level Spell checker module by using the SymSpellPy library. """
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import shutil
|
||||||
|
import logging
|
||||||
|
import paths
|
||||||
|
from symspellpy.symspellpy import SymSpell, Verbosity
|
||||||
|
from codecs import open as open_
|
||||||
|
|
||||||
|
log = logging.getLogger("SpellChecker.checker")
|
||||||
|
|
||||||
|
loaded_dicts = dict()
|
||||||
|
ready = False
|
||||||
|
|
||||||
|
def load_dicts():
|
||||||
|
global loaded_dicts, ready
|
||||||
|
log.debug("Start dictionary loading for spelling checker module...")
|
||||||
|
if len(loaded_dicts) > 0:
|
||||||
|
loaded_dicts = dict()
|
||||||
|
path = os.path.join(paths.config_path(), "dicts")
|
||||||
|
if os.path.isdir(path):
|
||||||
|
log.debug("Loading language dictionaries from path %s" % (path,))
|
||||||
|
files = glob.glob(os.path.join(path, "*.txt"))
|
||||||
|
log.debug("%r files found." % (len(files)))
|
||||||
|
for i in files:
|
||||||
|
key = os.path.splitext(os.path.basename(i))[0]
|
||||||
|
dictionary = SymSpell()
|
||||||
|
dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
|
||||||
|
loaded_dicts[key] = dictionary
|
||||||
|
log.debug("Added dictionary for language %s " % (key,))
|
||||||
|
ready = True
|
||||||
|
log.debug("All dicts were loaded.")
|
||||||
|
|
||||||
|
def prepare_dicts(language):
|
||||||
|
""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
|
||||||
|
@ language: two letter language code.
|
||||||
|
"""
|
||||||
|
log.debug("preparing dictionary data...")
|
||||||
|
path = os.path.join(paths.config_path(), "dicts")
|
||||||
|
if os.path.exists(path) == False:
|
||||||
|
log.debug("Creating dicts folder in config directory...")
|
||||||
|
os.mkdir(path)
|
||||||
|
original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
|
||||||
|
if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
|
||||||
|
log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
|
||||||
|
dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
|
||||||
|
|
||||||
|
class SpellChecker(object):
|
||||||
|
|
||||||
|
def __init__(self, wordlist=None, *args, **kwargs):
|
||||||
|
self.kwargs = kwargs
|
||||||
|
self.dictionary = None
|
||||||
|
self.ignored_words = []
|
||||||
|
self.word_index = 0
|
||||||
|
|
||||||
|
def set_language(self, lang):
|
||||||
|
global loaded_dicts
|
||||||
|
if loaded_dicts.get(lang) != None:
|
||||||
|
self.dictionary = loaded_dicts[lang]
|
||||||
|
else:
|
||||||
|
raise ValueError("Dictionary not found for the specified language")
|
||||||
|
|
||||||
|
def set_text(self, text):
|
||||||
|
self.transformed_words = text.split()
|
||||||
|
self.word_index = 0
|
||||||
|
|
||||||
|
def check_words(self):
|
||||||
|
for word in range(0, len(self.transformed_words)):
|
||||||
|
if self.transformed_words[word] in self.ignored_words:
|
||||||
|
continue
|
||||||
|
suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
|
||||||
|
valid_word = True
|
||||||
|
if len(suggestions) == 0:
|
||||||
|
continue
|
||||||
|
for s in suggestions:
|
||||||
|
print(s.term)
|
||||||
|
print(s.distance)
|
||||||
|
if s.distance == 0:
|
||||||
|
valid_word = False
|
||||||
|
if valid_word == False:
|
||||||
|
continue
|
||||||
|
if word <= 10:
|
||||||
|
if len(self.transformed_words) <= 10:
|
||||||
|
context = " ".join(self.transformed_words)
|
||||||
|
else:
|
||||||
|
context = " ".join(self.transformed_words[0:10])
|
||||||
|
elif word >= len(self.transformed_words)-9:
|
||||||
|
context = " ".join(self.transformed_words[-10])
|
||||||
|
else:
|
||||||
|
context = " ".join(self.transformed_words[word-5:word+5])
|
||||||
|
self.word_index = word
|
||||||
|
# print(self.word)
|
||||||
|
# print(suggestions[0].distance)
|
||||||
|
yield (suggestions, context, word)
|
||||||
|
|
||||||
|
def replace(self, suggestion):
|
||||||
|
if len(self.transformed_words) < self.word_index:
|
||||||
|
raise ValueError("Word index is not present in the current text")
|
||||||
|
self.transformed_words[self.word_index] = suggestion
|
||||||
|
|
||||||
|
def replace_all(self, word):
|
||||||
|
existing_word = self.word
|
||||||
|
for i in range(0, len(self.transformed_words)):
|
||||||
|
if self.transformed_words[i] == existing_word:
|
||||||
|
self.transformed_words[i] = word
|
||||||
|
|
||||||
|
def ignore_word(self, word):
|
||||||
|
self.ignored_words.append(word)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self):
|
||||||
|
return " ".join(self.transformed_words)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def word(self):
|
||||||
|
if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
|
||||||
|
return None
|
||||||
|
return self.transformed_words[self.word_index]
|
@ -1,35 +1,31 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
import os
|
||||||
import logging
|
import logging
|
||||||
import widgetUtils
|
import widgetUtils
|
||||||
import output
|
import output
|
||||||
import config
|
import config
|
||||||
import languageHandler
|
import languageHandler
|
||||||
from enchant.checker import SpellChecker
|
from platform_utils import paths
|
||||||
from enchant.errors import DictNotFoundError
|
from . import checker
|
||||||
from enchant import tokenize
|
|
||||||
from . import wx_ui
|
from . import wx_ui
|
||||||
|
|
||||||
log = logging.getLogger("extra.SpellChecker.spellChecker")
|
log = logging.getLogger("extra.SpellChecker.spellChecker")
|
||||||
|
|
||||||
class spellChecker(object):
|
class spellChecker(object):
|
||||||
def __init__(self, text, dictionary):
|
def __init__(self, text):
|
||||||
super(spellChecker, self).__init__()
|
super(spellChecker, self).__init__()
|
||||||
log.debug("Creating the SpellChecker object. Dictionary: %s" % (dictionary,))
|
|
||||||
self.active = True
|
self.active = True
|
||||||
try:
|
self.checker = checker.SpellChecker()
|
||||||
if config.app["app-settings"]["language"] == "system":
|
|
||||||
log.debug("Using the system language")
|
|
||||||
self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
|
|
||||||
else:
|
|
||||||
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
|
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
|
||||||
self.checker = SpellChecker(languageHandler.curLang, filters=[tokenize.EmailFilter, tokenize.URLFilter])
|
try:
|
||||||
self.checker.set_text(text)
|
self.checker.set_language(languageHandler.curLang)
|
||||||
except DictNotFoundError:
|
except ValueError:
|
||||||
print("no dict")
|
log.exception("Dictionary for language %s not found." % (languageHandler.curLang,))
|
||||||
log.exception("Dictionary for language %s not found." % (dictionary,))
|
|
||||||
wx_ui.dict_not_found_error()
|
wx_ui.dict_not_found_error()
|
||||||
self.active = False
|
self.active = False
|
||||||
|
self.checker.set_text(text)
|
||||||
|
self.generator = self.checker.check_words()
|
||||||
if self.active == True:
|
if self.active == True:
|
||||||
log.debug("Creating dialog...")
|
log.debug("Creating dialog...")
|
||||||
self.dialog = wx_ui.spellCheckerDialog()
|
self.dialog = wx_ui.spellCheckerDialog()
|
||||||
@ -39,16 +35,16 @@ class spellChecker(object):
|
|||||||
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
|
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
|
||||||
self.check()
|
self.check()
|
||||||
self.dialog.get_response()
|
self.dialog.get_response()
|
||||||
self.fixed_text = self.checker.get_text()
|
self.fixed_text = self.checker.text
|
||||||
|
|
||||||
def check(self):
|
def check(self):
|
||||||
try:
|
try:
|
||||||
next(self.checker)
|
suggestions, context, self.wordIndex = next(self.generator)
|
||||||
textToSay = _("Misspelled word: %s") % (self.checker.word,)
|
textToSay = _("Misspelled word: %s") % (self.checker.word,)
|
||||||
context = "... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
|
context = context
|
||||||
self.dialog.set_title(textToSay)
|
self.dialog.set_title(textToSay)
|
||||||
output.speak(textToSay)
|
output.speak(textToSay)
|
||||||
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
|
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions])
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
log.debug("Process finished.")
|
log.debug("Process finished.")
|
||||||
wx_ui.finished()
|
wx_ui.finished()
|
||||||
@ -58,7 +54,7 @@ class spellChecker(object):
|
|||||||
self.check()
|
self.check()
|
||||||
|
|
||||||
def ignoreAll(self, ev):
|
def ignoreAll(self, ev):
|
||||||
self.checker.ignore_always(word=self.checker.word)
|
self.checker.ignore_word(word=self.checker.word)
|
||||||
self.check()
|
self.check()
|
||||||
|
|
||||||
def replace(self, ev):
|
def replace(self, ev):
|
||||||
@ -66,7 +62,7 @@ class spellChecker(object):
|
|||||||
self.check()
|
self.check()
|
||||||
|
|
||||||
def replaceAll(self, ev):
|
def replaceAll(self, ev):
|
||||||
self.checker.replace_always(self.dialog.get_selected_suggestion())
|
self.checker.replace_all(self.dialog.get_selected_suggestion())
|
||||||
self.check()
|
self.check()
|
||||||
|
|
||||||
def clean(self):
|
def clean(self):
|
||||||
|
@ -19,6 +19,7 @@ if hasattr(sys, "frozen"):
|
|||||||
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
|
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
|
||||||
from mysc.thread_utils import call_threaded
|
from mysc.thread_utils import call_threaded
|
||||||
from wxUI import commonMessages
|
from wxUI import commonMessages
|
||||||
|
from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction
|
||||||
|
|
||||||
log = logging.getLogger("main")
|
log = logging.getLogger("main")
|
||||||
|
|
||||||
@ -58,6 +59,10 @@ def setup():
|
|||||||
sm = sessionManager.sessionManagerController()
|
sm = sessionManager.sessionManagerController()
|
||||||
sm.show()
|
sm.show()
|
||||||
del sm
|
del sm
|
||||||
|
log.debug("Loading dictionaries for spelling correction...")
|
||||||
|
# Let's copy dictionary files for the selected language just in case it is not present already.
|
||||||
|
checker.prepare_dicts(languageHandler.curLang)
|
||||||
|
call_threaded(checker.load_dicts)
|
||||||
r = mainController.Controller()
|
r = mainController.Controller()
|
||||||
call_threaded(r.login)
|
call_threaded(r.login)
|
||||||
app.run()
|
app.run()
|
||||||
|
@ -47,7 +47,7 @@ class createPostPresenter(base.basePresenter):
|
|||||||
output.speak(_("Translated"))
|
output.speak(_("Translated"))
|
||||||
|
|
||||||
def spellcheck(self, text):
|
def spellcheck(self, text):
|
||||||
checker = SpellChecker.spellchecker.spellChecker(text, "")
|
checker = SpellChecker.spellchecker.spellChecker(text)
|
||||||
if hasattr(checker, "fixed_text"):
|
if hasattr(checker, "fixed_text"):
|
||||||
self.send_message("set", control="text", value=checker.fixed_text)
|
self.send_message("set", control="text", value=checker.fixed_text)
|
||||||
self.send_message("focus_control", control="text")
|
self.send_message("focus_control", control="text")
|
||||||
|
@ -33,7 +33,7 @@ class sessionManagerController(object):
|
|||||||
self.sessions = []
|
self.sessions = []
|
||||||
log.debug("Filling the session list...")
|
log.debug("Filling the session list...")
|
||||||
for i in os.listdir(paths.config_path()):
|
for i in os.listdir(paths.config_path()):
|
||||||
if os.path.isdir(os.path.join(paths.config_path(), i)):
|
if i != "dicts" and os.path.isdir(os.path.join(paths.config_path(), i)):
|
||||||
log.debug("Adding session %s" % (i,))
|
log.debug("Adding session %s" % (i,))
|
||||||
config_test = Configuration(os.path.join(paths.config_path(), i, "session.conf"))
|
config_test = Configuration(os.path.join(paths.config_path(), i, "session.conf"))
|
||||||
name = config_test["vk"]["user"]
|
name = config_test["vk"]["user"]
|
||||||
|
@ -34,7 +34,7 @@ build_exe_options = dict(
|
|||||||
optimize=2,
|
optimize=2,
|
||||||
include_msvcr=True,
|
include_msvcr=True,
|
||||||
zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
|
zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
|
||||||
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", ("../windows-dependencies/dictionaries", "lib/enchant/share/enchant/myspell"), find_sound_lib_datafiles(), find_accessible_output2_datafiles()],
|
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles()],
|
||||||
packages=["interactors", "presenters", "views", "wxUI"],
|
packages=["interactors", "presenters", "views", "wxUI"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user