Added bak the enchant based spelling correction module

This commit is contained in:
2020-06-14 08:39:43 -05:00
parent 45465b246f
commit f43e9ffccf
27 changed files with 929638 additions and 250225 deletions

View File

@@ -1,4 +1,6 @@
from __future__ import absolute_import
from __future__ import unicode_literals
from . import spellchecker
import platform
if platform.system() == "Windows":
from .wx_ui import *
from .wx_ui import *

View File

@@ -1,115 +0,0 @@
# -*- coding: utf-8 -*-
""" High level Spell checker module by using the SymSpellPy library. """
import os
import glob
import shutil
import logging
import paths
from symspellpy.symspellpy import SymSpell, Verbosity
from codecs import open as open_
log = logging.getLogger("SpellChecker.checker")
loaded_dicts = dict()
ready = False
def load_dicts():
global loaded_dicts, ready
log.debug("Start dictionary loading for spelling checker module...")
if len(loaded_dicts) > 0:
loaded_dicts = dict()
path = os.path.join(paths.config_path(), "dicts")
if os.path.isdir(path):
log.debug("Loading language dictionaries from path %s" % (path,))
files = glob.glob(os.path.join(path, "*.txt"))
log.debug("%r files found." % (len(files)))
for i in files:
key = os.path.splitext(os.path.basename(i))[0]
dictionary = SymSpell()
dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
loaded_dicts[key] = dictionary
log.debug("Added dictionary for language %s " % (key,))
ready = True
log.debug("All dicts were loaded.")
def prepare_dicts(language):
""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
@ language: two letter language code.
"""
log.debug("preparing dictionary data...")
path = os.path.join(paths.config_path(), "dicts")
if os.path.exists(path) == False:
log.debug("Creating dicts folder in config directory...")
os.mkdir(path)
original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
class SpellChecker(object):
def __init__(self, wordlist=None, *args, **kwargs):
self.kwargs = kwargs
self.dictionary = None
self.ignored_words = []
self.word_index = 0
def set_language(self, lang):
global loaded_dicts
if loaded_dicts.get(lang) != None:
self.dictionary = loaded_dicts[lang]
else:
raise ValueError("Dictionary not found for the specified language")
def set_text(self, text):
self.transformed_words = text.split()
self.word_index = 0
def check_words(self):
for word in range(0, len(self.transformed_words)):
if self.transformed_words[word] in self.ignored_words:
continue
suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
valid_word = True
if len(suggestions) == 0:
continue
for s in suggestions:
if s.distance == 0:
valid_word = False
if valid_word == False:
continue
if word <= 10:
if len(self.transformed_words) <= 10:
context = " ".join(self.transformed_words)
else:
context = " ".join(self.transformed_words[0:10])
elif word >= len(self.transformed_words)-9:
context = " ".join(self.transformed_words[-10])
else:
context = " ".join(self.transformed_words[word-5:word+5])
self.word_index = word
yield (suggestions, context, word)
def replace(self, suggestion):
if len(self.transformed_words) < self.word_index:
raise ValueError("Word index is not present in the current text")
self.transformed_words[self.word_index] = suggestion
def replace_all(self, word):
existing_word = self.word
for i in range(0, len(self.transformed_words)):
if self.transformed_words[i] == existing_word:
self.transformed_words[i] = word
def ignore_word(self, word):
self.ignored_words.append(word)
@property
def text(self):
return " ".join(self.transformed_words)
@property
def word(self):
if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
return None
return self.transformed_words[self.word_index]

View File

@@ -1,70 +1,80 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import next
from builtins import object
import os
import logging
from . import wx_ui
import widgetUtils
import output
import config
import languageHandler
from platform_utils import paths
from . import checker
from . import wx_ui
import enchant
import paths
from . import twitterFilter
from enchant.checker import SpellChecker
from enchant.errors import DictNotFoundError
from enchant import tokenize
log = logging.getLogger("extra.SpellChecker.spellChecker")
class spellChecker(object):
def __init__(self, text):
super(spellChecker, self).__init__()
self.active = True
self.checker = checker.SpellChecker()
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
try:
self.checker.set_language(languageHandler.curLang[:2])
except ValueError:
log.exception("Dictionary for language %s not found." % (languageHandler.curLang,))
wx_ui.dict_not_found_error()
self.active = False
self.checker.set_text(text)
self.generator = self.checker.check_words()
if self.active == True:
log.debug("Creating dialog...")
self.dialog = wx_ui.spellCheckerDialog()
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
self.check()
self.dialog.get_response()
self.fixed_text = self.checker.text
def __init__(self, text):
super(spellChecker, self).__init__()
self.active = True
try:
if config.app["app-settings"]["language"] == "system":
log.debug("Using the system language")
self.dict = enchant.DictWithPWL(languageHandler.curLang[:2], os.path.join(paths.config_path(), "wordlist.dict"))
else:
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
self.dict = enchant.DictWithPWL(languageHandler.getLanguage()[:2], os.path.join(paths.config_path(), "wordlist.dict"))
except DictNotFoundError:
log.exception("Dictionary for language %s not found." % (dictionary,))
wx_ui.dict_not_found_error()
self.active = False
self.checker = SpellChecker(self.dict, filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter])
self.checker.set_text(text)
if self.active == True:
log.debug("Creating dialog...")
self.dialog = wx_ui.spellCheckerDialog()
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
widgetUtils.connect_event(self.dialog.add, widgetUtils.BUTTON_PRESSED, self.add)
self.check()
self.dialog.get_response()
self.fixed_text = self.checker.get_text()
def check(self):
try:
suggestions, context, self.wordIndex = next(self.generator)
textToSay = _("Misspelled word: %s") % (self.checker.word,)
context = context
self.dialog.set_title(textToSay)
output.speak(textToSay)
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions])
except StopIteration:
log.debug("Process finished.")
wx_ui.finished()
self.dialog.Destroy()
def check(self):
try:
next(self.checker)
textToSay = _(u"Misspelled word: %s") % (self.checker.word,)
context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
self.dialog.set_title(textToSay)
output.speak(textToSay)
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
except StopIteration:
log.debug("Process finished.")
wx_ui.finished()
self.dialog.Destroy()
def ignore(self, ev):
self.check()
def ignore(self, ev):
self.check()
def ignoreAll(self, ev):
self.checker.ignore_word(word=self.checker.word)
self.check()
def ignoreAll(self, ev):
self.checker.ignore_always(word=self.checker.word)
self.check()
def replace(self, ev):
self.checker.replace(self.dialog.get_selected_suggestion())
self.check()
def replace(self, ev):
self.checker.replace(self.dialog.get_selected_suggestion())
self.check()
def replaceAll(self, ev):
self.checker.replace_all(self.dialog.get_selected_suggestion())
self.check()
def replaceAll(self, ev):
self.checker.replace_always(self.dialog.get_selected_suggestion())
self.check()
def clean(self):
if hasattr(self, "dialog"):
self.dialog.Destroy()
def add(self, ev):
self.checker.add()
self.check()

View File

@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from enchant.tokenize import Filter
class TwitterFilter(Filter):
"""Filter skipping over twitter usernames and hashtags.
This filter skips any words matching the following regular expression:
^[#@](\S){1, }$
That is, any words that resemble users and hashtags.
"""
_pattern = re.compile(r"^[#@](\S){1,}$")
def _skip(self,word):
if self._pattern.match(word):
return True
return False

View File

@@ -21,60 +21,63 @@ import wx
import application
class spellCheckerDialog(wx.Dialog):
def __init__(self):
super(spellCheckerDialog, self).__init__(None, 1)
panel = wx.Panel(self)
sizer = wx.BoxSizer(wx.VERTICAL)
word = wx.StaticText(panel, -1, _("&Misspelled word"))
self.word = wx.TextCtrl(panel, -1)
wordBox = wx.BoxSizer(wx.HORIZONTAL)
wordBox.Add(word, 0, wx.ALL, 5)
wordBox.Add(self.word, 0, wx.ALL, 5)
context = wx.StaticText(panel, -1, _("Con&text"))
self.context = wx.TextCtrl(panel, -1)
contextBox = wx.BoxSizer(wx.HORIZONTAL)
contextBox.Add(context, 0, wx.ALL, 5)
contextBox.Add(self.context, 0, wx.ALL, 5)
suggest = wx.StaticText(panel, -1, _("&Suggestions"))
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
suggestionsBox.Add(suggest, 0, wx.ALL, 5)
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
self.ignore = wx.Button(panel, -1, _("&Ignore"))
self.ignoreAll = wx.Button(panel, -1, _("Ignore &all"))
self.replace = wx.Button(panel, -1, _("&Replace"))
self.replaceAll = wx.Button(panel, -1, _("Replace a&ll"))
close = wx.Button(panel, wx.ID_CANCEL)
btnBox = wx.BoxSizer(wx.HORIZONTAL)
btnBox.Add(self.ignore, 0, wx.ALL, 5)
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
btnBox.Add(self.replace, 0, wx.ALL, 5)
btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
btnBox.Add(close, 0, wx.ALL, 5)
sizer.Add(wordBox, 0, wx.ALL, 5)
sizer.Add(contextBox, 0, wx.ALL, 5)
sizer.Add(suggestionsBox, 0, wx.ALL, 5)
sizer.Add(btnBox, 0, wx.ALL, 5)
panel.SetSizer(sizer)
self.SetClientSize(sizer.CalcMin())
def __init__(self):
super(spellCheckerDialog, self).__init__(None, 1)
panel = wx.Panel(self)
sizer = wx.BoxSizer(wx.VERTICAL)
word = wx.StaticText(panel, -1, _(u"Misspelled word"))
self.word = wx.TextCtrl(panel, -1)
wordBox = wx.BoxSizer(wx.HORIZONTAL)
wordBox.Add(word, 0, wx.ALL, 5)
wordBox.Add(self.word, 0, wx.ALL, 5)
context = wx.StaticText(panel, -1, _(u"Context"))
self.context = wx.TextCtrl(panel, -1)
contextBox = wx.BoxSizer(wx.HORIZONTAL)
contextBox.Add(context, 0, wx.ALL, 5)
contextBox.Add(self.context, 0, wx.ALL, 5)
suggest = wx.StaticText(panel, -1, _(u"Suggestions"))
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
suggestionsBox.Add(suggest, 0, wx.ALL, 5)
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
self.ignore = wx.Button(panel, -1, _(u"&Ignore"))
self.ignoreAll = wx.Button(panel, -1, _(u"I&gnore all"))
self.replace = wx.Button(panel, -1, _(u"&Replace"))
self.replaceAll = wx.Button(panel, -1, _(u"R&eplace all"))
self.add = wx.Button(panel, -1, _(u"&Add to personal dictionary"))
close = wx.Button(panel, wx.ID_CANCEL)
btnBox = wx.BoxSizer(wx.HORIZONTAL)
btnBox.Add(self.ignore, 0, wx.ALL, 5)
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
btnBox.Add(self.replace, 0, wx.ALL, 5)
btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
btnBox.Add(self.add, 0, wx.ALL, 5)
btnBox.Add(close, 0, wx.ALL, 5)
sizer.Add(wordBox, 0, wx.ALL, 5)
sizer.Add(contextBox, 0, wx.ALL, 5)
sizer.Add(suggestionsBox, 0, wx.ALL, 5)
sizer.Add(btnBox, 0, wx.ALL, 5)
panel.SetSizer(sizer)
self.SetClientSize(sizer.CalcMin())
def get_response(self):
return self.ShowModal()
def set_title(self, title):
return self.SetTitle(title)
def get_response(self):
return self.ShowModal()
def set_word_and_suggestions(self, word, context, suggestions):
self.word.SetValue(word)
self.context.ChangeValue(context)
self.suggestions.Set(suggestions)
self.suggestions.SetFocus()
def set_title(self, title):
return self.SetTitle(title)
def get_selected_suggestion(self):
return self.suggestions.GetStringSelection()
def set_word_and_suggestions(self, word, context, suggestions):
self.word.SetValue(word)
self.context.ChangeValue(context)
self.suggestions.Set(suggestions)
self.suggestions.SetFocus()
def get_selected_suggestion(self):
return self.suggestions.GetStringSelection()
def dict_not_found_error():
wx.MessageDialog(None, _("An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _("Error"), wx.ICON_ERROR).ShowModal()
wx.MessageDialog(None, _(u"An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _(u"Error"), wx.ICON_ERROR).ShowModal()
def finished():
wx.MessageDialog(None, _("Spell check complete."), application.name, style=wx.OK).ShowModal()
wx.MessageDialog(None, _(u"Spell check complete."), application.name, style=wx.OK).ShowModal()

View File

@@ -18,7 +18,6 @@ if hasattr(sys, "frozen"):
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
from mysc.thread_utils import call_threaded
from wxUI import commonMessages
from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction
log = logging.getLogger("main")
@@ -57,8 +56,6 @@ def setup():
del sm
log.debug("Loading dictionaries for spelling correction...")
# Let's copy dictionary files for the selected language just in case it is not present already.
checker.prepare_dicts(languageHandler.curLang[:2])
call_threaded(checker.load_dicts)
r = mainController.Controller()
call_threaded(r.login)
app.run()

View File

@@ -36,7 +36,7 @@ build_exe_options = dict(
include_msvcr=True,
zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
replace_paths = [("*", "")],
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", ".")],
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", "."), ("../windows-dependencies/dictionaries", "lib/enchant/data/mingw32/share/enchant/hunspell")],
packages=["interactors", "presenters", "views", "wxUI"],
)