Added bak the enchant based spelling correction module
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
from . import spellchecker
|
||||
import platform
|
||||
if platform.system() == "Windows":
|
||||
from .wx_ui import *
|
||||
from .wx_ui import *
|
@@ -1,115 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
""" High level Spell checker module by using the SymSpellPy library. """
|
||||
import os
|
||||
import glob
|
||||
import shutil
|
||||
import logging
|
||||
import paths
|
||||
from symspellpy.symspellpy import SymSpell, Verbosity
|
||||
from codecs import open as open_
|
||||
|
||||
log = logging.getLogger("SpellChecker.checker")
|
||||
|
||||
loaded_dicts = dict()
|
||||
ready = False
|
||||
|
||||
def load_dicts():
|
||||
global loaded_dicts, ready
|
||||
log.debug("Start dictionary loading for spelling checker module...")
|
||||
if len(loaded_dicts) > 0:
|
||||
loaded_dicts = dict()
|
||||
path = os.path.join(paths.config_path(), "dicts")
|
||||
if os.path.isdir(path):
|
||||
log.debug("Loading language dictionaries from path %s" % (path,))
|
||||
files = glob.glob(os.path.join(path, "*.txt"))
|
||||
log.debug("%r files found." % (len(files)))
|
||||
for i in files:
|
||||
key = os.path.splitext(os.path.basename(i))[0]
|
||||
dictionary = SymSpell()
|
||||
dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
|
||||
loaded_dicts[key] = dictionary
|
||||
log.debug("Added dictionary for language %s " % (key,))
|
||||
ready = True
|
||||
log.debug("All dicts were loaded.")
|
||||
|
||||
def prepare_dicts(language):
|
||||
""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
|
||||
@ language: two letter language code.
|
||||
"""
|
||||
log.debug("preparing dictionary data...")
|
||||
path = os.path.join(paths.config_path(), "dicts")
|
||||
if os.path.exists(path) == False:
|
||||
log.debug("Creating dicts folder in config directory...")
|
||||
os.mkdir(path)
|
||||
original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
|
||||
if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
|
||||
log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
|
||||
dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
|
||||
|
||||
class SpellChecker(object):
|
||||
|
||||
def __init__(self, wordlist=None, *args, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
self.dictionary = None
|
||||
self.ignored_words = []
|
||||
self.word_index = 0
|
||||
|
||||
def set_language(self, lang):
|
||||
global loaded_dicts
|
||||
if loaded_dicts.get(lang) != None:
|
||||
self.dictionary = loaded_dicts[lang]
|
||||
else:
|
||||
raise ValueError("Dictionary not found for the specified language")
|
||||
|
||||
def set_text(self, text):
|
||||
self.transformed_words = text.split()
|
||||
self.word_index = 0
|
||||
|
||||
def check_words(self):
|
||||
for word in range(0, len(self.transformed_words)):
|
||||
if self.transformed_words[word] in self.ignored_words:
|
||||
continue
|
||||
suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
|
||||
valid_word = True
|
||||
if len(suggestions) == 0:
|
||||
continue
|
||||
for s in suggestions:
|
||||
if s.distance == 0:
|
||||
valid_word = False
|
||||
if valid_word == False:
|
||||
continue
|
||||
if word <= 10:
|
||||
if len(self.transformed_words) <= 10:
|
||||
context = " ".join(self.transformed_words)
|
||||
else:
|
||||
context = " ".join(self.transformed_words[0:10])
|
||||
elif word >= len(self.transformed_words)-9:
|
||||
context = " ".join(self.transformed_words[-10])
|
||||
else:
|
||||
context = " ".join(self.transformed_words[word-5:word+5])
|
||||
self.word_index = word
|
||||
yield (suggestions, context, word)
|
||||
|
||||
def replace(self, suggestion):
|
||||
if len(self.transformed_words) < self.word_index:
|
||||
raise ValueError("Word index is not present in the current text")
|
||||
self.transformed_words[self.word_index] = suggestion
|
||||
|
||||
def replace_all(self, word):
|
||||
existing_word = self.word
|
||||
for i in range(0, len(self.transformed_words)):
|
||||
if self.transformed_words[i] == existing_word:
|
||||
self.transformed_words[i] = word
|
||||
|
||||
def ignore_word(self, word):
|
||||
self.ignored_words.append(word)
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return " ".join(self.transformed_words)
|
||||
|
||||
@property
|
||||
def word(self):
|
||||
if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
|
||||
return None
|
||||
return self.transformed_words[self.word_index]
|
@@ -1,70 +1,80 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
from builtins import next
|
||||
from builtins import object
|
||||
import os
|
||||
import logging
|
||||
from . import wx_ui
|
||||
import widgetUtils
|
||||
import output
|
||||
import config
|
||||
import languageHandler
|
||||
from platform_utils import paths
|
||||
from . import checker
|
||||
from . import wx_ui
|
||||
|
||||
import enchant
|
||||
import paths
|
||||
from . import twitterFilter
|
||||
from enchant.checker import SpellChecker
|
||||
from enchant.errors import DictNotFoundError
|
||||
from enchant import tokenize
|
||||
log = logging.getLogger("extra.SpellChecker.spellChecker")
|
||||
|
||||
class spellChecker(object):
|
||||
def __init__(self, text):
|
||||
super(spellChecker, self).__init__()
|
||||
self.active = True
|
||||
self.checker = checker.SpellChecker()
|
||||
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
|
||||
try:
|
||||
self.checker.set_language(languageHandler.curLang[:2])
|
||||
except ValueError:
|
||||
log.exception("Dictionary for language %s not found." % (languageHandler.curLang,))
|
||||
wx_ui.dict_not_found_error()
|
||||
self.active = False
|
||||
self.checker.set_text(text)
|
||||
self.generator = self.checker.check_words()
|
||||
if self.active == True:
|
||||
log.debug("Creating dialog...")
|
||||
self.dialog = wx_ui.spellCheckerDialog()
|
||||
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
|
||||
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
|
||||
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
|
||||
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
|
||||
self.check()
|
||||
self.dialog.get_response()
|
||||
self.fixed_text = self.checker.text
|
||||
def __init__(self, text):
|
||||
super(spellChecker, self).__init__()
|
||||
self.active = True
|
||||
try:
|
||||
if config.app["app-settings"]["language"] == "system":
|
||||
log.debug("Using the system language")
|
||||
self.dict = enchant.DictWithPWL(languageHandler.curLang[:2], os.path.join(paths.config_path(), "wordlist.dict"))
|
||||
else:
|
||||
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
|
||||
self.dict = enchant.DictWithPWL(languageHandler.getLanguage()[:2], os.path.join(paths.config_path(), "wordlist.dict"))
|
||||
except DictNotFoundError:
|
||||
log.exception("Dictionary for language %s not found." % (dictionary,))
|
||||
wx_ui.dict_not_found_error()
|
||||
self.active = False
|
||||
self.checker = SpellChecker(self.dict, filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter])
|
||||
self.checker.set_text(text)
|
||||
if self.active == True:
|
||||
log.debug("Creating dialog...")
|
||||
self.dialog = wx_ui.spellCheckerDialog()
|
||||
widgetUtils.connect_event(self.dialog.ignore, widgetUtils.BUTTON_PRESSED, self.ignore)
|
||||
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
|
||||
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
|
||||
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
|
||||
widgetUtils.connect_event(self.dialog.add, widgetUtils.BUTTON_PRESSED, self.add)
|
||||
self.check()
|
||||
self.dialog.get_response()
|
||||
self.fixed_text = self.checker.get_text()
|
||||
|
||||
def check(self):
|
||||
try:
|
||||
suggestions, context, self.wordIndex = next(self.generator)
|
||||
textToSay = _("Misspelled word: %s") % (self.checker.word,)
|
||||
context = context
|
||||
self.dialog.set_title(textToSay)
|
||||
output.speak(textToSay)
|
||||
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions])
|
||||
except StopIteration:
|
||||
log.debug("Process finished.")
|
||||
wx_ui.finished()
|
||||
self.dialog.Destroy()
|
||||
def check(self):
|
||||
try:
|
||||
next(self.checker)
|
||||
textToSay = _(u"Misspelled word: %s") % (self.checker.word,)
|
||||
context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
|
||||
self.dialog.set_title(textToSay)
|
||||
output.speak(textToSay)
|
||||
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
|
||||
except StopIteration:
|
||||
log.debug("Process finished.")
|
||||
wx_ui.finished()
|
||||
self.dialog.Destroy()
|
||||
|
||||
def ignore(self, ev):
|
||||
self.check()
|
||||
def ignore(self, ev):
|
||||
self.check()
|
||||
|
||||
def ignoreAll(self, ev):
|
||||
self.checker.ignore_word(word=self.checker.word)
|
||||
self.check()
|
||||
def ignoreAll(self, ev):
|
||||
self.checker.ignore_always(word=self.checker.word)
|
||||
self.check()
|
||||
|
||||
def replace(self, ev):
|
||||
self.checker.replace(self.dialog.get_selected_suggestion())
|
||||
self.check()
|
||||
def replace(self, ev):
|
||||
self.checker.replace(self.dialog.get_selected_suggestion())
|
||||
self.check()
|
||||
|
||||
def replaceAll(self, ev):
|
||||
self.checker.replace_all(self.dialog.get_selected_suggestion())
|
||||
self.check()
|
||||
def replaceAll(self, ev):
|
||||
self.checker.replace_always(self.dialog.get_selected_suggestion())
|
||||
self.check()
|
||||
|
||||
def clean(self):
|
||||
if hasattr(self, "dialog"):
|
||||
self.dialog.Destroy()
|
||||
def add(self, ev):
|
||||
self.checker.add()
|
||||
self.check()
|
||||
|
16
src/extra/SpellChecker/twitterFilter.py
Normal file
16
src/extra/SpellChecker/twitterFilter.py
Normal file
@@ -0,0 +1,16 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
from enchant.tokenize import Filter
|
||||
|
||||
class TwitterFilter(Filter):
|
||||
"""Filter skipping over twitter usernames and hashtags.
|
||||
This filter skips any words matching the following regular expression:
|
||||
^[#@](\S){1, }$
|
||||
That is, any words that resemble users and hashtags.
|
||||
"""
|
||||
_pattern = re.compile(r"^[#@](\S){1,}$")
|
||||
def _skip(self,word):
|
||||
if self._pattern.match(word):
|
||||
return True
|
||||
return False
|
@@ -21,60 +21,63 @@ import wx
|
||||
import application
|
||||
|
||||
class spellCheckerDialog(wx.Dialog):
|
||||
def __init__(self):
|
||||
super(spellCheckerDialog, self).__init__(None, 1)
|
||||
panel = wx.Panel(self)
|
||||
sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
word = wx.StaticText(panel, -1, _("&Misspelled word"))
|
||||
self.word = wx.TextCtrl(panel, -1)
|
||||
wordBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
wordBox.Add(word, 0, wx.ALL, 5)
|
||||
wordBox.Add(self.word, 0, wx.ALL, 5)
|
||||
context = wx.StaticText(panel, -1, _("Con&text"))
|
||||
self.context = wx.TextCtrl(panel, -1)
|
||||
contextBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
contextBox.Add(context, 0, wx.ALL, 5)
|
||||
contextBox.Add(self.context, 0, wx.ALL, 5)
|
||||
suggest = wx.StaticText(panel, -1, _("&Suggestions"))
|
||||
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
|
||||
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
suggestionsBox.Add(suggest, 0, wx.ALL, 5)
|
||||
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
|
||||
self.ignore = wx.Button(panel, -1, _("&Ignore"))
|
||||
self.ignoreAll = wx.Button(panel, -1, _("Ignore &all"))
|
||||
self.replace = wx.Button(panel, -1, _("&Replace"))
|
||||
self.replaceAll = wx.Button(panel, -1, _("Replace a&ll"))
|
||||
close = wx.Button(panel, wx.ID_CANCEL)
|
||||
btnBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
btnBox.Add(self.ignore, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.replace, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
|
||||
btnBox.Add(close, 0, wx.ALL, 5)
|
||||
sizer.Add(wordBox, 0, wx.ALL, 5)
|
||||
sizer.Add(contextBox, 0, wx.ALL, 5)
|
||||
sizer.Add(suggestionsBox, 0, wx.ALL, 5)
|
||||
sizer.Add(btnBox, 0, wx.ALL, 5)
|
||||
panel.SetSizer(sizer)
|
||||
self.SetClientSize(sizer.CalcMin())
|
||||
def __init__(self):
|
||||
super(spellCheckerDialog, self).__init__(None, 1)
|
||||
panel = wx.Panel(self)
|
||||
sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
word = wx.StaticText(panel, -1, _(u"Misspelled word"))
|
||||
self.word = wx.TextCtrl(panel, -1)
|
||||
wordBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
wordBox.Add(word, 0, wx.ALL, 5)
|
||||
wordBox.Add(self.word, 0, wx.ALL, 5)
|
||||
context = wx.StaticText(panel, -1, _(u"Context"))
|
||||
self.context = wx.TextCtrl(panel, -1)
|
||||
contextBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
contextBox.Add(context, 0, wx.ALL, 5)
|
||||
contextBox.Add(self.context, 0, wx.ALL, 5)
|
||||
suggest = wx.StaticText(panel, -1, _(u"Suggestions"))
|
||||
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
|
||||
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
suggestionsBox.Add(suggest, 0, wx.ALL, 5)
|
||||
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
|
||||
self.ignore = wx.Button(panel, -1, _(u"&Ignore"))
|
||||
self.ignoreAll = wx.Button(panel, -1, _(u"I&gnore all"))
|
||||
self.replace = wx.Button(panel, -1, _(u"&Replace"))
|
||||
self.replaceAll = wx.Button(panel, -1, _(u"R&eplace all"))
|
||||
self.add = wx.Button(panel, -1, _(u"&Add to personal dictionary"))
|
||||
close = wx.Button(panel, wx.ID_CANCEL)
|
||||
btnBox = wx.BoxSizer(wx.HORIZONTAL)
|
||||
btnBox.Add(self.ignore, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.replace, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
|
||||
btnBox.Add(self.add, 0, wx.ALL, 5)
|
||||
btnBox.Add(close, 0, wx.ALL, 5)
|
||||
sizer.Add(wordBox, 0, wx.ALL, 5)
|
||||
sizer.Add(contextBox, 0, wx.ALL, 5)
|
||||
sizer.Add(suggestionsBox, 0, wx.ALL, 5)
|
||||
sizer.Add(btnBox, 0, wx.ALL, 5)
|
||||
panel.SetSizer(sizer)
|
||||
self.SetClientSize(sizer.CalcMin())
|
||||
|
||||
def get_response(self):
|
||||
return self.ShowModal()
|
||||
|
||||
def set_title(self, title):
|
||||
return self.SetTitle(title)
|
||||
def get_response(self):
|
||||
return self.ShowModal()
|
||||
|
||||
def set_word_and_suggestions(self, word, context, suggestions):
|
||||
self.word.SetValue(word)
|
||||
self.context.ChangeValue(context)
|
||||
self.suggestions.Set(suggestions)
|
||||
self.suggestions.SetFocus()
|
||||
def set_title(self, title):
|
||||
return self.SetTitle(title)
|
||||
|
||||
def get_selected_suggestion(self):
|
||||
return self.suggestions.GetStringSelection()
|
||||
def set_word_and_suggestions(self, word, context, suggestions):
|
||||
self.word.SetValue(word)
|
||||
self.context.ChangeValue(context)
|
||||
self.suggestions.Set(suggestions)
|
||||
self.suggestions.SetFocus()
|
||||
|
||||
def get_selected_suggestion(self):
|
||||
return self.suggestions.GetStringSelection()
|
||||
|
||||
def dict_not_found_error():
|
||||
wx.MessageDialog(None, _("An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _("Error"), wx.ICON_ERROR).ShowModal()
|
||||
wx.MessageDialog(None, _(u"An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _(u"Error"), wx.ICON_ERROR).ShowModal()
|
||||
|
||||
def finished():
|
||||
wx.MessageDialog(None, _("Spell check complete."), application.name, style=wx.OK).ShowModal()
|
||||
wx.MessageDialog(None, _(u"Spell check complete."), application.name, style=wx.OK).ShowModal()
|
||||
|
@@ -18,7 +18,6 @@ if hasattr(sys, "frozen"):
|
||||
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
|
||||
from mysc.thread_utils import call_threaded
|
||||
from wxUI import commonMessages
|
||||
from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction
|
||||
|
||||
log = logging.getLogger("main")
|
||||
|
||||
@@ -57,8 +56,6 @@ def setup():
|
||||
del sm
|
||||
log.debug("Loading dictionaries for spelling correction...")
|
||||
# Let's copy dictionary files for the selected language just in case it is not present already.
|
||||
checker.prepare_dicts(languageHandler.curLang[:2])
|
||||
call_threaded(checker.load_dicts)
|
||||
r = mainController.Controller()
|
||||
call_threaded(r.login)
|
||||
app.run()
|
||||
|
@@ -36,7 +36,7 @@ build_exe_options = dict(
|
||||
include_msvcr=True,
|
||||
zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
|
||||
replace_paths = [("*", "")],
|
||||
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", ".")],
|
||||
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", "."), ("../windows-dependencies/dictionaries", "lib/enchant/data/mingw32/share/enchant/hunspell")],
|
||||
packages=["interactors", "presenters", "views", "wxUI"],
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user