Added bak the enchant based spelling correction module

This commit is contained in:
Manuel Cortez 2020-06-14 08:39:43 -05:00
parent 45465b246f
commit f43e9ffccf
27 changed files with 929638 additions and 250225 deletions

View File

@ -2,6 +2,16 @@
## News in this version ## News in this version
## new additions
* the spelling correction module is able to add words to the dictionary so it will learn which words should start to ignore.
### Changes
* The spelling correction module has been rewritten to take advantage of the newest enchant Python module which is more stable and can be added properly to the distribution, as opposed to the first enchant module we have tried.
## News in Version 0.24
### New additions ### New additions
* Socializer will ask for confirmation before closing the application. * Socializer will ask for confirmation before closing the application.

View File

@ -16,9 +16,8 @@ yandex.translate
mutagen mutagen
mock mock
babel babel
# cx_freeze for building the executable files.
cx_freeze cx_freeze
symspellpy pyenchant
# forked repositories previously found at http://q-continuum.net # forked repositories previously found at http://q-continuum.net
git+https://code.manuelcortez.net/manuelcortez/libloader git+https://code.manuelcortez.net/manuelcortez/libloader
git+https://code.manuelcortez.net/manuelcortez/platform_utils git+https://code.manuelcortez.net/manuelcortez/platform_utils

View File

@ -1,3 +1,5 @@
from __future__ import absolute_import
from __future__ import unicode_literals
from . import spellchecker from . import spellchecker
import platform import platform
if platform.system() == "Windows": if platform.system() == "Windows":

View File

@ -1,115 +0,0 @@
# -*- coding: utf-8 -*-
""" High level Spell checker module by using the SymSpellPy library. """
import os
import glob
import shutil
import logging
import paths
from symspellpy.symspellpy import SymSpell, Verbosity
from codecs import open as open_
log = logging.getLogger("SpellChecker.checker")
loaded_dicts = dict()
ready = False
def load_dicts():
global loaded_dicts, ready
log.debug("Start dictionary loading for spelling checker module...")
if len(loaded_dicts) > 0:
loaded_dicts = dict()
path = os.path.join(paths.config_path(), "dicts")
if os.path.isdir(path):
log.debug("Loading language dictionaries from path %s" % (path,))
files = glob.glob(os.path.join(path, "*.txt"))
log.debug("%r files found." % (len(files)))
for i in files:
key = os.path.splitext(os.path.basename(i))[0]
dictionary = SymSpell()
dictionary.load_dictionary(i, 0, 1, encoding="utf-8")
loaded_dicts[key] = dictionary
log.debug("Added dictionary for language %s " % (key,))
ready = True
log.debug("All dicts were loaded.")
def prepare_dicts(language):
""" Copy the main dictionary file to the user's config directory so it can be modified and read without needing to require privileged sessions.
@ language: two letter language code.
"""
log.debug("preparing dictionary data...")
path = os.path.join(paths.config_path(), "dicts")
if os.path.exists(path) == False:
log.debug("Creating dicts folder in config directory...")
os.mkdir(path)
original_file = os.path.join(paths.app_path(), "dictionaries", language+".txt")
if os.path.exists(original_file) and os.path.exists(os.path.join(paths.config_path(), "dicts", language+".txt")) == False:
log.debug("Dictionary for language %s is not present in user config. Coppying... " % (language,))
dst_file = shutil.copy(original_file, os.path.join(paths.config_path(), "dicts"))
class SpellChecker(object):
def __init__(self, wordlist=None, *args, **kwargs):
self.kwargs = kwargs
self.dictionary = None
self.ignored_words = []
self.word_index = 0
def set_language(self, lang):
global loaded_dicts
if loaded_dicts.get(lang) != None:
self.dictionary = loaded_dicts[lang]
else:
raise ValueError("Dictionary not found for the specified language")
def set_text(self, text):
self.transformed_words = text.split()
self.word_index = 0
def check_words(self):
for word in range(0, len(self.transformed_words)):
if self.transformed_words[word] in self.ignored_words:
continue
suggestions = self.dictionary.lookup(self.transformed_words[word], Verbosity.CLOSEST, 2, transfer_casing=True)
valid_word = True
if len(suggestions) == 0:
continue
for s in suggestions:
if s.distance == 0:
valid_word = False
if valid_word == False:
continue
if word <= 10:
if len(self.transformed_words) <= 10:
context = " ".join(self.transformed_words)
else:
context = " ".join(self.transformed_words[0:10])
elif word >= len(self.transformed_words)-9:
context = " ".join(self.transformed_words[-10])
else:
context = " ".join(self.transformed_words[word-5:word+5])
self.word_index = word
yield (suggestions, context, word)
def replace(self, suggestion):
if len(self.transformed_words) < self.word_index:
raise ValueError("Word index is not present in the current text")
self.transformed_words[self.word_index] = suggestion
def replace_all(self, word):
existing_word = self.word
for i in range(0, len(self.transformed_words)):
if self.transformed_words[i] == existing_word:
self.transformed_words[i] = word
def ignore_word(self, word):
self.ignored_words.append(word)
@property
def text(self):
return " ".join(self.transformed_words)
@property
def word(self):
if len(self.transformed_words) == 0 or self.word_index >= len(self.transformed_words):
return None
return self.transformed_words[self.word_index]

View File

@ -1,31 +1,40 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals from __future__ import unicode_literals
from builtins import next
from builtins import object
import os import os
import logging import logging
from . import wx_ui
import widgetUtils import widgetUtils
import output import output
import config import config
import languageHandler import languageHandler
from platform_utils import paths import enchant
from . import checker import paths
from . import wx_ui from . import twitterFilter
from enchant.checker import SpellChecker
from enchant.errors import DictNotFoundError
from enchant import tokenize
log = logging.getLogger("extra.SpellChecker.spellChecker") log = logging.getLogger("extra.SpellChecker.spellChecker")
class spellChecker(object): class spellChecker(object):
def __init__(self, text): def __init__(self, text):
super(spellChecker, self).__init__() super(spellChecker, self).__init__()
self.active = True self.active = True
self.checker = checker.SpellChecker()
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
try: try:
self.checker.set_language(languageHandler.curLang[:2]) if config.app["app-settings"]["language"] == "system":
except ValueError: log.debug("Using the system language")
log.exception("Dictionary for language %s not found." % (languageHandler.curLang,)) self.dict = enchant.DictWithPWL(languageHandler.curLang[:2], os.path.join(paths.config_path(), "wordlist.dict"))
else:
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
self.dict = enchant.DictWithPWL(languageHandler.getLanguage()[:2], os.path.join(paths.config_path(), "wordlist.dict"))
except DictNotFoundError:
log.exception("Dictionary for language %s not found." % (dictionary,))
wx_ui.dict_not_found_error() wx_ui.dict_not_found_error()
self.active = False self.active = False
self.checker = SpellChecker(self.dict, filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter])
self.checker.set_text(text) self.checker.set_text(text)
self.generator = self.checker.check_words()
if self.active == True: if self.active == True:
log.debug("Creating dialog...") log.debug("Creating dialog...")
self.dialog = wx_ui.spellCheckerDialog() self.dialog = wx_ui.spellCheckerDialog()
@ -33,18 +42,19 @@ class spellChecker(object):
widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll) widgetUtils.connect_event(self.dialog.ignoreAll, widgetUtils.BUTTON_PRESSED, self.ignoreAll)
widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace) widgetUtils.connect_event(self.dialog.replace, widgetUtils.BUTTON_PRESSED, self.replace)
widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll) widgetUtils.connect_event(self.dialog.replaceAll, widgetUtils.BUTTON_PRESSED, self.replaceAll)
widgetUtils.connect_event(self.dialog.add, widgetUtils.BUTTON_PRESSED, self.add)
self.check() self.check()
self.dialog.get_response() self.dialog.get_response()
self.fixed_text = self.checker.text self.fixed_text = self.checker.get_text()
def check(self): def check(self):
try: try:
suggestions, context, self.wordIndex = next(self.generator) next(self.checker)
textToSay = _("Misspelled word: %s") % (self.checker.word,) textToSay = _(u"Misspelled word: %s") % (self.checker.word,)
context = context context = u"... %s %s %s" % (self.checker.leading_context(10), self.checker.word, self.checker.trailing_context(10))
self.dialog.set_title(textToSay) self.dialog.set_title(textToSay)
output.speak(textToSay) output.speak(textToSay)
self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=[suggestion.term for suggestion in suggestions]) self.dialog.set_word_and_suggestions(word=self.checker.word, context=context, suggestions=self.checker.suggest())
except StopIteration: except StopIteration:
log.debug("Process finished.") log.debug("Process finished.")
wx_ui.finished() wx_ui.finished()
@ -54,7 +64,7 @@ class spellChecker(object):
self.check() self.check()
def ignoreAll(self, ev): def ignoreAll(self, ev):
self.checker.ignore_word(word=self.checker.word) self.checker.ignore_always(word=self.checker.word)
self.check() self.check()
def replace(self, ev): def replace(self, ev):
@ -62,9 +72,9 @@ class spellChecker(object):
self.check() self.check()
def replaceAll(self, ev): def replaceAll(self, ev):
self.checker.replace_all(self.dialog.get_selected_suggestion()) self.checker.replace_always(self.dialog.get_selected_suggestion())
self.check() self.check()
def clean(self): def add(self, ev):
if hasattr(self, "dialog"): self.checker.add()
self.dialog.Destroy() self.check()

View File

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from enchant.tokenize import Filter
class TwitterFilter(Filter):
"""Filter skipping over twitter usernames and hashtags.
This filter skips any words matching the following regular expression:
^[#@](\S){1, }$
That is, any words that resemble users and hashtags.
"""
_pattern = re.compile(r"^[#@](\S){1,}$")
def _skip(self,word):
if self._pattern.match(word):
return True
return False

View File

@ -25,31 +25,33 @@ class spellCheckerDialog(wx.Dialog):
super(spellCheckerDialog, self).__init__(None, 1) super(spellCheckerDialog, self).__init__(None, 1)
panel = wx.Panel(self) panel = wx.Panel(self)
sizer = wx.BoxSizer(wx.VERTICAL) sizer = wx.BoxSizer(wx.VERTICAL)
word = wx.StaticText(panel, -1, _("&Misspelled word")) word = wx.StaticText(panel, -1, _(u"Misspelled word"))
self.word = wx.TextCtrl(panel, -1) self.word = wx.TextCtrl(panel, -1)
wordBox = wx.BoxSizer(wx.HORIZONTAL) wordBox = wx.BoxSizer(wx.HORIZONTAL)
wordBox.Add(word, 0, wx.ALL, 5) wordBox.Add(word, 0, wx.ALL, 5)
wordBox.Add(self.word, 0, wx.ALL, 5) wordBox.Add(self.word, 0, wx.ALL, 5)
context = wx.StaticText(panel, -1, _("Con&text")) context = wx.StaticText(panel, -1, _(u"Context"))
self.context = wx.TextCtrl(panel, -1) self.context = wx.TextCtrl(panel, -1)
contextBox = wx.BoxSizer(wx.HORIZONTAL) contextBox = wx.BoxSizer(wx.HORIZONTAL)
contextBox.Add(context, 0, wx.ALL, 5) contextBox.Add(context, 0, wx.ALL, 5)
contextBox.Add(self.context, 0, wx.ALL, 5) contextBox.Add(self.context, 0, wx.ALL, 5)
suggest = wx.StaticText(panel, -1, _("&Suggestions")) suggest = wx.StaticText(panel, -1, _(u"Suggestions"))
self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE) self.suggestions = wx.ListBox(panel, -1, choices=[], style=wx.LB_SINGLE)
suggestionsBox = wx.BoxSizer(wx.HORIZONTAL) suggestionsBox = wx.BoxSizer(wx.HORIZONTAL)
suggestionsBox.Add(suggest, 0, wx.ALL, 5) suggestionsBox.Add(suggest, 0, wx.ALL, 5)
suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5) suggestionsBox.Add(self.suggestions, 0, wx.ALL, 5)
self.ignore = wx.Button(panel, -1, _("&Ignore")) self.ignore = wx.Button(panel, -1, _(u"&Ignore"))
self.ignoreAll = wx.Button(panel, -1, _("Ignore &all")) self.ignoreAll = wx.Button(panel, -1, _(u"I&gnore all"))
self.replace = wx.Button(panel, -1, _("&Replace")) self.replace = wx.Button(panel, -1, _(u"&Replace"))
self.replaceAll = wx.Button(panel, -1, _("Replace a&ll")) self.replaceAll = wx.Button(panel, -1, _(u"R&eplace all"))
self.add = wx.Button(panel, -1, _(u"&Add to personal dictionary"))
close = wx.Button(panel, wx.ID_CANCEL) close = wx.Button(panel, wx.ID_CANCEL)
btnBox = wx.BoxSizer(wx.HORIZONTAL) btnBox = wx.BoxSizer(wx.HORIZONTAL)
btnBox.Add(self.ignore, 0, wx.ALL, 5) btnBox.Add(self.ignore, 0, wx.ALL, 5)
btnBox.Add(self.ignoreAll, 0, wx.ALL, 5) btnBox.Add(self.ignoreAll, 0, wx.ALL, 5)
btnBox.Add(self.replace, 0, wx.ALL, 5) btnBox.Add(self.replace, 0, wx.ALL, 5)
btnBox.Add(self.replaceAll, 0, wx.ALL, 5) btnBox.Add(self.replaceAll, 0, wx.ALL, 5)
btnBox.Add(self.add, 0, wx.ALL, 5)
btnBox.Add(close, 0, wx.ALL, 5) btnBox.Add(close, 0, wx.ALL, 5)
sizer.Add(wordBox, 0, wx.ALL, 5) sizer.Add(wordBox, 0, wx.ALL, 5)
sizer.Add(contextBox, 0, wx.ALL, 5) sizer.Add(contextBox, 0, wx.ALL, 5)
@ -58,6 +60,7 @@ class spellCheckerDialog(wx.Dialog):
panel.SetSizer(sizer) panel.SetSizer(sizer)
self.SetClientSize(sizer.CalcMin()) self.SetClientSize(sizer.CalcMin())
def get_response(self): def get_response(self):
return self.ShowModal() return self.ShowModal()
@ -74,7 +77,7 @@ class spellCheckerDialog(wx.Dialog):
return self.suggestions.GetStringSelection() return self.suggestions.GetStringSelection()
def dict_not_found_error(): def dict_not_found_error():
wx.MessageDialog(None, _("An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _("Error"), wx.ICON_ERROR).ShowModal() wx.MessageDialog(None, _(u"An error has occurred. There are no dictionaries available for the selected language in {0}").format(application.name,), _(u"Error"), wx.ICON_ERROR).ShowModal()
def finished(): def finished():
wx.MessageDialog(None, _("Spell check complete."), application.name, style=wx.OK).ShowModal() wx.MessageDialog(None, _(u"Spell check complete."), application.name, style=wx.OK).ShowModal()

View File

@ -18,7 +18,6 @@ if hasattr(sys, "frozen"):
sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z))) sys.excepthook = lambda x, y, z: logging.critical(''.join(traceback.format_exception(x, y, z)))
from mysc.thread_utils import call_threaded from mysc.thread_utils import call_threaded
from wxUI import commonMessages from wxUI import commonMessages
from extra.SpellChecker import checker # Load dictionaries in advance for spelling correction
log = logging.getLogger("main") log = logging.getLogger("main")
@ -57,8 +56,6 @@ def setup():
del sm del sm
log.debug("Loading dictionaries for spelling correction...") log.debug("Loading dictionaries for spelling correction...")
# Let's copy dictionary files for the selected language just in case it is not present already. # Let's copy dictionary files for the selected language just in case it is not present already.
checker.prepare_dicts(languageHandler.curLang[:2])
call_threaded(checker.load_dicts)
r = mainController.Controller() r = mainController.Controller()
call_threaded(r.login) call_threaded(r.login)
app.run() app.run()

View File

@ -36,7 +36,7 @@ build_exe_options = dict(
include_msvcr=True, include_msvcr=True,
zip_include_packages=["accessible_output2", "sound_lib", "arrow"], zip_include_packages=["accessible_output2", "sound_lib", "arrow"],
replace_paths = [("*", "")], replace_paths = [("*", "")],
include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", ".")], include_files=["session.defaults", "cacert.pem", "app-configuration.defaults", "locales", "sounds", "documentation", "../windows-dependencies/x86/oggenc2.exe", "../windows-dependencies/x86/bootstrap.exe", "../windows-dependencies/dictionaries", find_sound_lib_datafiles(), find_accessible_output2_datafiles(), ("../windows-dependencies/msvc32", "."), ("../windows-dependencies/dictionaries", "lib/enchant/data/mingw32/share/enchant/hunspell")],
packages=["interactors", "presenters", "views", "wxUI"], packages=["interactors", "presenters", "views", "wxUI"],
) )

View File

@ -46,5 +46,5 @@
## Other ## Other
* distribution: * distribution:
- Create an installer for alpha versions - (done) Create an installer for alpha versions
- create a 64 bits distribution for both alpha and stable versions. - create a 64 bits distribution for both alpha and stable versions.

View File

@ -0,0 +1,729 @@
# this is the affix file of the de_DE Hunspell dictionary
# derived from the igerman98 dictionary
#
# Version: 20161207
#
# Copyright (C) 1998-2015 Bjoern Jacke <bjoern@j3e.de>
#
# License: GPLv2, GPLv3
# There should be a copy of both of this licenses included
# with every distribution of this dictionary. Modified
# versions using the GPL may only include the GPL
SET ISO8859-1
TRY esijanrtolcdugmphbyfvkwqxzäüößáéêàâñESIJANRTOLCDUGMPHBYFVKWQXZÄÜÖÉ-.
PFX U Y 1
PFX U 0 un .
PFX V Y 1
PFX V 0 ver .
SFX F Y 35
SFX F 0 nen in
SFX F e in e
SFX F e innen e
SFX F 0 in [^i]n
SFX F 0 innen [^i]n
SFX F 0 in [^enr]
SFX F 0 innen [^enr]
SFX F 0 in [^e]r
SFX F 0 innen [^e]r
SFX F 0 in [^r]er
SFX F 0 innen [^r]er
SFX F 0 in [^e]rer
SFX F 0 innen [^e]rer
SFX F 0 in ierer
SFX F 0 innen ierer
SFX F er in [^i]erer
SFX F er innen [^i]erer
SFX F in In in
SFX F in Innen in
SFX F e In e
SFX F e Innen e
SFX F 0 In [^i]n
SFX F 0 Innen [^i]n
SFX F 0 In [^en]
SFX F 0 Innen [^en]
SFX F 0 In [^e]r
SFX F 0 Innen [^e]r
SFX F 0 In [^r]er
SFX F 0 Innen [^r]er
SFX F 0 In [^e]rer
SFX F 0 Innen [^e]rer
SFX F 0 In ierer
SFX F 0 Innen ierer
SFX F er In [^i]erer
SFX F er Innen [^i]erer
#SFX F en innen en
#SFX F en Innen en
SFX L N 12
SFX L 0 tlich n
SFX L 0 tliche n
SFX L 0 tlicher n
SFX L 0 tliches n
SFX L 0 tlichem n
SFX L 0 tlichen n
SFX L 0 lich [^n]
SFX L 0 liche [^n]
SFX L 0 licher [^n]
SFX L 0 liches [^n]
SFX L 0 lichem [^n]
SFX L 0 lichen [^n]
#SFX H N 2
#SFX H 0 heit .
#SFX H 0 heiten .
#SFX K N 2
#SFX K 0 keit .
#SFX K 0 keiten .
SFX M N 10
SFX M 0 chen [^se]
SFX M 0 chens [^se]
SFX M ass ässchen ass
SFX M ass ässchens ass
SFX M oss össchen oss
SFX M oss össchens oss
SFX M uss üsschen uss
SFX M uss üsschens uss
SFX M e chen e
SFX M e chens e
SFX A Y 46
SFX A 0 r e
SFX A 0 n e
SFX A 0 m e
SFX A 0 s e
SFX A 0 e [^elr]
SFX A 0 er [^elr]
SFX A 0 en [^elr]
SFX A 0 em [^elr]
SFX A 0 es [^elr]
SFX A 0 e [^e][rl]
SFX A 0 er [^e][rl]
SFX A 0 en [^e][rl]
SFX A 0 em [^e][rl]
SFX A 0 es [^e][rl]
SFX A 0 e [^u]er
SFX A 0 er [^u]er
SFX A 0 en [^u]er
SFX A 0 em [^u]er
SFX A 0 es [^u]er
SFX A er re uer
SFX A er rer uer
SFX A er ren uer
SFX A er rem uer
SFX A er res uer
SFX A 0 e [eil]el
SFX A 0 er [eil]el
SFX A 0 en [eil]el
SFX A 0 em [eil]el
SFX A 0 es [eil]el
SFX A el le [^eil]el
SFX A el ler [^eil]el
SFX A el len [^eil]el
SFX A el lem [^eil]el
SFX A el les [^eil]el
SFX A lig elig [^aeiouhlräüö]lig
SFX A lig elige [^aeiouhlräüö]lig
SFX A lig eliger [^aeiouhlräüö]lig
SFX A lig eligen [^aeiouhlräüö]lig
SFX A lig eligem [^aeiouhlräüö]lig
SFX A lig eliges [^aeiouhlräüö]lig
SFX A erig rig [^hi]erig
SFX A erig rige [^hi]erig
SFX A erig riger [^hi]erig
SFX A erig rigen [^hi]erig
SFX A erig rigem [^hi]erig
SFX A erig riges [^hi]erig
SFX C Y 100
SFX C 0 ere [^elr]
SFX C 0 erer [^elr]
SFX C 0 eren [^elr]
SFX C 0 erem [^elr]
SFX C 0 eres [^elr]
SFX C 0 re e
SFX C 0 rer e
SFX C 0 ren e
SFX C 0 rem e
SFX C 0 res e
SFX C 0 ere [^e][lr]
SFX C 0 erer [^e][lr]
SFX C 0 eren [^e][lr]
SFX C 0 erem [^e][lr]
SFX C 0 eres [^e][lr]
SFX C el lere el
SFX C el lerer el
SFX C el leren el
SFX C el lerem el
SFX C el leres el
SFX C er rere uer
SFX C er rerer uer
SFX C er reren uer
SFX C er rerem uer
SFX C er reres uer
SFX C 0 ere [^u]er
SFX C 0 erer [^u]er
SFX C 0 eren [^u]er
SFX C 0 erem [^u]er
SFX C 0 eres [^u]er
SFX C lig eligere [^aeiouhlräüö]lig
SFX C lig eligerer [^aeiouhlräüö]lig
SFX C lig eligeren [^aeiouhlräüö]lig
SFX C lig eligerem [^aeiouhlräüö]lig
SFX C lig eligeres [^aeiouhlräüö]lig
SFX C erig rigere [^hi]erig
SFX C erig rigerer [^hi]erig
SFX C erig rigeren [^hi]erig
SFX C erig rigerem [^hi]erig
SFX C erig rigeres [^hi]erig
SFX C 0 est [kßsuxz]
SFX C 0 este [kßsuxz]
SFX C 0 ester [kßsuxz]
SFX C 0 esten [kßsuxz]
SFX C 0 estem [kßsuxz]
SFX C 0 estes [kßsuxz]
SFX C 0 st et
SFX C 0 ste et
SFX C 0 ster et
SFX C 0 sten et
SFX C 0 stem et
SFX C 0 stes et
SFX C 0 st igt
SFX C 0 ste igt
SFX C 0 ster igt
SFX C 0 sten igt
SFX C 0 stem igt
SFX C 0 stes igt
SFX C 0 est [^i]gt
SFX C 0 este [^i]gt
SFX C 0 ester [^i]gt
SFX C 0 esten [^i]gt
SFX C 0 estem [^i]gt
SFX C 0 estes [^i]gt
SFX C 0 est [^eg]t
SFX C 0 este [^eg]t
SFX C 0 ester [^eg]t
SFX C 0 esten [^eg]t
SFX C 0 estem [^eg]t
SFX C 0 estes [^eg]t
SFX C 0 st [^kßstxz]
SFX C 0 ste [^kßstxz]
SFX C 0 ster [^kßstxz]
SFX C 0 sten [^kßstxz]
SFX C 0 stem [^kßstxz]
SFX C 0 stes [^kßstxz]
SFX C 0 st nd
SFX C 0 ste nd
SFX C 0 ster nd
SFX C 0 sten nd
SFX C 0 stem nd
SFX C 0 stes nd
SFX C 0 est [^n]d
SFX C 0 este [^n]d
SFX C 0 ester [^n]d
SFX C 0 esten [^n]d
SFX C 0 estem [^n]d
SFX C 0 estes [^n]d
SFX C lig eligst [^aeiouhlräüö]lig
SFX C lig eligste [^aeiouhlräüö]lig
SFX C lig eligster [^aeiouhlräüö]lig
SFX C lig eligsten [^aeiouhlräüö]lig
SFX C lig eligstem [^aeiouhlräüö]lig
SFX C lig eligstes [^aeiouhlräüö]lig
SFX C erig rigst [^hi]erig
SFX C erig rigste [^hi]erig
SFX C erig rigster [^hi]erig
SFX C erig rigsten [^hi]erig
SFX C erig rigstem [^hi]erig
SFX C erig rigstes [^hi]erig
SFX E Y 1
SFX E 0 e .
SFX f Y 4
SFX f ph f ph
SFX f ph fen ph
SFX f phie fie phie
SFX f phie fien phie
SFX N Y 1
SFX N 0 n .
SFX P Y 1
SFX P 0 en .
SFX p Y 26
SFX p auf äufe auf
SFX p auf äufen auf
SFX p aus äuser [hH]aus
SFX p aus äusern [hH]aus
SFX p arkt ärkte [mM]arkt
SFX p arkt ärkten [mM]arkt
SFX p ang änge ang
SFX p ang ängen ang
SFX p uß üße uß
SFX p uß üßen uß
SFX p oß öße oß
SFX p oß ößen oß
SFX p aum äume aum
SFX p aum äumen aum
SFX p ag äge ag
SFX p ag ägen ag
SFX p ug üge ug
SFX p ug ügen ug
SFX p all älle all
SFX p all ällen all
SFX p ass ässe ass
SFX p ass ässen ass
SFX p uss üsse uss
SFX p uss üssen uss
SFX p oss össe oss
SFX p oss össen oss
# last ...oss rules are for swiss de_CH only - but do not affect de_DE
SFX R Y 3
SFX R 0 er [^e]
SFX R 0 ern [^e]
SFX R 0 r e
SFX S Y 1
SFX S 0 s .
SFX q Y 2
SFX q 0 se s
SFX q 0 sen s
SFX Q Y 1
SFX Q 0 ses s
#SFX Q 0 se s
#SFX Q 0 sen s
SFX T Y 1
SFX T 0 es .
SFX J Y 12
SFX J n ung [bgkpßsz]eln
SFX J n ungen [bgkpßsz]eln
SFX J eln lung eln
SFX J n ung ern
SFX J en ung en
SFX J eln lungen eln
SFX J n ungen ern
SFX J en ungen en
SFX J 0 ung [^n]
SFX J 0 ungen [^n]
SFX J el lung el
SFX J el lungen el
SFX B N 12
SFX B n bar e[lr]n
SFX B n bare e[lr]n
SFX B n baren e[lr]n
SFX B n barer e[lr]n
SFX B n bares e[lr]n
SFX B n barem e[lr]n
SFX B en bar en
SFX B en bare en
SFX B en baren en
SFX B en barer en
SFX B en bares en
SFX B en barem en
SFX D Y 6
SFX D 0 d n
SFX D 0 de n
SFX D 0 den n
SFX D 0 der n
SFX D 0 des n
SFX D 0 dem n
SFX W Y 5
SFX W en 0 en
SFX W n 0 [^e]n
SFX W st 0 [^s]st
SFX W t 0 sst
SFX W t 0 [^s]t
SFX I Y 16
SFX I n 0 en
SFX I eln le eln
SFX I n e eln
SFX I ern re ern
SFX I n e ern
SFX I n t e[lr]n
SFX I n t [dt]en
SFX I en t [^dimnt]en
SFX I en t eien
SFX I n t [^e]ien
SFX I n t chnen
SFX I en t [^c]h[mn]en
SFX I n t [^aäehilmnoöuür][mn]en
SFX I en t [aäeilmnoöuür][mn]en
SFX I n e un
SFX I n t un
SFX X Y 26
SFX X n t e[lr]n
SFX X n t [dtw]en
SFX X en t eien
SFX X n t [^e]ien
SFX X en t [^ditmnw]en
SFX X n t chnen
SFX X en t [^c]h[mn]en
SFX X n t [^aäehilmnoöuür][mn]en
SFX X en t [aäeilmnoöuür][mn]en
SFX X n t un
SFX X st 0 tst
SFX X n st e[lr]n
SFX X n st [dtw]en
SFX X en st [^dimnßstwzx]en
SFX X en st eien
SFX X n st [^e]ien
SFX X n st chnen
SFX X en st [^c]h[mn]en
SFX X n st [^aäehilmnoöuür][mn]en
SFX X en st [aäeilmnoöuür][mn]en
SFX X n st un
SFX X n st [ßsxz]en
SFX X n st ssen
SFX X n st schen
SFX X t st [^sz]t
SFX X t est zt
SFX Y Y 36
SFX Y n te e[lr]n
SFX Y n te [dtw]en
SFX Y en te [^dimntw]en
SFX Y en te eien
SFX Y n te [^e]ien
SFX Y n te chnen
SFX Y en te [^c]h[mn]en
SFX Y n te [^aäehilmnoöuür][mn]en
SFX Y en te [aäeilmnoöuür][mn]en
SFX Y n test e[lr]n
SFX Y n test [dtw]en
SFX Y en test [^dimntw]en
SFX Y en test eien
SFX Y n test [^e]ien
SFX Y n test chnen
SFX Y en test [^c]h[mn]en
SFX Y n test [^aäehilmnoöuür][mn]en
SFX Y en test [aäeilmnoöuür][mn]en
SFX Y n tet e[lr]n
SFX Y n tet [dtw]en
SFX Y en tet [^dimntw]en
SFX Y en tet eien
SFX Y n tet [^e]ien
SFX Y n tet chnen
SFX Y en tet [^c]h[mn]en
SFX Y n tet [^aäehilmnoöuür][mn]en
SFX Y en tet [aäeilmnoöuür][mn]en
SFX Y n ten e[lr]n
SFX Y n ten [dtw]en
SFX Y en ten [^dimntw]en
SFX Y en ten eien
SFX Y n ten [^e]ien
SFX Y n ten chnen
SFX Y en ten [^c]h[mn]en
SFX Y n ten [^aäehilmnoöuür][mn]en
SFX Y en ten [aäeilmnoöuür][mn]en
SFX Z Y 15
SFX Z 0 st [^hßsz]
SFX Z 0 st [^c]h
SFX Z 0 st [^s]ch
SFX Z 0 est [dfkstz]
SFX Z 0 est ch
SFX Z 0 est [au]ß
SFX Z 0 est ieß
SFX Z 0 est [io]ss
SFX Z 0 t [^dt]
SFX Z 0 et [dt]
SFX Z 0 n e
SFX Z 0 en ie
SFX Z 0 en [^e]
SFX Z 0 est iess
SFX Z 0 est [au]ss
# last two ...ss rules only used for swiss de_CH - but de_DE is unaffected
SFX O Y 21
SFX O n tes e[lr]n
SFX O n tes [dtw]en
SFX O en tes [^dmntw]en
SFX O n tes chnen
SFX O en tes [^c]h[mn]en
SFX O n tes [^aäehilmnoöuür][mn]en
SFX O en tes [aäeilmnoöuür][mn]en
SFX O n ter e[lr]n
SFX O n ter [dtw]en
SFX O en ter [^dmntw]en
SFX O n ter chnen
SFX O en ter [^c]h[mn]en
SFX O n ter [^aäehilmnoöuür][mn]en
SFX O en ter [aäeilmnoöuür][mn]en
SFX O n tem e[lr]n
SFX O n tem [dtw]en
SFX O en tem [^dmntw]en
SFX O n tem chnen
SFX O en tem [^c]h[mn]en
SFX O n tem [^aäehilmnoöuür][mn]en
SFX O en tem [aäeilmnoöuür][mn]en
REP 28
REP f ph
REP ph f
REP ß ss
REP ss ß
REP s ss
REP ss s
REP i ie
REP ie i
REP ee e
REP o oh
REP oh o
REP a ah
REP ah a
REP e eh
REP eh e
REP ae ä
REP oe ö
REP ue ü
REP Ae Ä
REP Oe Ö
REP Ue Ü
REP d t
REP t d
REP th t
REP t th
REP r rh
REP ch k
REP k ch
#REP eee ee-E
# this one will allow "-Eltern" - Hunspell 1.1.5 bug, but CHECKSHARPS obsoletes LANG de_DE
#LANG de_DE
CHECKSHARPS
COMPOUNDBEGIN x
COMPOUNDMIDDLE y
COMPOUNDEND z
FORBIDDENWORD d
# Prefixes are allowed at the beginning of compounds,
# suffixes are allowed at the end of compounds by default:
# (prefix)?(root)+(affix)?
# Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
COMPOUNDPERMITFLAG c
ONLYINCOMPOUND o
# my PSEUDOROOT h(elper) flag
NEEDAFFIX h
# forbid uppercase characters at compound word bounds
# BUT I want to take care about it myself ;-)
# CHECKCOMPOUNDCASE
KEEPCASE w
# Affixes signed with CIRCUMFIX flag may be on a word when this word also has a prefix with CIRCUMFIX flag and vice versa.
# for decapitalizing nouns with fogemorphemes
CIRCUMFIX f
# this one would make a separate dict entry "Denkmalsschutz" invalidate the
# compound of "Denkmal"+"schutz". We do not want this feature here...
# CHECKCOMPOUNDREP
# make not all possible suggestions for typos of Flicken or some rare words
NOSUGGEST n
WORDCHARS ß-.
# - setting this to 2 decreases performance by 1/10 but is needed for "öl" and "ei"
# - setting this to 1 for handling Fuge-elements with dashes (Arbeits-) dash will
# be a special word but - is handled as a affix now
COMPOUNDMIN 2
# this ones are for Duden R36 (old orthography)
#CHECKCOMPOUNDPATTERN 2 #oldspell
#CHECKCOMPOUNDPATTERN ee e #oldspell
#CHECKCOMPOUNDPATTERN oo o #oldspell
# also need oo o
# this one needs to be flagable to be used for old orthography
#CHECKCOMPOUNDTRIPLE
PFX i Y 1
PFX i 0 -/coyf .
SFX j Y 3
SFX j 0 0/xoc .
SFX j 0 -/zocf .
SFX j 0 -/cz .
# Female forms for compound/Compound words:
# attention: [^e][^n] does also filter out "...er" !
SFX g Y 12
SFX g 0 innen/xyoc [^n]
SFX g en innen/xyoc en
SFX g 0 Innen/xyoc [^n]
SFX g en Innen/xyoc en
SFX g 0 innen/xyocf [^n]
SFX g en innen/xyocf en
SFX g 0 Innen/xyocf [^n]
SFX g en Innen/xyocf en
SFX g 0 innen-/cz [^n]
SFX g en innen-/cz en
SFX g 0 Innen-/cz [^n]
SFX g en Innen-/cz en
PFX k Y 2
PFX k 0 -/coxf .
PFX k 0 0/coy .
SFX e Y 2
SFX e 0 0/yoc .
SFX e 0 -/zc .
# for Uppercased end-words to prepend - and lowercase: (Tier/EPSm) (EX: Bettbezüge und *-laken*)
# AND
# for lowercased end-words to prepend - and re-uppercase : (tier/EPSozm) (EX: Arbeits*-Tier*)
#PFX m A -a/co A
#PFX m a -/ a
PFX m Y 58
PFX m A -a A
PFX m B -b B
PFX m C -c C
PFX m D -d D
PFX m E -e E
PFX m F -f F
PFX m G -g G
PFX m H -h H
PFX m I -i I
PFX m J -j J
PFX m K -k K
PFX m L -l L
PFX m M -m M
PFX m N -n N
PFX m O -o O
PFX m P -p P
PFX m Q -q Q
PFX m R -r R
PFX m S -s S
PFX m T -t T
PFX m U -u U
PFX m V -v V
PFX m W -w W
PFX m X -x X
PFX m Y -y Y
PFX m Z -z Z
PFX m Ä -ä Ä
PFX m Ö -ö Ö
PFX m Ü -ü Ü
PFX m a -A/co a
PFX m b -B/co b
PFX m c -C/co c
PFX m d -D/co d
PFX m e -E/co e
PFX m f -F/co f
PFX m g -G/co g
PFX m h -H/co h
PFX m i -I/co i
PFX m j -J/co j
PFX m k -K/co k
PFX m l -L/co l
PFX m m -M/co m
PFX m n -N/co n
PFX m o -O/co o
PFX m p -P/co p
PFX m q -Q/co q
PFX m r -R/co r
PFX m s -S/co s
PFX m t -T/co t
PFX m u -U/co u
PFX m v -V/co v
PFX m w -W/co w
PFX m x -X/co x
PFX m y -Y/co y
PFX m z -Z/co z
PFX m ä -Ä/co ä
PFX m ö -Ö/co ö
PFX m ü -Ü/co ü
# Decapitalizing: (not used ATM... )
# /co(f) : compound permit, in coumpount only, (decapitalizing with fogemorphemes)
#PFX l Y 29
#PFX l A a/co A
#PFX l Ä ä/co Ä
#PFX l B b/co B
#PFX l C c/co C
#PFX l D d/co D
#PFX l E e/co E
#PFX l F f/co F
#PFX l G g/co G
#PFX l H h/co H
#PFX l I i/co I
#PFX l J j/co J
#PFX l K k/co K
#PFX l L l/co L
#PFX l M m/co M
#PFX l N n/co N
#PFX l O o/co O
#PFX l Ö ö/co Ö
#PFX l P p/co P
#PFX l Q q/co Q
#PFX l R r/co R
#PFX l S s/co S
#PFX l T t/co T
#PFX l U u/co U
#PFX l Ü ü/co Ü
#PFX l V v/co V
#PFX l W w/co W
#PFX l X x/co X
#PFX l Y y/co Y
#PFX l Z z/co Z
# private hunspell flags:
# --x : not for capmain (rare words)
# With "BREAK -" some wrong forms are accepted but that is needed for US-Wirtschaft etc.
# So enabling this is the lesser evil. No perfect solution found so far...
BREAK 2
BREAK -
BREAK .

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,205 @@
SET UTF-8
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
ICONV 1
ICONV '
NOSUGGEST !
# ordinal numbers
COMPOUNDMIN 1
# only in compounds: 1th, 2th, 3th
ONLYINCOMPOUND c
# compound rules:
# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
COMPOUNDRULE 2
COMPOUNDRULE n*1t
COMPOUNDRULE n*mp
WORDCHARS 0123456789
PFX A Y 1
PFX A 0 re .
PFX I Y 1
PFX I 0 in .
PFX U Y 1
PFX U 0 un .
PFX C Y 1
PFX C 0 de .
PFX E Y 1
PFX E 0 dis .
PFX F Y 1
PFX F 0 con .
PFX K Y 1
PFX K 0 pro .
SFX V N 2
SFX V e ive e
SFX V 0 ive [^e]
SFX N Y 3
SFX N e ion e
SFX N y ication y
SFX N 0 en [^ey]
SFX X Y 3
SFX X e ions e
SFX X y ications y
SFX X 0 ens [^ey]
SFX H N 2
SFX H y ieth y
SFX H 0 th [^y]
SFX Y Y 1
SFX Y 0 ly .
SFX G Y 2
SFX G e ing e
SFX G 0 ing [^e]
SFX J Y 2
SFX J e ings e
SFX J 0 ings [^e]
SFX D Y 4
SFX D 0 d e
SFX D y ied [^aeiou]y
SFX D 0 ed [^ey]
SFX D 0 ed [aeiou]y
SFX T N 4
SFX T 0 st e
SFX T y iest [^aeiou]y
SFX T 0 est [aeiou]y
SFX T 0 est [^ey]
SFX R Y 4
SFX R 0 r e
SFX R y ier [^aeiou]y
SFX R 0 er [aeiou]y
SFX R 0 er [^ey]
SFX Z Y 4
SFX Z 0 rs e
SFX Z y iers [^aeiou]y
SFX Z 0 ers [aeiou]y
SFX Z 0 ers [^ey]
SFX S Y 4
SFX S y ies [^aeiou]y
SFX S 0 s [aeiou]y
SFX S 0 es [sxzh]
SFX S 0 s [^sxzhy]
SFX P Y 3
SFX P y iness [^aeiou]y
SFX P 0 ness [aeiou]y
SFX P 0 ness [^y]
SFX M Y 1
SFX M 0 's .
SFX B Y 3
SFX B 0 able [^aeiou]
SFX B 0 able ee
SFX B e able [^aeiou]e
SFX L Y 1
SFX L 0 ment .
REP 90
REP a ei
REP ei a
REP a ey
REP ey a
REP ai ie
REP ie ai
REP alot a_lot
REP are air
REP are ear
REP are eir
REP air are
REP air ere
REP ere air
REP ere ear
REP ere eir
REP ear are
REP ear air
REP ear ere
REP eir are
REP eir ere
REP ch te
REP te ch
REP ch ti
REP ti ch
REP ch tu
REP tu ch
REP ch s
REP s ch
REP ch k
REP k ch
REP f ph
REP ph f
REP gh f
REP f gh
REP i igh
REP igh i
REP i uy
REP uy i
REP i ee
REP ee i
REP j di
REP di j
REP j gg
REP gg j
REP j ge
REP ge j
REP s ti
REP ti s
REP s ci
REP ci s
REP k cc
REP cc k
REP k qu
REP qu k
REP kw qu
REP o eau
REP eau o
REP o ew
REP ew o
REP oo ew
REP ew oo
REP ew ui
REP ui ew
REP oo ui
REP ui oo
REP ew u
REP u ew
REP oo u
REP u oo
REP u oe
REP oe u
REP u ieu
REP ieu u
REP ue ew
REP ew ue
REP uff ough
REP oo ieu
REP ieu oo
REP ier ear
REP ear ier
REP ear air
REP air ear
REP w qu
REP qu w
REP z ss
REP ss z
REP shun tion
REP shun sion
REP shun cion
REP size cise

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
SET ISO8859-2
TRY ABCDEFGHIJKLMNOPQRSTUVWXYæÆèÈðо®abcdefghijklmnopqrstuvwxy
SFX A Y 13
SFX A 0 a [^aeiou]
SFX A 0 u [^aeiou]
SFX A 0 e [^aeiou]
SFX A 0 om [^aeiou]
SFX A a e a
SFX A a i a
SFX A a om a
SFX A o a o
SFX A o u o
SFX A o om o
SFX A e a e
SFX A e u e
SFX A e em e
SFX B Y 5
SFX B ti m ti
SFX B ti ¹ ti
SFX B ti mo ti
SFX B ti te ti
SFX B ti u ti
SFX C Y 4
SFX C ti h ti
SFX C ti smo ti
SFX C ti ste ti
SFX C ti ¹e ti
SFX D Y 5
SFX D ti h ti
SFX D ti ¹e ti
SFX D ti smo ti
SFX D ti ste ti
SFX D ti hu ti
SFX E Y 3
SFX E ti o ti
SFX E ti li ti
SFX E ti la ti
SFX F Y 5
SFX F ti h ti
SFX F ti ¹e ti
SFX F ti smo ti
SFX F ti ste ti
SFX F iti hu ti
SFX G Y 5
SFX G ti m ti
SFX G ti ¹ ti
SFX G ti mo ti
SFX G ti te ti
SFX G iti e iti
SFX H Y 6
SFX H 0 a .
SFX H 0 u .
SFX H 0 i .
SFX H 0 ih .
SFX H 0 im .
SFX H 0 e .
SFX I Y 6
SFX I 0 a .
SFX I 0 u .
SFX I 0 i .
SFX I 0 ih .
SFX I 0 ima .
SFX I 0 e .
SFX J Y 6
SFX J 0 a .
SFX J 0 u .
SFX J 0 i .
SFX J 0 ih .
SFX J 0 im .
SFX J 0 o .
SFX K Y 6
SFX K 0 a .
SFX K 0 u .
SFX K 0 i .
SFX K 0 ih .
SFX K 0 ima .
SFX K 0 o .
SFX L Y 2
SFX L 0 oj .
SFX L 0 om .
SFX M Y 4
SFX M a e a
SFX M a i a
SFX M a o a
SFX M a u a

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff