User actions dialog now works, the spell checker module ignores twitter usernames, hashtags, URL and email addresses

This commit is contained in:
2015-02-01 00:49:03 -06:00
parent ff65b73232
commit e93f0f4980
9 changed files with 195 additions and 12 deletions

View File

@@ -8,6 +8,8 @@ import config
import languageHandler
from enchant.checker import SpellChecker
from enchant.errors import DictNotFoundError
from enchant import tokenize
import twitterFilter
class spellChecker(object):
def __init__(self, text, dictionary):
@@ -17,10 +19,10 @@ class spellChecker(object):
try:
if config.app["app-settings"]["language"] == "system":
log.debug("Using the system language")
self.checker = SpellChecker()
self.checker = SpellChecker(filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter])
else:
log.debug("Using language: %s" % (languageHandler.getLanguage(),))
self.checker = SpellChecker(languageHandler.getLanguage())
self.checker = SpellChecker(languageHandler.getLanguage(), filters=[twitterFilter.TwitterFilter, tokenize.EmailFilter, tokenize.URLFilter])
self.checker.set_text(text)
except DictNotFoundError:
log.exception("Dictionary for language %s not found." % (dictionary,))

View File

@@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
import re
from enchant.tokenize import Filter
class TwitterFilter(Filter):
"""Filter skipping over twitter usernames and hashtags.
This filter skips any words matching the following regular expression:
^[#@](\S){1, }$
That is, any words that resemble users and hashtags.
"""
_pattern = re.compile(r"^[#@](\S){1,}$")
def _skip(self,word):
if self._pattern.match(word):
return True
return False