From 301e3d436170570369168d4134a068e12e049065 Mon Sep 17 00:00:00 2001 From: Manuel Cortez Date: Mon, 10 Jan 2022 05:30:14 -0600 Subject: [PATCH] Display properly HTML Entities in tweets --- doc/changelog.md | 1 + src/controller/messages.py | 1 + src/sessions/twitter/compose.py | 15 +-------------- src/sessions/twitter/templates.py | 2 +- src/sessions/twitter/utils.py | 14 ++++++++++++++ 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/doc/changelog.md b/doc/changelog.md index 6916756a..83d7022f 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -7,6 +7,7 @@ TWBlue Changelog * TWBlue can display image descriptions within Tweet templates. For that, you can use the $image_description variable in your template. * We have restored conversation and threads support powered by Twitter API V2 thanks to a set of improvements we have done in the application, as well as more generous limits to Tweet monthly cap by Twitter. * In the Windows 11 Keymap, the default shortcut to open the keystrokes editor is now CTRL+Alt+Windows+K to avoid conflicts with the new global mute microphone shortcut. +* TWBlue show display properly HTML entities in tweet's text. * TWBlue should no longer load old tweets in buffers. * Fixed issue when uploading attachments (images, videos or gif files) while sending tweets or replies. * Fixed an error that was making TWBlue to ask for a restart after saving account settings, even if such restart was not required. ([#413,](https://github.com/manuelcortez/TWBlue/issues/413)) diff --git a/src/controller/messages.py b/src/controller/messages.py index 421be6d7..9b93ed58 100644 --- a/src/controller/messages.py +++ b/src/controller/messages.py @@ -367,6 +367,7 @@ class viewTweet(basicTweet): pass def clear_text(self, text): + text = utils.StripChars(text) urls = utils.find_urls_in_text(text) for i in urls: if "https://twitter.com/" in i: diff --git a/src/sessions/twitter/compose.py b/src/sessions/twitter/compose.py index 1b761390..539b4df7 100644 --- a/src/sessions/twitter/compose.py +++ b/src/sessions/twitter/compose.py @@ -3,7 +3,6 @@ import platform system = platform.system() from . import utils import re -import html.entities import time import output import languageHandler @@ -11,21 +10,9 @@ import arrow import logging import config from .long_tweets import twishort, tweets +from .utils import StripChars log = logging.getLogger("compose") -def StripChars(s): - """Converts any html entities in s to their unicode-decoded equivalents and returns a string.""" - entity_re = re.compile(r"&(#\d+|\w+);") - def matchFunc(match): - """Nested function to handle a match object. - If we match &blah; and it's not found, &blah; will be returned. - if we match #\d+, unichr(digits) will be returned. - Else, a unicode string will be returned.""" - if match.group(1).startswith('#'): return chr(int(match.group(1)[1:])) - replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1)) - return replacement - return str(entity_re.sub(matchFunc, s)) - chars = "abcdefghijklmnopqrstuvwxyz" def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=None): diff --git a/src/sessions/twitter/templates.py b/src/sessions/twitter/templates.py index 156a2d67..bd4067b0 100644 --- a/src/sessions/twitter/templates.py +++ b/src/sessions/twitter/templates.py @@ -32,7 +32,7 @@ def process_text(tweet): elif hasattr(tweet, "text"): text = tweet.text # Cleanup mentions, so we'll remove more than 2 mentions to make the tweet easier to read. - text = utils.clean_mentions(text) + text = utils.clean_mentions(utils.StripChars(text)) # Replace URLS for extended version of those. if hasattr(tweet, "entities"): text = utils.expand_urls(text, tweet.entities) diff --git a/src/sessions/twitter/utils.py b/src/sessions/twitter/utils.py index 47cef7a7..c13d3088 100644 --- a/src/sessions/twitter/utils.py +++ b/src/sessions/twitter/utils.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import re +import html.entities import output import logging import requests @@ -16,6 +17,19 @@ url_re = re.compile(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4 url_re2 = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*") bad_chars = '\'\\\n.,[](){}:;"' +def StripChars(s): + """Converts any html entities in s to their unicode-decoded equivalents and returns a string.""" + entity_re = re.compile(r"&(#\d+|\w+);") + def matchFunc(match): + """Nested function to handle a match object. + If we match &blah; and it's not found, &blah; will be returned. + if we match #\d+, unichr(digits) will be returned. + Else, a unicode string will be returned.""" + if match.group(1).startswith('#'): return chr(int(match.group(1)[1:])) + replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1)) + return replacement + return str(entity_re.sub(matchFunc, s)) + def find_urls_in_text(text): return url_re2.findall(text)