From 301e3d436170570369168d4134a068e12e049065 Mon Sep 17 00:00:00 2001
From: Manuel Cortez <manuel@manuelcortez.net>
Date: Mon, 10 Jan 2022 05:30:14 -0600
Subject: [PATCH] Display properly HTML Entities in tweets

---
 doc/changelog.md                  |  1 +
 src/controller/messages.py        |  1 +
 src/sessions/twitter/compose.py   | 15 +--------------
 src/sessions/twitter/templates.py |  2 +-
 src/sessions/twitter/utils.py     | 14 ++++++++++++++
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/doc/changelog.md b/doc/changelog.md
index 6916756a..83d7022f 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -7,6 +7,7 @@ TWBlue Changelog
     * TWBlue can display image descriptions within Tweet templates. For that, you can use the $image_description variable in your template.
 * We have restored conversation and threads support powered by Twitter API V2 thanks to a set of improvements we have done in the application, as well as more generous limits to Tweet monthly cap by Twitter.
 * In the Windows 11 Keymap, the default shortcut to open the keystrokes editor is now CTRL+Alt+Windows+K to avoid conflicts with the new global mute microphone shortcut.
+* TWBlue show display properly HTML entities in tweet's text.
 * TWBlue should no longer load old tweets in buffers.
 * Fixed issue when uploading attachments (images, videos or gif files) while sending tweets or replies.
 * Fixed an error that was making TWBlue to ask for a restart after saving account settings, even if such restart was not required. ([#413,](https://github.com/manuelcortez/TWBlue/issues/413))
diff --git a/src/controller/messages.py b/src/controller/messages.py
index 421be6d7..9b93ed58 100644
--- a/src/controller/messages.py
+++ b/src/controller/messages.py
@@ -367,6 +367,7 @@ class viewTweet(basicTweet):
         pass
 
     def clear_text(self, text):
+        text = utils.StripChars(text)
         urls = utils.find_urls_in_text(text)
         for i in urls:
             if "https://twitter.com/" in i:
diff --git a/src/sessions/twitter/compose.py b/src/sessions/twitter/compose.py
index 1b761390..539b4df7 100644
--- a/src/sessions/twitter/compose.py
+++ b/src/sessions/twitter/compose.py
@@ -3,7 +3,6 @@ import platform
 system = platform.system()
 from . import utils
 import re
-import html.entities
 import time
 import output
 import languageHandler
@@ -11,21 +10,9 @@ import arrow
 import logging
 import config
 from .long_tweets import twishort, tweets
+from .utils import StripChars
 log = logging.getLogger("compose")
 
-def StripChars(s):
-    """Converts any html entities in s to their unicode-decoded equivalents and returns a string."""
-    entity_re = re.compile(r"&(#\d+|\w+);")
-    def matchFunc(match):
-        """Nested function to handle a match object.
-       If we match &blah; and it's not found, &blah; will be returned.
-       if we match #\d+, unichr(digits) will be returned.
-       Else, a unicode string will be returned."""
-        if match.group(1).startswith('#'): return chr(int(match.group(1)[1:]))
-        replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1))
-        return replacement
-    return str(entity_re.sub(matchFunc, s))
-
 chars = "abcdefghijklmnopqrstuvwxyz"
 
 def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=None):
diff --git a/src/sessions/twitter/templates.py b/src/sessions/twitter/templates.py
index 156a2d67..bd4067b0 100644
--- a/src/sessions/twitter/templates.py
+++ b/src/sessions/twitter/templates.py
@@ -32,7 +32,7 @@ def process_text(tweet):
     elif hasattr(tweet, "text"):
         text = tweet.text
     # Cleanup mentions, so we'll remove more than 2 mentions to make the tweet easier to read.
-    text = utils.clean_mentions(text)
+    text = utils.clean_mentions(utils.StripChars(text))
     # Replace URLS for extended version of those.
     if hasattr(tweet, "entities"):
         text = utils.expand_urls(text, tweet.entities)
diff --git a/src/sessions/twitter/utils.py b/src/sessions/twitter/utils.py
index 47cef7a7..c13d3088 100644
--- a/src/sessions/twitter/utils.py
+++ b/src/sessions/twitter/utils.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 import re
+import html.entities
 import output
 import logging
 import requests
@@ -16,6 +17,19 @@ url_re = re.compile(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4
 url_re2 = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*")
 bad_chars = '\'\\\n.,[](){}:;"'
 
+def StripChars(s):
+    """Converts any html entities in s to their unicode-decoded equivalents and returns a string."""
+    entity_re = re.compile(r"&(#\d+|\w+);")
+    def matchFunc(match):
+        """Nested function to handle a match object.
+       If we match &blah; and it's not found, &blah; will be returned.
+       if we match #\d+, unichr(digits) will be returned.
+       Else, a unicode string will be returned."""
+        if match.group(1).startswith('#'): return chr(int(match.group(1)[1:]))
+        replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1))
+        return replacement
+    return str(entity_re.sub(matchFunc, s))
+
 def find_urls_in_text(text):
     return  url_re2.findall(text)