Display properly HTML Entities in tweets

This commit is contained in:
Manuel Cortez 2022-01-10 05:30:14 -06:00
parent 7a78accd1f
commit 301e3d4361
No known key found for this signature in database
GPG Key ID: 262CC30FA01B5CBF
5 changed files with 18 additions and 15 deletions

View File

@ -7,6 +7,7 @@ TWBlue Changelog
* TWBlue can display image descriptions within Tweet templates. For that, you can use the $image_description variable in your template.
* We have restored conversation and threads support powered by Twitter API V2 thanks to a set of improvements we have done in the application, as well as more generous limits to Tweet monthly cap by Twitter.
* In the Windows 11 Keymap, the default shortcut to open the keystrokes editor is now CTRL+Alt+Windows+K to avoid conflicts with the new global mute microphone shortcut.
* TWBlue show display properly HTML entities in tweet's text.
* TWBlue should no longer load old tweets in buffers.
* Fixed issue when uploading attachments (images, videos or gif files) while sending tweets or replies.
* Fixed an error that was making TWBlue to ask for a restart after saving account settings, even if such restart was not required. ([#413,](https://github.com/manuelcortez/TWBlue/issues/413))

View File

@ -367,6 +367,7 @@ class viewTweet(basicTweet):
pass
def clear_text(self, text):
text = utils.StripChars(text)
urls = utils.find_urls_in_text(text)
for i in urls:
if "https://twitter.com/" in i:

View File

@ -3,7 +3,6 @@ import platform
system = platform.system()
from . import utils
import re
import html.entities
import time
import output
import languageHandler
@ -11,21 +10,9 @@ import arrow
import logging
import config
from .long_tweets import twishort, tweets
from .utils import StripChars
log = logging.getLogger("compose")
def StripChars(s):
"""Converts any html entities in s to their unicode-decoded equivalents and returns a string."""
entity_re = re.compile(r"&(#\d+|\w+);")
def matchFunc(match):
"""Nested function to handle a match object.
If we match &blah; and it's not found, &blah; will be returned.
if we match #\d+, unichr(digits) will be returned.
Else, a unicode string will be returned."""
if match.group(1).startswith('#'): return chr(int(match.group(1)[1:]))
replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1))
return replacement
return str(entity_re.sub(matchFunc, s))
chars = "abcdefghijklmnopqrstuvwxyz"
def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=None):

View File

@ -32,7 +32,7 @@ def process_text(tweet):
elif hasattr(tweet, "text"):
text = tweet.text
# Cleanup mentions, so we'll remove more than 2 mentions to make the tweet easier to read.
text = utils.clean_mentions(text)
text = utils.clean_mentions(utils.StripChars(text))
# Replace URLS for extended version of those.
if hasattr(tweet, "entities"):
text = utils.expand_urls(text, tweet.entities)

View File

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import re
import html.entities
import output
import logging
import requests
@ -16,6 +17,19 @@ url_re = re.compile(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4
url_re2 = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*")
bad_chars = '\'\\\n.,[](){}:;"'
def StripChars(s):
"""Converts any html entities in s to their unicode-decoded equivalents and returns a string."""
entity_re = re.compile(r"&(#\d+|\w+);")
def matchFunc(match):
"""Nested function to handle a match object.
If we match &blah; and it's not found, &blah; will be returned.
if we match #\d+, unichr(digits) will be returned.
Else, a unicode string will be returned."""
if match.group(1).startswith('#'): return chr(int(match.group(1)[1:]))
replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1))
return replacement
return str(entity_re.sub(matchFunc, s))
def find_urls_in_text(text):
return url_re2.findall(text)