Display properly HTML Entities in tweets

This commit is contained in:
2022-01-10 05:30:14 -06:00
parent 7a78accd1f
commit 301e3d4361
5 changed files with 18 additions and 15 deletions

View File

@@ -3,7 +3,6 @@ import platform
system = platform.system()
from . import utils
import re
import html.entities
import time
import output
import languageHandler
@@ -11,21 +10,9 @@ import arrow
import logging
import config
from .long_tweets import twishort, tweets
from .utils import StripChars
log = logging.getLogger("compose")
def StripChars(s):
"""Converts any html entities in s to their unicode-decoded equivalents and returns a string."""
entity_re = re.compile(r"&(#\d+|\w+);")
def matchFunc(match):
"""Nested function to handle a match object.
If we match &blah; and it's not found, &blah; will be returned.
if we match #\d+, unichr(digits) will be returned.
Else, a unicode string will be returned."""
if match.group(1).startswith('#'): return chr(int(match.group(1)[1:]))
replacement = html.entities.entitydefs.get(match.group(1), "&%s;" % match.group(1))
return replacement
return str(entity_re.sub(matchFunc, s))
chars = "abcdefghijklmnopqrstuvwxyz"
def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=None):