mirror of
https://github.com/MCV-Software/TWBlue.git
synced 2024-11-22 19:28:09 -06:00
Improved html parsing for toots. Remove Tags from URLList
This commit is contained in:
parent
f151d6554d
commit
0aad2f0ab3
@ -17,7 +17,8 @@ from mysc.thread_utils import call_threaded
|
||||
from pubsub import pub
|
||||
from extra import ocr
|
||||
from wxUI import buffers, dialogs, commonMessageDialogs
|
||||
from wxUI.dialogs.mastodon import dialogs, menus
|
||||
from wxUI.dialogs.mastodon import menus
|
||||
from wxUI.dialogs.mastodon import dialogs as mastodon_dialogs
|
||||
|
||||
log = logging.getLogger("controller.buffers.mastodon.base")
|
||||
|
||||
@ -330,7 +331,7 @@ class BaseBuffer(base.Buffer):
|
||||
toot = self.get_item()
|
||||
id = toot.id
|
||||
if self.session.settings["general"]["boost_mode"] == "ask":
|
||||
answer = dialogs.boost_question()
|
||||
answer = mastodon_dialogs.boost_question()
|
||||
if answer == True:
|
||||
self._direct_boost(id)
|
||||
else:
|
||||
@ -378,9 +379,9 @@ class BaseBuffer(base.Buffer):
|
||||
if url == '':
|
||||
toot = self.get_item()
|
||||
if toot.reblog != None:
|
||||
urls = utils.find_urls(toot.reblog.content)
|
||||
urls = utils.find_urls(toot.REBLOG)
|
||||
else:
|
||||
urls = utils.find_urls(toot.reblog.content)
|
||||
urls = utils.find_urls(toot)
|
||||
if len(urls) == 1:
|
||||
url=urls[0]
|
||||
elif len(urls) > 1:
|
||||
@ -406,7 +407,7 @@ class BaseBuffer(base.Buffer):
|
||||
if item.account.id != self.session.db["user_id"] or item.reblog != None:
|
||||
output.speak(_("You can delete only your own toots."))
|
||||
return
|
||||
answer = dialogs.delete_toot_dialog()
|
||||
answer = mastodon_dialogs.delete_toot_dialog()
|
||||
if answer == True:
|
||||
items = self.session.db[self.name]
|
||||
try:
|
||||
|
@ -1,13 +1,19 @@
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
|
||||
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*")
|
||||
url_re = re.compile('<a\s*href=[\'|"](.*?)[\'"].*?>')
|
||||
|
||||
class HTMLFilter(HTMLParser):
|
||||
text = ""
|
||||
def handle_data(self, data):
|
||||
self.text += data
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == "br":
|
||||
self.text = self.text+"\n"
|
||||
elif tag == "p":
|
||||
self.text = self.text+"\n\n"
|
||||
|
||||
def html_filter(data):
|
||||
f = HTMLFilter()
|
||||
f.feed(data)
|
||||
@ -45,5 +51,11 @@ def get_media_urls(toot):
|
||||
urls.append(media.get("url"))
|
||||
return urls
|
||||
|
||||
def find_urls(text):
|
||||
return url_re.findall(html_filter(text))
|
||||
def find_urls(toot, include_tags=False):
|
||||
urls = url_re.findall(toot.content)
|
||||
if include_tags == False:
|
||||
for tag in toot.tags:
|
||||
for url in urls[::]:
|
||||
if url.lower().endswith("/tags/"+tag["name"]):
|
||||
urls.remove(url)
|
||||
return urls
|
Loading…
Reference in New Issue
Block a user