mirror of
https://github.com/MCV-Software/TWBlue.git
synced 2025-04-05 11:22:30 -04:00
Improved html parsing for toots. Remove Tags from URLList
This commit is contained in:
parent
f151d6554d
commit
0aad2f0ab3
@ -17,7 +17,8 @@ from mysc.thread_utils import call_threaded
|
|||||||
from pubsub import pub
|
from pubsub import pub
|
||||||
from extra import ocr
|
from extra import ocr
|
||||||
from wxUI import buffers, dialogs, commonMessageDialogs
|
from wxUI import buffers, dialogs, commonMessageDialogs
|
||||||
from wxUI.dialogs.mastodon import dialogs, menus
|
from wxUI.dialogs.mastodon import menus
|
||||||
|
from wxUI.dialogs.mastodon import dialogs as mastodon_dialogs
|
||||||
|
|
||||||
log = logging.getLogger("controller.buffers.mastodon.base")
|
log = logging.getLogger("controller.buffers.mastodon.base")
|
||||||
|
|
||||||
@ -330,7 +331,7 @@ class BaseBuffer(base.Buffer):
|
|||||||
toot = self.get_item()
|
toot = self.get_item()
|
||||||
id = toot.id
|
id = toot.id
|
||||||
if self.session.settings["general"]["boost_mode"] == "ask":
|
if self.session.settings["general"]["boost_mode"] == "ask":
|
||||||
answer = dialogs.boost_question()
|
answer = mastodon_dialogs.boost_question()
|
||||||
if answer == True:
|
if answer == True:
|
||||||
self._direct_boost(id)
|
self._direct_boost(id)
|
||||||
else:
|
else:
|
||||||
@ -378,9 +379,9 @@ class BaseBuffer(base.Buffer):
|
|||||||
if url == '':
|
if url == '':
|
||||||
toot = self.get_item()
|
toot = self.get_item()
|
||||||
if toot.reblog != None:
|
if toot.reblog != None:
|
||||||
urls = utils.find_urls(toot.reblog.content)
|
urls = utils.find_urls(toot.REBLOG)
|
||||||
else:
|
else:
|
||||||
urls = utils.find_urls(toot.reblog.content)
|
urls = utils.find_urls(toot)
|
||||||
if len(urls) == 1:
|
if len(urls) == 1:
|
||||||
url=urls[0]
|
url=urls[0]
|
||||||
elif len(urls) > 1:
|
elif len(urls) > 1:
|
||||||
@ -406,7 +407,7 @@ class BaseBuffer(base.Buffer):
|
|||||||
if item.account.id != self.session.db["user_id"] or item.reblog != None:
|
if item.account.id != self.session.db["user_id"] or item.reblog != None:
|
||||||
output.speak(_("You can delete only your own toots."))
|
output.speak(_("You can delete only your own toots."))
|
||||||
return
|
return
|
||||||
answer = dialogs.delete_toot_dialog()
|
answer = mastodon_dialogs.delete_toot_dialog()
|
||||||
if answer == True:
|
if answer == True:
|
||||||
items = self.session.db[self.name]
|
items = self.session.db[self.name]
|
||||||
try:
|
try:
|
||||||
|
@ -1,13 +1,19 @@
|
|||||||
import re
|
import re
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*")
|
url_re = re.compile('<a\s*href=[\'|"](.*?)[\'"].*?>')
|
||||||
|
|
||||||
class HTMLFilter(HTMLParser):
|
class HTMLFilter(HTMLParser):
|
||||||
text = ""
|
text = ""
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
self.text += data
|
self.text += data
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag == "br":
|
||||||
|
self.text = self.text+"\n"
|
||||||
|
elif tag == "p":
|
||||||
|
self.text = self.text+"\n\n"
|
||||||
|
|
||||||
def html_filter(data):
|
def html_filter(data):
|
||||||
f = HTMLFilter()
|
f = HTMLFilter()
|
||||||
f.feed(data)
|
f.feed(data)
|
||||||
@ -45,5 +51,11 @@ def get_media_urls(toot):
|
|||||||
urls.append(media.get("url"))
|
urls.append(media.get("url"))
|
||||||
return urls
|
return urls
|
||||||
|
|
||||||
def find_urls(text):
|
def find_urls(toot, include_tags=False):
|
||||||
return url_re.findall(html_filter(text))
|
urls = url_re.findall(toot.content)
|
||||||
|
if include_tags == False:
|
||||||
|
for tag in toot.tags:
|
||||||
|
for url in urls[::]:
|
||||||
|
if url.lower().endswith("/tags/"+tag["name"]):
|
||||||
|
urls.remove(url)
|
||||||
|
return urls
|
Loading…
x
Reference in New Issue
Block a user