mirror of
				https://github.com/MCV-Software/TWBlue.git
				synced 2025-11-03 21:37:05 +00:00 
			
		
		
		
	Improved html parsing for toots. Remove Tags from URLList
This commit is contained in:
		@@ -17,7 +17,8 @@ from mysc.thread_utils import call_threaded
 | 
			
		||||
from pubsub import pub
 | 
			
		||||
from extra import ocr
 | 
			
		||||
from wxUI import buffers, dialogs, commonMessageDialogs
 | 
			
		||||
from wxUI.dialogs.mastodon import dialogs, menus
 | 
			
		||||
from wxUI.dialogs.mastodon import menus
 | 
			
		||||
from wxUI.dialogs.mastodon import dialogs as mastodon_dialogs
 | 
			
		||||
 | 
			
		||||
log = logging.getLogger("controller.buffers.mastodon.base")
 | 
			
		||||
 | 
			
		||||
@@ -330,7 +331,7 @@ class BaseBuffer(base.Buffer):
 | 
			
		||||
        toot = self.get_item()
 | 
			
		||||
        id = toot.id
 | 
			
		||||
        if self.session.settings["general"]["boost_mode"] == "ask":
 | 
			
		||||
            answer = dialogs.boost_question()
 | 
			
		||||
            answer = mastodon_dialogs.boost_question()
 | 
			
		||||
            if answer == True:
 | 
			
		||||
                self._direct_boost(id)
 | 
			
		||||
        else:
 | 
			
		||||
@@ -378,9 +379,9 @@ class BaseBuffer(base.Buffer):
 | 
			
		||||
        if url == '':
 | 
			
		||||
            toot = self.get_item()
 | 
			
		||||
            if toot.reblog != None:
 | 
			
		||||
                urls = utils.find_urls(toot.reblog.content)
 | 
			
		||||
                urls = utils.find_urls(toot.REBLOG)
 | 
			
		||||
            else:
 | 
			
		||||
                urls = utils.find_urls(toot.reblog.content)
 | 
			
		||||
                urls = utils.find_urls(toot)
 | 
			
		||||
            if len(urls) == 1:
 | 
			
		||||
                url=urls[0]
 | 
			
		||||
            elif len(urls) > 1:
 | 
			
		||||
@@ -406,7 +407,7 @@ class BaseBuffer(base.Buffer):
 | 
			
		||||
        if item.account.id != self.session.db["user_id"] or item.reblog != None:
 | 
			
		||||
            output.speak(_("You can delete only your own toots."))
 | 
			
		||||
            return
 | 
			
		||||
        answer = dialogs.delete_toot_dialog()
 | 
			
		||||
        answer = mastodon_dialogs.delete_toot_dialog()
 | 
			
		||||
        if answer == True:
 | 
			
		||||
            items = self.session.db[self.name]
 | 
			
		||||
            try:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,13 +1,19 @@
 | 
			
		||||
import re
 | 
			
		||||
from html.parser import HTMLParser
 | 
			
		||||
 | 
			
		||||
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*")
 | 
			
		||||
url_re = re.compile('<a\s*href=[\'|"](.*?)[\'"].*?>')
 | 
			
		||||
 | 
			
		||||
class HTMLFilter(HTMLParser):
 | 
			
		||||
    text = ""
 | 
			
		||||
    def handle_data(self, data):
 | 
			
		||||
        self.text += data
 | 
			
		||||
 | 
			
		||||
    def handle_starttag(self, tag, attrs):
 | 
			
		||||
        if tag == "br":
 | 
			
		||||
            self.text = self.text+"\n"
 | 
			
		||||
        elif tag == "p":
 | 
			
		||||
            self.text = self.text+"\n\n"
 | 
			
		||||
 | 
			
		||||
def html_filter(data):
 | 
			
		||||
    f = HTMLFilter()
 | 
			
		||||
    f.feed(data)
 | 
			
		||||
@@ -45,5 +51,11 @@ def get_media_urls(toot):
 | 
			
		||||
            urls.append(media.get("url"))
 | 
			
		||||
    return urls
 | 
			
		||||
 | 
			
		||||
def find_urls(text):
 | 
			
		||||
    return  url_re.findall(html_filter(text))
 | 
			
		||||
def find_urls(toot, include_tags=False):
 | 
			
		||||
    urls = url_re.findall(toot.content)
 | 
			
		||||
    if include_tags == False:
 | 
			
		||||
        for tag in toot.tags:
 | 
			
		||||
            for url in urls[::]:
 | 
			
		||||
                if url.lower().endswith("/tags/"+tag["name"]):
 | 
			
		||||
                    urls.remove(url)
 | 
			
		||||
    return urls
 | 
			
		||||
		Reference in New Issue
	
	Block a user