import re
from html.parser import HTMLParser
url_re = re.compile('')
class HTMLFilter(HTMLParser):
text = ""
first_paragraph = True
def handle_data(self, data):
self.text += data
def handle_starttag(self, tag, attrs):
if tag == "br":
self.text = self.text+"\n"
elif tag == "p":
if self.first_paragraph:
self.first_paragraph = False
else:
self.text = self.text+"\n\n"
def html_filter(data):
f = HTMLFilter()
f.feed(data)
return f.text
def find_item(item, listItems):
for i in range(0, len(listItems)):
if listItems[i].id == item.id:
return i
if hasattr(item, "reblog") and item.reblog != None and item.reblog.id == listItems[i].id:
return i
return None
def is_audio_or_video(post):
if post.reblog != None:
return is_audio_or_video(post.reblog)
# Checks firstly for Mastodon native videos and audios.
for media in post.media_attachments:
if media["type"] == "video" or media["type"] == "audio":
return True
def is_image(post):
if post.reblog != None:
return is_image(post.reblog)
# Checks firstly for Mastodon native videos and audios.
for media in post.media_attachments:
if media["type"] == "gifv" or media["type"] == "image":
return True
def get_media_urls(post):
if hasattr(post, "reblog") and post.reblog != None:
return get_media_urls(post.reblog)
urls = []
for media in post.media_attachments:
if media.get("type") == "audio" or media.get("type") == "video":
urls.append(media.get("url"))
return urls
def find_urls(post, include_tags=False):
urls = url_re.findall(post.content)
if include_tags == False:
for tag in post.tags:
for url in urls[::]:
if url.lower().endswith("/tags/"+tag["name"]):
urls.remove(url)
return urls