diff --git a/src/sessions/mastodon/utils.py b/src/sessions/mastodon/utils.py
index 12a8c8fa..05a6303f 100644
--- a/src/sessions/mastodon/utils.py
+++ b/src/sessions/mastodon/utils.py
@@ -3,23 +3,47 @@ import demoji
from html.parser import HTMLParser
from datetime import datetime, timezone
-url_re = re.compile('')
+url_re = re.compile(r'')
class HTMLFilter(HTMLParser):
+ # Classes to ignore when parsing HTML
+ IGNORED_CLASSES = ["quote-inline"]
+
text = ""
first_paragraph = True
+ skip_depth = 0 # Track nesting depth of ignored elements
def handle_data(self, data):
- self.text += data
+ # Only add data if we're not inside an ignored element
+ if self.skip_depth == 0:
+ self.text += data
def handle_starttag(self, tag, attrs):
- if tag == "br":
- self.text = self.text+"\n"
- elif tag == "p":
- if self.first_paragraph:
- self.first_paragraph = False
- else:
- self.text = self.text+"\n\n"
+ # Check if this tag has a class that should be ignored
+ attrs_dict = dict(attrs)
+ tag_class = attrs_dict.get("class", "")
+
+ # Check if any ignored class is present in this tag
+ should_skip = any(ignored_class in tag_class for ignored_class in self.IGNORED_CLASSES)
+
+ if should_skip:
+ self.skip_depth += 1
+ elif self.skip_depth == 0: # Only process tags if we're not skipping
+ if tag == "br":
+ self.text = self.text+"\n"
+ elif tag == "p":
+ if self.first_paragraph:
+ self.first_paragraph = False
+ else:
+ self.text = self.text+"\n\n"
+ else:
+ # We're inside a skipped element, increment depth for nested tags
+ self.skip_depth += 1
+
+ def handle_endtag(self, tag):
+ # Decrement skip depth when closing any tag while skipping
+ if self.skip_depth > 0:
+ self.skip_depth -= 1
def html_filter(data):
f = HTMLFilter()