Retrieve all tweets from a thread properly. closes #417

2026-07-14 11:21:19 +02:00 · 2021-11-01 00:11:46 -06:00
parent b39b732a93
commit ff0a2b5692
2 changed files with 51 additions and 17 deletions
@@ -5,6 +5,7 @@
 ## changes in this version

 * fixed a bug when clearing the direct messages buffer. ([#418](https://github.com/manuelcortez/TWBlue/issues/418))
+* TWBlue should retrieve tweets from threads and conversations in a more reliable way. Tweets in the same thread (made by the same author) will be sorted correctly, although replies to the thread (made by different people) may not be ordered in the same way they are displayed in Twitter apps. ([#417](https://github.com/manuelcortez/TWBlue/issues/417))

 ## Changes in Version 2021.10.30

@@ -94,24 +94,57 @@ class ConversationBuffer(SearchBuffer):

    def get_replies(self, tweet):
        """ Try to retrieve the whole conversation for the passed object by using a mix between calls to API V1.1 and V2 """
-        results = []
+        # firstly we would try to retrieve the whole thread, then we will get replies.
+        # this makes us to waste two search API calls, but there's no better option to retrieve the whole thread including replies, unfortunately.
+        thread_results = []
+        reply_results = []
+        # try to fetch conversation_id of the tweet initiating the buffer.
        try:
-            tweet = self.session.twitter_v2.get_tweet(id=self.tweet.id, user_auth=True, tweet_fields=["conversation_id"])
-            results.append(tweet.data)
-            original_tweet = self.session.twitter_v2.get_tweet(id=tweet.data.conversation_id, user_auth=True, tweet_fields=["conversation_id"])
-            results.insert(0, original_tweet.data)
-            term = "conversation_id:{}".format(tweet.data.conversation_id)
-            tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=98, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
-            if tweets.data != None:
-                results.extend(tweets.data)
+            tweet = self.session.twitter_v2.get_tweet(id=self.tweet.id, user_auth=True, tweet_fields=["conversation_id", "author_id"])
+            thread_results.append(tweet.data)
+        except TweepyException as e:
+            log.exception("Error attempting to retrieve tweet conversation ID")
+            thread_results.append(self.tweet)
+            # Return earlier cause we can't do anything if we cannot fetch the object from twitter.
+            return thread_results
+        # If tweet contains a conversation_id param, let's retrieve the original tweet which started the conversation so we will have the whole reference for later.
+        if hasattr(tweet.data, "conversation_id") and tweet.data.conversation_id != None:
+            conversation_id = tweet.data.conversation_id
+            original_tweet = self.session.twitter_v2.get_tweet(id=tweet.data.conversation_id, user_auth=True, tweet_fields=["conversation_id", "author_id"])
+            thread_results.insert(0, original_tweet.data)
+        else:
+            conversation_id = tweet.data.id
+        # find all tweets replying to the original thread only. Those tweets are sent by the same author who originally posted the first tweet.
+        try:
+            term = "conversation_id:{} from:{} to:{}".format(conversation_id, original_tweet.data.author_id, original_tweet.data.author_id)
+            thread_tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=98, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
+            if thread_tweets.data != None:
+                thread_results.extend(thread_tweets.data)
+            # Search only replies to conversation_id.
+            term = "conversation_id:{}".format(conversation_id, original_tweet.data.author_id)
+            reply_tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=50, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
+            if reply_tweets.data != None:
+                reply_results.extend(reply_tweets.data)
        except TweepyException as e:
            log.exception("There was an error when attempting to retrieve the whole conversation for buffer {}".format(self.buffer.name))
-        new_results = []
-        ids = [tweet.id for tweet in results]
-        try:
-            results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
-            results.sort(key=lambda x: x.id)
-        except TweepyException as e:
-            log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
-            return []
+        # convert v2 tweets in normal, V1.1 tweets so we don't have to deal with those kind of objects in our infrastructure.
+        # ToDo: Remove this last step once we support natively all objects fetched via Twitter API V2.
+        results = []
+        ids = [tweet.id for tweet in thread_results]
+        if len(ids) > 0:
+            try:
+                thread_results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
+                thread_results.sort(key=lambda x: x.id)
+                results.extend(thread_results)
+            except TweepyException as e:
+                log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
+                return []
+        ids = [tweet.id for tweet in reply_results]
+        if len(ids) > 0:
+            try:
+                reply_results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
+                reply_results.sort(key=lambda x: x.id)
+                results.extend(reply_results)
+            except TweepyException as e:
+                log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
        return results