Retrieve all tweets from a thread properly. closes #417

This commit is contained in:
Manuel Cortez 2021-11-01 00:11:46 -06:00
parent b39b732a93
commit ff0a2b5692
No known key found for this signature in database
GPG Key ID: 262CC30FA01B5CBF
2 changed files with 51 additions and 17 deletions

View File

@ -5,6 +5,7 @@
## changes in this version ## changes in this version
* fixed a bug when clearing the direct messages buffer. ([#418](https://github.com/manuelcortez/TWBlue/issues/418)) * fixed a bug when clearing the direct messages buffer. ([#418](https://github.com/manuelcortez/TWBlue/issues/418))
* TWBlue should retrieve tweets from threads and conversations in a more reliable way. Tweets in the same thread (made by the same author) will be sorted correctly, although replies to the thread (made by different people) may not be ordered in the same way they are displayed in Twitter apps. ([#417](https://github.com/manuelcortez/TWBlue/issues/417))
## Changes in Version 2021.10.30 ## Changes in Version 2021.10.30

View File

@ -94,24 +94,57 @@ class ConversationBuffer(SearchBuffer):
def get_replies(self, tweet): def get_replies(self, tweet):
""" Try to retrieve the whole conversation for the passed object by using a mix between calls to API V1.1 and V2 """ """ Try to retrieve the whole conversation for the passed object by using a mix between calls to API V1.1 and V2 """
results = [] # firstly we would try to retrieve the whole thread, then we will get replies.
# this makes us to waste two search API calls, but there's no better option to retrieve the whole thread including replies, unfortunately.
thread_results = []
reply_results = []
# try to fetch conversation_id of the tweet initiating the buffer.
try: try:
tweet = self.session.twitter_v2.get_tweet(id=self.tweet.id, user_auth=True, tweet_fields=["conversation_id"]) tweet = self.session.twitter_v2.get_tweet(id=self.tweet.id, user_auth=True, tweet_fields=["conversation_id", "author_id"])
results.append(tweet.data) thread_results.append(tweet.data)
original_tweet = self.session.twitter_v2.get_tweet(id=tweet.data.conversation_id, user_auth=True, tweet_fields=["conversation_id"]) except TweepyException as e:
results.insert(0, original_tweet.data) log.exception("Error attempting to retrieve tweet conversation ID")
term = "conversation_id:{}".format(tweet.data.conversation_id) thread_results.append(self.tweet)
tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=98, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"]) # Return earlier cause we can't do anything if we cannot fetch the object from twitter.
if tweets.data != None: return thread_results
results.extend(tweets.data) # If tweet contains a conversation_id param, let's retrieve the original tweet which started the conversation so we will have the whole reference for later.
if hasattr(tweet.data, "conversation_id") and tweet.data.conversation_id != None:
conversation_id = tweet.data.conversation_id
original_tweet = self.session.twitter_v2.get_tweet(id=tweet.data.conversation_id, user_auth=True, tweet_fields=["conversation_id", "author_id"])
thread_results.insert(0, original_tweet.data)
else:
conversation_id = tweet.data.id
# find all tweets replying to the original thread only. Those tweets are sent by the same author who originally posted the first tweet.
try:
term = "conversation_id:{} from:{} to:{}".format(conversation_id, original_tweet.data.author_id, original_tweet.data.author_id)
thread_tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=98, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
if thread_tweets.data != None:
thread_results.extend(thread_tweets.data)
# Search only replies to conversation_id.
term = "conversation_id:{}".format(conversation_id, original_tweet.data.author_id)
reply_tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=50, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
if reply_tweets.data != None:
reply_results.extend(reply_tweets.data)
except TweepyException as e: except TweepyException as e:
log.exception("There was an error when attempting to retrieve the whole conversation for buffer {}".format(self.buffer.name)) log.exception("There was an error when attempting to retrieve the whole conversation for buffer {}".format(self.buffer.name))
new_results = [] # convert v2 tweets in normal, V1.1 tweets so we don't have to deal with those kind of objects in our infrastructure.
ids = [tweet.id for tweet in results] # ToDo: Remove this last step once we support natively all objects fetched via Twitter API V2.
results = []
ids = [tweet.id for tweet in thread_results]
if len(ids) > 0:
try: try:
results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended") thread_results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
results.sort(key=lambda x: x.id) thread_results.sort(key=lambda x: x.id)
results.extend(thread_results)
except TweepyException as e: except TweepyException as e:
log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name)) log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
return [] return []
ids = [tweet.id for tweet in reply_results]
if len(ids) > 0:
try:
reply_results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
reply_results.sort(key=lambda x: x.id)
results.extend(reply_results)
except TweepyException as e:
log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
return results return results