Retrieve all tweets from a thread properly. closes #417

This commit is contained in:
2021-11-01 00:11:46 -06:00
parent b39b732a93
commit ff0a2b5692
2 changed files with 51 additions and 17 deletions

View File

@@ -94,24 +94,57 @@ class ConversationBuffer(SearchBuffer):
def get_replies(self, tweet):
""" Try to retrieve the whole conversation for the passed object by using a mix between calls to API V1.1 and V2 """
results = []
# firstly we would try to retrieve the whole thread, then we will get replies.
# this makes us to waste two search API calls, but there's no better option to retrieve the whole thread including replies, unfortunately.
thread_results = []
reply_results = []
# try to fetch conversation_id of the tweet initiating the buffer.
try:
tweet = self.session.twitter_v2.get_tweet(id=self.tweet.id, user_auth=True, tweet_fields=["conversation_id"])
results.append(tweet.data)
original_tweet = self.session.twitter_v2.get_tweet(id=tweet.data.conversation_id, user_auth=True, tweet_fields=["conversation_id"])
results.insert(0, original_tweet.data)
term = "conversation_id:{}".format(tweet.data.conversation_id)
tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=98, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
if tweets.data != None:
results.extend(tweets.data)
tweet = self.session.twitter_v2.get_tweet(id=self.tweet.id, user_auth=True, tweet_fields=["conversation_id", "author_id"])
thread_results.append(tweet.data)
except TweepyException as e:
log.exception("Error attempting to retrieve tweet conversation ID")
thread_results.append(self.tweet)
# Return earlier cause we can't do anything if we cannot fetch the object from twitter.
return thread_results
# If tweet contains a conversation_id param, let's retrieve the original tweet which started the conversation so we will have the whole reference for later.
if hasattr(tweet.data, "conversation_id") and tweet.data.conversation_id != None:
conversation_id = tweet.data.conversation_id
original_tweet = self.session.twitter_v2.get_tweet(id=tweet.data.conversation_id, user_auth=True, tweet_fields=["conversation_id", "author_id"])
thread_results.insert(0, original_tweet.data)
else:
conversation_id = tweet.data.id
# find all tweets replying to the original thread only. Those tweets are sent by the same author who originally posted the first tweet.
try:
term = "conversation_id:{} from:{} to:{}".format(conversation_id, original_tweet.data.author_id, original_tweet.data.author_id)
thread_tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=98, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
if thread_tweets.data != None:
thread_results.extend(thread_tweets.data)
# Search only replies to conversation_id.
term = "conversation_id:{}".format(conversation_id, original_tweet.data.author_id)
reply_tweets = self.session.twitter_v2.search_recent_tweets(term, user_auth=True, max_results=50, tweet_fields=["in_reply_to_user_id", "author_id", "conversation_id"])
if reply_tweets.data != None:
reply_results.extend(reply_tweets.data)
except TweepyException as e:
log.exception("There was an error when attempting to retrieve the whole conversation for buffer {}".format(self.buffer.name))
new_results = []
ids = [tweet.id for tweet in results]
try:
results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
results.sort(key=lambda x: x.id)
except TweepyException as e:
log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
return []
# convert v2 tweets in normal, V1.1 tweets so we don't have to deal with those kind of objects in our infrastructure.
# ToDo: Remove this last step once we support natively all objects fetched via Twitter API V2.
results = []
ids = [tweet.id for tweet in thread_results]
if len(ids) > 0:
try:
thread_results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
thread_results.sort(key=lambda x: x.id)
results.extend(thread_results)
except TweepyException as e:
log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
return []
ids = [tweet.id for tweet in reply_results]
if len(ids) > 0:
try:
reply_results = self.session.twitter.lookup_statuses(ids, include_ext_alt_text=True, tweet_mode="extended")
reply_results.sort(key=lambda x: x.id)
results.extend(reply_results)
except TweepyException as e:
log.exception("There was an error attempting to retrieve tweets for Twitter API V1.1, in conversation buffer {}".format(self.name))
return results