2021-06-25 16:25:51 -05:00
# -*- coding: utf-8 -*-
2021-06-25 22:47:10 -05:00
""" Strips unneeded tweet information in order to store tweet objects by using less memory. This is especially useful when buffers start to contain more than a certain amount of items. """
2021-06-25 16:25:51 -05:00
from tweepy . models import Status
def reduce_tweet ( tweet ) :
2021-06-25 22:47:10 -05:00
""" generates a new Tweet model with the fields we currently need, excluding everything else including null values and empty collections. """
2021-06-28 00:47:23 -05:00
allowed_values = [ " created_at " , " id " , " full_text " , " text " , " message " , " in_reply_to_status_id " , " in_reply_to_user_id " , " is_quote_status " , " lang " , " source " , " coordinates " , " quoted_status_id " , " extended_entities " ]
2021-06-25 16:25:51 -05:00
allowed_entities = [ " hashtags " , " media " , " urls " , " user_mentions " , " polls " ]
status_dict = { }
for key in allowed_values :
if tweet . _json . get ( key ) :
status_dict [ key ] = tweet . _json [ key ]
entities = dict ( )
for key in allowed_entities :
if tweet . _json [ " entities " ] . get ( key ) and tweet . _json [ " entities " ] . get ( key ) != None :
entities [ key ] = tweet . _json [ " entities " ] [ key ]
status_dict [ " entities " ] = entities
2021-06-25 22:47:10 -05:00
# If tweet comes from the cached database, it does not include an API, so we can pass None here as we do not use that reference to tweepy's API.
2021-06-25 16:48:44 -05:00
if hasattr ( tweet , " _api " ) :
api = tweet . _api
else :
api = None
status = Status ( ) . parse ( api = api , json = status_dict )
2021-06-25 22:47:10 -05:00
# Quotes and retweets are different objects. So we parse a new tweet when we have a quoted or retweeted status here.
2021-06-25 16:25:51 -05:00
if tweet . _json . get ( " quoted_status " ) :
quoted_tweet = reduce_tweet ( tweet . quoted_status )
status . quoted_status = quoted_tweet
if tweet . _json . get ( " retweeted_status " ) :
retweeted_tweet = reduce_tweet ( tweet . retweeted_status )
status . retweeted_status = retweeted_tweet
# Adds user ID to here so we can reference it later.
# Sometimes, the conversations buffer would send an already reduced tweet here so we will need to return it as is.
if isinstance ( tweet . user , str ) == False :
status . user = tweet . user . id_str
else :
return tweet
return status