Reduce the size of all tweets so it might make easier to handle those in a realtime database

This commit is contained in:
Manuel Cortez 2021-06-25 16:25:51 -05:00
parent 49505fabcd
commit c761230566
6 changed files with 114 additions and 46 deletions

View File

@ -418,7 +418,7 @@ class baseBufferController(baseBuffers.buffer):
id = tweet.id
twishort_enabled = hasattr(tweet, "twishort")
users = utils.get_all_mentioned(tweet, self.session.db, field="screen_name")
ids = utils.get_all_mentioned(tweet, self.session.db, field="id_str")
ids = utils.get_all_mentioned(tweet, self.session.db, field="id")
# Build the window title
if len(users) < 1:
title=_("Reply to {arg0}").format(arg0=screen_name)

View File

@ -923,8 +923,8 @@ class Controller(object):
def open_conversation(self, *args, **kwargs):
buffer = self.get_current_buffer()
id = buffer.get_right_tweet().id_str
user = buff.session.get_user(buffer.get_right_tweet().user).screen_name
id = buffer.get_right_tweet().id
user = buffer.session.get_user(buffer.get_right_tweet().user).screen_name
search = twitterBuffers.conversationBufferController(self.view.nb, "search", "%s-searchterm" % (id,), buffer.session, buffer.session.db["user_name"], bufferType="searchPanel", sound="search_updated.ogg", since_id=id, q="@{0}".format(user,))
search.tweet = buffer.get_right_tweet()
search.start_stream(start=True)

View File

@ -49,23 +49,24 @@ def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=No
else:
text = StripChars(getattr(tweet, value))
if show_screen_names:
user = tweet.user.screen_name
user = session.get_user(tweet.user).screen_name
else:
user = tweet.user.name
user = session.get_user(tweet.user).name
source = re.sub(r"(?s)<.*?>", "", tweet.source)
if hasattr(tweet, "retweeted_status"):
if (hasattr(tweet, "message")) == False and tweet.retweeted_status.is_quote_status == False:
text = "RT @%s: %s" % (tweet.retweeted_status.user.screen_name, text)
elif tweet.retweeted_status.is_quote_status:
if hasattr(tweet, "message") == False and hasattr(tweet.retweeted_status, "is_quote_status") == False:
text = "RT @%s: %s" % (session.get_user(tweet.retweeted_status.user).screen_name, text)
elif hasattr(tweet.retweeted_status, "is_quote_status"):
text = "%s" % (text)
else:
text = "RT @%s: %s" % (tweet.retweeted_status.user.screen_name, text)
text = "RT @%s: %s" % (session.get_user(tweet.retweeted_status.user).screen_name, text)
if not hasattr(tweet, "message"):
if hasattr(tweet, "retweeted_status"):
text = utils.expand_urls(text, tweet.retweeted_status.entities)
if hasattr(tweet.retweeted_status, "entities"):
text = utils.expand_urls(text, tweet.retweeted_status.entities)
else:
text = utils.expand_urls(text, tweet.entities)
if hasattr(tweet, "entities"):
text = utils.expand_urls(text, tweet.entities)
if config.app['app-settings']['handle_longtweets']: pass
return [user+", ", text, ts+", ", source]
@ -112,14 +113,14 @@ def compose_quoted_tweet(quoted_tweet, original_tweet, show_screen_names=False,
value = "text"
text = StripChars(getattr(quoted_tweet, value))
if show_screen_names:
quoting_user = quoted_tweet.user.screen_name
quoting_user = session.get_user(quoted_tweet.user).screen_name
else:
quoting_user = quoted_tweet.user.name
quoting_user = session.get_user(quoted_tweet.user).name
source = quoted_tweet.source
if hasattr(quoted_tweet, "retweeted_status"):
text = "rt @%s: %s" % (quoted_tweet.retweeted_status.user.screen_name, text)
text = "rt @%s: %s" % (session.get_user(quoted_tweet.retweeted_status.user).screen_name, text)
if text[-1] in chars: text=text+"."
original_user = original_tweet.user.screen_name
original_user = session.get_user(original_tweet.user).screen_name
if hasattr(original_tweet, "message"):
original_text = original_tweet.message
elif hasattr(original_tweet, "full_text"):
@ -128,7 +129,12 @@ def compose_quoted_tweet(quoted_tweet, original_tweet, show_screen_names=False,
original_text = StripChars(original_tweet.text)
quoted_tweet.message = _(u"{0}. Quoted tweet from @{1}: {2}").format( text, original_user, original_text)
quoted_tweet = tweets.clear_url(quoted_tweet)
quoted_tweet.entities["urls"].extend(original_tweet.entities["urls"])
if hasattr(original_tweet, "entities") and original_tweet.entities.get("urls"):
if hasattr(quoted_tweet, "entities") == False:
quoted_tweet.entities = {}
if quoted_tweet.entities.get("urls") == None:
quoted_tweet.entities["urls"] = []
quoted_tweet.entities["urls"].extend(original_tweet.entities["urls"])
return quoted_tweet
def compose_followers_list(tweet, db, relative_times=True, show_screen_names=False, session=None):

View File

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
""" Strips unneeded tweet information in order to store tweet objects by using less memory. """
from tweepy.models import Status
def reduce_tweet(tweet):
allowed_values = ["created_at", "id", "full_text", "text", "message", "in_reply_to_status_id", "in_reply_to_user_id", "is_quote_status", "lang", "source", "coordinates", "quoted_status_id", ]
allowed_entities = ["hashtags", "media", "urls", "user_mentions", "polls"]
status_dict = {}
for key in allowed_values:
if tweet._json.get(key):
status_dict[key] = tweet._json[key]
entities = dict()
for key in allowed_entities:
if tweet._json["entities"].get(key) and tweet._json["entities"].get(key) != None:
entities[key] = tweet._json["entities"][key]
status_dict["entities"] = entities
# Quotes and retweets are different objects.
status = Status().parse(api=tweet._api, json=status_dict)
if tweet._json.get("quoted_status"):
quoted_tweet = reduce_tweet(tweet.quoted_status)
# print(quoted_tweet)
status.quoted_status = quoted_tweet
if tweet._json.get("retweeted_status"):
retweeted_tweet = reduce_tweet(tweet.retweeted_status)
status.retweeted_status = retweeted_tweet
# Adds user ID to here so we can reference it later.
# Sometimes, the conversations buffer would send an already reduced tweet here so we will need to return it as is.
if isinstance(tweet.user, str) == False:
status.user = tweet.user.id_str
else:
return tweet
return status

View File

@ -17,6 +17,7 @@ from keys import keyring
from sessions import base
from sessions.twitter import utils, compose
from sessions.twitter.long_tweets import tweets, twishort
from . import reduce
from .wxUI import authorisationDialog
log = logging.getLogger("sessions.twitterSession")
@ -44,21 +45,20 @@ class Session(base.baseSession):
last_id = self.db[name][0].id
else:
last_id = self.db[name][-1].id
self.add_users_from_results(data)
for i in data:
if ignore_older and last_id != None:
if i.id < last_id:
log.error("Ignoring an older tweet... Last id: {0}, tweet id: {1}".format(last_id, i.id))
continue
if utils.find_item(i.id, self.db[name]) == None and utils.is_allowed(i, self.settings, name) == True:
i = self.check_quoted_status(i)
i = self.check_long_tweet(i)
if i == False: continue
if self.settings["general"]["reverse_timelines"] == False: objects.append(i)
else: objects.insert(0, i)
reduced_object = reduce.reduce_tweet(i)
reduced_object = self.check_quoted_status(reduced_object)
reduced_object = self.check_long_tweet(reduced_object)
if self.settings["general"]["reverse_timelines"] == False: objects.append(reduced_object)
else: objects.insert(0, reduced_object)
num = num+1
if hasattr(i, "user"):
if (i.user.id in self.db["users"]) == False:
self.db["users"][i.user.id] = i.user
self.db[name] = objects
return num
@ -338,10 +338,11 @@ class Session(base.baseSession):
value = "full_text"
else:
value = "text"
setattr(quoted_tweet, value, utils.expand_urls(getattr(quoted_tweet, value), quoted_tweet.entities))
if quoted_tweet.is_quote_status == True and hasattr(quoted_tweet, "quoted_status"):
if hasattr(quoted_tweet, "entities"):
setattr(quoted_tweet, value, utils.expand_urls(getattr(quoted_tweet, value), quoted_tweet.entities))
if hasattr(quoted_tweet, "is_quote_status") == True and hasattr(quoted_tweet, "quoted_status"):
original_tweet = quoted_tweet.quoted_status
elif hasattr(quoted_tweet, "retweeted_status") and quoted_tweet.retweeted_status.is_quote_status == True and hasattr(quoted_tweet.retweeted_status, "quoted_status"):
elif hasattr(quoted_tweet, "retweeted_status") and hasattr(quoted_tweet.retweeted_status, "is_quote_status") == True and hasattr(quoted_tweet.retweeted_status, "quoted_status"):
original_tweet = quoted_tweet.retweeted_status.quoted_status
else:
return quoted_tweet
@ -352,33 +353,40 @@ class Session(base.baseSession):
value = "message"
else:
value = "text"
setattr(original_tweet, value, utils.expand_urls(getattr(original_tweet, value), original_tweet.entities))
return compose.compose_quoted_tweet(quoted_tweet, original_tweet)
if hasattr(original_tweet, "entities"):
setattr(original_tweet, value, utils.expand_urls(getattr(original_tweet, value), original_tweet.entities))
# ToDo: Shall we check whether we should add show_screen_names here?
return compose.compose_quoted_tweet(quoted_tweet, original_tweet, session=self)
def check_long_tweet(self, tweet):
""" Process a tweet and add extra info if it's a long tweet made with Twyshort.
tweet dict: a tweet object.
returns a tweet with a new argument message, or original tweet if it's not a long tweet."""
long = twishort.is_long(tweet)
long = False
if hasattr(tweet, "entities") and tweet.entities.get("urls"):
long = twishort.is_long(tweet)
if long != False and config.app["app-settings"]["handle_longtweets"]:
message = twishort.get_full_text(long)
if hasattr(tweet, "quoted_status"):
tweet.quoted_status.message = message
if tweet.quoted_status.message == False: return False
tweet.quoted_status.twishort = True
for i in tweet.quoted_status.entities["user_mentions"]:
if "@%s" % (i["screen_name"]) not in tweet.quoted_status.message and i["screen_name"] != tweet.user.screen_name:
if hasattr(tweet.quoted_status, "retweeted_status") and tweet.retweeted_status.user.screen_name == i["screen_name"]:
continue
tweet.quoted_status.message = u"@%s %s" % (i["screen_name"], tweet.message)
if hasattr(tweet.quoted_status, "entities") and tweet.quoted_status.entities.get("user_mentions"):
for i in tweet.quoted_status.entities["user_mentions"]:
if "@%s" % (i["screen_name"]) not in tweet.quoted_status.message and i["screen_name"] != self.get_user(tweet.user).screen_name:
if hasattr(tweet.quoted_status, "retweeted_status") and self.get_user(tweet.retweeted_status.user).screen_name == i["screen_name"]:
continue
tweet.quoted_status.message = u"@%s %s" % (i["screen_name"], tweet.message)
else:
tweet.message = message
if tweet.message == False: return False
tweet.twishort = True
for i in tweet.entities["user_mentions"]:
if "@%s" % (i["screen_name"]) not in tweet.message and i["screen_name"] != tweet.user.screen_name:
if hasattr(tweet, "retweeted_status") and tweet.retweeted_status.user.screen_name == i["screen_name"]:
continue
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
for i in tweet.entities["user_mentions"]:
if "@%s" % (i["screen_name"]) not in tweet.message and i["screen_name"] != self.get_user(tweet.user).screen_name:
if hasattr(tweet, "retweeted_status") and self.get_user(tweet.retweeted_status.user).screen_name == i["screen_name"]:
continue
tweet.message = u"@%s %s" % (i["screen_name"], tweet.message)
return tweet
def get_user(self, id):
@ -386,20 +394,21 @@ class Session(base.baseSession):
id str: User identifier, provided by Twitter.
returns a tweepy user object."""
if ("users" in self.db) == False or (str(id) in self.db["users"]) == False:
log.debug("Requesting user id {} as it is not present in the users database.".format(id))
try:
user = self.twitter.get_user(id=id)
except TweepError as err:
except ValueError as err:
user = UserModel(None)
user.screen_name = "deleted_user"
user.id = id
user.name = _("Deleted account")
return user
users = self.db["users"]
users[user.id] = user
users[user.id_str] = user
self.db["users"] = users
return user
else:
return self.db["users"][id]
return self.db["users"][str(id)]
def get_user_by_screen_name(self, screen_name):
""" Returns an user identifier associated with a screen_name.
@ -437,4 +446,20 @@ class Session(base.baseSession):
for user in users:
users_db[user.id_str] = user
log.debug("Added %d new users" % (len(users)))
self.db["users"] = users_db
self.db["users"] = users_db
def add_users_from_results(self, data):
users = self.db["users"]
for i in data:
if hasattr(i, "user"):
if isinstance(i.user, str):
log.warning("A String was passed to be added as an user. This is normal only if TWBlue tried to load a conversation.")
continue
if (i.user.id_str in self.db["users"]) == False:
users[i.user.id_str] = i.user
if hasattr(i, "quoted_status") and (i.quoted_status.user.id_str in self.db["users"]) == False:
users[i.quoted_status.user.id_str] = i.quoted_status.user
if hasattr(i, "retweeted_status") and (i.retweeted_status.user.id_str in self.db["users"]) == False:
users[i.retweeted_status.user.id_str] = i.retweeted_status.user
self.db["users"] = users

View File

@ -69,13 +69,14 @@ def is_audio(tweet):
if hasattr(tweet, "message_create"):
entities = tweet.message_create["message_data"]["entities"]
else:
if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None:
return False
entities = tweet.entities
if len(entities["hashtags"]) > 0:
for i in entities["hashtags"]:
if i["text"] == "audio":
return True
except IndexError:
print(tweet.entities["hashtags"])
log.exception("Exception while executing is_audio hashtag algorithm")
def is_geocoded(tweet):
@ -86,6 +87,8 @@ def is_media(tweet):
if hasattr(tweet, "message_create"):
entities = tweet.message_create["message_data"]["entities"]
else:
if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None:
return False
entities = tweet.entities
if entities.get("media") == None:
return False
@ -112,7 +115,7 @@ def get_all_users(tweet, session):
else:
if user.screen_name != session.db["user_name"]:
string.append(user.screen_name)
if tweet.get("entities") != None and tweet["entities"].get("user_mentions") != None:
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
for i in tweet.entities["user_mentions"]:
if i["screen_name"] != session.db["user_name"] and i["screen_name"] != user.screen_name:
if i["screen_name"] not in string:
@ -138,7 +141,7 @@ def is_allowed(tweet, settings, buffer_name):
tweet_data = {}
if hasattr(tweet, "retweeted_status"):
tweet_data["retweet"] = True
if tweet.in_reply_to_status_id_str != None:
if tweet.in_reply_to_status_id != None:
tweet_data["reply"] = True
if hasattr(tweet, "quoted_status"):
tweet_data["quote"] = True
@ -203,6 +206,8 @@ def twitter_error(error):
def expand_urls(text, entities):
""" Expand all URLS present in text with information found in entities"""
if entities.get("urls") == None:
return text
urls = find_urls_in_text(text)
for url in entities["urls"]:
if url["url"] in text: