Merge pull request #380 from manuelcortez/better_memory_management

Replace the cache database for a SQLite backed implementation
This commit is contained in:
Manuel Cortez 2021-06-27 19:10:50 -05:00 committed by GitHub
commit 969a75e9f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 594 additions and 214 deletions

View File

@ -2,6 +2,11 @@
## changes in this version
* We just implemented some changes in the way TWBlue handles tweets in order to reduce its RAM memory usage [#380](https://github.com/manuelcortez/TWBlue/pull/380):
* We reduced the tweets size by storing only the tweet fields we currently use. This should reduce tweet's size in memory for every object up to 75%.
* When using the cache database to store your tweets, there is a new setting present in the account settings dialog, in the general tab. This setting allows you to control whether TWBlue will load the whole database into memory (which is the current behaviour) or not.
* Loading the whole database into memory has the advantage of being extremely fast to access any element (for example when moving through tweets in a buffer), but it requires more memory as the tweet buffers grow up. This should, however, use less memory than before thanks to the optimizations performed in tweet objects. If you have a machine with enough memory, this should be a good option for your case.
* If you uncheck this setting, TWBlue will read the whole database from disk. This is significantly slower, but the advantage of this setting is that it will consume almost no extra memory, no matter how big is the tweets dataset. Be ware, though, that TWBlue might start to feel slower when accessing elements (for example when reading tweets) as the buffers grow up. This setting is suggested for computers with low memory or for those people not wanting to keep a really big amount of tweets stored.
* Changed the label in the direct message's text control so it will indicate that the user needs to write the text there, without referring to any username in particular. ([#366,](https://github.com/manuelcortez/TWBlue/issues/366))
* TWBlue will take Shift+F10 again as the contextual menu key in the list of items in a buffer. This stopped working after we have migrated to WX 4.1. ([#353,](https://github.com/manuelcortez/TWBlue/issues/353))
* TWBlue should render correctly retweets of quoted tweets. ([#365,](https://github.com/manuelcortez/TWBlue/issues/365))

View File

@ -31,6 +31,7 @@ cx_freeze
tweepy
twitter-text-parser
pyenchant
sqlitedict
git+https://github.com/accessibleapps/libloader
git+https://github.com/accessibleapps/platform_utils
git+https://github.com/accessibleapps/accessible_output2

View File

@ -12,6 +12,7 @@ reverse_timelines = boolean(default=False)
announce_stream_status = boolean(default=True)
retweet_mode = string(default="ask")
persist_size = integer(default=0)
load_cache_in_memory=boolean(default=True)
show_screen_names = boolean(default=False)
buffer_order = list(default=list('home','mentions', 'dm', 'sent_dm', 'sent_tweets','favorites','followers','friends','blocks','muted','events'))

View File

@ -19,7 +19,7 @@ import languageHandler
import logging
from audio_services import youtube_utils
from controller.buffers import baseBuffers
from sessions.twitter import compose, utils
from sessions.twitter import compose, utils, reduce
from mysc.thread_utils import call_threaded
from tweepy.error import TweepError
from tweepy.cursor import Cursor
@ -178,7 +178,9 @@ class baseBufferController(baseBuffers.buffer):
val, cursor = val
if type(cursor) == tuple:
cursor = cursor[1]
self.session.db["cursors"][self.name] = cursor
cursors = self.session.db["cursors"]
cursors[self.name] = cursor
self.session.db["cursors"] = cursors
results = [i for i in val]
val = results
val.reverse()
@ -190,7 +192,6 @@ class baseBufferController(baseBuffers.buffer):
return
number_of_items = self.session.order_buffer(self.name, val)
log.debug("Number of items retrieved: %d" % (number_of_items,))
self.put_items_on_list(number_of_items)
if hasattr(self, "finished_timeline") and self.finished_timeline == False:
if "-timeline" in self.name:
@ -229,15 +230,19 @@ class baseBufferController(baseBuffers.buffer):
return
if items == None:
return
items_db = self.session.db[self.name]
self.session.add_users_from_results(items)
for i in items:
if utils.is_allowed(i, self.session.settings, self.name) == True and utils.find_item(i.id, self.session.db[self.name]) == None:
i = reduce.reduce_tweet(i)
i = self.session.check_quoted_status(i)
i = self.session.check_long_tweet(i)
elements.append(i)
if self.session.settings["general"]["reverse_timelines"] == False:
self.session.db[self.name].insert(0, i)
items_db.insert(0, i)
else:
self.session.db[self.name].append(i)
items_db.append(i)
self.session.db[self.name] = items_db
selection = self.buffer.list.get_selected()
log.debug("Retrieved %d items from cursored search in function %s." % (len(elements), self.function))
if self.session.settings["general"]["reverse_timelines"] == False:
@ -286,10 +291,12 @@ class baseBufferController(baseBuffers.buffer):
def remove_tweet(self, id):
if type(self.session.db[self.name]) == dict: return
for i in range(0, len(self.session.db[self.name])):
if self.session.db[self.name][i].id == id:
self.session.db[self.name].pop(i)
items = self.session.db[self.name]
for i in range(0, len(items)):
if items[i].id == id:
items.pop(i)
self.remove_item(i)
self.session.db[self.name] = items
def put_items_on_list(self, number_of_items):
list_to_use = self.session.db[self.name]
@ -408,11 +415,12 @@ class baseBufferController(baseBuffers.buffer):
@_tweets_exist
def reply(self, *args, **kwargs):
tweet = self.get_right_tweet()
screen_name = tweet.user.screen_name
user = self.session.get_user(tweet.user)
screen_name = user.screen_name
id = tweet.id
twishort_enabled = hasattr(tweet, "twishort")
users = utils.get_all_mentioned(tweet, self.session.db, field="screen_name")
ids = utils.get_all_mentioned(tweet, self.session.db, field="id_str")
ids = utils.get_all_mentioned(tweet, self.session.db, field="id")
# Build the window title
if len(users) < 1:
title=_("Reply to {arg0}").format(arg0=screen_name)
@ -461,8 +469,8 @@ class baseBufferController(baseBuffers.buffer):
screen_name = tweet.screen_name
users = [screen_name]
else:
screen_name = tweet.user.screen_name
users = utils.get_all_users(tweet, self.session.db)
screen_name = self.session.get_user(tweet.user).screen_name
users = utils.get_all_users(tweet, self.session)
dm = messages.dm(self.session, _(u"Direct message to %s") % (screen_name,), _(u"New direct message"), users)
if dm.message.get_response() == widgetUtils.OK:
screen_name = dm.message.get("cb")
@ -471,10 +479,12 @@ class baseBufferController(baseBuffers.buffer):
text = dm.message.get_text()
val = self.session.api_call(call_name="send_direct_message", recipient_id=recipient_id, text=text)
if val != None:
sent_dms = self.session.db["sent_direct_messages"]
if self.session.settings["general"]["reverse_timelines"] == False:
self.session.db["sent_direct_messages"].append(val)
sent_dms.append(val)
else:
self.session.db["sent_direct_messages"].insert(0, val)
sent_dms.insert(0, val)
self.session.db["sent_direct_messages"] = sent_dms
pub.sendMessage("sent-dm", data=val, user=self.session.db["user_name"])
if hasattr(dm.message, "destroy"): dm.message.destroy()
@ -501,12 +511,12 @@ class baseBufferController(baseBuffers.buffer):
comments = tweet.full_text
else:
comments = tweet.text
retweet = messages.tweet(self.session, _(u"Quote"), _(u"Add your comment to the tweet"), u"“@%s: %s" % (tweet.user.screen_name, comments), max=256, messageType="retweet")
retweet = messages.tweet(self.session, _(u"Quote"), _(u"Add your comment to the tweet"), u"“@%s: %s" % (self.session.get_user(tweet.user).screen_name, comments), max=256, messageType="retweet")
if comment != '':
retweet.message.set_text(comment)
if retweet.message.get_response() == widgetUtils.OK:
text = retweet.message.get_text()
text = text+" https://twitter.com/{0}/status/{1}".format(tweet.user.screen_name, id)
text = text+" https://twitter.com/{0}/status/{1}".format(self.session.get_user(tweet.user).screen_name, id)
if retweet.image == None:
item = self.session.api_call(call_name="update_status", _sound="retweet_send.ogg", status=text, in_reply_to_status_id=id, tweet_mode="extended")
if item != None:
@ -588,16 +598,18 @@ class baseBufferController(baseBuffers.buffer):
if self.type == "events" or self.type == "people" or self.type == "empty" or self.type == "account": return
answer = commonMessageDialogs.delete_tweet_dialog(None)
if answer == widgetUtils.YES:
items = self.session.db[self.name]
try:
if self.name == "direct_messages" or self.name == "sent_direct_messages":
self.session.twitter.destroy_direct_message(id=self.get_right_tweet().id)
self.session.db[self.name].pop(index)
items.pop(index)
else:
self.session.twitter.destroy_status(id=self.get_right_tweet().id)
self.session.db[self.name].pop(index)
items.pop(index)
self.buffer.list.remove_item(index)
except TweepError:
self.session.sound.play("error.ogg")
self.session.db[self.name] = items
@_tweets_exist
def user_details(self):
@ -607,7 +619,7 @@ class baseBufferController(baseBuffers.buffer):
elif self.type == "people":
users = [tweet.screen_name]
else:
users = utils.get_all_users(tweet, self.session.db)
users = utils.get_all_users(tweet, self.session)
dlg = dialogs.utils.selectUserDialog(title=_(u"User details"), users=users)
if dlg.get_response() == widgetUtils.OK:
user.profileController(session=self.session, user=dlg.get_user())
@ -625,7 +637,7 @@ class baseBufferController(baseBuffers.buffer):
def open_in_browser(self, *args, **kwargs):
tweet = self.get_tweet()
output.speak(_(u"Opening item in web browser..."))
url = "https://twitter.com/{screen_name}/status/{tweet_id}".format(screen_name=tweet.user.screen_name, tweet_id=tweet.id)
url = "https://twitter.com/{screen_name}/status/{tweet_id}".format(screen_name=self.session.get_user(tweet.user).screen_name, tweet_id=tweet.id)
webbrowser.open(url)
class directMessagesController(baseBufferController):
@ -646,7 +658,9 @@ class directMessagesController(baseBufferController):
items, cursor = items
if type(cursor) == tuple:
cursor = cursor[1]
self.session.db["cursors"][self.name] = cursor
cursors = self.session.db["cursors"]
cursors[self.name] = cursor
self.session.db["cursors"] = cursors
results = [i for i in items]
items = results
log.debug("Retrieved %d items for cursored search in function %s" % (len(items), self.function))
@ -657,22 +671,26 @@ class directMessagesController(baseBufferController):
return
sent = []
received = []
sent_dms = self.session.db["sent_direct_messages"]
received_dms = self.session.db["direct_messages"]
for i in items:
if int(i.message_create["sender_id"]) == self.session.db["user_id"]:
if self.session.settings["general"]["reverse_timelines"] == False:
self.session.db["sent_direct_messages"].insert(0, i)
sent_dms.insert(0, i)
sent.append(i)
else:
self.session.db["sent_direct_messages"].append(i)
sent_dms.append(i)
sent.insert(0, i)
else:
if self.session.settings["general"]["reverse_timelines"] == False:
self.session.db[self.name].insert(0, i)
received_dms.insert(0, i)
received.append(i)
else:
self.session.db[self.name].append(i)
received_dms.append(i)
received.insert(0, i)
total = total+1
self.session.db["direct_messages"] = received_dms
self.session.db["sent_direct_messages"] = sent_dms
user_ids = [item.message_create["sender_id"] for item in items]
self.session.save_users(user_ids)
pub.sendMessage("more-sent-dms", data=sent, account=self.session.db["user_name"])
@ -885,7 +903,9 @@ class peopleBufferController(baseBufferController):
val, cursor = val
if type(cursor) == tuple:
cursor = cursor[1]
self.session.db["cursors"][self.name] = cursor
cursors = self.session.db["cursors"]
cursors[self.name] = cursor
self.session.db["cursors"] = cursors
results = [i for i in val]
val = results
val.reverse()
@ -914,7 +934,9 @@ class peopleBufferController(baseBufferController):
items, cursor = items
if type(cursor) == tuple:
cursor = cursor[1]
self.session.db["cursors"][self.name] = cursor
cursors = self.session.db["cursors"]
cursors[self.name] = cursor
self.session.db["cursors"] = cursors
results = [i for i in items]
items = results
log.debug("Retrieved %d items from cursored search in function %s" % (len(items), self.function))
@ -923,11 +945,13 @@ class peopleBufferController(baseBufferController):
return
if items == None:
return
items_db = self.session.db[self.name]
for i in items:
if self.session.settings["general"]["reverse_timelines"] == False:
self.session.db[self.name].insert(0, i)
items_db.insert(0, i)
else:
self.session.db[self.name].append(i)
items_db.append(i)
self.session.db[self.name] = items_db
selected = self.buffer.list.get_selected()
if self.session.settings["general"]["reverse_timelines"] == True:
for i in items:

View File

@ -254,8 +254,8 @@ class Controller(object):
# Connection checker executed each minute.
self.checker_function = RepeatingTimer(60, self.check_connection)
# self.checker_function.start()
self.save_db = RepeatingTimer(300, self.save_data_in_db)
self.save_db.start()
# self.save_db = RepeatingTimer(300, self.save_data_in_db)
# self.save_db.start()
log.debug("Setting updates to buffers every %d seconds..." % (60*config.app["app-settings"]["update_period"],))
self.update_buffers_function = RepeatingTimer(60*config.app["app-settings"]["update_period"], self.update_buffers)
self.update_buffers_function.start()
@ -530,7 +530,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
dlg = dialogs.utils.selectUserDialog(_(u"Select the user"), users)
if dlg.get_response() == widgetUtils.OK:
user = dlg.get_user()
@ -547,7 +547,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
dlg = dialogs.utils.selectUserDialog(_(u"Select the user"), users)
if dlg.get_response() == widgetUtils.OK:
user = dlg.get_user()
@ -575,7 +575,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
dlg = dialogs.utils.selectUserDialog(_(u"Select the user"), users)
if dlg.get_response() == widgetUtils.OK:
user = dlg.get_user()
@ -617,6 +617,7 @@ class Controller(object):
if d.needs_restart == True:
commonMessageDialogs.needs_restart()
buff.session.settings.write()
buff.session.save_persistent_data()
restart.restart_program()
def report_error(self, *args, **kwargs):
@ -651,8 +652,8 @@ class Controller(object):
if sessions.sessions[item].logged == False: continue
log.debug("Disconnecting streams for %s session" % (sessions.sessions[item].session_id,))
sessions.sessions[item].sound.cleaner.cancel()
log.debug("Shelving database for " + sessions.sessions[item].session_id)
sessions.sessions[item].shelve()
log.debug("Saving database for " + sessions.sessions[item].session_id)
sessions.sessions[item].save_persistent_data()
if system == "Windows":
self.systrayIcon.RemoveIcon()
pidpath = os.path.join(os.getenv("temp"), "{}.pid".format(application.name))
@ -669,7 +670,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users)
def unfollow(self, *args, **kwargs):
@ -681,7 +682,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users, "unfollow")
def mute(self, *args, **kwargs):
@ -693,7 +694,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users, "mute")
def unmute(self, *args, **kwargs):
@ -705,7 +706,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users, "unmute")
def block(self, *args, **kwargs):
@ -717,7 +718,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users, "block")
def unblock(self, *args, **kwargs):
@ -729,7 +730,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users, "unblock")
def report(self, *args, **kwargs):
@ -741,7 +742,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
u = userActionsController.userActionsController(buff, users, "report")
def post_tweet(self, event=None):
@ -828,7 +829,7 @@ class Controller(object):
elif buff.type == "dm":
users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name]
else:
users = utils.get_all_users(tweet, buff.session.db)
users = utils.get_all_users(tweet, buff.session)
dlg = dialogs.userSelection.selectUserDialog(users=users, default=default)
if dlg.get_response() == widgetUtils.OK:
usr = utils.if_user_exists(buff.session.twitter, dlg.get_user())
@ -923,8 +924,8 @@ class Controller(object):
def open_conversation(self, *args, **kwargs):
buffer = self.get_current_buffer()
id = buffer.get_right_tweet().id_str
user = buffer.get_right_tweet().user.screen_name
id = buffer.get_right_tweet().id
user = buffer.session.get_user(buffer.get_right_tweet().user).screen_name
search = twitterBuffers.conversationBufferController(self.view.nb, "search", "%s-searchterm" % (id,), buffer.session, buffer.session.db["user_name"], bufferType="searchPanel", sound="search_updated.ogg", since_id=id, q="@{0}".format(user,))
search.tweet = buffer.get_right_tweet()
search.start_stream(start=True)
@ -1274,10 +1275,12 @@ class Controller(object):
data = buffer.session.check_long_tweet(data)
if data == False: # Long tweet deleted from twishort.
return
items = buffer.session.db[buffer.name]
if buffer.session.settings["general"]["reverse_timelines"] == False:
buffer.session.db[buffer.name].append(data)
items.append(data)
else:
buffer.session.db[buffer.name].insert(0, data)
items.insert(0, data)
buffer.session.db[buffer.name] = items
buffer.add_new_item(data)
def manage_friend(self, data, user):
@ -1623,4 +1626,4 @@ class Controller(object):
def save_data_in_db(self):
for i in sessions.sessions:
sessions.sessions[i].shelve()
sessions.sessions[i].save_persistent_data()

View File

@ -1,7 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from builtins import str
from builtins import object
import os
import webbrowser
import sound_lib
@ -151,6 +148,7 @@ class accountSettingsController(globalSettingsController):
else:
self.dialog.set_value("general", "retweet_mode", _(u"Retweet with comments"))
self.dialog.set_value("general", "persist_size", str(self.config["general"]["persist_size"]))
self.dialog.set_value("general", "load_cache_in_memory", self.config["general"]["load_cache_in_memory"])
self.dialog.create_reporting()
self.dialog.set_value("reporting", "speech_reporting", self.config["reporting"]["speech_reporting"])
self.dialog.set_value("reporting", "braille_reporting", self.config["reporting"]["braille_reporting"])
@ -193,6 +191,9 @@ class accountSettingsController(globalSettingsController):
self.config["general"]["relative_times"] = self.dialog.get_value("general", "relative_time")
self.config["general"]["show_screen_names"] = self.dialog.get_value("general", "show_screen_names")
self.config["general"]["max_tweets_per_call"] = self.dialog.get_value("general", "itemsPerApiCall")
if self.config["general"]["load_cache_in_memory"] != self.dialog.get_value("general", "load_cache_in_memory"):
self.config["general"]["load_cache_in_memory"] = self.dialog.get_value("general", "load_cache_in_memory")
self.needs_restart = True
if self.config["general"]["persist_size"] != self.dialog.get_value("general", "persist_size"):
if self.dialog.get_value("general", "persist_size") == '':
self.config["general"]["persist_size"] =-1

18
src/run_tests.py Normal file
View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
import unittest
testmodules = ["test.test_cache"]
suite = unittest.TestSuite()
for t in testmodules:
try:
# If the module defines a suite() function, call it to get the suite.
mod = __import__(t, globals(), locals(), ['suite'])
suitefn = getattr(mod, 'suite')
suite.addTest(suitefn())
except (ImportError, AttributeError):
# else, just load all the test cases from the module.
suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t))
unittest.TextTestRunner(verbosity=2).run(suite)

View File

@ -1,9 +1,5 @@
# -*- coding: utf-8 -*-
""" A base class to be derived in possible new sessions for TWBlue and services."""
from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import str
from builtins import object
import os
import paths
import output
@ -11,9 +7,8 @@ import time
import sound
import logging
import config_utils
import shelve
import sqlitedict
import application
import os
from . import session_exceptions as Exceptions
log = logging.getLogger("sessionmanager.session")
@ -59,7 +54,7 @@ class baseSession(object):
log.debug("Creating config file %s" % (file_,))
self.settings = config_utils.load_config(os.path.join(paths.config_path(), file_), os.path.join(paths.app_path(), "Conf.defaults"))
self.init_sound()
self.deshelve()
self.load_persistent_data()
def init_sound(self):
try: self.sound = sound.soundSystem(self.settings["sound"])
@ -73,48 +68,88 @@ class baseSession(object):
def authorise(self):
pass
def shelve(self):
"""Shelve the database to allow for persistance."""
shelfname=os.path.join(paths.config_path(), str(self.session_id), "cache")
if self.settings["general"]["persist_size"] == 0:
if os.path.exists(shelfname+".dat"):
os.remove(shelfname+".dat")
return
try:
if not os.path.exists(shelfname+".dat"):
output.speak("Generating database, this might take a while.",True)
shelf=shelve.open(os.path.join(paths.config_path(), shelfname),'c')
for key, value in list(self.db.items()):
if type(key) != str and type(key) != str:
output.speak("Uh oh, while shelving the database, a key of type " + str(type(key)) + " has been found. It will be converted to type str, but this will cause all sorts of problems on deshelve. Please bring this to the attention of the " + application.name + " developers immediately. More information about the error will be written to the error log.",True)
log.error("Uh oh, " + str(key) + " is of type " + str(type(key)) + "!")
if type(value) == list and self.settings["general"]["persist_size"] != -1 and len(value) > self.settings["general"]["persist_size"]:
shelf[key]=value[self.settings["general"]["persist_size"]:]
def get_sized_buffer(self, buffer, size, reversed=False):
""" Returns a list with the amount of items specified by size."""
if isinstance(buffer, list) and size != -1 and len(buffer) > size:
log.debug("Requesting {} items from a list of {} items. Reversed mode: {}".format(size, len(buffer), reversed))
if reversed == True:
return buffer[:size]
else:
shelf[key]=value
shelf.close()
except:
output.speak("An exception occurred while shelving the " + application.name + " database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the " + application.name + " developers.",True)
log.exception("Exception while shelving" + shelfname)
os.remove(shelfname)
return buffer[len(buffer)-size:]
else:
return buffer
def deshelve(self):
"""Import a shelved database."""
shelfname=os.path.join(paths.config_path(), str(self.session_id)+"/cache")
def save_persistent_data(self):
""" Save the data to a persistent sqlite backed file. ."""
dbname=os.path.join(paths.config_path(), str(self.session_id), "cache.db")
log.debug("Saving storage information...")
# persist_size set to 0 means not saving data actually.
if self.settings["general"]["persist_size"] == 0:
if os.path.exists(shelfname+".dat"):
os.remove(shelfname+".dat")
if os.path.exists(dbname):
os.remove(dbname)
return
# Let's check if we need to create a new SqliteDict object (when loading db in memory) or we just need to call to commit in self (if reading from disk).db.
# If we read from disk, we cannot modify the buffer size here as we could damage the app's integrity.
# We will modify buffer's size (managed by persist_size) upon loading the db into memory in app startup.
if self.settings["general"]["load_cache_in_memory"] and isinstance(self.db, dict):
log.debug("Opening database to dump memory contents...")
db=sqlitedict.SqliteDict(dbname, 'c')
for k in self.db.keys():
sized_buff = self.get_sized_buffer(self.db[k], self.settings["general"]["persist_size"], self.settings["general"]["reverse_timelines"])
db[k] = sized_buff
db.commit(blocking=True)
db.close()
log.debug("Data has been saved in the database.")
else:
try:
shelf=shelve.open(os.path.join(paths.config_path(), shelfname),'c')
for key,value in list(shelf.items()):
self.db[key]=value
shelf.close()
log.debug("Syncing new data to disk...")
if hasattr(self.db, "commit"):
self.db.commit()
except:
output.speak("An exception occurred while deshelving the " + application.name + " database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the " + application.name + " developers.",True)
log.exception("Exception while deshelving" + shelfname)
output.speak(_("An exception occurred while saving the {app} database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the {app} developers.").format(app=application.name),True)
log.exception("Exception while saving {}".format(dbname))
os.remove(dbname)
def load_persistent_data(self):
"""Import data from a database file from user config."""
log.debug("Loading storage data...")
dbname=os.path.join(paths.config_path(), str(self.session_id), "cache.db")
# If persist_size is set to 0, we should remove the db file as we are no longer going to save anything.
if self.settings["general"]["persist_size"] == 0:
if os.path.exists(dbname):
os.remove(dbname)
# Let's return from here, as we are not loading anything.
return
# try to load the db file.
try:
os.remove(shelfname)
log.debug("Opening database...")
db=sqlitedict.SqliteDict(os.path.join(paths.config_path(), dbname), 'c')
# If load_cache_in_memory is set to true, we will load the whole database into memory for faster access.
# This is going to be faster when retrieving specific objects, at the cost of more memory.
# Setting this to False will read the objects from database as they are needed, which might be slower for bigger datasets.
if self.settings["general"]["load_cache_in_memory"]:
log.debug("Loading database contents into memory...")
for k in db.keys():
self.db[k] = db[k]
db.commit(blocking=True)
db.close()
log.debug("Contents were loaded successfully.")
else:
log.debug("Instantiating database from disk.")
self.db = db
# We must make sure we won't load more than the amount of buffer specified.
log.debug("Checking if we will load all content...")
for k in self.db.keys():
sized_buffer = self.get_sized_buffer(self.db[k], self.settings["general"]["persist_size"], self.settings["general"]["reverse_timelines"])
self.db[k] = sized_buffer
if self.db.get("cursors") == None:
cursors = dict(direct_messages=-1)
self.db["cursors"] = cursors
except:
output.speak(_("An exception occurred while loading the {app} database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the {app} developers.").format(app=application.name), True)
log.exception("Exception while loading {}".format(dbname))
try:
os.remove(dbname)
except:
pass

View File

@ -49,22 +49,23 @@ def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=No
else:
text = StripChars(getattr(tweet, value))
if show_screen_names:
user = tweet.user.screen_name
user = session.get_user(tweet.user).screen_name
else:
user = tweet.user.name
user = session.get_user(tweet.user).name
source = re.sub(r"(?s)<.*?>", "", tweet.source)
if hasattr(tweet, "retweeted_status"):
if (hasattr(tweet, "message")) == False and tweet.retweeted_status.is_quote_status == False:
text = "RT @%s: %s" % (tweet.retweeted_status.user.screen_name, text)
elif tweet.retweeted_status.is_quote_status:
if hasattr(tweet, "message") == False and hasattr(tweet.retweeted_status, "is_quote_status") == False:
text = "RT @%s: %s" % (session.get_user(tweet.retweeted_status.user).screen_name, text)
elif hasattr(tweet.retweeted_status, "is_quote_status"):
text = "%s" % (text)
else:
text = "RT @%s: %s" % (tweet.retweeted_status.user.screen_name, text)
text = "RT @%s: %s" % (session.get_user(tweet.retweeted_status.user).screen_name, text)
if not hasattr(tweet, "message"):
if hasattr(tweet, "retweeted_status"):
if hasattr(tweet.retweeted_status, "entities"):
text = utils.expand_urls(text, tweet.retweeted_status.entities)
else:
if hasattr(tweet, "entities"):
text = utils.expand_urls(text, tweet.entities)
if config.app['app-settings']['handle_longtweets']: pass
return [user+", ", text, ts+", ", source]
@ -112,14 +113,14 @@ def compose_quoted_tweet(quoted_tweet, original_tweet, show_screen_names=False,
value = "text"
text = StripChars(getattr(quoted_tweet, value))
if show_screen_names:
quoting_user = quoted_tweet.user.screen_name
quoting_user = session.get_user(quoted_tweet.user).screen_name
else:
quoting_user = quoted_tweet.user.name
quoting_user = session.get_user(quoted_tweet.user).name
source = quoted_tweet.source
if hasattr(quoted_tweet, "retweeted_status"):
text = "rt @%s: %s" % (quoted_tweet.retweeted_status.user.screen_name, text)
text = "rt @%s: %s" % (session.get_user(quoted_tweet.retweeted_status.user).screen_name, text)
if text[-1] in chars: text=text+"."
original_user = original_tweet.user.screen_name
original_user = session.get_user(original_tweet.user).screen_name
if hasattr(original_tweet, "message"):
original_text = original_tweet.message
elif hasattr(original_tweet, "full_text"):
@ -128,6 +129,11 @@ def compose_quoted_tweet(quoted_tweet, original_tweet, show_screen_names=False,
original_text = StripChars(original_tweet.text)
quoted_tweet.message = _(u"{0}. Quoted tweet from @{1}: {2}").format( text, original_user, original_text)
quoted_tweet = tweets.clear_url(quoted_tweet)
if hasattr(original_tweet, "entities") and original_tweet.entities.get("urls"):
if hasattr(quoted_tweet, "entities") == False:
quoted_tweet.entities = {}
if quoted_tweet.entities.get("urls") == None:
quoted_tweet.entities["urls"] = []
quoted_tweet.entities["urls"].extend(original_tweet.entities["urls"])
return quoted_tweet

View File

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
""" Strips unneeded tweet information in order to store tweet objects by using less memory. This is especially useful when buffers start to contain more than a certain amount of items. """
from tweepy.models import Status
def reduce_tweet(tweet):
""" generates a new Tweet model with the fields we currently need, excluding everything else including null values and empty collections. """
allowed_values = ["created_at", "id", "full_text", "text", "message", "in_reply_to_status_id", "in_reply_to_user_id", "is_quote_status", "lang", "source", "coordinates", "quoted_status_id", ]
allowed_entities = ["hashtags", "media", "urls", "user_mentions", "polls"]
status_dict = {}
for key in allowed_values:
if tweet._json.get(key):
status_dict[key] = tweet._json[key]
entities = dict()
for key in allowed_entities:
if tweet._json["entities"].get(key) and tweet._json["entities"].get(key) != None:
entities[key] = tweet._json["entities"][key]
status_dict["entities"] = entities
# If tweet comes from the cached database, it does not include an API, so we can pass None here as we do not use that reference to tweepy's API.
if hasattr(tweet, "_api"):
api = tweet._api
else:
api = None
status = Status().parse(api=api, json=status_dict)
# Quotes and retweets are different objects. So we parse a new tweet when we have a quoted or retweeted status here.
if tweet._json.get("quoted_status"):
quoted_tweet = reduce_tweet(tweet.quoted_status)
status.quoted_status = quoted_tweet
if tweet._json.get("retweeted_status"):
retweeted_tweet = reduce_tweet(tweet.retweeted_status)
status.retweeted_status = retweeted_tweet
# Adds user ID to here so we can reference it later.
# Sometimes, the conversations buffer would send an already reduced tweet here so we will need to return it as is.
if isinstance(tweet.user, str) == False:
status.user = tweet.user.id_str
else:
return tweet
return status

View File

@ -17,6 +17,7 @@ from keys import keyring
from sessions import base
from sessions.twitter import utils, compose
from sessions.twitter.long_tweets import tweets, twishort
from . import reduce
from .wxUI import authorisationDialog
log = logging.getLogger("sessions.twitterSession")
@ -38,26 +39,27 @@ class Session(base.baseSession):
self.db[name] = []
if ("users" in self.db) == False:
self.db["users"] = {}
objects = self.db[name]
if ignore_older and len(self.db[name]) > 0:
if self.settings["general"]["reverse_timelines"] == False:
last_id = self.db[name][0].id
else:
last_id = self.db[name][-1].id
self.add_users_from_results(data)
for i in data:
if ignore_older and last_id != None:
if i.id < last_id:
log.error("Ignoring an older tweet... Last id: {0}, tweet id: {1}".format(last_id, i.id))
continue
if utils.find_item(i.id, self.db[name]) == None and utils.is_allowed(i, self.settings, name) == True:
i = self.check_quoted_status(i)
i = self.check_long_tweet(i)
if i == False: continue
if self.settings["general"]["reverse_timelines"] == False: self.db[name].append(i)
else: self.db[name].insert(0, i)
reduced_object = reduce.reduce_tweet(i)
reduced_object = self.check_quoted_status(reduced_object)
reduced_object = self.check_long_tweet(reduced_object)
if self.settings["general"]["reverse_timelines"] == False: objects.append(reduced_object)
else: objects.insert(0, reduced_object)
num = num+1
if hasattr(i, "user"):
if (i.user.id in self.db["users"]) == False:
self.db["users"][i.user.id] = i.user
self.db[name] = objects
return num
def order_people(self, name, data):
@ -68,11 +70,13 @@ class Session(base.baseSession):
num = 0
if (name in self.db) == False:
self.db[name] = []
objects = self.db[name]
for i in data:
if utils.find_item(i.id, self.db[name]) == None:
if self.settings["general"]["reverse_timelines"] == False: self.db[name].append(i)
else: self.db[name].insert(0, i)
if self.settings["general"]["reverse_timelines"] == False: objects.append(i)
else: objects.insert(0, i)
num = num+1
self.db[name] = objects
return num
def order_direct_messages(self, data):
@ -83,19 +87,28 @@ class Session(base.baseSession):
sent = 0
if ("direct_messages" in self.db) == False:
self.db["direct_messages"] = []
if ("sent_direct_messages" in self.db) == False:
self.db["sent_direct_messages"] = []
objects = self.db["direct_messages"]
sent_objects = self.db["sent_direct_messages"]
for i in data:
# Twitter returns sender_id as str, which must be converted to int in order to match to our user_id object.
if int(i.message_create["sender_id"]) == self.db["user_id"]:
if "sent_direct_messages" in self.db and utils.find_item(i.id, self.db["sent_direct_messages"]) == None:
if self.settings["general"]["reverse_timelines"] == False: self.db["sent_direct_messages"].append(i)
else: self.db["sent_direct_messages"].insert(0, i)
if self.settings["general"]["reverse_timelines"] == False: sent_objects.append(i)
else: sent_objects.insert(0, i)
sent = sent+1
else:
if utils.find_item(i.id, self.db["direct_messages"]) == None:
if self.settings["general"]["reverse_timelines"] == False: self.db["direct_messages"].append(i)
else: self.db["direct_messages"].insert(0, i)
if self.settings["general"]["reverse_timelines"] == False: objects.append(i)
else: objects.insert(0, i)
incoming = incoming+1
self.db["direct_messages"] = objects
self.db["sent_direct_messages"] = sent_objects
pub.sendMessage("sent-dms-updated", total=sent, account=self.db["user_name"])
return incoming
def __init__(self, *args, **kwargs):
@ -106,6 +119,12 @@ class Session(base.baseSession):
self.reconnection_function_active = False
self.counter = 0
self.lists = []
# As users are cached for accessing them with not too many twitter calls,
# there could be a weird situation where a deleted user who sent direct messages to the current account will not be able to be retrieved at twitter.
# So we need to store an "user deleted" object in the cache, but have the ID of the deleted user in a local reference.
# This will be especially useful because if the user reactivates their account later, TWblue will try to retrieve such user again at startup.
# If we wouldn't implement this approach, TWBlue would save permanently the "deleted user" object.
self.deleted_users = {}
# @_require_configuration
def login(self, verify_credentials=True):
@ -161,35 +180,6 @@ class Session(base.baseSession):
self.verify_authorisation(pincode)
self.authorisation_dialog.Destroy()
def get_more_items(self, update_function, users=False, dm=False, name=None, *args, **kwargs):
""" Get more items for twitter objects.
update_function str: function to call for getting more items. Must be member of self.twitter.
users, dm bool: If any of these is set to True, the function will treat items as users or dm (they need different handling).
name str: name of the database item to put new element in."""
results = []
if "cursor" in kwargs and kwargs["cursor"] == 0:
output.speak(_(u"There are no more items to retrieve in this buffer."))
return
data = getattr(self.twitter, update_function)(*args, **kwargs)
if users == True:
if type(data) == dict and "next_cursor" in data:
if "next_cursor" in data: # There are more objects to retrieve.
self.db[name]["cursor"] = data["next_cursor"]
else: # Set cursor to 0, wich means no more items available.
self.db[name]["cursor"] = 0
for i in data["users"]: results.append(i)
elif type(data) == list:
results.extend(data[1:])
elif dm == True:
if "next_cursor" in data: # There are more objects to retrieve.
self.db[name]["cursor"] = data["next_cursor"]
else: # Set cursor to 0, wich means no more items available.
self.db[name]["cursor"] = 0
for i in data["events"]: results.append(i)
else:
results.extend(data[1:])
return results
def api_call(self, call_name, action="", _sound=None, report_success=False, report_failure=True, preexec_message="", *args, **kwargs):
""" Make a call to the Twitter API. If there is a connectionError or another exception not related to Twitter, It will call the method again at least 25 times, waiting a while between calls. Useful for post methods.
If twitter returns an error, it will not call the method anymore.
@ -359,10 +349,11 @@ class Session(base.baseSession):
value = "full_text"
else:
value = "text"
if hasattr(quoted_tweet, "entities"):
setattr(quoted_tweet, value, utils.expand_urls(getattr(quoted_tweet, value), quoted_tweet.entities))
if quoted_tweet.is_quote_status == True and hasattr(quoted_tweet, "quoted_status"):
if hasattr(quoted_tweet, "is_quote_status") == True and hasattr(quoted_tweet, "quoted_status"):
original_tweet = quoted_tweet.quoted_status
elif hasattr(quoted_tweet, "retweeted_status") and quoted_tweet.retweeted_status.is_quote_status == True and hasattr(quoted_tweet.retweeted_status, "quoted_status"):
elif hasattr(quoted_tweet, "retweeted_status") and hasattr(quoted_tweet.retweeted_status, "is_quote_status") == True and hasattr(quoted_tweet.retweeted_status, "quoted_status"):
original_tweet = quoted_tweet.retweeted_status.quoted_status
else:
return quoted_tweet
@ -373,13 +364,17 @@ class Session(base.baseSession):
value = "message"
else:
value = "text"
if hasattr(original_tweet, "entities"):
setattr(original_tweet, value, utils.expand_urls(getattr(original_tweet, value), original_tweet.entities))
return compose.compose_quoted_tweet(quoted_tweet, original_tweet)
# ToDo: Shall we check whether we should add show_screen_names here?
return compose.compose_quoted_tweet(quoted_tweet, original_tweet, session=self)
def check_long_tweet(self, tweet):
""" Process a tweet and add extra info if it's a long tweet made with Twyshort.
tweet dict: a tweet object.
returns a tweet with a new argument message, or original tweet if it's not a long tweet."""
long = False
if hasattr(tweet, "entities") and tweet.entities.get("urls"):
long = twishort.is_long(tweet)
if long != False and config.app["app-settings"]["handle_longtweets"]:
message = twishort.get_full_text(long)
@ -387,26 +382,37 @@ class Session(base.baseSession):
tweet.quoted_status.message = message
if tweet.quoted_status.message == False: return False
tweet.quoted_status.twishort = True
if hasattr(tweet.quoted_status, "entities") and tweet.quoted_status.entities.get("user_mentions"):
for i in tweet.quoted_status.entities["user_mentions"]:
if "@%s" % (i["screen_name"]) not in tweet.quoted_status.message and i["screen_name"] != tweet.user.screen_name:
if hasattr(tweet.quoted_status, "retweeted_status") and tweet.retweeted_status.user.screen_name == i["screen_name"]:
if "@%s" % (i["screen_name"]) not in tweet.quoted_status.message and i["screen_name"] != self.get_user(tweet.user).screen_name:
if hasattr(tweet.quoted_status, "retweeted_status") and self.get_user(tweet.retweeted_status.user).screen_name == i["screen_name"]:
continue
tweet.quoted_status.message = u"@%s %s" % (i["screen_name"], tweet.message)
else:
tweet.message = message
if tweet.message == False: return False
tweet.twishort = True
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
for i in tweet.entities["user_mentions"]:
if "@%s" % (i["screen_name"]) not in tweet.message and i["screen_name"] != tweet.user.screen_name:
if hasattr(tweet, "retweeted_status") and tweet.retweeted_status.user.screen_name == i["screen_name"]:
if "@%s" % (i["screen_name"]) not in tweet.message and i["screen_name"] != self.get_user(tweet.user).screen_name:
if hasattr(tweet, "retweeted_status") and self.get_user(tweet.retweeted_status.user).screen_name == i["screen_name"]:
continue
tweet.message = u"@%s %s" % (i["screen_name"], tweet.message)
return tweet
def get_user(self, id):
""" Returns an user object associated with an ID.
id str: User identifier, provided by Twitter.
returns a tweepy user object."""
if ("users" in self.db) == False or (id in self.db["users"]) == False:
if hasattr(id, "id_str"):
log.error("Called get_user function by passing a full user id as a parameter.")
id = id.id_str
# Check if the user has been added to the list of deleted users previously.
if id in self.deleted_users:
log.debug("Returning user {} from the list of deleted users.".format(id))
return self.deleted_users[id]
if ("users" in self.db) == False or (str(id) in self.db["users"]) == False:
log.debug("Requesting user id {} as it is not present in the users database.".format(id))
try:
user = self.twitter.get_user(id=id)
except TweepError as err:
@ -414,11 +420,18 @@ class Session(base.baseSession):
user.screen_name = "deleted_user"
user.id = id
user.name = _("Deleted account")
user.id_str = id
self.db["users"][user.id_str] = user
if hasattr(err, "api_code") and err.api_code == 50:
self.deleted_users[id] = user
return user
else:
return self.db["users"][id]
log.exception("Error when attempting to retrieve an user from Twitter.")
return user
users = self.db["users"]
users[user.id_str] = user
self.db["users"] = users
return user
else:
return self.db["users"][str(id)]
def get_user_by_screen_name(self, screen_name):
""" Returns an user identifier associated with a screen_name.
@ -426,28 +439,65 @@ class Session(base.baseSession):
returns an user ID."""
if ("users" in self.db) == False:
user = utils.if_user_exists(self.twitter, screen_name)
self.db["users"][user["id_str"]] = user
return user["id_str"]
users = self.db["users"]
users[user["id"]] = user
self.db["users"] = users
return user["id"]
else:
for i in list(self.db["users"].keys()):
if self.db["users"][i].screen_name == screen_name:
return self.db["users"][i].id_str
return self.db["users"][i].id
user = utils.if_user_exists(self.twitter, screen_name)
self.db["users"][user.id_str] = user
return user.id_str
users = self.db["users"]
users[user.id] = user
self.db["users"] = users
return user.id
def save_users(self, user_ids):
""" Adds all new users to the users database. """
if len(user_ids) == 0:
return
log.debug("Received %d user IDS to be added in the database." % (len(user_ids)))
users_to_retrieve = [user_id for user_id in user_ids if user_id not in self.db["users"]]
users_to_retrieve = [user_id for user_id in user_ids if (user_id not in self.db["users"] and user_id not in self.deleted_users)]
# Remove duplicates
users_to_retrieve = list(dict.fromkeys(users_to_retrieve))
if len(users_to_retrieve) == 0:
return
log.debug("TWBlue will get %d new users from Twitter." % (len(users_to_retrieve)))
try:
users = self.twitter.lookup_users(user_ids=users_to_retrieve, tweet_mode="extended")
users_db = self.db["users"]
for user in users:
self.db["users"][user.id_str] = user
users_db[user.id_str] = user
log.debug("Added %d new users" % (len(users)))
self.db["users"] = users_db
except TweepError as err:
if hasattr(err, "api_code") and err.api_code == 17: # Users not found.
log.error("The specified users {} were not found in twitter.".format(user_ids))
# Creates a deleted user object for every user_id not found here.
# This will make TWBlue to not waste Twitter API calls when attempting to retrieve those users again.
# As deleted_users is not saved across restarts, when restarting TWBlue, it will retrieve the correct users if they enabled their accounts.
for id in users_to_retrieve:
user = UserModel(None)
user.screen_name = "deleted_user"
user.id = id
user.name = _("Deleted account")
self.deleted_users[id] = user
else:
log.exception("An exception happened while attempting to retrieve a list of users from direct messages in Twitter.")
def add_users_from_results(self, data):
users = self.db["users"]
for i in data:
if hasattr(i, "user"):
if isinstance(i.user, str):
log.warning("A String was passed to be added as an user. This is normal only if TWBlue tried to load a conversation.")
continue
if (i.user.id_str in self.db["users"]) == False:
users[i.user.id_str] = i.user
if hasattr(i, "quoted_status") and (i.quoted_status.user.id_str in self.db["users"]) == False:
users[i.quoted_status.user.id_str] = i.quoted_status.user
if hasattr(i, "retweeted_status") and (i.retweeted_status.user.id_str in self.db["users"]) == False:
users[i.retweeted_status.user.id_str] = i.retweeted_status.user
self.db["users"] = users

View File

@ -27,22 +27,16 @@ def find_urls (tweet):
if hasattr(tweet, "message_create"):
entities = tweet.message_create["message_data"]["entities"]
else:
if hasattr(tweet, "entities") == True:
entities = tweet.entities
if entities.get("urls") != None:
for i in entities["urls"]:
if i["expanded_url"] not in urls:
urls.append(i["expanded_url"])
if hasattr(tweet, "quoted_status"):
for i in tweet.quoted_status.entities["urls"]:
if i["expanded_url"] not in urls:
urls.append(i["expanded_url"])
urls.extend(find_urls(tweet.quoted_status))
if hasattr(tweet, "retweeted_status"):
for i in tweet.retweeted_status.entities["urls"]:
if i["expanded_url"] not in urls:
urls.append(i["expanded_url"])
if hasattr(tweet["retweeted_status"], "quoted_status"):
for i in tweet.retweeted_status.quoted_status.entities["urls"]:
if i["expanded_url"] not in urls:
urls.append(i["expanded_url"])
urls.extend(find_urls(tweet.retweeted_status))
if hasattr(tweet, "message"):
i = "message"
elif hasattr(tweet, "full_text"):
@ -75,13 +69,14 @@ def is_audio(tweet):
if hasattr(tweet, "message_create"):
entities = tweet.message_create["message_data"]["entities"]
else:
if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None:
return False
entities = tweet.entities
if len(entities["hashtags"]) > 0:
for i in entities["hashtags"]:
if i["text"] == "audio":
return True
except IndexError:
print(tweet.entities["hashtags"])
log.exception("Exception while executing is_audio hashtag algorithm")
def is_geocoded(tweet):
@ -92,6 +87,8 @@ def is_media(tweet):
if hasattr(tweet, "message_create"):
entities = tweet.message_create["message_data"]["entities"]
else:
if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None:
return False
entities = tweet.entities
if entities.get("media") == None:
return False
@ -103,28 +100,29 @@ def is_media(tweet):
def get_all_mentioned(tweet, conf, field="screen_name"):
""" Gets all users that have been mentioned."""
results = []
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
for i in tweet.entities["user_mentions"]:
if i["screen_name"] != conf["user_name"] and i["screen_name"] != tweet.user.screen_name:
if i["screen_name"] != conf["user_name"] and i["id_str"] != tweet.user:
if i.get(field) not in results:
results.append(i.get(field))
return results
def get_all_users(tweet, conf):
def get_all_users(tweet, session):
string = []
user = session.get_user(tweet.user)
if hasattr(tweet, "retweeted_status"):
string.append(tweet.user.screen_name)
string.append(user.screen_name)
tweet = tweet.retweeted_status
if hasattr(tweet, "sender"):
string.append(tweet.sender.screen_name)
else:
if tweet.user.screen_name != conf["user_name"]:
string.append(tweet.user.screen_name)
if user.screen_name != session.db["user_name"]:
string.append(user.screen_name)
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
for i in tweet.entities["user_mentions"]:
if i["screen_name"] != conf["user_name"] and i["screen_name"] != tweet.user.screen_name:
if i["screen_name"] != session.db["user_name"] and i["screen_name"] != user.screen_name:
if i["screen_name"] not in string:
string.append(i["screen_name"])
if len(string) == 0:
string.append(tweet.user.screen_name)
string.append(user.screen_name)
return string
def if_user_exists(twitter, user):
@ -144,7 +142,7 @@ def is_allowed(tweet, settings, buffer_name):
tweet_data = {}
if hasattr(tweet, "retweeted_status"):
tweet_data["retweet"] = True
if tweet.in_reply_to_status_id_str != None:
if tweet.in_reply_to_status_id != None:
tweet_data["reply"] = True
if hasattr(tweet, "quoted_status"):
tweet_data["quote"] = True
@ -209,6 +207,8 @@ def twitter_error(error):
def expand_urls(text, entities):
""" Expand all URLS present in text with information found in entities"""
if entities.get("urls") == None:
return text
urls = find_urls_in_text(text)
for url in entities["urls"]:
if url["url"] in text:

1
src/test/__init__.py Normal file
View File

@ -0,0 +1 @@
# -*- coding: utf-8 -*-

200
src/test/test_cache.py Normal file
View File

@ -0,0 +1,200 @@
# -*- coding: utf-8 -*-
""" Test case to check some of the scenarios we might face when storing tweets in cache, both loading into memory or rreading from disk. """
import unittest
import os
import paths
import sqlitedict
import shutil
# The base session module requires sound as a dependency, and this needs libVLC to be locatable.
os.environ['PYTHON_VLC_MODULE_PATH']=os.path.abspath(os.path.join(paths.app_path(), "..", "windows-dependencies", "x86"))
os.environ['PYTHON_VLC_LIB_PATH']=os.path.abspath(os.path.join(paths.app_path(), "..", "windows-dependencies", "x86", "libvlc.dll"))
from sessions import base
class cacheTestCase(unittest.TestCase):
def setUp(self):
""" Configures a fake session to check caching objects here. """
self.session = base.baseSession("testing")
if os.path.exists(os.path.join(paths.config_path(), "testing")) == False:
os.mkdir(os.path.join(paths.config_path(), "testing"))
self.session.get_configuration()
def tearDown(self):
""" Removes the previously configured session. """
session_folder = os.path.join(paths.config_path(), "testing")
if os.path.exists(session_folder):
shutil.rmtree(session_folder)
def generate_dataset(self):
""" Generates a sample dataset"""
dataset = dict(home_timeline=["message" for i in range(10000)], mentions_timeline=["mention" for i in range(20000)])
return dataset
### Testing database being read from disk.
def test_cache_in_disk_unlimited_size(self):
""" Tests cache database being read from disk, storing the whole datasets. """
dataset = self.generate_dataset()
self.session.settings["general"]["load_cache_in_memory"] = False
self.session.settings["general"]["persist_size"] = -1
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
self.session.save_persistent_data()
self.assertIsInstance(self.session.db, sqlitedict.SqliteDict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(len(self.session.db.get("home_timeline")), 10000)
self.assertEquals(len(self.session.db.get("mentions_timeline")), 20000)
self.session.db.close()
def test_cache_in_disk_limited_dataset(self):
""" Tests wether the cache stores only the amount of items we ask it to store. """
dataset = self.generate_dataset()
self.session.settings["general"]["load_cache_in_memory"] = False
self.session.settings["general"]["persist_size"] = 100
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
# We need to save and load the db again because we cannot modify buffers' size while the database is opened.
# As TWBlue reads directly from db when reading from disk, an attempt to modify buffers size while Blue is reading the db
# Might cause an out of sync error between the GUI lists and the database.
# So we perform the changes to buffer size when loading data during app startup if the DB is read from disk.
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, sqlitedict.SqliteDict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(len(self.session.db.get("home_timeline")), 100)
self.assertEquals(len(self.session.db.get("mentions_timeline")), 100)
self.session.db.close()
def test_cache_in_disk_limited_dataset_unreversed(self):
"""Test if the cache is saved properly in unreversed buffers, when newest items are at the end of the list. """
dataset = dict(home_timeline=[i for i in range(20)], mentions_timeline=[i for i in range(20)])
self.session.settings["general"]["load_cache_in_memory"] = False
self.session.settings["general"]["persist_size"] = 10
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
# We need to save and load the db again because we cannot modify buffers' size while the database is opened.
# As TWBlue reads directly from db when reading from disk, an attempt to modify buffers size while Blue is reading the db
# Might cause an out of sync error between the GUI lists and the database.
# So we perform the changes to buffer size when loading data during app startup if the DB is read from disk.
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, sqlitedict.SqliteDict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(self.session.db.get("home_timeline")[0], 10)
self.assertEquals(self.session.db.get("mentions_timeline")[0], 10)
self.assertEquals(self.session.db.get("home_timeline")[-1], 19)
self.assertEquals(self.session.db.get("mentions_timeline")[-1], 19)
self.session.db.close()
def test_cache_in_disk_limited_dataset_reversed(self):
"""Test if the cache is saved properly in reversed buffers, when newest items are at the start of the list. """
dataset = dict(home_timeline=[i for i in range(19, -1, -1)], mentions_timeline=[i for i in range(19, -1, -1)])
self.session.settings["general"]["load_cache_in_memory"] = False
self.session.settings["general"]["persist_size"] = 10
self.session.settings["general"]["reverse_timelines"] = True
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
# We need to save and load the db again because we cannot modify buffers' size while the database is opened.
# As TWBlue reads directly from db when reading from disk, an attempt to modify buffers size while Blue is reading the db
# Might cause an out of sync error between the GUI lists and the database.
# So we perform the changes to buffer size when loading data during app startup if the DB is read from disk.
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, sqlitedict.SqliteDict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(self.session.db.get("home_timeline")[0], 19)
self.assertEquals(self.session.db.get("mentions_timeline")[0], 19)
self.assertEquals(self.session.db.get("home_timeline")[-1], 10)
self.assertEquals(self.session.db.get("mentions_timeline")[-1], 10)
self.session.db.close()
### Testing database being loaded into memory. Those tests should give the same results than before
### but as we have different code depending whether we load db into memory or read it from disk,
### We need to test this anyways.
def test_cache_in_memory_unlimited_size(self):
""" Tests cache database being loaded in memory, storing the whole datasets. """
dataset = self.generate_dataset()
self.session.settings["general"]["load_cache_in_memory"] = True
self.session.settings["general"]["persist_size"] = -1
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, dict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(len(self.session.db.get("home_timeline")), 10000)
self.assertEquals(len(self.session.db.get("mentions_timeline")), 20000)
def test_cache_in_memory_limited_dataset(self):
""" Tests wether the cache stores only the amount of items we ask it to store, when loaded in memory. """
dataset = self.generate_dataset()
self.session.settings["general"]["load_cache_in_memory"] = True
self.session.settings["general"]["persist_size"] = 100
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, dict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(len(self.session.db.get("home_timeline")), 100)
self.assertEquals(len(self.session.db.get("mentions_timeline")), 100)
def test_cache_in_memory_limited_dataset_unreversed(self):
"""Test if the cache is saved properly when loaded in memory in unreversed buffers, when newest items are at the end of the list. """
dataset = dict(home_timeline=[i for i in range(20)], mentions_timeline=[i for i in range(20)])
self.session.settings["general"]["load_cache_in_memory"] = True
self.session.settings["general"]["persist_size"] = 10
self.session.load_persistent_data()
self.assertTrue(len(self.session.db)==1)
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, dict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(self.session.db.get("home_timeline")[0], 10)
self.assertEquals(self.session.db.get("mentions_timeline")[0], 10)
self.assertEquals(self.session.db.get("home_timeline")[-1], 19)
self.assertEquals(self.session.db.get("mentions_timeline")[-1], 19)
def test_cache_in_memory_limited_dataset_reversed(self):
"""Test if the cache is saved properly in reversed buffers, when newest items are at the start of the list. This test if for db read into memory. """
dataset = dict(home_timeline=[i for i in range(19, -1, -1)], mentions_timeline=[i for i in range(19, -1, -1)])
self.session.settings["general"]["load_cache_in_memory"] = True
self.session.settings["general"]["persist_size"] = 10
self.session.settings["general"]["reverse_timelines"] = True
self.session.load_persistent_data()
self.session.db["home_timeline"] = dataset["home_timeline"]
self.session.db["mentions_timeline"] = dataset["mentions_timeline"]
self.session.save_persistent_data()
self.session.db = dict()
self.session.load_persistent_data()
self.assertIsInstance(self.session.db, dict)
self.assertTrue(self.session.db.get("home_timeline") != None)
self.assertTrue(self.session.db.get("mentions_timeline") != None)
self.assertEquals(self.session.db.get("home_timeline")[0], 19)
self.assertEquals(self.session.db.get("mentions_timeline")[0], 19)
self.assertEquals(self.session.db.get("home_timeline")[-1], 10)
self.assertEquals(self.session.db.get("mentions_timeline")[-1], 10)
if __name__ == "__main__":
unittest.main()

View File

@ -1,7 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import range
import logging as original_logger
import wx
import application
@ -127,6 +124,7 @@ class generalAccount(wx.Panel, baseDialog.BaseWXDialog):
self.persist_size = wx.TextCtrl(self, -1)
sizer.Add(PersistSizeLabel, 0, wx.ALL, 5)
sizer.Add(self.persist_size, 0, wx.ALL, 5)
self.load_cache_in_memory = wx.CheckBox(self, wx.NewId(), _("Load cache for tweets in memory (much faster in big datasets but requires more RAM)"))
self.SetSizer(sizer)
class reporting(wx.Panel, baseDialog.BaseWXDialog):