diff --git a/doc/changelog.md b/doc/changelog.md index e4369880..66ef6a13 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -2,6 +2,11 @@ ## changes in this version +* We just implemented some changes in the way TWBlue handles tweets in order to reduce its RAM memory usage [#380](https://github.com/manuelcortez/TWBlue/pull/380): + * We reduced the tweets size by storing only the tweet fields we currently use. This should reduce tweet's size in memory for every object up to 75%. + * When using the cache database to store your tweets, there is a new setting present in the account settings dialog, in the general tab. This setting allows you to control whether TWBlue will load the whole database into memory (which is the current behaviour) or not. + * Loading the whole database into memory has the advantage of being extremely fast to access any element (for example when moving through tweets in a buffer), but it requires more memory as the tweet buffers grow up. This should, however, use less memory than before thanks to the optimizations performed in tweet objects. If you have a machine with enough memory, this should be a good option for your case. + * If you uncheck this setting, TWBlue will read the whole database from disk. This is significantly slower, but the advantage of this setting is that it will consume almost no extra memory, no matter how big is the tweets dataset. Be ware, though, that TWBlue might start to feel slower when accessing elements (for example when reading tweets) as the buffers grow up. This setting is suggested for computers with low memory or for those people not wanting to keep a really big amount of tweets stored. * Changed the label in the direct message's text control so it will indicate that the user needs to write the text there, without referring to any username in particular. ([#366,](https://github.com/manuelcortez/TWBlue/issues/366)) * TWBlue will take Shift+F10 again as the contextual menu key in the list of items in a buffer. This stopped working after we have migrated to WX 4.1. ([#353,](https://github.com/manuelcortez/TWBlue/issues/353)) * TWBlue should render correctly retweets of quoted tweets. ([#365,](https://github.com/manuelcortez/TWBlue/issues/365)) diff --git a/requirements.txt b/requirements.txt index ae1296b8..9943eb0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,6 +31,7 @@ cx_freeze tweepy twitter-text-parser pyenchant +sqlitedict git+https://github.com/accessibleapps/libloader git+https://github.com/accessibleapps/platform_utils git+https://github.com/accessibleapps/accessible_output2 diff --git a/src/Conf.defaults b/src/Conf.defaults index 4396e120..80b28db6 100644 --- a/src/Conf.defaults +++ b/src/Conf.defaults @@ -12,6 +12,7 @@ reverse_timelines = boolean(default=False) announce_stream_status = boolean(default=True) retweet_mode = string(default="ask") persist_size = integer(default=0) +load_cache_in_memory=boolean(default=True) show_screen_names = boolean(default=False) buffer_order = list(default=list('home','mentions', 'dm', 'sent_dm', 'sent_tweets','favorites','followers','friends','blocks','muted','events')) diff --git a/src/controller/buffers/twitterBuffers.py b/src/controller/buffers/twitterBuffers.py index 0fdacac4..03657e7c 100644 --- a/src/controller/buffers/twitterBuffers.py +++ b/src/controller/buffers/twitterBuffers.py @@ -19,7 +19,7 @@ import languageHandler import logging from audio_services import youtube_utils from controller.buffers import baseBuffers -from sessions.twitter import compose, utils +from sessions.twitter import compose, utils, reduce from mysc.thread_utils import call_threaded from tweepy.error import TweepError from tweepy.cursor import Cursor @@ -178,7 +178,9 @@ class baseBufferController(baseBuffers.buffer): val, cursor = val if type(cursor) == tuple: cursor = cursor[1] - self.session.db["cursors"][self.name] = cursor + cursors = self.session.db["cursors"] + cursors[self.name] = cursor + self.session.db["cursors"] = cursors results = [i for i in val] val = results val.reverse() @@ -190,7 +192,6 @@ class baseBufferController(baseBuffers.buffer): return number_of_items = self.session.order_buffer(self.name, val) log.debug("Number of items retrieved: %d" % (number_of_items,)) - self.put_items_on_list(number_of_items) if hasattr(self, "finished_timeline") and self.finished_timeline == False: if "-timeline" in self.name: @@ -229,15 +230,19 @@ class baseBufferController(baseBuffers.buffer): return if items == None: return + items_db = self.session.db[self.name] + self.session.add_users_from_results(items) for i in items: if utils.is_allowed(i, self.session.settings, self.name) == True and utils.find_item(i.id, self.session.db[self.name]) == None: + i = reduce.reduce_tweet(i) i = self.session.check_quoted_status(i) i = self.session.check_long_tweet(i) elements.append(i) if self.session.settings["general"]["reverse_timelines"] == False: - self.session.db[self.name].insert(0, i) + items_db.insert(0, i) else: - self.session.db[self.name].append(i) + items_db.append(i) + self.session.db[self.name] = items_db selection = self.buffer.list.get_selected() log.debug("Retrieved %d items from cursored search in function %s." % (len(elements), self.function)) if self.session.settings["general"]["reverse_timelines"] == False: @@ -286,10 +291,12 @@ class baseBufferController(baseBuffers.buffer): def remove_tweet(self, id): if type(self.session.db[self.name]) == dict: return - for i in range(0, len(self.session.db[self.name])): - if self.session.db[self.name][i].id == id: - self.session.db[self.name].pop(i) + items = self.session.db[self.name] + for i in range(0, len(items)): + if items[i].id == id: + items.pop(i) self.remove_item(i) + self.session.db[self.name] = items def put_items_on_list(self, number_of_items): list_to_use = self.session.db[self.name] @@ -408,11 +415,12 @@ class baseBufferController(baseBuffers.buffer): @_tweets_exist def reply(self, *args, **kwargs): tweet = self.get_right_tweet() - screen_name = tweet.user.screen_name + user = self.session.get_user(tweet.user) + screen_name = user.screen_name id = tweet.id twishort_enabled = hasattr(tweet, "twishort") users = utils.get_all_mentioned(tweet, self.session.db, field="screen_name") - ids = utils.get_all_mentioned(tweet, self.session.db, field="id_str") + ids = utils.get_all_mentioned(tweet, self.session.db, field="id") # Build the window title if len(users) < 1: title=_("Reply to {arg0}").format(arg0=screen_name) @@ -461,8 +469,8 @@ class baseBufferController(baseBuffers.buffer): screen_name = tweet.screen_name users = [screen_name] else: - screen_name = tweet.user.screen_name - users = utils.get_all_users(tweet, self.session.db) + screen_name = self.session.get_user(tweet.user).screen_name + users = utils.get_all_users(tweet, self.session) dm = messages.dm(self.session, _(u"Direct message to %s") % (screen_name,), _(u"New direct message"), users) if dm.message.get_response() == widgetUtils.OK: screen_name = dm.message.get("cb") @@ -471,10 +479,12 @@ class baseBufferController(baseBuffers.buffer): text = dm.message.get_text() val = self.session.api_call(call_name="send_direct_message", recipient_id=recipient_id, text=text) if val != None: + sent_dms = self.session.db["sent_direct_messages"] if self.session.settings["general"]["reverse_timelines"] == False: - self.session.db["sent_direct_messages"].append(val) + sent_dms.append(val) else: - self.session.db["sent_direct_messages"].insert(0, val) + sent_dms.insert(0, val) + self.session.db["sent_direct_messages"] = sent_dms pub.sendMessage("sent-dm", data=val, user=self.session.db["user_name"]) if hasattr(dm.message, "destroy"): dm.message.destroy() @@ -501,12 +511,12 @@ class baseBufferController(baseBuffers.buffer): comments = tweet.full_text else: comments = tweet.text - retweet = messages.tweet(self.session, _(u"Quote"), _(u"Add your comment to the tweet"), u"“@%s: %s ”" % (tweet.user.screen_name, comments), max=256, messageType="retweet") + retweet = messages.tweet(self.session, _(u"Quote"), _(u"Add your comment to the tweet"), u"“@%s: %s ”" % (self.session.get_user(tweet.user).screen_name, comments), max=256, messageType="retweet") if comment != '': retweet.message.set_text(comment) if retweet.message.get_response() == widgetUtils.OK: text = retweet.message.get_text() - text = text+" https://twitter.com/{0}/status/{1}".format(tweet.user.screen_name, id) + text = text+" https://twitter.com/{0}/status/{1}".format(self.session.get_user(tweet.user).screen_name, id) if retweet.image == None: item = self.session.api_call(call_name="update_status", _sound="retweet_send.ogg", status=text, in_reply_to_status_id=id, tweet_mode="extended") if item != None: @@ -588,16 +598,18 @@ class baseBufferController(baseBuffers.buffer): if self.type == "events" or self.type == "people" or self.type == "empty" or self.type == "account": return answer = commonMessageDialogs.delete_tweet_dialog(None) if answer == widgetUtils.YES: + items = self.session.db[self.name] try: if self.name == "direct_messages" or self.name == "sent_direct_messages": self.session.twitter.destroy_direct_message(id=self.get_right_tweet().id) - self.session.db[self.name].pop(index) + items.pop(index) else: self.session.twitter.destroy_status(id=self.get_right_tweet().id) - self.session.db[self.name].pop(index) + items.pop(index) self.buffer.list.remove_item(index) except TweepError: self.session.sound.play("error.ogg") + self.session.db[self.name] = items @_tweets_exist def user_details(self): @@ -607,7 +619,7 @@ class baseBufferController(baseBuffers.buffer): elif self.type == "people": users = [tweet.screen_name] else: - users = utils.get_all_users(tweet, self.session.db) + users = utils.get_all_users(tweet, self.session) dlg = dialogs.utils.selectUserDialog(title=_(u"User details"), users=users) if dlg.get_response() == widgetUtils.OK: user.profileController(session=self.session, user=dlg.get_user()) @@ -625,7 +637,7 @@ class baseBufferController(baseBuffers.buffer): def open_in_browser(self, *args, **kwargs): tweet = self.get_tweet() output.speak(_(u"Opening item in web browser...")) - url = "https://twitter.com/{screen_name}/status/{tweet_id}".format(screen_name=tweet.user.screen_name, tweet_id=tweet.id) + url = "https://twitter.com/{screen_name}/status/{tweet_id}".format(screen_name=self.session.get_user(tweet.user).screen_name, tweet_id=tweet.id) webbrowser.open(url) class directMessagesController(baseBufferController): @@ -646,7 +658,9 @@ class directMessagesController(baseBufferController): items, cursor = items if type(cursor) == tuple: cursor = cursor[1] - self.session.db["cursors"][self.name] = cursor + cursors = self.session.db["cursors"] + cursors[self.name] = cursor + self.session.db["cursors"] = cursors results = [i for i in items] items = results log.debug("Retrieved %d items for cursored search in function %s" % (len(items), self.function)) @@ -657,22 +671,26 @@ class directMessagesController(baseBufferController): return sent = [] received = [] + sent_dms = self.session.db["sent_direct_messages"] + received_dms = self.session.db["direct_messages"] for i in items: if int(i.message_create["sender_id"]) == self.session.db["user_id"]: if self.session.settings["general"]["reverse_timelines"] == False: - self.session.db["sent_direct_messages"].insert(0, i) + sent_dms.insert(0, i) sent.append(i) else: - self.session.db["sent_direct_messages"].append(i) + sent_dms.append(i) sent.insert(0, i) else: if self.session.settings["general"]["reverse_timelines"] == False: - self.session.db[self.name].insert(0, i) + received_dms.insert(0, i) received.append(i) else: - self.session.db[self.name].append(i) + received_dms.append(i) received.insert(0, i) total = total+1 + self.session.db["direct_messages"] = received_dms + self.session.db["sent_direct_messages"] = sent_dms user_ids = [item.message_create["sender_id"] for item in items] self.session.save_users(user_ids) pub.sendMessage("more-sent-dms", data=sent, account=self.session.db["user_name"]) @@ -885,7 +903,9 @@ class peopleBufferController(baseBufferController): val, cursor = val if type(cursor) == tuple: cursor = cursor[1] - self.session.db["cursors"][self.name] = cursor + cursors = self.session.db["cursors"] + cursors[self.name] = cursor + self.session.db["cursors"] = cursors results = [i for i in val] val = results val.reverse() @@ -914,7 +934,9 @@ class peopleBufferController(baseBufferController): items, cursor = items if type(cursor) == tuple: cursor = cursor[1] - self.session.db["cursors"][self.name] = cursor + cursors = self.session.db["cursors"] + cursors[self.name] = cursor + self.session.db["cursors"] = cursors results = [i for i in items] items = results log.debug("Retrieved %d items from cursored search in function %s" % (len(items), self.function)) @@ -923,11 +945,13 @@ class peopleBufferController(baseBufferController): return if items == None: return + items_db = self.session.db[self.name] for i in items: if self.session.settings["general"]["reverse_timelines"] == False: - self.session.db[self.name].insert(0, i) + items_db.insert(0, i) else: - self.session.db[self.name].append(i) + items_db.append(i) + self.session.db[self.name] = items_db selected = self.buffer.list.get_selected() if self.session.settings["general"]["reverse_timelines"] == True: for i in items: diff --git a/src/controller/mainController.py b/src/controller/mainController.py index ed8bf128..b405b3b2 100644 --- a/src/controller/mainController.py +++ b/src/controller/mainController.py @@ -254,8 +254,8 @@ class Controller(object): # Connection checker executed each minute. self.checker_function = RepeatingTimer(60, self.check_connection) # self.checker_function.start() - self.save_db = RepeatingTimer(300, self.save_data_in_db) - self.save_db.start() +# self.save_db = RepeatingTimer(300, self.save_data_in_db) +# self.save_db.start() log.debug("Setting updates to buffers every %d seconds..." % (60*config.app["app-settings"]["update_period"],)) self.update_buffers_function = RepeatingTimer(60*config.app["app-settings"]["update_period"], self.update_buffers) self.update_buffers_function.start() @@ -530,7 +530,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) dlg = dialogs.utils.selectUserDialog(_(u"Select the user"), users) if dlg.get_response() == widgetUtils.OK: user = dlg.get_user() @@ -547,7 +547,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) dlg = dialogs.utils.selectUserDialog(_(u"Select the user"), users) if dlg.get_response() == widgetUtils.OK: user = dlg.get_user() @@ -575,7 +575,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) dlg = dialogs.utils.selectUserDialog(_(u"Select the user"), users) if dlg.get_response() == widgetUtils.OK: user = dlg.get_user() @@ -617,6 +617,7 @@ class Controller(object): if d.needs_restart == True: commonMessageDialogs.needs_restart() buff.session.settings.write() + buff.session.save_persistent_data() restart.restart_program() def report_error(self, *args, **kwargs): @@ -651,8 +652,8 @@ class Controller(object): if sessions.sessions[item].logged == False: continue log.debug("Disconnecting streams for %s session" % (sessions.sessions[item].session_id,)) sessions.sessions[item].sound.cleaner.cancel() - log.debug("Shelving database for " + sessions.sessions[item].session_id) - sessions.sessions[item].shelve() + log.debug("Saving database for " + sessions.sessions[item].session_id) + sessions.sessions[item].save_persistent_data() if system == "Windows": self.systrayIcon.RemoveIcon() pidpath = os.path.join(os.getenv("temp"), "{}.pid".format(application.name)) @@ -669,7 +670,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users) def unfollow(self, *args, **kwargs): @@ -681,7 +682,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users, "unfollow") def mute(self, *args, **kwargs): @@ -693,7 +694,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users, "mute") def unmute(self, *args, **kwargs): @@ -705,7 +706,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users, "unmute") def block(self, *args, **kwargs): @@ -717,7 +718,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users, "block") def unblock(self, *args, **kwargs): @@ -729,7 +730,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users, "unblock") def report(self, *args, **kwargs): @@ -741,7 +742,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) u = userActionsController.userActionsController(buff, users, "report") def post_tweet(self, event=None): @@ -828,7 +829,7 @@ class Controller(object): elif buff.type == "dm": users = [buff.session.get_user(tweet.message_create["sender_id"]).screen_name] else: - users = utils.get_all_users(tweet, buff.session.db) + users = utils.get_all_users(tweet, buff.session) dlg = dialogs.userSelection.selectUserDialog(users=users, default=default) if dlg.get_response() == widgetUtils.OK: usr = utils.if_user_exists(buff.session.twitter, dlg.get_user()) @@ -923,8 +924,8 @@ class Controller(object): def open_conversation(self, *args, **kwargs): buffer = self.get_current_buffer() - id = buffer.get_right_tweet().id_str - user = buffer.get_right_tweet().user.screen_name + id = buffer.get_right_tweet().id + user = buffer.session.get_user(buffer.get_right_tweet().user).screen_name search = twitterBuffers.conversationBufferController(self.view.nb, "search", "%s-searchterm" % (id,), buffer.session, buffer.session.db["user_name"], bufferType="searchPanel", sound="search_updated.ogg", since_id=id, q="@{0}".format(user,)) search.tweet = buffer.get_right_tweet() search.start_stream(start=True) @@ -1274,10 +1275,12 @@ class Controller(object): data = buffer.session.check_long_tweet(data) if data == False: # Long tweet deleted from twishort. return + items = buffer.session.db[buffer.name] if buffer.session.settings["general"]["reverse_timelines"] == False: - buffer.session.db[buffer.name].append(data) + items.append(data) else: - buffer.session.db[buffer.name].insert(0, data) + items.insert(0, data) + buffer.session.db[buffer.name] = items buffer.add_new_item(data) def manage_friend(self, data, user): @@ -1623,4 +1626,4 @@ class Controller(object): def save_data_in_db(self): for i in sessions.sessions: - sessions.sessions[i].shelve() + sessions.sessions[i].save_persistent_data() diff --git a/src/controller/settings.py b/src/controller/settings.py index b0c1efd3..84040d2e 100644 --- a/src/controller/settings.py +++ b/src/controller/settings.py @@ -1,7 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import unicode_literals -from builtins import str -from builtins import object import os import webbrowser import sound_lib @@ -151,6 +148,7 @@ class accountSettingsController(globalSettingsController): else: self.dialog.set_value("general", "retweet_mode", _(u"Retweet with comments")) self.dialog.set_value("general", "persist_size", str(self.config["general"]["persist_size"])) + self.dialog.set_value("general", "load_cache_in_memory", self.config["general"]["load_cache_in_memory"]) self.dialog.create_reporting() self.dialog.set_value("reporting", "speech_reporting", self.config["reporting"]["speech_reporting"]) self.dialog.set_value("reporting", "braille_reporting", self.config["reporting"]["braille_reporting"]) @@ -193,6 +191,9 @@ class accountSettingsController(globalSettingsController): self.config["general"]["relative_times"] = self.dialog.get_value("general", "relative_time") self.config["general"]["show_screen_names"] = self.dialog.get_value("general", "show_screen_names") self.config["general"]["max_tweets_per_call"] = self.dialog.get_value("general", "itemsPerApiCall") + if self.config["general"]["load_cache_in_memory"] != self.dialog.get_value("general", "load_cache_in_memory"): + self.config["general"]["load_cache_in_memory"] = self.dialog.get_value("general", "load_cache_in_memory") + self.needs_restart = True if self.config["general"]["persist_size"] != self.dialog.get_value("general", "persist_size"): if self.dialog.get_value("general", "persist_size") == '': self.config["general"]["persist_size"] =-1 diff --git a/src/run_tests.py b/src/run_tests.py new file mode 100644 index 00000000..41842347 --- /dev/null +++ b/src/run_tests.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +import unittest + +testmodules = ["test.test_cache"] + +suite = unittest.TestSuite() + +for t in testmodules: + try: + # If the module defines a suite() function, call it to get the suite. + mod = __import__(t, globals(), locals(), ['suite']) + suitefn = getattr(mod, 'suite') + suite.addTest(suitefn()) + except (ImportError, AttributeError): + # else, just load all the test cases from the module. + suite.addTest(unittest.defaultTestLoader.loadTestsFromName(t)) + +unittest.TextTestRunner(verbosity=2).run(suite) \ No newline at end of file diff --git a/src/sessions/base.py b/src/sessions/base.py index 597cacfc..63ee5142 100644 --- a/src/sessions/base.py +++ b/src/sessions/base.py @@ -1,9 +1,5 @@ # -*- coding: utf-8 -*- """ A base class to be derived in possible new sessions for TWBlue and services.""" -from __future__ import absolute_import -from __future__ import unicode_literals -from builtins import str -from builtins import object import os import paths import output @@ -11,9 +7,8 @@ import time import sound import logging import config_utils -import shelve +import sqlitedict import application -import os from . import session_exceptions as Exceptions log = logging.getLogger("sessionmanager.session") @@ -59,7 +54,7 @@ class baseSession(object): log.debug("Creating config file %s" % (file_,)) self.settings = config_utils.load_config(os.path.join(paths.config_path(), file_), os.path.join(paths.app_path(), "Conf.defaults")) self.init_sound() - self.deshelve() + self.load_persistent_data() def init_sound(self): try: self.sound = sound.soundSystem(self.settings["sound"]) @@ -73,48 +68,88 @@ class baseSession(object): def authorise(self): pass - def shelve(self): - """Shelve the database to allow for persistance.""" - shelfname=os.path.join(paths.config_path(), str(self.session_id), "cache") - if self.settings["general"]["persist_size"] == 0: - if os.path.exists(shelfname+".dat"): - os.remove(shelfname+".dat") - return - try: - if not os.path.exists(shelfname+".dat"): - output.speak("Generating database, this might take a while.",True) - shelf=shelve.open(os.path.join(paths.config_path(), shelfname),'c') - for key, value in list(self.db.items()): - if type(key) != str and type(key) != str: - output.speak("Uh oh, while shelving the database, a key of type " + str(type(key)) + " has been found. It will be converted to type str, but this will cause all sorts of problems on deshelve. Please bring this to the attention of the " + application.name + " developers immediately. More information about the error will be written to the error log.",True) - log.error("Uh oh, " + str(key) + " is of type " + str(type(key)) + "!") - if type(value) == list and self.settings["general"]["persist_size"] != -1 and len(value) > self.settings["general"]["persist_size"]: - shelf[key]=value[self.settings["general"]["persist_size"]:] - else: - shelf[key]=value - shelf.close() - except: - output.speak("An exception occurred while shelving the " + application.name + " database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the " + application.name + " developers.",True) - log.exception("Exception while shelving" + shelfname) - os.remove(shelfname) + def get_sized_buffer(self, buffer, size, reversed=False): + """ Returns a list with the amount of items specified by size.""" + if isinstance(buffer, list) and size != -1 and len(buffer) > size: + log.debug("Requesting {} items from a list of {} items. Reversed mode: {}".format(size, len(buffer), reversed)) + if reversed == True: + return buffer[:size] + else: + return buffer[len(buffer)-size:] + else: + return buffer - def deshelve(self): - """Import a shelved database.""" - shelfname=os.path.join(paths.config_path(), str(self.session_id)+"/cache") + def save_persistent_data(self): + """ Save the data to a persistent sqlite backed file. .""" + dbname=os.path.join(paths.config_path(), str(self.session_id), "cache.db") + log.debug("Saving storage information...") + # persist_size set to 0 means not saving data actually. if self.settings["general"]["persist_size"] == 0: - if os.path.exists(shelfname+".dat"): - os.remove(shelfname+".dat") + if os.path.exists(dbname): + os.remove(dbname) return + # Let's check if we need to create a new SqliteDict object (when loading db in memory) or we just need to call to commit in self (if reading from disk).db. + # If we read from disk, we cannot modify the buffer size here as we could damage the app's integrity. + # We will modify buffer's size (managed by persist_size) upon loading the db into memory in app startup. + if self.settings["general"]["load_cache_in_memory"] and isinstance(self.db, dict): + log.debug("Opening database to dump memory contents...") + db=sqlitedict.SqliteDict(dbname, 'c') + for k in self.db.keys(): + sized_buff = self.get_sized_buffer(self.db[k], self.settings["general"]["persist_size"], self.settings["general"]["reverse_timelines"]) + db[k] = sized_buff + db.commit(blocking=True) + db.close() + log.debug("Data has been saved in the database.") + else: + try: + log.debug("Syncing new data to disk...") + if hasattr(self.db, "commit"): + self.db.commit() + except: + output.speak(_("An exception occurred while saving the {app} database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the {app} developers.").format(app=application.name),True) + log.exception("Exception while saving {}".format(dbname)) + os.remove(dbname) + + def load_persistent_data(self): + """Import data from a database file from user config.""" + log.debug("Loading storage data...") + dbname=os.path.join(paths.config_path(), str(self.session_id), "cache.db") + # If persist_size is set to 0, we should remove the db file as we are no longer going to save anything. + if self.settings["general"]["persist_size"] == 0: + if os.path.exists(dbname): + os.remove(dbname) + # Let's return from here, as we are not loading anything. + return + # try to load the db file. try: - shelf=shelve.open(os.path.join(paths.config_path(), shelfname),'c') - for key,value in list(shelf.items()): - self.db[key]=value - shelf.close() + log.debug("Opening database...") + db=sqlitedict.SqliteDict(os.path.join(paths.config_path(), dbname), 'c') + # If load_cache_in_memory is set to true, we will load the whole database into memory for faster access. + # This is going to be faster when retrieving specific objects, at the cost of more memory. + # Setting this to False will read the objects from database as they are needed, which might be slower for bigger datasets. + if self.settings["general"]["load_cache_in_memory"]: + log.debug("Loading database contents into memory...") + for k in db.keys(): + self.db[k] = db[k] + db.commit(blocking=True) + db.close() + log.debug("Contents were loaded successfully.") + else: + log.debug("Instantiating database from disk.") + self.db = db + # We must make sure we won't load more than the amount of buffer specified. + log.debug("Checking if we will load all content...") + for k in self.db.keys(): + sized_buffer = self.get_sized_buffer(self.db[k], self.settings["general"]["persist_size"], self.settings["general"]["reverse_timelines"]) + self.db[k] = sized_buffer + if self.db.get("cursors") == None: + cursors = dict(direct_messages=-1) + self.db["cursors"] = cursors except: - output.speak("An exception occurred while deshelving the " + application.name + " database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the " + application.name + " developers.",True) - log.exception("Exception while deshelving" + shelfname) + output.speak(_("An exception occurred while loading the {app} database. It will be deleted and rebuilt automatically. If this error persists, send the error log to the {app} developers.").format(app=application.name), True) + log.exception("Exception while loading {}".format(dbname)) try: - os.remove(shelfname) + os.remove(dbname) except: pass diff --git a/src/sessions/twitter/compose.py b/src/sessions/twitter/compose.py index e76a7034..b6ceb4ae 100644 --- a/src/sessions/twitter/compose.py +++ b/src/sessions/twitter/compose.py @@ -49,23 +49,24 @@ def compose_tweet(tweet, db, relative_times, show_screen_names=False, session=No else: text = StripChars(getattr(tweet, value)) if show_screen_names: - user = tweet.user.screen_name + user = session.get_user(tweet.user).screen_name else: - user = tweet.user.name + user = session.get_user(tweet.user).name source = re.sub(r"(?s)<.*?>", "", tweet.source) if hasattr(tweet, "retweeted_status"): - if (hasattr(tweet, "message")) == False and tweet.retweeted_status.is_quote_status == False: - text = "RT @%s: %s" % (tweet.retweeted_status.user.screen_name, text) - elif tweet.retweeted_status.is_quote_status: + if hasattr(tweet, "message") == False and hasattr(tweet.retweeted_status, "is_quote_status") == False: + text = "RT @%s: %s" % (session.get_user(tweet.retweeted_status.user).screen_name, text) + elif hasattr(tweet.retweeted_status, "is_quote_status"): text = "%s" % (text) else: - text = "RT @%s: %s" % (tweet.retweeted_status.user.screen_name, text) + text = "RT @%s: %s" % (session.get_user(tweet.retweeted_status.user).screen_name, text) if not hasattr(tweet, "message"): - if hasattr(tweet, "retweeted_status"): - text = utils.expand_urls(text, tweet.retweeted_status.entities) + if hasattr(tweet.retweeted_status, "entities"): + text = utils.expand_urls(text, tweet.retweeted_status.entities) else: - text = utils.expand_urls(text, tweet.entities) + if hasattr(tweet, "entities"): + text = utils.expand_urls(text, tweet.entities) if config.app['app-settings']['handle_longtweets']: pass return [user+", ", text, ts+", ", source] @@ -112,14 +113,14 @@ def compose_quoted_tweet(quoted_tweet, original_tweet, show_screen_names=False, value = "text" text = StripChars(getattr(quoted_tweet, value)) if show_screen_names: - quoting_user = quoted_tweet.user.screen_name + quoting_user = session.get_user(quoted_tweet.user).screen_name else: - quoting_user = quoted_tweet.user.name + quoting_user = session.get_user(quoted_tweet.user).name source = quoted_tweet.source if hasattr(quoted_tweet, "retweeted_status"): - text = "rt @%s: %s" % (quoted_tweet.retweeted_status.user.screen_name, text) + text = "rt @%s: %s" % (session.get_user(quoted_tweet.retweeted_status.user).screen_name, text) if text[-1] in chars: text=text+"." - original_user = original_tweet.user.screen_name + original_user = session.get_user(original_tweet.user).screen_name if hasattr(original_tweet, "message"): original_text = original_tweet.message elif hasattr(original_tweet, "full_text"): @@ -128,7 +129,12 @@ def compose_quoted_tweet(quoted_tweet, original_tweet, show_screen_names=False, original_text = StripChars(original_tweet.text) quoted_tweet.message = _(u"{0}. Quoted tweet from @{1}: {2}").format( text, original_user, original_text) quoted_tweet = tweets.clear_url(quoted_tweet) - quoted_tweet.entities["urls"].extend(original_tweet.entities["urls"]) + if hasattr(original_tweet, "entities") and original_tweet.entities.get("urls"): + if hasattr(quoted_tweet, "entities") == False: + quoted_tweet.entities = {} + if quoted_tweet.entities.get("urls") == None: + quoted_tweet.entities["urls"] = [] + quoted_tweet.entities["urls"].extend(original_tweet.entities["urls"]) return quoted_tweet def compose_followers_list(tweet, db, relative_times=True, show_screen_names=False, session=None): diff --git a/src/sessions/twitter/reduce.py b/src/sessions/twitter/reduce.py new file mode 100644 index 00000000..eae64a70 --- /dev/null +++ b/src/sessions/twitter/reduce.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +""" Strips unneeded tweet information in order to store tweet objects by using less memory. This is especially useful when buffers start to contain more than a certain amount of items. """ +from tweepy.models import Status + +def reduce_tweet(tweet): + """ generates a new Tweet model with the fields we currently need, excluding everything else including null values and empty collections. """ + allowed_values = ["created_at", "id", "full_text", "text", "message", "in_reply_to_status_id", "in_reply_to_user_id", "is_quote_status", "lang", "source", "coordinates", "quoted_status_id", ] + allowed_entities = ["hashtags", "media", "urls", "user_mentions", "polls"] + status_dict = {} + for key in allowed_values: + if tweet._json.get(key): + status_dict[key] = tweet._json[key] + entities = dict() + for key in allowed_entities: + if tweet._json["entities"].get(key) and tweet._json["entities"].get(key) != None: + entities[key] = tweet._json["entities"][key] + status_dict["entities"] = entities + # If tweet comes from the cached database, it does not include an API, so we can pass None here as we do not use that reference to tweepy's API. + if hasattr(tweet, "_api"): + api = tweet._api + else: + api = None + status = Status().parse(api=api, json=status_dict) + # Quotes and retweets are different objects. So we parse a new tweet when we have a quoted or retweeted status here. + if tweet._json.get("quoted_status"): + quoted_tweet = reduce_tweet(tweet.quoted_status) + status.quoted_status = quoted_tweet + if tweet._json.get("retweeted_status"): + retweeted_tweet = reduce_tweet(tweet.retweeted_status) + status.retweeted_status = retweeted_tweet + # Adds user ID to here so we can reference it later. + # Sometimes, the conversations buffer would send an already reduced tweet here so we will need to return it as is. + if isinstance(tweet.user, str) == False: + status.user = tweet.user.id_str + else: + return tweet + return status \ No newline at end of file diff --git a/src/sessions/twitter/session.py b/src/sessions/twitter/session.py index 1cdbece0..e33d1019 100644 --- a/src/sessions/twitter/session.py +++ b/src/sessions/twitter/session.py @@ -17,6 +17,7 @@ from keys import keyring from sessions import base from sessions.twitter import utils, compose from sessions.twitter.long_tweets import tweets, twishort +from . import reduce from .wxUI import authorisationDialog log = logging.getLogger("sessions.twitterSession") @@ -38,26 +39,27 @@ class Session(base.baseSession): self.db[name] = [] if ("users" in self.db) == False: self.db["users"] = {} + objects = self.db[name] if ignore_older and len(self.db[name]) > 0: if self.settings["general"]["reverse_timelines"] == False: last_id = self.db[name][0].id else: last_id = self.db[name][-1].id + self.add_users_from_results(data) for i in data: if ignore_older and last_id != None: if i.id < last_id: log.error("Ignoring an older tweet... Last id: {0}, tweet id: {1}".format(last_id, i.id)) continue if utils.find_item(i.id, self.db[name]) == None and utils.is_allowed(i, self.settings, name) == True: - i = self.check_quoted_status(i) - i = self.check_long_tweet(i) if i == False: continue - if self.settings["general"]["reverse_timelines"] == False: self.db[name].append(i) - else: self.db[name].insert(0, i) + reduced_object = reduce.reduce_tweet(i) + reduced_object = self.check_quoted_status(reduced_object) + reduced_object = self.check_long_tweet(reduced_object) + if self.settings["general"]["reverse_timelines"] == False: objects.append(reduced_object) + else: objects.insert(0, reduced_object) num = num+1 - if hasattr(i, "user"): - if (i.user.id in self.db["users"]) == False: - self.db["users"][i.user.id] = i.user + self.db[name] = objects return num def order_people(self, name, data): @@ -68,11 +70,13 @@ class Session(base.baseSession): num = 0 if (name in self.db) == False: self.db[name] = [] + objects = self.db[name] for i in data: if utils.find_item(i.id, self.db[name]) == None: - if self.settings["general"]["reverse_timelines"] == False: self.db[name].append(i) - else: self.db[name].insert(0, i) + if self.settings["general"]["reverse_timelines"] == False: objects.append(i) + else: objects.insert(0, i) num = num+1 + self.db[name] = objects return num def order_direct_messages(self, data): @@ -83,19 +87,28 @@ class Session(base.baseSession): sent = 0 if ("direct_messages" in self.db) == False: self.db["direct_messages"] = [] + if ("sent_direct_messages" in self.db) == False: + self.db["sent_direct_messages"] = [] + objects = self.db["direct_messages"] + sent_objects = self.db["sent_direct_messages"] for i in data: # Twitter returns sender_id as str, which must be converted to int in order to match to our user_id object. if int(i.message_create["sender_id"]) == self.db["user_id"]: if "sent_direct_messages" in self.db and utils.find_item(i.id, self.db["sent_direct_messages"]) == None: - if self.settings["general"]["reverse_timelines"] == False: self.db["sent_direct_messages"].append(i) - else: self.db["sent_direct_messages"].insert(0, i) + if self.settings["general"]["reverse_timelines"] == False: sent_objects.append(i) + else: sent_objects.insert(0, i) sent = sent+1 else: if utils.find_item(i.id, self.db["direct_messages"]) == None: - if self.settings["general"]["reverse_timelines"] == False: self.db["direct_messages"].append(i) - else: self.db["direct_messages"].insert(0, i) + if self.settings["general"]["reverse_timelines"] == False: objects.append(i) + else: objects.insert(0, i) incoming = incoming+1 + self.db["direct_messages"] = objects + + self.db["sent_direct_messages"] = sent_objects pub.sendMessage("sent-dms-updated", total=sent, account=self.db["user_name"]) + + return incoming def __init__(self, *args, **kwargs): @@ -106,6 +119,12 @@ class Session(base.baseSession): self.reconnection_function_active = False self.counter = 0 self.lists = [] + # As users are cached for accessing them with not too many twitter calls, + # there could be a weird situation where a deleted user who sent direct messages to the current account will not be able to be retrieved at twitter. + # So we need to store an "user deleted" object in the cache, but have the ID of the deleted user in a local reference. + # This will be especially useful because if the user reactivates their account later, TWblue will try to retrieve such user again at startup. + # If we wouldn't implement this approach, TWBlue would save permanently the "deleted user" object. + self.deleted_users = {} # @_require_configuration def login(self, verify_credentials=True): @@ -161,35 +180,6 @@ class Session(base.baseSession): self.verify_authorisation(pincode) self.authorisation_dialog.Destroy() - def get_more_items(self, update_function, users=False, dm=False, name=None, *args, **kwargs): - """ Get more items for twitter objects. - update_function str: function to call for getting more items. Must be member of self.twitter. - users, dm bool: If any of these is set to True, the function will treat items as users or dm (they need different handling). - name str: name of the database item to put new element in.""" - results = [] - if "cursor" in kwargs and kwargs["cursor"] == 0: - output.speak(_(u"There are no more items to retrieve in this buffer.")) - return - data = getattr(self.twitter, update_function)(*args, **kwargs) - if users == True: - if type(data) == dict and "next_cursor" in data: - if "next_cursor" in data: # There are more objects to retrieve. - self.db[name]["cursor"] = data["next_cursor"] - else: # Set cursor to 0, wich means no more items available. - self.db[name]["cursor"] = 0 - for i in data["users"]: results.append(i) - elif type(data) == list: - results.extend(data[1:]) - elif dm == True: - if "next_cursor" in data: # There are more objects to retrieve. - self.db[name]["cursor"] = data["next_cursor"] - else: # Set cursor to 0, wich means no more items available. - self.db[name]["cursor"] = 0 - for i in data["events"]: results.append(i) - else: - results.extend(data[1:]) - return results - def api_call(self, call_name, action="", _sound=None, report_success=False, report_failure=True, preexec_message="", *args, **kwargs): """ Make a call to the Twitter API. If there is a connectionError or another exception not related to Twitter, It will call the method again at least 25 times, waiting a while between calls. Useful for post methods. If twitter returns an error, it will not call the method anymore. @@ -359,10 +349,11 @@ class Session(base.baseSession): value = "full_text" else: value = "text" - setattr(quoted_tweet, value, utils.expand_urls(getattr(quoted_tweet, value), quoted_tweet.entities)) - if quoted_tweet.is_quote_status == True and hasattr(quoted_tweet, "quoted_status"): + if hasattr(quoted_tweet, "entities"): + setattr(quoted_tweet, value, utils.expand_urls(getattr(quoted_tweet, value), quoted_tweet.entities)) + if hasattr(quoted_tweet, "is_quote_status") == True and hasattr(quoted_tweet, "quoted_status"): original_tweet = quoted_tweet.quoted_status - elif hasattr(quoted_tweet, "retweeted_status") and quoted_tweet.retweeted_status.is_quote_status == True and hasattr(quoted_tweet.retweeted_status, "quoted_status"): + elif hasattr(quoted_tweet, "retweeted_status") and hasattr(quoted_tweet.retweeted_status, "is_quote_status") == True and hasattr(quoted_tweet.retweeted_status, "quoted_status"): original_tweet = quoted_tweet.retweeted_status.quoted_status else: return quoted_tweet @@ -373,40 +364,55 @@ class Session(base.baseSession): value = "message" else: value = "text" - setattr(original_tweet, value, utils.expand_urls(getattr(original_tweet, value), original_tweet.entities)) - return compose.compose_quoted_tweet(quoted_tweet, original_tweet) + if hasattr(original_tweet, "entities"): + setattr(original_tweet, value, utils.expand_urls(getattr(original_tweet, value), original_tweet.entities)) + # ToDo: Shall we check whether we should add show_screen_names here? + return compose.compose_quoted_tweet(quoted_tweet, original_tweet, session=self) def check_long_tweet(self, tweet): """ Process a tweet and add extra info if it's a long tweet made with Twyshort. tweet dict: a tweet object. returns a tweet with a new argument message, or original tweet if it's not a long tweet.""" - long = twishort.is_long(tweet) + long = False + if hasattr(tweet, "entities") and tweet.entities.get("urls"): + long = twishort.is_long(tweet) if long != False and config.app["app-settings"]["handle_longtweets"]: message = twishort.get_full_text(long) if hasattr(tweet, "quoted_status"): tweet.quoted_status.message = message if tweet.quoted_status.message == False: return False tweet.quoted_status.twishort = True - for i in tweet.quoted_status.entities["user_mentions"]: - if "@%s" % (i["screen_name"]) not in tweet.quoted_status.message and i["screen_name"] != tweet.user.screen_name: - if hasattr(tweet.quoted_status, "retweeted_status") and tweet.retweeted_status.user.screen_name == i["screen_name"]: - continue - tweet.quoted_status.message = u"@%s %s" % (i["screen_name"], tweet.message) + if hasattr(tweet.quoted_status, "entities") and tweet.quoted_status.entities.get("user_mentions"): + for i in tweet.quoted_status.entities["user_mentions"]: + if "@%s" % (i["screen_name"]) not in tweet.quoted_status.message and i["screen_name"] != self.get_user(tweet.user).screen_name: + if hasattr(tweet.quoted_status, "retweeted_status") and self.get_user(tweet.retweeted_status.user).screen_name == i["screen_name"]: + continue + tweet.quoted_status.message = u"@%s %s" % (i["screen_name"], tweet.message) else: tweet.message = message if tweet.message == False: return False tweet.twishort = True - for i in tweet.entities["user_mentions"]: - if "@%s" % (i["screen_name"]) not in tweet.message and i["screen_name"] != tweet.user.screen_name: - if hasattr(tweet, "retweeted_status") and tweet.retweeted_status.user.screen_name == i["screen_name"]: - continue + if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"): + for i in tweet.entities["user_mentions"]: + if "@%s" % (i["screen_name"]) not in tweet.message and i["screen_name"] != self.get_user(tweet.user).screen_name: + if hasattr(tweet, "retweeted_status") and self.get_user(tweet.retweeted_status.user).screen_name == i["screen_name"]: + continue + tweet.message = u"@%s %s" % (i["screen_name"], tweet.message) return tweet def get_user(self, id): """ Returns an user object associated with an ID. id str: User identifier, provided by Twitter. returns a tweepy user object.""" - if ("users" in self.db) == False or (id in self.db["users"]) == False: + if hasattr(id, "id_str"): + log.error("Called get_user function by passing a full user id as a parameter.") + id = id.id_str + # Check if the user has been added to the list of deleted users previously. + if id in self.deleted_users: + log.debug("Returning user {} from the list of deleted users.".format(id)) + return self.deleted_users[id] + if ("users" in self.db) == False or (str(id) in self.db["users"]) == False: + log.debug("Requesting user id {} as it is not present in the users database.".format(id)) try: user = self.twitter.get_user(id=id) except TweepError as err: @@ -414,11 +420,18 @@ class Session(base.baseSession): user.screen_name = "deleted_user" user.id = id user.name = _("Deleted account") - user.id_str = id - self.db["users"][user.id_str] = user + if hasattr(err, "api_code") and err.api_code == 50: + self.deleted_users[id] = user + return user + else: + log.exception("Error when attempting to retrieve an user from Twitter.") + return user + users = self.db["users"] + users[user.id_str] = user + self.db["users"] = users return user else: - return self.db["users"][id] + return self.db["users"][str(id)] def get_user_by_screen_name(self, screen_name): """ Returns an user identifier associated with a screen_name. @@ -426,28 +439,65 @@ class Session(base.baseSession): returns an user ID.""" if ("users" in self.db) == False: user = utils.if_user_exists(self.twitter, screen_name) - self.db["users"][user["id_str"]] = user - return user["id_str"] + users = self.db["users"] + users[user["id"]] = user + self.db["users"] = users + return user["id"] else: for i in list(self.db["users"].keys()): if self.db["users"][i].screen_name == screen_name: - return self.db["users"][i].id_str + return self.db["users"][i].id user = utils.if_user_exists(self.twitter, screen_name) - self.db["users"][user.id_str] = user - return user.id_str + users = self.db["users"] + users[user.id] = user + self.db["users"] = users + return user.id def save_users(self, user_ids): """ Adds all new users to the users database. """ if len(user_ids) == 0: return log.debug("Received %d user IDS to be added in the database." % (len(user_ids))) - users_to_retrieve = [user_id for user_id in user_ids if user_id not in self.db["users"]] + users_to_retrieve = [user_id for user_id in user_ids if (user_id not in self.db["users"] and user_id not in self.deleted_users)] # Remove duplicates users_to_retrieve = list(dict.fromkeys(users_to_retrieve)) if len(users_to_retrieve) == 0: return log.debug("TWBlue will get %d new users from Twitter." % (len(users_to_retrieve))) - users = self.twitter.lookup_users(user_ids=users_to_retrieve, tweet_mode="extended") - for user in users: - self.db["users"][user.id_str] = user - log.debug("Added %d new users" % (len(users))) + try: + users = self.twitter.lookup_users(user_ids=users_to_retrieve, tweet_mode="extended") + users_db = self.db["users"] + for user in users: + users_db[user.id_str] = user + log.debug("Added %d new users" % (len(users))) + self.db["users"] = users_db + except TweepError as err: + if hasattr(err, "api_code") and err.api_code == 17: # Users not found. + log.error("The specified users {} were not found in twitter.".format(user_ids)) + # Creates a deleted user object for every user_id not found here. + # This will make TWBlue to not waste Twitter API calls when attempting to retrieve those users again. + # As deleted_users is not saved across restarts, when restarting TWBlue, it will retrieve the correct users if they enabled their accounts. + for id in users_to_retrieve: + user = UserModel(None) + user.screen_name = "deleted_user" + user.id = id + user.name = _("Deleted account") + self.deleted_users[id] = user + else: + log.exception("An exception happened while attempting to retrieve a list of users from direct messages in Twitter.") + + def add_users_from_results(self, data): + users = self.db["users"] + for i in data: + if hasattr(i, "user"): + if isinstance(i.user, str): + log.warning("A String was passed to be added as an user. This is normal only if TWBlue tried to load a conversation.") + continue + if (i.user.id_str in self.db["users"]) == False: + users[i.user.id_str] = i.user + if hasattr(i, "quoted_status") and (i.quoted_status.user.id_str in self.db["users"]) == False: + users[i.quoted_status.user.id_str] = i.quoted_status.user + + if hasattr(i, "retweeted_status") and (i.retweeted_status.user.id_str in self.db["users"]) == False: + users[i.retweeted_status.user.id_str] = i.retweeted_status.user + self.db["users"] = users diff --git a/src/sessions/twitter/utils.py b/src/sessions/twitter/utils.py index 2e44f21b..10ba9c37 100644 --- a/src/sessions/twitter/utils.py +++ b/src/sessions/twitter/utils.py @@ -27,22 +27,16 @@ def find_urls (tweet): if hasattr(tweet, "message_create"): entities = tweet.message_create["message_data"]["entities"] else: - entities = tweet.entities - for i in entities["urls"]: - if i["expanded_url"] not in urls: - urls.append(i["expanded_url"]) + if hasattr(tweet, "entities") == True: + entities = tweet.entities + if entities.get("urls") != None: + for i in entities["urls"]: + if i["expanded_url"] not in urls: + urls.append(i["expanded_url"]) if hasattr(tweet, "quoted_status"): - for i in tweet.quoted_status.entities["urls"]: - if i["expanded_url"] not in urls: - urls.append(i["expanded_url"]) + urls.extend(find_urls(tweet.quoted_status)) if hasattr(tweet, "retweeted_status"): - for i in tweet.retweeted_status.entities["urls"]: - if i["expanded_url"] not in urls: - urls.append(i["expanded_url"]) - if hasattr(tweet["retweeted_status"], "quoted_status"): - for i in tweet.retweeted_status.quoted_status.entities["urls"]: - if i["expanded_url"] not in urls: - urls.append(i["expanded_url"]) + urls.extend(find_urls(tweet.retweeted_status)) if hasattr(tweet, "message"): i = "message" elif hasattr(tweet, "full_text"): @@ -75,13 +69,14 @@ def is_audio(tweet): if hasattr(tweet, "message_create"): entities = tweet.message_create["message_data"]["entities"] else: + if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None: + return False entities = tweet.entities if len(entities["hashtags"]) > 0: for i in entities["hashtags"]: if i["text"] == "audio": return True except IndexError: - print(tweet.entities["hashtags"]) log.exception("Exception while executing is_audio hashtag algorithm") def is_geocoded(tweet): @@ -92,6 +87,8 @@ def is_media(tweet): if hasattr(tweet, "message_create"): entities = tweet.message_create["message_data"]["entities"] else: + if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None: + return False entities = tweet.entities if entities.get("media") == None: return False @@ -103,28 +100,29 @@ def is_media(tweet): def get_all_mentioned(tweet, conf, field="screen_name"): """ Gets all users that have been mentioned.""" results = [] - for i in tweet.entities["user_mentions"]: - if i["screen_name"] != conf["user_name"] and i["screen_name"] != tweet.user.screen_name: - if i.get(field) not in results: - results.append(i.get(field)) + if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"): + for i in tweet.entities["user_mentions"]: + if i["screen_name"] != conf["user_name"] and i["id_str"] != tweet.user: + if i.get(field) not in results: + results.append(i.get(field)) return results -def get_all_users(tweet, conf): +def get_all_users(tweet, session): string = [] + user = session.get_user(tweet.user) if hasattr(tweet, "retweeted_status"): - string.append(tweet.user.screen_name) + string.append(user.screen_name) tweet = tweet.retweeted_status - if hasattr(tweet, "sender"): - string.append(tweet.sender.screen_name) else: - if tweet.user.screen_name != conf["user_name"]: - string.append(tweet.user.screen_name) - for i in tweet.entities["user_mentions"]: - if i["screen_name"] != conf["user_name"] and i["screen_name"] != tweet.user.screen_name: - if i["screen_name"] not in string: - string.append(i["screen_name"]) + if user.screen_name != session.db["user_name"]: + string.append(user.screen_name) + if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"): + for i in tweet.entities["user_mentions"]: + if i["screen_name"] != session.db["user_name"] and i["screen_name"] != user.screen_name: + if i["screen_name"] not in string: + string.append(i["screen_name"]) if len(string) == 0: - string.append(tweet.user.screen_name) + string.append(user.screen_name) return string def if_user_exists(twitter, user): @@ -144,7 +142,7 @@ def is_allowed(tweet, settings, buffer_name): tweet_data = {} if hasattr(tweet, "retweeted_status"): tweet_data["retweet"] = True - if tweet.in_reply_to_status_id_str != None: + if tweet.in_reply_to_status_id != None: tweet_data["reply"] = True if hasattr(tweet, "quoted_status"): tweet_data["quote"] = True @@ -209,6 +207,8 @@ def twitter_error(error): def expand_urls(text, entities): """ Expand all URLS present in text with information found in entities""" + if entities.get("urls") == None: + return text urls = find_urls_in_text(text) for url in entities["urls"]: if url["url"] in text: diff --git a/src/test/__init__.py b/src/test/__init__.py new file mode 100644 index 00000000..40a96afc --- /dev/null +++ b/src/test/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/src/test/test_cache.py b/src/test/test_cache.py new file mode 100644 index 00000000..6337c3b2 --- /dev/null +++ b/src/test/test_cache.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +""" Test case to check some of the scenarios we might face when storing tweets in cache, both loading into memory or rreading from disk. """ +import unittest +import os +import paths +import sqlitedict +import shutil +# The base session module requires sound as a dependency, and this needs libVLC to be locatable. +os.environ['PYTHON_VLC_MODULE_PATH']=os.path.abspath(os.path.join(paths.app_path(), "..", "windows-dependencies", "x86")) +os.environ['PYTHON_VLC_LIB_PATH']=os.path.abspath(os.path.join(paths.app_path(), "..", "windows-dependencies", "x86", "libvlc.dll")) +from sessions import base + +class cacheTestCase(unittest.TestCase): + + def setUp(self): + """ Configures a fake session to check caching objects here. """ + self.session = base.baseSession("testing") + if os.path.exists(os.path.join(paths.config_path(), "testing")) == False: + os.mkdir(os.path.join(paths.config_path(), "testing")) + self.session.get_configuration() + + def tearDown(self): + """ Removes the previously configured session. """ + session_folder = os.path.join(paths.config_path(), "testing") + if os.path.exists(session_folder): + shutil.rmtree(session_folder) + + def generate_dataset(self): + """ Generates a sample dataset""" + dataset = dict(home_timeline=["message" for i in range(10000)], mentions_timeline=["mention" for i in range(20000)]) + return dataset + + ### Testing database being read from disk. + + def test_cache_in_disk_unlimited_size(self): + """ Tests cache database being read from disk, storing the whole datasets. """ + dataset = self.generate_dataset() + self.session.settings["general"]["load_cache_in_memory"] = False + self.session.settings["general"]["persist_size"] = -1 + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + self.session.save_persistent_data() + self.assertIsInstance(self.session.db, sqlitedict.SqliteDict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(len(self.session.db.get("home_timeline")), 10000) + self.assertEquals(len(self.session.db.get("mentions_timeline")), 20000) + self.session.db.close() + + def test_cache_in_disk_limited_dataset(self): + """ Tests wether the cache stores only the amount of items we ask it to store. """ + dataset = self.generate_dataset() + self.session.settings["general"]["load_cache_in_memory"] = False + self.session.settings["general"]["persist_size"] = 100 + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + # We need to save and load the db again because we cannot modify buffers' size while the database is opened. + # As TWBlue reads directly from db when reading from disk, an attempt to modify buffers size while Blue is reading the db + # Might cause an out of sync error between the GUI lists and the database. + # So we perform the changes to buffer size when loading data during app startup if the DB is read from disk. + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, sqlitedict.SqliteDict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(len(self.session.db.get("home_timeline")), 100) + self.assertEquals(len(self.session.db.get("mentions_timeline")), 100) + self.session.db.close() + + def test_cache_in_disk_limited_dataset_unreversed(self): + """Test if the cache is saved properly in unreversed buffers, when newest items are at the end of the list. """ + dataset = dict(home_timeline=[i for i in range(20)], mentions_timeline=[i for i in range(20)]) + self.session.settings["general"]["load_cache_in_memory"] = False + self.session.settings["general"]["persist_size"] = 10 + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + # We need to save and load the db again because we cannot modify buffers' size while the database is opened. + # As TWBlue reads directly from db when reading from disk, an attempt to modify buffers size while Blue is reading the db + # Might cause an out of sync error between the GUI lists and the database. + # So we perform the changes to buffer size when loading data during app startup if the DB is read from disk. + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, sqlitedict.SqliteDict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(self.session.db.get("home_timeline")[0], 10) + self.assertEquals(self.session.db.get("mentions_timeline")[0], 10) + self.assertEquals(self.session.db.get("home_timeline")[-1], 19) + self.assertEquals(self.session.db.get("mentions_timeline")[-1], 19) + self.session.db.close() + + def test_cache_in_disk_limited_dataset_reversed(self): + """Test if the cache is saved properly in reversed buffers, when newest items are at the start of the list. """ + dataset = dict(home_timeline=[i for i in range(19, -1, -1)], mentions_timeline=[i for i in range(19, -1, -1)]) + self.session.settings["general"]["load_cache_in_memory"] = False + self.session.settings["general"]["persist_size"] = 10 + self.session.settings["general"]["reverse_timelines"] = True + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + # We need to save and load the db again because we cannot modify buffers' size while the database is opened. + # As TWBlue reads directly from db when reading from disk, an attempt to modify buffers size while Blue is reading the db + # Might cause an out of sync error between the GUI lists and the database. + # So we perform the changes to buffer size when loading data during app startup if the DB is read from disk. + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, sqlitedict.SqliteDict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(self.session.db.get("home_timeline")[0], 19) + self.assertEquals(self.session.db.get("mentions_timeline")[0], 19) + self.assertEquals(self.session.db.get("home_timeline")[-1], 10) + self.assertEquals(self.session.db.get("mentions_timeline")[-1], 10) + self.session.db.close() + + ### Testing database being loaded into memory. Those tests should give the same results than before + ### but as we have different code depending whether we load db into memory or read it from disk, + ### We need to test this anyways. + def test_cache_in_memory_unlimited_size(self): + """ Tests cache database being loaded in memory, storing the whole datasets. """ + dataset = self.generate_dataset() + self.session.settings["general"]["load_cache_in_memory"] = True + self.session.settings["general"]["persist_size"] = -1 + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, dict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(len(self.session.db.get("home_timeline")), 10000) + self.assertEquals(len(self.session.db.get("mentions_timeline")), 20000) + + def test_cache_in_memory_limited_dataset(self): + """ Tests wether the cache stores only the amount of items we ask it to store, when loaded in memory. """ + dataset = self.generate_dataset() + self.session.settings["general"]["load_cache_in_memory"] = True + self.session.settings["general"]["persist_size"] = 100 + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, dict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(len(self.session.db.get("home_timeline")), 100) + self.assertEquals(len(self.session.db.get("mentions_timeline")), 100) + + def test_cache_in_memory_limited_dataset_unreversed(self): + """Test if the cache is saved properly when loaded in memory in unreversed buffers, when newest items are at the end of the list. """ + dataset = dict(home_timeline=[i for i in range(20)], mentions_timeline=[i for i in range(20)]) + self.session.settings["general"]["load_cache_in_memory"] = True + self.session.settings["general"]["persist_size"] = 10 + self.session.load_persistent_data() + self.assertTrue(len(self.session.db)==1) + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, dict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(self.session.db.get("home_timeline")[0], 10) + self.assertEquals(self.session.db.get("mentions_timeline")[0], 10) + self.assertEquals(self.session.db.get("home_timeline")[-1], 19) + self.assertEquals(self.session.db.get("mentions_timeline")[-1], 19) + + def test_cache_in_memory_limited_dataset_reversed(self): + """Test if the cache is saved properly in reversed buffers, when newest items are at the start of the list. This test if for db read into memory. """ + dataset = dict(home_timeline=[i for i in range(19, -1, -1)], mentions_timeline=[i for i in range(19, -1, -1)]) + self.session.settings["general"]["load_cache_in_memory"] = True + self.session.settings["general"]["persist_size"] = 10 + self.session.settings["general"]["reverse_timelines"] = True + self.session.load_persistent_data() + self.session.db["home_timeline"] = dataset["home_timeline"] + self.session.db["mentions_timeline"] = dataset["mentions_timeline"] + self.session.save_persistent_data() + self.session.db = dict() + self.session.load_persistent_data() + self.assertIsInstance(self.session.db, dict) + self.assertTrue(self.session.db.get("home_timeline") != None) + self.assertTrue(self.session.db.get("mentions_timeline") != None) + self.assertEquals(self.session.db.get("home_timeline")[0], 19) + self.assertEquals(self.session.db.get("mentions_timeline")[0], 19) + self.assertEquals(self.session.db.get("home_timeline")[-1], 10) + self.assertEquals(self.session.db.get("mentions_timeline")[-1], 10) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/src/wxUI/dialogs/configuration.py b/src/wxUI/dialogs/configuration.py index 147bba1d..1bc5de9c 100644 --- a/src/wxUI/dialogs/configuration.py +++ b/src/wxUI/dialogs/configuration.py @@ -1,7 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import unicode_literals -from builtins import range import logging as original_logger import wx import application @@ -127,6 +124,7 @@ class generalAccount(wx.Panel, baseDialog.BaseWXDialog): self.persist_size = wx.TextCtrl(self, -1) sizer.Add(PersistSizeLabel, 0, wx.ALL, 5) sizer.Add(self.persist_size, 0, wx.ALL, 5) + self.load_cache_in_memory = wx.CheckBox(self, wx.NewId(), _("Load cache for tweets in memory (much faster in big datasets but requires more RAM)")) self.SetSizer(sizer) class reporting(wx.Panel, baseDialog.BaseWXDialog):