From 4d1732b3aaef037b59c4fcccf5a9885404505ce7 Mon Sep 17 00:00:00 2001 From: Manuel Cortez Date: Wed, 29 Nov 2017 10:55:42 -0600 Subject: [PATCH] Improvements in quoted tweets handling. Closes #190 --- src/long_tweets/tweets.py | 8 ++++++-- src/sessionmanager/session.py | 16 +++++++++------- src/twitter/utils.py | 26 +++++++++++++++++++++----- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/long_tweets/tweets.py b/src/long_tweets/tweets.py index e84095a5..1ed12835 100644 --- a/src/long_tweets/tweets.py +++ b/src/long_tweets/tweets.py @@ -19,8 +19,10 @@ from twitter import utils def is_long(tweet): - if tweet.has_key("quoted_status_id") and tweet["quoted_status_id"] != None: + if tweet.has_key("quoted_status_id") and tweet.has_key("quoted_status"): return tweet["quoted_status_id"] + elif tweet.has_key("retweeted_status") and tweet["retweeted_status"].has_key("quoted_status_id") and tweet["retweeted_status"].has_key("quoted_status"): + return tweet["retweeted_status"]["quoted_status_id"] return False def clear_url(tweet): @@ -33,6 +35,8 @@ def clear_url(tweet): except IndexError: pass try: tweet["entities"]["urls"].remove(tweet["entities"]["urls"][-1]) - except ValueError: + except IndexError: + tweet["retweeted_status"]["entities"]["urls"].remove(tweet["retweeted_status"]["entities"]["urls"][-1]) + else: pass return tweet \ No newline at end of file diff --git a/src/sessionmanager/session.py b/src/sessionmanager/session.py index 688e0d1a..13568dab 100644 --- a/src/sessionmanager/session.py +++ b/src/sessionmanager/session.py @@ -75,8 +75,7 @@ class Session(object): log.error("Ignoring an older tweet... Last id: {0}, tweet id: {1}".format(last_id, i["id"])) continue if utils.find_item(i["id"], self.db[name]) == None and utils.is_allowed(i, self.settings, name) == True: - try: i = self.check_quoted_status(i) - except: pass + i = self.check_quoted_status(i) i = self.check_long_tweet(i) if i == False: continue if self.settings["general"]["reverse_timelines"] == False: self.db[name].append(i) @@ -367,7 +366,6 @@ class Session(object): def add_friends(self): try: -# print "setting friends" self.timelinesStream.set_friends(self.main_stream.friends) except AttributeError: pass @@ -463,7 +461,8 @@ class Session(object): def check_quoted_status(self, tweet): status = tweets.is_long(tweet) if status != False and config.app["app-settings"]["handle_longtweets"]: - tweet = self.get_quoted_tweet(tweet) + quoted_tweet = self.get_quoted_tweet(tweet) + return quoted_tweet return tweet def get_quoted_tweet(self, tweet): @@ -476,9 +475,12 @@ class Session(object): for url in range(0, len(urls)): try: quoted_tweet[value] = quoted_tweet[value].replace(urls[url], quoted_tweet["entities"]["urls"][url]["expanded_url"]) except IndexError: pass - id = tweets.is_long(quoted_tweet) - try: original_tweet = self.twitter.twitter.show_status(id=id, tweet_mode="extended") - except: return quoted_tweet + if quoted_tweet.has_key("quoted_status"): + original_tweet = quoted_tweet["quoted_status"] + elif quoted_tweet.has_key("retweeted_status") and quoted_tweet["retweeted_status"].has_key("quoted_status"): + original_tweet = quoted_tweet["retweeted_status"]["quoted_status"] + else: + return quoted_tweet original_tweet = self.check_long_tweet(original_tweet) urls = utils.find_urls_in_text(original_tweet["full_text"]) for url in range(0, len(urls)): diff --git a/src/twitter/utils.py b/src/twitter/utils.py index be3f1f73..b447df76 100644 --- a/src/twitter/utils.py +++ b/src/twitter/utils.py @@ -23,17 +23,33 @@ def find_urls_in_text(text): def find_urls (tweet): urls = [] + # Let's add URLS from tweet entities. + for i in tweet["entities"]["urls"]: + if i["expanded_url"] not in urls: + urls.append(i["expanded_url"]) + if tweet.has_key("quoted_status"): + for i in tweet["quoted_status"]["entities"]["urls"]: + if i["expanded_url"] not in urls: + urls.append(i["expanded_url"]) + if tweet.has_key("retweeted_status"): + for i in tweet["retweeted_status"]["entities"]["urls"]: + if i["expanded_url"] not in urls: + urls.append(i["expanded_url"]) + if tweet["retweeted_status"].has_key("quoted_status"): + for i in tweet["retweeted_status"]["quoted_status"]["entities"]["urls"]: + if i["expanded_url"] not in urls: + urls.append(i["expanded_url"]) if tweet.has_key("message"): i = "message" elif tweet.has_key("full_text"): i = "full_text" else: i = "text" - shorten_urls = find_urls_in_text(tweet[i]) - for url in range(0, len(shorten_urls)): - try: - urls.append(tweet["entities"]["urls"][url]["expanded_url"]) - except: pass + extracted_urls = find_urls_in_text(tweet[i]) + # Don't include t.co links (mostly they are photos or shortened versions of already added URLS). + for i in extracted_urls: + if i not in urls and "https://t.co" not in i: + urls.append(i) return urls def find_item(id, listItem):