2018-08-16 17:25:16 -05:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import url_shortener, re
|
|
|
|
import output
|
|
|
|
import config
|
|
|
|
import logging
|
|
|
|
import requests
|
|
|
|
import time
|
|
|
|
import sound
|
2021-01-04 15:52:25 -06:00
|
|
|
from tweepy.error import TweepError
|
2018-08-16 17:25:16 -05:00
|
|
|
log = logging.getLogger("twitter.utils")
|
|
|
|
""" Some utilities for the twitter interface."""
|
|
|
|
|
|
|
|
__version__ = 0.1
|
|
|
|
__doc__ = "Find urls in tweets and #audio hashtag."
|
|
|
|
|
|
|
|
url_re = re.compile(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))")
|
|
|
|
|
2019-08-13 09:55:50 -05:00
|
|
|
url_re2 = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ \\n\\t]*")
|
|
|
|
bad_chars = '\'\\\n.,[](){}:;"'
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def find_urls_in_text(text):
|
2021-06-16 16:18:41 -05:00
|
|
|
return url_re2.findall(text)
|
2018-08-16 17:25:16 -05:00
|
|
|
|
2021-06-27 21:40:22 -05:00
|
|
|
def find_urls (tweet, twitter_media=False):
|
2021-06-16 16:18:41 -05:00
|
|
|
urls = []
|
2021-06-27 21:40:22 -05:00
|
|
|
if twitter_media and hasattr(tweet, "extended_entities"):
|
|
|
|
for mediaItem in tweet.extended_entities["media"]:
|
|
|
|
if mediaItem["type"] == "video":
|
|
|
|
for variant in mediaItem["video_info"]["variants"]:
|
|
|
|
if variant["content_type"] == "video/mp4":
|
|
|
|
urls.append(variant["url"])
|
|
|
|
break
|
2021-06-16 16:18:41 -05:00
|
|
|
# Let's add URLS from tweet entities.
|
|
|
|
if hasattr(tweet, "message_create"):
|
|
|
|
entities = tweet.message_create["message_data"]["entities"]
|
|
|
|
else:
|
2021-06-25 13:14:01 -05:00
|
|
|
if hasattr(tweet, "entities") == True:
|
|
|
|
entities = tweet.entities
|
|
|
|
if entities.get("urls") != None:
|
|
|
|
for i in entities["urls"]:
|
2021-06-16 16:18:41 -05:00
|
|
|
if i["expanded_url"] not in urls:
|
|
|
|
urls.append(i["expanded_url"])
|
2021-06-25 13:14:01 -05:00
|
|
|
if hasattr(tweet, "quoted_status"):
|
2021-06-28 10:20:39 -05:00
|
|
|
urls.extend(find_urls(tweet.quoted_status, twitter_media))
|
2021-06-16 16:18:41 -05:00
|
|
|
if hasattr(tweet, "retweeted_status"):
|
2021-06-28 10:20:39 -05:00
|
|
|
urls.extend(find_urls(tweet.retweeted_status, twitter_media))
|
2021-06-16 16:18:41 -05:00
|
|
|
if hasattr(tweet, "message"):
|
|
|
|
i = "message"
|
|
|
|
elif hasattr(tweet, "full_text"):
|
|
|
|
i = "full_text"
|
|
|
|
else:
|
|
|
|
i = "text"
|
|
|
|
if hasattr(tweet, "message_create"):
|
|
|
|
extracted_urls = find_urls_in_text(tweet.message_create["message_data"]["text"])
|
|
|
|
else:
|
|
|
|
extracted_urls = find_urls_in_text(getattr(tweet, i))
|
|
|
|
# Don't include t.co links (mostly they are photos or shortened versions of already added URLS).
|
|
|
|
for i in extracted_urls:
|
|
|
|
if i not in urls and "https://t.co" not in i:
|
|
|
|
urls.append(i)
|
|
|
|
return urls
|
2018-08-16 17:25:16 -05:00
|
|
|
|
2021-07-16 10:22:51 -05:00
|
|
|
def find_item(item, listItems):
|
|
|
|
for i in range(0, len(listItems)):
|
|
|
|
if listItems[i].id == item.id:
|
|
|
|
return i
|
|
|
|
# Check also retweets.
|
|
|
|
if hasattr(item, "retweeted_status") and item.retweeted_status.id == listItems[i].id:
|
|
|
|
return i
|
2021-06-16 16:18:41 -05:00
|
|
|
return None
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def find_list(name, lists):
|
2021-06-16 16:18:41 -05:00
|
|
|
for i in range(0, len(lists)):
|
|
|
|
if lists[i].name == name: return lists[i].id
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def is_audio(tweet):
|
2021-06-28 10:20:39 -05:00
|
|
|
if hasattr(tweet, "quoted_status") and hasattr(tweet.quoted_status, "extended_entities"):
|
|
|
|
result = is_audio(tweet.quoted_status)
|
|
|
|
if result != None:
|
|
|
|
return result
|
|
|
|
if hasattr(tweet, "retweeted_status") and hasattr(tweet.retweeted_status, "extended_entities"):
|
|
|
|
result = is_audio(tweet.retweeted_status)
|
|
|
|
if result == True:
|
|
|
|
return result
|
2021-06-27 21:40:22 -05:00
|
|
|
# Checks firstly for Twitter videos and audios.
|
|
|
|
if hasattr(tweet, "extended_entities"):
|
|
|
|
for mediaItem in tweet.extended_entities["media"]:
|
|
|
|
if mediaItem["type"] == "video":
|
|
|
|
return True
|
2021-06-16 16:18:41 -05:00
|
|
|
try:
|
|
|
|
if len(find_urls(tweet)) < 1:
|
|
|
|
return False
|
|
|
|
if hasattr(tweet, "message_create"):
|
|
|
|
entities = tweet.message_create["message_data"]["entities"]
|
|
|
|
else:
|
2021-06-25 16:25:51 -05:00
|
|
|
if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None:
|
|
|
|
return False
|
2021-06-16 16:18:41 -05:00
|
|
|
entities = tweet.entities
|
|
|
|
if len(entities["hashtags"]) > 0:
|
|
|
|
for i in entities["hashtags"]:
|
|
|
|
if i["text"] == "audio":
|
|
|
|
return True
|
|
|
|
except IndexError:
|
|
|
|
log.exception("Exception while executing is_audio hashtag algorithm")
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def is_geocoded(tweet):
|
2021-06-16 16:18:41 -05:00
|
|
|
if hasattr(tweet, "coordinates") and tweet.coordinates != None:
|
|
|
|
return True
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def is_media(tweet):
|
2021-06-16 16:18:41 -05:00
|
|
|
if hasattr(tweet, "message_create"):
|
|
|
|
entities = tweet.message_create["message_data"]["entities"]
|
|
|
|
else:
|
2021-06-25 16:25:51 -05:00
|
|
|
if hasattr(tweet, "entities") == False or tweet.entities.get("hashtags") == None:
|
|
|
|
return False
|
2021-06-16 16:18:41 -05:00
|
|
|
entities = tweet.entities
|
|
|
|
if entities.get("media") == None:
|
|
|
|
return False
|
|
|
|
for i in entities["media"]:
|
|
|
|
if i.get("type") != None and i.get("type") == "photo":
|
|
|
|
return True
|
|
|
|
return False
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def get_all_mentioned(tweet, conf, field="screen_name"):
|
2021-06-16 16:18:41 -05:00
|
|
|
""" Gets all users that have been mentioned."""
|
|
|
|
results = []
|
2021-07-04 05:31:27 -05:00
|
|
|
if hasattr(tweet, "retweeted_status"):
|
2021-07-04 11:50:03 -05:00
|
|
|
results.extend(get_all_mentioned(tweet.retweeted_status, conf, field))
|
2021-07-04 05:31:27 -05:00
|
|
|
if hasattr(tweet, "quoted_status"):
|
2021-07-04 11:50:03 -05:00
|
|
|
results.extend(get_all_mentioned(tweet.quoted_status, conf, field))
|
2021-06-27 18:04:26 -05:00
|
|
|
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
|
|
|
|
for i in tweet.entities["user_mentions"]:
|
|
|
|
if i["screen_name"] != conf["user_name"] and i["id_str"] != tweet.user:
|
|
|
|
if i.get(field) not in results:
|
|
|
|
results.append(i.get(field))
|
2021-06-16 16:18:41 -05:00
|
|
|
return results
|
2018-08-16 17:25:16 -05:00
|
|
|
|
2021-06-25 13:14:01 -05:00
|
|
|
def get_all_users(tweet, session):
|
2021-06-16 16:18:41 -05:00
|
|
|
string = []
|
2021-06-25 13:14:01 -05:00
|
|
|
user = session.get_user(tweet.user)
|
2021-07-04 05:31:27 -05:00
|
|
|
if user.screen_name != session.db["user_name"]:
|
2021-06-25 13:14:01 -05:00
|
|
|
string.append(user.screen_name)
|
2021-07-04 05:31:27 -05:00
|
|
|
if hasattr(tweet, "retweeted_status"):
|
|
|
|
string.extend(get_all_users(tweet.retweeted_status, session))
|
|
|
|
if hasattr(tweet, "quoted_status"):
|
|
|
|
string.extend(get_all_users(tweet.quoted_status, session))
|
|
|
|
if hasattr(tweet, "entities") and tweet.entities.get("user_mentions"):
|
|
|
|
for i in tweet.entities["user_mentions"]:
|
|
|
|
if i["screen_name"] != session.db["user_name"] and i["screen_name"] != user.screen_name:
|
|
|
|
if i["screen_name"] not in string:
|
|
|
|
string.append(i["screen_name"])
|
|
|
|
# Attempt to remove duplicates, tipically caused by nested tweets.
|
|
|
|
string = list(dict.fromkeys(string))
|
2021-06-16 16:18:41 -05:00
|
|
|
if len(string) == 0:
|
2021-06-25 13:14:01 -05:00
|
|
|
string.append(user.screen_name)
|
2021-06-16 16:18:41 -05:00
|
|
|
return string
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def if_user_exists(twitter, user):
|
2021-06-16 16:18:41 -05:00
|
|
|
try:
|
|
|
|
data = twitter.get_user(screen_name=user)
|
|
|
|
return data
|
|
|
|
except TweepError as err:
|
|
|
|
if err.api_code == 50:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return user
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def is_allowed(tweet, settings, buffer_name):
|
2021-06-16 16:18:41 -05:00
|
|
|
clients = settings["twitter"]["ignored_clients"]
|
|
|
|
if hasattr(tweet, "sender"): return True
|
|
|
|
allowed = True
|
|
|
|
tweet_data = {}
|
|
|
|
if hasattr(tweet, "retweeted_status"):
|
|
|
|
tweet_data["retweet"] = True
|
2021-07-04 09:43:53 -05:00
|
|
|
if hasattr(tweet, "in_reply_to_status_id"):
|
2021-06-16 16:18:41 -05:00
|
|
|
tweet_data["reply"] = True
|
|
|
|
if hasattr(tweet, "quoted_status"):
|
|
|
|
tweet_data["quote"] = True
|
|
|
|
if hasattr(tweet, "retweeted_status"):
|
|
|
|
tweet = tweet.retweeted_status
|
|
|
|
source = tweet.source
|
|
|
|
for i in clients:
|
|
|
|
if i.lower() == source.lower():
|
|
|
|
return False
|
|
|
|
return filter_tweet(tweet, tweet_data, settings, buffer_name)
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def filter_tweet(tweet, tweet_data, settings, buffer_name):
|
2021-06-16 16:18:41 -05:00
|
|
|
if hasattr(tweet, "full_text"):
|
|
|
|
value = "full_text"
|
|
|
|
else:
|
|
|
|
value = "text"
|
|
|
|
for i in settings["filters"]:
|
|
|
|
if settings["filters"][i]["in_buffer"] == buffer_name:
|
|
|
|
regexp = settings["filters"][i]["regexp"]
|
|
|
|
word = settings["filters"][i]["word"]
|
|
|
|
# Added if/else for compatibility reasons.
|
|
|
|
if "allow_rts" in settings["filters"][i]:
|
|
|
|
allow_rts = settings["filters"][i]["allow_rts"]
|
|
|
|
else:
|
|
|
|
allow_rts = "True"
|
|
|
|
if "allow_quotes" in settings["filters"][i]:
|
|
|
|
allow_quotes = settings["filters"][i]["allow_quotes"]
|
|
|
|
else:
|
|
|
|
allow_quotes = "True"
|
|
|
|
if "allow_replies" in settings["filters"][i]:
|
|
|
|
allow_replies = settings["filters"][i]["allow_replies"]
|
|
|
|
else:
|
|
|
|
allow_replies = "True"
|
|
|
|
if allow_rts == "False" and "retweet" in tweet_data:
|
|
|
|
return False
|
|
|
|
if allow_quotes == "False" and "quote" in tweet_data:
|
|
|
|
return False
|
|
|
|
if allow_replies == "False" and "reply" in tweet_data:
|
|
|
|
return False
|
|
|
|
if word != "" and settings["filters"][i]["if_word_exists"]:
|
|
|
|
if word in getattr(tweet, value):
|
|
|
|
return False
|
|
|
|
elif word != "" and settings["filters"][i]["if_word_exists"] == False:
|
|
|
|
if word not in getattr(tweet, value):
|
|
|
|
return False
|
|
|
|
if settings["filters"][i]["in_lang"] == "True":
|
|
|
|
if getattr(tweet, lang) not in settings["filters"][i]["languages"]:
|
|
|
|
return False
|
|
|
|
elif settings["filters"][i]["in_lang"] == "False":
|
|
|
|
if tweet.lang in settings["filters"][i]["languages"]:
|
|
|
|
return False
|
|
|
|
return True
|
2018-08-16 17:25:16 -05:00
|
|
|
|
|
|
|
def twitter_error(error):
|
2021-06-16 16:18:41 -05:00
|
|
|
if error.api_code == 179:
|
|
|
|
msg = _(u"Sorry, you are not authorised to see this status.")
|
|
|
|
elif error.api_code == 144:
|
|
|
|
msg = _(u"No status found with that ID")
|
|
|
|
else:
|
|
|
|
msg = _(u"Error code {0}").format(error.api_code,)
|
|
|
|
output.speak(msg)
|
2020-06-09 11:34:02 -05:00
|
|
|
|
|
|
|
def expand_urls(text, entities):
|
2021-06-16 16:18:41 -05:00
|
|
|
""" Expand all URLS present in text with information found in entities"""
|
2021-06-25 16:25:51 -05:00
|
|
|
if entities.get("urls") == None:
|
|
|
|
return text
|
2021-06-16 16:18:41 -05:00
|
|
|
urls = find_urls_in_text(text)
|
|
|
|
for url in entities["urls"]:
|
|
|
|
if url["url"] in text:
|
|
|
|
text = text.replace(url["url"], url["expanded_url"])
|
|
|
|
return text
|