is_audio, find_urls and is_media should work with dm's #215

This commit is contained in:
Manuel Cortez 2018-07-20 10:04:53 -05:00
parent 6b43ff1c0e
commit c7f4fd2926

View File

@ -22,11 +22,13 @@ def find_urls_in_text(text):
return [s.strip(bad_chars) for s in url_re2.findall(text)] return [s.strip(bad_chars) for s in url_re2.findall(text)]
def find_urls (tweet): def find_urls (tweet):
if tweet.has_key("entities") == False:
return []
urls = [] urls = []
# Let's add URLS from tweet entities. # Let's add URLS from tweet entities.
for i in tweet["entities"]["urls"]: if tweet.has_key("message_create"):
entities = tweet["message_create"]["message_data"]["entities"]
else:
entities = tweet["entities"]
for i in entities["urls"]:
if i["expanded_url"] not in urls: if i["expanded_url"] not in urls:
urls.append(i["expanded_url"]) urls.append(i["expanded_url"])
if tweet.has_key("quoted_status"): if tweet.has_key("quoted_status"):
@ -47,7 +49,10 @@ def find_urls (tweet):
i = "full_text" i = "full_text"
else: else:
i = "text" i = "text"
extracted_urls = find_urls_in_text(tweet[i]) if tweet.has_key("message_create"):
extracted_urls = find_urls_in_text(tweet["message_create"]["message_data"]["text"])
else:
extracted_urls = find_urls_in_text(tweet[i])
# Don't include t.co links (mostly they are photos or shortened versions of already added URLS). # Don't include t.co links (mostly they are photos or shortened versions of already added URLS).
for i in extracted_urls: for i in extracted_urls:
if i not in urls and "https://t.co" not in i: if i not in urls and "https://t.co" not in i:
@ -74,15 +79,18 @@ def find_next_reply(id, listItem):
return None return None
def is_audio(tweet): def is_audio(tweet):
if tweet.has_key("entities") == False: return False
try: try:
if len(find_urls(tweet)) < 1: if len(find_urls(tweet)) < 1:
return False return False
if len(tweet["entities"]["hashtags"]) > 0: if tweet.has_key("message_create"):
for i in tweet["entities"]["hashtags"]: entities = tweet["message_create"]["message_data"]["entities"]
else:
entities = tweet["entities"]
if len(entities["hashtags"]) > 0:
for i in entities["hashtags"]:
if i["text"] == "audio": if i["text"] == "audio":
return True return True
except: except IndexError:
print tweet["entities"]["hashtags"] print tweet["entities"]["hashtags"]
log.exception("Exception while executing is_audio hashtag algorithm") log.exception("Exception while executing is_audio hashtag algorithm")
@ -91,10 +99,13 @@ def is_geocoded(tweet):
return True return True
def is_media(tweet): def is_media(tweet):
if tweet.has_key("entities") == False: return False if tweet.has_key("message_create"):
if tweet["entities"].has_key("media") == False: entities = tweet["message_create"]["message_data"]["entities"]
else:
entities = tweet["entities"]
if entities.has_key("media") == False:
return False return False
for i in tweet["entities"]["media"]: for i in entities["media"]:
if i.has_key("type") and i["type"] == "photo": if i.has_key("type") and i["type"] == "photo":
return True return True
return False return False