2016-02-13 17:06:36 -06:00
|
|
|
# -*- coding: utf-8 -*-
|
2016-02-22 08:49:51 -06:00
|
|
|
""" Some utilities. I no have idea how I should put these, so..."""
|
2016-02-18 17:16:43 -06:00
|
|
|
import os
|
2016-02-14 19:24:45 -06:00
|
|
|
import re
|
2019-02-03 20:56:32 -06:00
|
|
|
import html
|
2016-05-14 20:47:10 -05:00
|
|
|
import logging
|
2019-02-03 20:56:32 -06:00
|
|
|
import requests
|
2019-01-01 19:42:53 -06:00
|
|
|
|
2016-05-14 20:47:10 -05:00
|
|
|
log = logging.getLogger("utils")
|
2016-02-14 19:24:45 -06:00
|
|
|
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
|
|
|
|
bad_chars = '\'\\.,[](){}:;"'
|
2016-02-13 17:06:36 -06:00
|
|
|
|
|
|
|
def seconds_to_string(seconds, precision=0):
|
2019-02-03 18:54:31 -06:00
|
|
|
""" convert a number of seconds in a string representation."""
|
|
|
|
# ToDo: Improve it to handle properly Russian plurals.
|
2016-02-13 17:06:36 -06:00
|
|
|
day = seconds // 86400
|
|
|
|
hour = seconds // 3600
|
|
|
|
min = (seconds // 60) % 60
|
|
|
|
sec = seconds - (hour * 3600) - (min * 60)
|
|
|
|
sec_spec = "." + str(precision) + "f"
|
|
|
|
sec_string = sec.__format__(sec_spec)
|
|
|
|
string = ""
|
|
|
|
if day == 1:
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%d day, ") % day
|
2016-02-13 17:06:36 -06:00
|
|
|
elif day >= 2:
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%d days, ") % day
|
2016-02-13 17:06:36 -06:00
|
|
|
if (hour == 1):
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%d hour, ") % hour
|
2016-02-13 17:06:36 -06:00
|
|
|
elif (hour >= 2):
|
|
|
|
string += _("%d hours, ") % hour
|
|
|
|
if (min == 1):
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%d minute, ") % min
|
2016-02-13 17:06:36 -06:00
|
|
|
elif (min >= 2):
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%d minutes, ") % min
|
2016-02-13 17:06:36 -06:00
|
|
|
if sec >= 0 and sec <= 2:
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%s second") % sec_string
|
2016-02-13 17:06:36 -06:00
|
|
|
else:
|
2019-01-01 19:42:53 -06:00
|
|
|
string += _("%s seconds") % sec_string
|
2016-02-14 19:24:45 -06:00
|
|
|
return string
|
|
|
|
|
|
|
|
def find_urls_in_text(text):
|
2016-02-15 16:49:09 -06:00
|
|
|
return [s.strip(bad_chars) for s in url_re.findall(text)]
|
|
|
|
|
|
|
|
def download_file(url, local_filename, window):
|
|
|
|
r = requests.get(url, stream=True)
|
2019-01-01 19:42:53 -06:00
|
|
|
window.change_status(_("Downloading {0}").format(local_filename,))
|
2016-02-15 16:49:09 -06:00
|
|
|
total_length = r.headers.get("content-length")
|
|
|
|
dl = 0
|
|
|
|
total_length = int(total_length)
|
|
|
|
with open(local_filename, 'wb') as f:
|
|
|
|
for chunk in r.iter_content(chunk_size=64):
|
|
|
|
if chunk: # filter out keep-alive new chunks
|
|
|
|
dl += len(chunk)
|
|
|
|
f.write(chunk)
|
2019-01-01 19:42:53 -06:00
|
|
|
done = int(100 * dl/total_length)
|
|
|
|
msg = _("Downloading {0} ({1}%)").format(os.path.basename(local_filename), done)
|
2016-02-15 16:49:09 -06:00
|
|
|
window.change_status(msg)
|
2019-01-01 19:42:53 -06:00
|
|
|
window.change_status(_("Ready"))
|
2016-02-22 08:49:51 -06:00
|
|
|
return local_filename
|
|
|
|
|
2019-02-03 20:56:32 -06:00
|
|
|
def detect_users(text):
|
|
|
|
""" Detect all users and communities mentionned in any text posted in VK."""
|
|
|
|
# This regexp gets group and users mentionned in topic comments.
|
|
|
|
for matched_data in re.finditer("(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", text):
|
|
|
|
text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
|
|
|
|
# This is for users and communities just mentionned in wall comments or posts.
|
|
|
|
for matched_data in re.finditer("(\[)(id|club)(\d+\|)(\D+)(\])", text):
|
|
|
|
text = re.sub("\[(id|club)\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
|
|
|
|
return text
|
|
|
|
|
|
|
|
def clean_text(text):
|
|
|
|
""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""
|
|
|
|
text = detect_users(text)
|
|
|
|
text = html.unescape(text)
|
2019-04-16 15:45:25 -05:00
|
|
|
return text
|
|
|
|
|
|
|
|
def transform_audio_url(url):
|
2019-04-17 11:50:15 -05:00
|
|
|
""" Transforms the URL offered by VK to the unencrypted stream so we can still play it.
|
|
|
|
This function will be updated every time VK decides to change something in their Audio API'S.
|
|
|
|
Changelog:
|
|
|
|
16/04/2019: Implemented this function. For now it replaces /index.m3u8 by .mp3, also removes the path component before "/audios" if the URL contains the word /audios, or the last path component before the filename if doesn't.
|
|
|
|
17/04/2019: Updated function. Now it is not required to strip anything, just replacing /index.m3u8 with .mp3 should be enough.
|
|
|
|
"""
|
2019-04-16 15:45:25 -05:00
|
|
|
if "vkuseraudio.net" not in url and "index.m3u8" not in url:
|
|
|
|
return url
|
|
|
|
url = url.replace("/index.m3u8", ".mp3")
|
2019-04-17 11:50:15 -05:00
|
|
|
return url
|
|
|
|
### The following code was useful for VK audio methods prior to 17/04/2019.
|
|
|
|
# I just left this here because they may enable such change any time soon.
|
|
|
|
### basically this method was requiring us to strip a part of the full URL.
|
|
|
|
# parts = url.split("/")
|
|
|
|
# if "/audio" not in url:
|
|
|
|
# url = url.replace("/"+parts[-2], "")
|
|
|
|
# else:
|
|
|
|
# url = url.replace("/"+parts[-3], "")
|
|
|
|
# return url
|