2016-02-13 17:06:36 -06:00
# -*- coding: utf-8 -*-
2016-02-22 08:49:51 -06:00
""" Some utilities. I no have idea how I should put these, so..."""
2016-02-18 17:16:43 -06:00
import os
2016-02-14 19:24:45 -06:00
import re
2019-02-03 20:56:32 -06:00
import html
2016-05-14 20:47:10 -05:00
import logging
2019-02-03 20:56:32 -06:00
import requests
2019-12-04 17:48:59 -06:00
from pubsub import pub
2019-01-02 04:42:53 +03:00
2016-05-14 20:47:10 -05:00
log = logging.getLogger("utils")
2016-02-14 19:24:45 -06:00
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
bad_chars = '\'\\.,[](){}:;"'
2016-02-13 17:06:36 -06:00
def seconds_to_string(seconds, precision=0):
2021-09-22 09:17:12 -05:00
""" convert a number of seconds in a string representation."""
# ToDo: Improve it to handle properly Russian plurals.
day = seconds // 86400
hour = seconds // 3600
min = (seconds // 60) % 60
sec = seconds - (hour * 3600) - (min * 60)
sec_spec = "." + str(precision) + "f"
sec_string = sec.__format__(sec_spec)
string = ""
if day == 1:
string += _("%d day, ") % day
elif day >= 2:
string += _("%d days, ") % day
if (hour == 1):
string += _("%d hour, ") % hour
elif (hour >= 2):
string += _("%d hours, ") % hour
if (min == 1):
string += _("%d minute, ") % min
elif (min >= 2):
string += _("%d minutes, ") % min
if sec >= 0 and sec <= 2:
string += _("%s second") % sec_string
string += _("%s seconds") % sec_string
return string
2016-02-14 19:24:45 -06:00
def find_urls_in_text(text):
2021-09-22 09:17:12 -05:00
return [s.strip(bad_chars) for s in url_re.findall(text)]
2016-02-15 16:49:09 -06:00
2019-12-04 17:48:59 -06:00
def download_file(url, local_filename):
2021-09-22 09:17:12 -05:00
r = requests.get(url, stream=True)
pub.sendMessage("change_status", status=_("Downloading {0}").format(local_filename,))
total_length = r.headers.get("content-length")
dl = 0
total_length = int(total_length)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=512*1024):
if chunk: # filter out keep-alive new chunks
dl += len(chunk)
done = int(100 * dl/total_length)
msg = _("Downloading {0} ({1}%)").format(os.path.basename(local_filename), done)
# print(msg)
pub.sendMessage("change_status", status=msg)
pub.sendMessage("change_status", status=_("Ready"))
return local_filename
2016-02-22 08:49:51 -06:00
2019-12-04 17:48:59 -06:00
def download_files(downloads):
2021-09-22 09:17:12 -05:00
for download in downloads:
download_file(download[0], download[1])
2019-12-04 12:45:47 -06:00
2019-02-03 20:56:32 -06:00
def detect_users(text):
2021-09-22 09:17:12 -05:00
""" Detect all users and communities mentionned in any text posted in VK."""
# This regexp gets group and users mentionned in topic comments.
for matched_data in re.finditer("(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", text):
text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
# This is for users and communities just mentionned in wall comments or posts.
for matched_data in re.finditer("(\[)(id|club)(\d+\|)(\D+)(\])", text):
text = re.sub("\[(id|club)\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
return text
2019-02-03 20:56:32 -06:00
def clean_text(text):
2021-09-22 09:17:12 -05:00
""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""
text = detect_users(text)
text = html.unescape(text)
return text
2019-04-16 15:45:25 -05:00
def transform_audio_url(url):
2021-09-22 09:17:12 -05:00
""" Transforms the URL offered by VK to the unencrypted stream so we can still play it.
This function will be updated every time VK decides to change something in their Audio API'S.
30/04/2019: Re-enabled old methods as VK changed everything as how it was working on 16.04.2019.
17.04.2019: Updated function. Now it is not required to strip anything, just replacing /index.m3u8 with .mp3 should be enough.
16.04.2019: Implemented this function. For now it replaces /index.m3u8 by .mp3, also removes the path component before "/audios" if the URL contains the word /audios, or the last path component before the filename if doesn't.
if "vkuseraudio.net" not in url and "index.m3u8" not in url:
return url
url = url.replace("/index.m3u8", ".mp3")
parts = url.split("/")
if "/audios" not in url:
url = url.replace("/"+parts[-2], "")
url = url.replace("/"+parts[-3], "")
url = url.split(".mp3?")[0]+".mp3"
return url
2019-10-14 17:25:06 -05:00
def safe_filename(filename):
2021-09-22 09:17:12 -05:00
allowed_symbols = ["_", ".", ",", "-", "(", ")"]
return "".join([c for c in filename if c.isalpha() or c.isdigit() or c==' ' or c in allowed_symbols]).rstrip()