socializer/src/sessionmanager/utils.py

# -*- coding: utf-8 -*-
""" Some utilities. I no have idea how I should put these, so..."""
import os
import re
import html
import logging
import requests
from pubsub import pub

log = logging.getLogger("utils")
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
bad_chars = '\'\\.,[](){}:;"'

def seconds_to_string(seconds, precision=0):
	""" convert a number of seconds in a string representation."""
	# ToDo: Improve it to handle properly Russian plurals.
	day = seconds // 86400
	hour = seconds // 3600
	min = (seconds // 60) % 60
	sec = seconds - (hour * 3600) - (min * 60)
	sec_spec = "." + str(precision) + "f"
	sec_string = sec.__format__(sec_spec)
	string = ""
	if day == 1:
		string += _("%d day, ") % day
	elif day >= 2:
		string += _("%d days, ") % day
	if (hour == 1):
		string += _("%d hour, ") % hour
	elif (hour >= 2):
		string += _("%d hours, ") % hour
	if (min == 1):
		string += _("%d minute, ") % min
	elif (min >= 2):
		string += _("%d minutes, ") % min
	if sec >= 0 and sec <= 2:
		string += _("%s second") % sec_string
	else:
		string += _("%s seconds") % sec_string
	return string

def find_urls_in_text(text):
	return [s.strip(bad_chars) for s in url_re.findall(text)]

def download_file(url, local_filename):
	r = requests.get(url, stream=True)
	pub.sendMessage("change_status", status=_("Downloading {0}").format(local_filename,))
	total_length = r.headers.get("content-length")
	dl = 0
	total_length = int(total_length)
	with open(local_filename, 'wb') as f:
		for chunk in r.iter_content(chunk_size=512*1024): 
			if chunk: # filter out keep-alive new chunks
				dl += len(chunk)
				f.write(chunk)
				done = int(100 * dl/total_length)
				msg = _("Downloading {0} ({1}%)").format(os.path.basename(local_filename), done)
#				print(msg)
				pub.sendMessage("change_status", status=msg)
	pub.sendMessage("change_status", status=_("Ready"))
	return local_filename

def download_files(downloads):
		for download in downloads:
			download_file(download[0], download[1])

def detect_users(text):
	""" Detect all users and communities mentionned in any text posted in VK."""
	# This regexp gets group and users mentionned in topic comments.
	for matched_data in re.finditer("(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", text):
		text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
	# This is for users and communities just mentionned in wall comments or posts.
	for matched_data in  re.finditer("(\[)(id|club)(\d+\|)(\D+)(\])", text):
		text = re.sub("\[(id|club)\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
	return text

def clean_text(text):
	""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""
	text = detect_users(text)
	text = html.unescape(text)
	return text

def transform_audio_url(url):
	""" Transforms the URL offered by VK to the unencrypted stream so we can still play it.
		This function will be updated every time VK decides to change something in their Audio API'S.
		Changelog:
		30/04/2019: Re-enabled old methods as VK changed everything as how it was working on 16.04.2019.
		17.04.2019: Updated function. Now it is not required to strip anything, just replacing /index.m3u8 with .mp3 should be enough.
		16.04.2019: Implemented this function. For now it replaces /index.m3u8 by .mp3, also removes the path component before "/audios" if the URL contains the word /audios, or the last path component before the filename if doesn't.
	"""
	if "vkuseraudio.net" not in url and "index.m3u8" not in url:
		return url
	url = url.replace("/index.m3u8", ".mp3")
	parts = url.split("/")
	if "/audios" not in url:
		url = url.replace("/"+parts[-2], "")
	else:
		url = url.replace("/"+parts[-3], "")
		url = url.split(".mp3?")[0]+".mp3"
	return url

def safe_filename(filename):
	allowed_symbols = ["_", ".", ",", "-", "(", ")"]
	return "".join([c for c in filename if c.isalpha() or c.isdigit() or c==' ' or c in allowed_symbols]).rstrip()
Added some code for starting 2016-02-13 17:06:36 -06:00			`# -- coding: utf-8 --`
Clean message before displaying it in the buffers or the post dialogue 2016-02-22 08:49:51 -06:00			`""" Some utilities. I no have idea how I should put these, so..."""`
Added a FileDialog for choosing the filename in audio downloads 2016-02-18 17:16:43 -06:00			`import os`
The first URL will be posted as attachment 2016-02-14 19:24:45 -06:00			`import re`
Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters 2019-02-03 20:56:32 -06:00			`import html`
Posts from twitter are displayed properly 2016-05-14 20:47:10 -05:00			`import logging`
Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters 2019-02-03 20:56:32 -06:00			`import requests`
Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`from pubsub import pub`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00
Posts from twitter are displayed properly 2016-05-14 20:47:10 -05:00			`log = logging.getLogger("utils")`
The first URL will be posted as attachment 2016-02-14 19:24:45 -06:00			`url_re = re.compile("(?:\w+://\|www\.)[^ ,.?!#%=+][^ ]*")`
			`bad_chars = '\'\\.,[](){}:;"'`
Added some code for starting 2016-02-13 17:06:36 -06:00
			`def seconds_to_string(seconds, precision=0):`
Group mentions are displayed properly when mentioned in comments 2019-02-03 18:54:31 -06:00			`""" convert a number of seconds in a string representation."""`
			`# ToDo: Improve it to handle properly Russian plurals.`
Added some code for starting 2016-02-13 17:06:36 -06:00			`day = seconds // 86400`
			`hour = seconds // 3600`
			`min = (seconds // 60) % 60`
			`sec = seconds - (hour * 3600) - (min * 60)`
			`sec_spec = "." + str(precision) + "f"`
			`sec_string = sec.__format__(sec_spec)`
			`string = ""`
			`if day == 1:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d day, ") % day`
Added some code for starting 2016-02-13 17:06:36 -06:00			`elif day >= 2:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d days, ") % day`
Added some code for starting 2016-02-13 17:06:36 -06:00			`if (hour == 1):`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d hour, ") % hour`
Added some code for starting 2016-02-13 17:06:36 -06:00			`elif (hour >= 2):`
			`string += _("%d hours, ") % hour`
			`if (min == 1):`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d minute, ") % min`
Added some code for starting 2016-02-13 17:06:36 -06:00			`elif (min >= 2):`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d minutes, ") % min`
Added some code for starting 2016-02-13 17:06:36 -06:00			`if sec >= 0 and sec <= 2:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%s second") % sec_string`
Added some code for starting 2016-02-13 17:06:36 -06:00			`else:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%s seconds") % sec_string`
The first URL will be posted as attachment 2016-02-14 19:24:45 -06:00			`return string`

			`def find_urls_in_text(text):`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`return [s.strip(bad_chars) for s in url_re.findall(text)]`

Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`def download_file(url, local_filename):`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`r = requests.get(url, stream=True)`
Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`pub.sendMessage("change_status", status=_("Downloading {0}").format(local_filename,))`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`total_length = r.headers.get("content-length")`
			`dl = 0`
			`total_length = int(total_length)`
			`with open(local_filename, 'wb') as f:`
Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`for chunk in r.iter_content(chunk_size=512*1024):`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`if chunk: # filter out keep-alive new chunks`
			`dl += len(chunk)`
			`f.write(chunk)`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`done = int(100 * dl/total_length)`
			`msg = _("Downloading {0} ({1}%)").format(os.path.basename(local_filename), done)`
Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`# print(msg)`
			`pub.sendMessage("change_status", status=msg)`
			`pub.sendMessage("change_status", status=_("Ready"))`
Clean message before displaying it in the buffers or the post dialogue 2016-02-22 08:49:51 -06:00			`return local_filename`

Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`def download_files(downloads):`
Multiupload attempt #1 2019-12-04 12:45:47 -06:00			`for download in downloads:`
Added download of multiple audios from any audio buffer 2019-12-04 17:48:59 -06:00			`download_file(download[0], download[1])`
Multiupload attempt #1 2019-12-04 12:45:47 -06:00
Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters 2019-02-03 20:56:32 -06:00			`def detect_users(text):`
			`""" Detect all users and communities mentionned in any text posted in VK."""`
			`# This regexp gets group and users mentionned in topic comments.`
			`for matched_data in re.finditer("(\[)(id\|club)(\d+:bp-\d+_\d+\\|)(\D+)(\])", text):`
			`text = re.sub("\[(id\|club)\d+:bp-\d+_\d+\\|\D+\]", matched_data.groups()[3]+", ", text, count=1)`
			`# This is for users and communities just mentionned in wall comments or posts.`
			`for matched_data in re.finditer("(\[)(id\|club)(\d+\\|)(\D+)(\])", text):`
			`text = re.sub("\[(id\|club)\d+\\|\D+\]", matched_data.groups()[3]+", ", text, count=1)`
			`return text`

			`def clean_text(text):`
			`""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""`
			`text = detect_users(text)`
			`text = html.unescape(text)`
Fixed all audio methods due to latest VK changes 2019-04-16 15:45:25 -05:00			`return text`

			`def transform_audio_url(url):`
Updated audio methods for changes introduced in 17/04 2019-04-17 11:50:15 -05:00			`""" Transforms the URL offered by VK to the unencrypted stream so we can still play it.`
			`This function will be updated every time VK decides to change something in their Audio API'S.`
			`Changelog:`
Fixed audio methods 2019-04-30 15:32:38 -05:00			`30/04/2019: Re-enabled old methods as VK changed everything as how it was working on 16.04.2019.`
			`17.04.2019: Updated function. Now it is not required to strip anything, just replacing /index.m3u8 with .mp3 should be enough.`
			`16.04.2019: Implemented this function. For now it replaces /index.m3u8 by .mp3, also removes the path component before "/audios" if the URL contains the word /audios, or the last path component before the filename if doesn't.`
Improved the audio transformation URl function 2021-01-19 11:40:17 -06:00			`"""`
Fixed all audio methods due to latest VK changes 2019-04-16 15:45:25 -05:00			`if "vkuseraudio.net" not in url and "index.m3u8" not in url:`
			`return url`
			`url = url.replace("/index.m3u8", ".mp3")`
Fixed audio methods 2019-04-30 15:32:38 -05:00			`parts = url.split("/")`
			`if "/audios" not in url:`
			`url = url.replace("/"+parts[-2], "")`
			`else:`
			`url = url.replace("/"+parts[-3], "")`
Improved the audio transformation URl function 2021-01-19 11:40:17 -06:00			`url = url.split(".mp3?")[0]+".mp3"`
added a safe-filename function so we will be sure the suggested filename won't break anything in the operating system 2019-10-14 17:25:06 -05:00			`return url`

			`def safe_filename(filename):`
			`allowed_symbols = ["_", ".", ",", "-", "(", ")"]`
			`return "".join([c for c in filename if c.isalpha() or c.isdigit() or c==' ' or c in allowed_symbols]).rstrip()`