socializer/src/sessionmanager/utils.py

# -*- coding: utf-8 -*-
""" Some utilities. I no have idea how I should put these, so..."""
import os
import re
import html
import logging
import requests

log = logging.getLogger("utils")
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
bad_chars = '\'\\.,[](){}:;"'

def seconds_to_string(seconds, precision=0):
	""" convert a number of seconds in a string representation."""
	# ToDo: Improve it to handle properly Russian plurals.
	day = seconds // 86400
	hour = seconds // 3600
	min = (seconds // 60) % 60
	sec = seconds - (hour * 3600) - (min * 60)
	sec_spec = "." + str(precision) + "f"
	sec_string = sec.__format__(sec_spec)
	string = ""
	if day == 1:
		string += _("%d day, ") % day
	elif day >= 2:
		string += _("%d days, ") % day
	if (hour == 1):
		string += _("%d hour, ") % hour
	elif (hour >= 2):
		string += _("%d hours, ") % hour
	if (min == 1):
		string += _("%d minute, ") % min
	elif (min >= 2):
		string += _("%d minutes, ") % min
	if sec >= 0 and sec <= 2:
		string += _("%s second") % sec_string
	else:
		string += _("%s seconds") % sec_string
	return string

def find_urls_in_text(text):
	return [s.strip(bad_chars) for s in url_re.findall(text)]

def download_file(url, local_filename, window):
	r = requests.get(url, stream=True)
	window.change_status(_("Downloading {0}").format(local_filename,))
	total_length = r.headers.get("content-length")
	dl = 0
	total_length = int(total_length)
	with open(local_filename, 'wb') as f:
		for chunk in r.iter_content(chunk_size=64): 
			if chunk: # filter out keep-alive new chunks
				dl += len(chunk)
				f.write(chunk)
				done = int(100 * dl/total_length)
				msg = _("Downloading {0} ({1}%)").format(os.path.basename(local_filename), done)
				window.change_status(msg)
	window.change_status(_("Ready"))
	return local_filename

def detect_users(text):
	""" Detect all users and communities mentionned in any text posted in VK."""
	# This regexp gets group and users mentionned in topic comments.
	for matched_data in re.finditer("(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", text):
		text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
	# This is for users and communities just mentionned in wall comments or posts.
	for matched_data in  re.finditer("(\[)(id|club)(\d+\|)(\D+)(\])", text):
		text = re.sub("\[(id|club)\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
	return text

def clean_text(text):
	""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""
	text = detect_users(text)
	text = html.unescape(text)
	return text

def transform_audio_url(url):
	""" Transforms the URL offered by VK to the unencrypted stream so we can still play it. """
	if "vkuseraudio.net" not in url and "index.m3u8" not in url:
		return url
	url = url.replace("/index.m3u8", ".mp3")
	parts = url.split("/")
	if "/audio" not in url:
		url = url.replace("/"+parts[-2], "")
	else:
		url = url.replace("/"+parts[-3], "")
	return url
Added some code for starting 2016-02-13 17:06:36 -06:00			`# -- coding: utf-8 --`
Clean message before displaying it in the buffers or the post dialogue 2016-02-22 08:49:51 -06:00			`""" Some utilities. I no have idea how I should put these, so..."""`
Added a FileDialog for choosing the filename in audio downloads 2016-02-18 17:16:43 -06:00			`import os`
The first URL will be posted as attachment 2016-02-14 19:24:45 -06:00			`import re`
Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters 2019-02-03 20:56:32 -06:00			`import html`
Posts from twitter are displayed properly 2016-05-14 20:47:10 -05:00			`import logging`
Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters 2019-02-03 20:56:32 -06:00			`import requests`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00
Posts from twitter are displayed properly 2016-05-14 20:47:10 -05:00			`log = logging.getLogger("utils")`
The first URL will be posted as attachment 2016-02-14 19:24:45 -06:00			`url_re = re.compile("(?:\w+://\|www\.)[^ ,.?!#%=+][^ ]*")`
			`bad_chars = '\'\\.,[](){}:;"'`
Added some code for starting 2016-02-13 17:06:36 -06:00
			`def seconds_to_string(seconds, precision=0):`
Group mentions are displayed properly when mentioned in comments 2019-02-03 18:54:31 -06:00			`""" convert a number of seconds in a string representation."""`
			`# ToDo: Improve it to handle properly Russian plurals.`
Added some code for starting 2016-02-13 17:06:36 -06:00			`day = seconds // 86400`
			`hour = seconds // 3600`
			`min = (seconds // 60) % 60`
			`sec = seconds - (hour * 3600) - (min * 60)`
			`sec_spec = "." + str(precision) + "f"`
			`sec_string = sec.__format__(sec_spec)`
			`string = ""`
			`if day == 1:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d day, ") % day`
Added some code for starting 2016-02-13 17:06:36 -06:00			`elif day >= 2:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d days, ") % day`
Added some code for starting 2016-02-13 17:06:36 -06:00			`if (hour == 1):`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d hour, ") % hour`
Added some code for starting 2016-02-13 17:06:36 -06:00			`elif (hour >= 2):`
			`string += _("%d hours, ") % hour`
			`if (min == 1):`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d minute, ") % min`
Added some code for starting 2016-02-13 17:06:36 -06:00			`elif (min >= 2):`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%d minutes, ") % min`
Added some code for starting 2016-02-13 17:06:36 -06:00			`if sec >= 0 and sec <= 2:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%s second") % sec_string`
Added some code for starting 2016-02-13 17:06:36 -06:00			`else:`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`string += _("%s seconds") % sec_string`
The first URL will be posted as attachment 2016-02-14 19:24:45 -06:00			`return string`

			`def find_urls_in_text(text):`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`return [s.strip(bad_chars) for s in url_re.findall(text)]`

			`def download_file(url, local_filename, window):`
			`r = requests.get(url, stream=True)`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`window.change_status(_("Downloading {0}").format(local_filename,))`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`total_length = r.headers.get("content-length")`
			`dl = 0`
			`total_length = int(total_length)`
			`with open(local_filename, 'wb') as f:`
			`for chunk in r.iter_content(chunk_size=64):`
			`if chunk: # filter out keep-alive new chunks`
			`dl += len(chunk)`
			`f.write(chunk)`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`done = int(100 * dl/total_length)`
			`msg = _("Downloading {0} ({1}%)").format(os.path.basename(local_filename), done)`
Download audio files support from the details dialogue for songs 2016-02-15 16:49:09 -06:00			`window.change_status(msg)`
Port socializer to Python 3. #16 2019-01-01 19:42:53 -06:00			`window.change_status(_("Ready"))`
Clean message before displaying it in the buffers or the post dialogue 2016-02-22 08:49:51 -06:00			`return local_filename`

Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters 2019-02-03 20:56:32 -06:00			`def detect_users(text):`
			`""" Detect all users and communities mentionned in any text posted in VK."""`
			`# This regexp gets group and users mentionned in topic comments.`
			`for matched_data in re.finditer("(\[)(id\|club)(\d+:bp-\d+_\d+\\|)(\D+)(\])", text):`
			`text = re.sub("\[(id\|club)\d+:bp-\d+_\d+\\|\D+\]", matched_data.groups()[3]+", ", text, count=1)`
			`# This is for users and communities just mentionned in wall comments or posts.`
			`for matched_data in re.finditer("(\[)(id\|club)(\d+\\|)(\D+)(\])", text):`
			`text = re.sub("\[(id\|club)\d+\\|\D+\]", matched_data.groups()[3]+", ", text, count=1)`
			`return text`

			`def clean_text(text):`
			`""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""`
			`text = detect_users(text)`
			`text = html.unescape(text)`
Fixed all audio methods due to latest VK changes 2019-04-16 15:45:25 -05:00			`return text`

			`def transform_audio_url(url):`
			`""" Transforms the URL offered by VK to the unencrypted stream so we can still play it. """`
			`if "vkuseraudio.net" not in url and "index.m3u8" not in url:`
			`return url`
			`url = url.replace("/index.m3u8", ".mp3")`
			`parts = url.split("/")`
			`if "/audio" not in url:`
			`url = url.replace("/"+parts[-2], "")`
			`else:`
			`url = url.replace("/"+parts[-3], "")`
			`return url`