Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
""" this module contains everything used to render different kind of posts (posts in the home buffer,
|
||||
Chat messages, audios, videos, photos, comments in posts, etc)"""
|
||||
from __future__ import unicode_literals
|
||||
from builtins import range
|
||||
import arrow
|
||||
import languageHandler
|
||||
import logging
|
||||
from . utils import seconds_to_string
|
||||
from . utils import seconds_to_string, clean_text
|
||||
|
||||
log = logging.getLogger(__file__)
|
||||
|
||||
@@ -50,12 +48,6 @@ def clean_audio(audio):
|
||||
audio["count"] = audio["count"] -1
|
||||
return audio
|
||||
|
||||
def clean_text(text):
|
||||
""" Replaces all HTML entities and put the plain text equivalent if it's possible."""
|
||||
text = text.replace("<br>", "\n")
|
||||
text = text.replace("\\n", "\n")
|
||||
return text
|
||||
|
||||
def add_attachment(attachment):
|
||||
msg = ""
|
||||
tpe = ""
|
||||
|
@@ -1,9 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
""" Some utilities. I no have idea how I should put these, so..."""
|
||||
import os
|
||||
import requests
|
||||
import re
|
||||
import html
|
||||
import logging
|
||||
import requests
|
||||
|
||||
log = logging.getLogger("utils")
|
||||
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
|
||||
@@ -57,3 +58,18 @@ def download_file(url, local_filename, window):
|
||||
window.change_status(_("Ready"))
|
||||
return local_filename
|
||||
|
||||
def detect_users(text):
|
||||
""" Detect all users and communities mentionned in any text posted in VK."""
|
||||
# This regexp gets group and users mentionned in topic comments.
|
||||
for matched_data in re.finditer("(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", text):
|
||||
text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
|
||||
# This is for users and communities just mentionned in wall comments or posts.
|
||||
for matched_data in re.finditer("(\[)(id|club)(\d+\|)(\D+)(\])", text):
|
||||
text = re.sub("\[(id|club)\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
|
||||
return text
|
||||
|
||||
def clean_text(text):
|
||||
""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""
|
||||
text = detect_users(text)
|
||||
text = html.unescape(text)
|
||||
return text
|
Reference in New Issue
Block a user