Text should be cleaned better in posts, coments and topic comments. Cleaned texts will render properly usernames, group names and render properly certain unicode characters
This commit is contained in:
parent
2496f19bee
commit
38b0eec741
@ -1,6 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from __future__ import unicode_literals
|
|
||||||
import re
|
|
||||||
import os
|
import os
|
||||||
import six
|
import six
|
||||||
import threading
|
import threading
|
||||||
@ -25,7 +23,7 @@ log = logging.getLogger(__file__)
|
|||||||
def get_message(status):
|
def get_message(status):
|
||||||
message = ""
|
message = ""
|
||||||
if "text" in status:
|
if "text" in status:
|
||||||
message = renderers.clean_text(status["text"])
|
message = utils.clean_text(status["text"])
|
||||||
return message
|
return message
|
||||||
|
|
||||||
class displayPostPresenter(base.basePresenter):
|
class displayPostPresenter(base.basePresenter):
|
||||||
@ -83,7 +81,7 @@ class displayPostPresenter(base.basePresenter):
|
|||||||
extra_info = self.session.get_user(i["reply_to_user"])["user1_nom"]
|
extra_info = self.session.get_user(i["reply_to_user"])["user1_nom"]
|
||||||
from_ = _("{0} > {1}").format(from_, extra_info)
|
from_ = _("{0} > {1}").format(from_, extra_info)
|
||||||
# As we set the comment reply properly in the from_ field, let's remove the first username from here if it exists.
|
# As we set the comment reply properly in the from_ field, let's remove the first username from here if it exists.
|
||||||
fixed_text = re.sub("^\[id\d+\|\D+\], ", "", i["text"])
|
fixed_text = utils.clean_text(i["text"])
|
||||||
if len(fixed_text) > 140:
|
if len(fixed_text) > 140:
|
||||||
text = fixed_text[:141]
|
text = fixed_text[:141]
|
||||||
else:
|
else:
|
||||||
@ -297,7 +295,7 @@ class displayPostPresenter(base.basePresenter):
|
|||||||
else:
|
else:
|
||||||
from_ = from_["user1_nom"]
|
from_ = from_["user1_nom"]
|
||||||
# As we set the comment reply properly in the from_ field, let's remove the first username from here if it exists.
|
# As we set the comment reply properly in the from_ field, let's remove the first username from here if it exists.
|
||||||
fixed_text = re.sub("^\[id\d+\|\D+\], ", "", comment_object["text"])
|
fixed_text = utils.clean_text(comment_object["text"])
|
||||||
if len(fixed_text) > 140:
|
if len(fixed_text) > 140:
|
||||||
text = fixed_text[:141]
|
text = fixed_text[:141]
|
||||||
else:
|
else:
|
||||||
@ -484,7 +482,7 @@ class displayCommentPresenter(displayPostPresenter):
|
|||||||
else:
|
else:
|
||||||
from_ = from_["user1_nom"]
|
from_ = from_["user1_nom"]
|
||||||
# As we set the comment reply properly in the from_ field, let's remove the first username from here if it exists.
|
# As we set the comment reply properly in the from_ field, let's remove the first username from here if it exists.
|
||||||
fixed_text = re.sub("^\[id\d+\|\D+\], ", "", i["text"])
|
fixed_text = utils.clean_text(i["text"])
|
||||||
if len(fixed_text) > 140:
|
if len(fixed_text) > 140:
|
||||||
text = fixed_text[:141]
|
text = fixed_text[:141]
|
||||||
else:
|
else:
|
||||||
@ -548,17 +546,10 @@ class displayTopicPresenter(displayPostPresenter):
|
|||||||
continue
|
continue
|
||||||
from_ = self.session.get_user(i["from_id"])["user1_nom"]
|
from_ = self.session.get_user(i["from_id"])["user1_nom"]
|
||||||
# match user mentions inside text comment.
|
# match user mentions inside text comment.
|
||||||
matched_data = re.match(".*(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", i["text"])
|
|
||||||
# If matched data exists we should modify the title.
|
|
||||||
# if len(matched_data.groups()) > 2:
|
|
||||||
# from_ = "{from_} > {to_}".format(from_=from_, to_=matched_data.groups()[1])
|
|
||||||
original_date = arrow.get(i["date"])
|
original_date = arrow.get(i["date"])
|
||||||
created_at = original_date.humanize(locale=languageHandler.curLang[:2])
|
created_at = original_date.humanize(locale=languageHandler.curLang[:2])
|
||||||
likes = str(i["likes"]["count"])
|
likes = str(i["likes"]["count"])
|
||||||
if matched_data != None:
|
text = utils.clean_text(text=i["text"])
|
||||||
text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", i["text"])
|
|
||||||
else:
|
|
||||||
text = i["text"]
|
|
||||||
comments_.append((from_, text, created_at, likes))
|
comments_.append((from_, text, created_at, likes))
|
||||||
self.send_message("add_items", control="comments", items=comments_)
|
self.send_message("add_items", control="comments", items=comments_)
|
||||||
|
|
||||||
|
@ -1,12 +1,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
""" this module contains everything used to render different kind of posts (posts in the home buffer,
|
""" this module contains everything used to render different kind of posts (posts in the home buffer,
|
||||||
Chat messages, audios, videos, photos, comments in posts, etc)"""
|
Chat messages, audios, videos, photos, comments in posts, etc)"""
|
||||||
from __future__ import unicode_literals
|
|
||||||
from builtins import range
|
|
||||||
import arrow
|
import arrow
|
||||||
import languageHandler
|
import languageHandler
|
||||||
import logging
|
import logging
|
||||||
from . utils import seconds_to_string
|
from . utils import seconds_to_string, clean_text
|
||||||
|
|
||||||
log = logging.getLogger(__file__)
|
log = logging.getLogger(__file__)
|
||||||
|
|
||||||
@ -50,12 +48,6 @@ def clean_audio(audio):
|
|||||||
audio["count"] = audio["count"] -1
|
audio["count"] = audio["count"] -1
|
||||||
return audio
|
return audio
|
||||||
|
|
||||||
def clean_text(text):
|
|
||||||
""" Replaces all HTML entities and put the plain text equivalent if it's possible."""
|
|
||||||
text = text.replace("<br>", "\n")
|
|
||||||
text = text.replace("\\n", "\n")
|
|
||||||
return text
|
|
||||||
|
|
||||||
def add_attachment(attachment):
|
def add_attachment(attachment):
|
||||||
msg = ""
|
msg = ""
|
||||||
tpe = ""
|
tpe = ""
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
""" Some utilities. I no have idea how I should put these, so..."""
|
""" Some utilities. I no have idea how I should put these, so..."""
|
||||||
import os
|
import os
|
||||||
import requests
|
|
||||||
import re
|
import re
|
||||||
|
import html
|
||||||
import logging
|
import logging
|
||||||
|
import requests
|
||||||
|
|
||||||
log = logging.getLogger("utils")
|
log = logging.getLogger("utils")
|
||||||
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
|
url_re = re.compile("(?:\w+://|www\.)[^ ,.?!#%=+][^ ]*")
|
||||||
@ -57,3 +58,18 @@ def download_file(url, local_filename, window):
|
|||||||
window.change_status(_("Ready"))
|
window.change_status(_("Ready"))
|
||||||
return local_filename
|
return local_filename
|
||||||
|
|
||||||
|
def detect_users(text):
|
||||||
|
""" Detect all users and communities mentionned in any text posted in VK."""
|
||||||
|
# This regexp gets group and users mentionned in topic comments.
|
||||||
|
for matched_data in re.finditer("(\[)(id|club)(\d+:bp-\d+_\d+\|)(\D+)(\])", text):
|
||||||
|
text = re.sub("\[(id|club)\d+:bp-\d+_\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
|
||||||
|
# This is for users and communities just mentionned in wall comments or posts.
|
||||||
|
for matched_data in re.finditer("(\[)(id|club)(\d+\|)(\D+)(\])", text):
|
||||||
|
text = re.sub("\[(id|club)\d+\|\D+\]", matched_data.groups()[3]+", ", text, count=1)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def clean_text(text):
|
||||||
|
""" Clean text, removing all unneeded HTMl and converting HTML represented characters in their unicode counterparts."""
|
||||||
|
text = detect_users(text)
|
||||||
|
text = html.unescape(text)
|
||||||
|
return text
|
Loading…
Reference in New Issue
Block a user