Added extractors package

This commit is contained in:
Manuel Cortez 2018-01-23 13:39:49 -06:00
parent 303b8c1bab
commit eda8b950e4
5 changed files with 64 additions and 0 deletions

View File

@ -0,0 +1,2 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

View File

@ -0,0 +1,13 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
class song(object):
""" Represents a song in all services. Data will be filled by the service itself"""
def __init__(self):
self.bitrate = 0
self.title = ""
self.artist = ""
self.duration = ""
self.size = 0
self.url = ""

36
src/extractors/zaycev.py Normal file
View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import baseFile
import re
import json
import requests
from bs4 import BeautifulSoup
class interface(object):
def __init__(self):
self.results = []
def search(self, text, page=1):
site = 'http://go.mail.ru/zaycev?q=%s&page=%s' % (text, page)
r = requests.get(site)
soup = BeautifulSoup(r.text, 'html.parser')
D = r'длительность.(\d+\:\d+\:\d+)'
R = r'размер.((\d+|\d+.\d+) \w+)'
B = r'битрейт.(\d+ \w+)'
self.dh = [[x.get_text(), x.get('href')]for x in soup.find_all('a', {'class': "light-link"}) if x.get_text() != "Читать далее"]
self.hd = [{'duration': re.search(D, str(x)).group()[13:], 'size': re.search(R, str(x)).group()[7:], 'bitrate': re.search(B, str(x)).group()[8:]} for x in soup.find_all('div', {'class': "result__snp"})]
self.results = []
for i in range(len(self.hd)):
s = baseFile.song()
s.title = self.dh[i][0]
s.url = self.dh[i][1]
s.duration = self.hd[i]["duration"]
s.size = self.hd[i]["size"]
s.bitrate = self.hd[i]["bitrate"]
self.results.append(s)
def get_download_url(self, url):
soups = BeautifulSoup(requests.get(url).text, 'html.parser')
data = json.loads(requests.get('http://zaycev.net' + soups.find('div', {'class':"musicset-track"}).get('data-url')).text)
return data["url"]

View File

@ -0,0 +1,2 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

View File

@ -0,0 +1,11 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
class song(object):
def __init__(self):
self.bitrate = 0
self.title = ""
self.artist = ""
self.duration = ""
self.size = 0