Fixed zaycev.net extractor
This commit is contained in:
parent
2f4416f6bb
commit
130fb40d78
@ -19,23 +19,23 @@ class interface(object):
|
||||
log.debug("Started extraction service for zaycev.net")
|
||||
|
||||
def search(self, text, page=1):
|
||||
site = 'http://go.mail.ru/zaycev?q=%s&page=%s' % (text, page)
|
||||
site = "http://zaycev.net/search.html?query_search=%s" % (text,)
|
||||
log.debug("Retrieving data from {0}...".format(site,))
|
||||
r = requests.get(site)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
D = r'длительность.(\d+\:\d+\:\d+)'
|
||||
R = r'размер.((\d+|\d+.\d+) \w+)'
|
||||
B = r'битрейт.(\d+ \w+)'
|
||||
self.dh = [[x.get_text(), x.get('href')]for x in soup.find_all('a', {'class': "light-link"}) if x.get_text() != "Читать далее"]
|
||||
self.hd = [{'duration': re.search(D, str(x)).group()[13:], 'size': re.search(R, str(x)).group()[7:], 'bitrate': re.search(B, str(x)).group()[8:]} for x in soup.find_all('div', {'class': "result__snp"})]
|
||||
search_results = soup.find_all("div", {"class": "musicset-track__title track-geo__title"})
|
||||
self.results = []
|
||||
for i in range(len(self.hd)):
|
||||
for i in search_results:
|
||||
# The easiest method to get artist and song names is to fetch links. There are only two links per result here.
|
||||
data = i.find_all("a")
|
||||
# from here, data[0] contains artist info and data[1] contains info of the retrieved song.
|
||||
s = baseFile.song(self)
|
||||
s.title = self.dh[i][0]
|
||||
s.url = self.dh[i][1]
|
||||
s.duration = self.hd[i]["duration"]
|
||||
s.size = self.hd[i]["size"]
|
||||
s.bitrate = self.hd[i]["bitrate"]
|
||||
s.title = data[1].text
|
||||
s.artist = data[0].text
|
||||
s.url = "http://zaycev.net%s" % (data[1].attrs["href"])
|
||||
# s.duration = self.hd[i]["duration"]
|
||||
# s.size = self.hd[i]["size"]
|
||||
# s.bitrate = self.hd[i]["bitrate"]
|
||||
self.results.append(s)
|
||||
log.debug("{0} results found.".format(len(self.results)))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user