diff --git a/src/extractors/zaycev.py b/src/extractors/zaycev.py index 37335df..74b37da 100644 --- a/src/extractors/zaycev.py +++ b/src/extractors/zaycev.py @@ -19,23 +19,23 @@ class interface(object): log.debug("Started extraction service for zaycev.net") def search(self, text, page=1): - site = 'http://go.mail.ru/zaycev?q=%s&page=%s' % (text, page) + site = "http://zaycev.net/search.html?query_search=%s" % (text,) log.debug("Retrieving data from {0}...".format(site,)) r = requests.get(site) soup = BeautifulSoup(r.text, 'html.parser') - D = r'длительность.(\d+\:\d+\:\d+)' - R = r'размер.((\d+|\d+.\d+) \w+)' - B = r'битрейт.(\d+ \w+)' - self.dh = [[x.get_text(), x.get('href')]for x in soup.find_all('a', {'class': "light-link"}) if x.get_text() != "Читать далее"] - self.hd = [{'duration': re.search(D, str(x)).group()[13:], 'size': re.search(R, str(x)).group()[7:], 'bitrate': re.search(B, str(x)).group()[8:]} for x in soup.find_all('div', {'class': "result__snp"})] + search_results = soup.find_all("div", {"class": "musicset-track__title track-geo__title"}) self.results = [] - for i in range(len(self.hd)): + for i in search_results: + # The easiest method to get artist and song names is to fetch links. There are only two links per result here. + data = i.find_all("a") + # from here, data[0] contains artist info and data[1] contains info of the retrieved song. s = baseFile.song(self) - s.title = self.dh[i][0] - s.url = self.dh[i][1] - s.duration = self.hd[i]["duration"] - s.size = self.hd[i]["size"] - s.bitrate = self.hd[i]["bitrate"] + s.title = data[1].text + s.artist = data[0].text + s.url = "http://zaycev.net%s" % (data[1].attrs["href"]) +# s.duration = self.hd[i]["duration"] +# s.size = self.hd[i]["size"] +# s.bitrate = self.hd[i]["bitrate"] self.results.append(s) log.debug("{0} results found.".format(len(self.results)))