From 4b2caf8924f404d15d8cdc475912eac3a6e24339 Mon Sep 17 00:00:00 2001 From: Valentin Berlier Date: Fri, 9 Oct 2020 14:23:21 +0000 Subject: [PATCH 8/8] [bandcamp] fix album downloading From , with missing json import added. --- youtube_dl/extractor/bandcamp.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 04b8aa80f..3fba35b83 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import random import re import time @@ -305,28 +306,16 @@ class BandcampAlbumIE(InfoExtractor): album_id = mobj.group('album_id') playlist_id = album_id or uploader_id webpage = self._download_webpage(url, playlist_id) - track_elements = re.findall( - r'(?s)]*>(.*?]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)', webpage) - if not track_elements: - raise ExtractorError('The page doesn\'t contain any tracks') - # Only tracks with duration info have songs + ld = re.findall(r']+type="application/ld\+json"[^>]*>(.*?)', webpage, re.DOTALL) + album = json.loads(ld[0]) entries = [ self.url_result( - compat_urlparse.urljoin(url, t_path), + record['item']['@id'], ie=BandcampIE.ie_key(), - video_title=self._search_regex( - r']+\bitemprop=["\']name["\'][^>]*>([^<]+)', - elem_content, 'track title', fatal=False)) - for elem_content, t_path in track_elements - if self._html_search_meta('duration', elem_content, default=None)] - - title = self._html_search_regex( - r'album_title\s*(?:"|["\']):\s*("|["\'])(?P(?:\\\1|((?!\1).))+)\1', - webpage, 'title', fatal=False, group='album') - - if title: - title = title.replace(r'\"', '"') - + video_title=record['item']['name']) + for record in album['track']['itemListElement'] + ] + title = album['name'] return { '_type': 'playlist', 'uploader_id': uploader_id, -- 2.27.0