1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
From 4b2caf8924f404d15d8cdc475912eac3a6e24339 Mon Sep 17 00:00:00 2001
From: Valentin Berlier <berlier.v@gmail.com>
Date: Fri, 9 Oct 2020 14:23:21 +0000
Subject: [PATCH 8/8] [bandcamp] fix album downloading
From
<https://github.com/ytdl-org/youtube-dl/pull/26684#issuecomment-705253522>,
with missing json import added.
---
youtube_dl/extractor/bandcamp.py | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index 04b8aa80f..3fba35b83 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import json
import random
import re
import time
@@ -305,28 +306,16 @@ class BandcampAlbumIE(InfoExtractor):
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
- track_elements = re.findall(
- r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
- if not track_elements:
- raise ExtractorError('The page doesn\'t contain any tracks')
- # Only tracks with duration info have songs
+ ld = re.findall(r'<script[^>]+type="application/ld\+json"[^>]*>(.*?)</script>', webpage, re.DOTALL)
+ album = json.loads(ld[0])
entries = [
self.url_result(
- compat_urlparse.urljoin(url, t_path),
+ record['item']['@id'],
ie=BandcampIE.ie_key(),
- video_title=self._search_regex(
- r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
- elem_content, 'track title', fatal=False))
- for elem_content, t_path in track_elements
- if self._html_search_meta('duration', elem_content, default=None)]
-
- title = self._html_search_regex(
- r'album_title\s*(?:"|["\']):\s*("|["\'])(?P<album>(?:\\\1|((?!\1).))+)\1',
- webpage, 'title', fatal=False, group='album')
-
- if title:
- title = title.replace(r'\"', '"')
-
+ video_title=record['item']['name'])
+ for record in album['track']['itemListElement']
+ ]
+ title = album['name']
return {
'_type': 'playlist',
'uploader_id': uploader_id,
--
2.27.0
|