From 435d62abe9c4b4de7634d6028812ebd28b31c882 Mon Sep 17 00:00:00 2001 From: Gilles Pietri Date: Wed, 23 Sep 2020 23:09:00 +0200 Subject: [PATCH 1/8] [bandcamp] fix regexp for JSON matching on bandcamp From https://github.com/ytdl-org/youtube-dl/pull/26684. --- youtube_dl/extractor/bandcamp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index f14b407dc..ad1812320 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -91,10 +91,11 @@ class BandcampIE(InfoExtractor): duration = None formats = [] - track_info = self._parse_json( - self._search_regex( - r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n', - webpage, 'track info', default='{}'), title) + trackinfo_block = self._search_regex( + r'trackinfo":\[\s*({.+?})\s*\],"', + webpage, 'track info', default='{}') + quoted_json = trackinfo_block.replace('"', '"') + track_info = self._parse_json(quoted_json, title) if track_info: file_ = track_info.get('file') if isinstance(file_, dict): @@ -117,7 +118,7 @@ class BandcampIE(InfoExtractor): def extract(key): return self._search_regex( - r'\b%s\s*["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % key, + r',"%s":(")(?P(?:(?!").)+)"' % key, webpage, key, default=None, group='value') artist = extract('artist') -- 2.27.0