From 6aadce819b42e2c0daabc7625464994010e40e51 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Fri, 9 Oct 2020 14:41:45 +0000 Subject: patches/youtube-dl: fix bandcamp --- ...e-unescapeHTML-instead-of-a-simple-replac.patch | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 overlays/patches/youtube-dl/0002-bandcamp-use-unescapeHTML-instead-of-a-simple-replac.patch (limited to 'overlays/patches/youtube-dl/0002-bandcamp-use-unescapeHTML-instead-of-a-simple-replac.patch') diff --git a/overlays/patches/youtube-dl/0002-bandcamp-use-unescapeHTML-instead-of-a-simple-replac.patch b/overlays/patches/youtube-dl/0002-bandcamp-use-unescapeHTML-instead-of-a-simple-replac.patch new file mode 100644 index 000000000000..649ca6477951 --- /dev/null +++ b/overlays/patches/youtube-dl/0002-bandcamp-use-unescapeHTML-instead-of-a-simple-replac.patch @@ -0,0 +1,41 @@ +From 61b0809be0f6acb7b88ebb0146cb2a85bc41b805 Mon Sep 17 00:00:00 2001 +From: Gilles Pietri +Date: Sat, 26 Sep 2020 17:34:35 +0200 +Subject: [PATCH 2/8] [bandcamp] use unescapeHTML instead of a simple replace + of quotes + +From https://github.com/ytdl-org/youtube-dl/pull/26684. +--- + youtube_dl/extractor/bandcamp.py | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py +index ad1812320..55d110e28 100644 +--- a/youtube_dl/extractor/bandcamp.py ++++ b/youtube_dl/extractor/bandcamp.py +@@ -92,10 +92,10 @@ class BandcampIE(InfoExtractor): + + formats = [] + trackinfo_block = self._search_regex( +- r'trackinfo":\[\s*({.+?})\s*\],"', ++ r'trackinfo(?:["\']|"):\[\s*({.+?})\s*\],(?:["\']|")', + webpage, 'track info', default='{}') +- quoted_json = trackinfo_block.replace('"', '"') +- track_info = self._parse_json(quoted_json, title) ++ unescaped_json = unescapeHTML(trackinfo_block) ++ track_info = self._parse_json(unescaped_json, title) + if track_info: + file_ = track_info.get('file') + if isinstance(file_, dict): +@@ -118,7 +118,7 @@ class BandcampIE(InfoExtractor): + + def extract(key): + return self._search_regex( +- r',"%s":(")(?P(?:(?!").)+)"' % key, ++ r',(["\']|")%s\1:\1(?P(?:(?!\1).)+)\1' % key, + webpage, key, default=None, group='value') + + artist = extract('artist') +-- +2.27.0 + -- cgit 1.4.1