diff options
author | leot <leot@pkgsrc.org> | 2019-12-09 11:45:59 +0000 |
---|---|---|
committer | leot <leot@pkgsrc.org> | 2019-12-09 11:45:59 +0000 |
commit | 8038f237bfecff17b40aa914f253fda5bf4df39e (patch) | |
tree | a270414e4e34fbfcca24584010dd94955a2b24a9 /net | |
parent | c679c0e5d5594180ab4e26a70067b8078ec17606 (diff) | |
download | pkgsrc-8038f237bfecff17b40aa914f253fda5bf4df39e.tar.gz |
youtube-dl: Apply a patch to fix rai extractor
Shared upstream via:
<https://github.com/ytdl-org/youtube-dl/pull/23040/>
Diffstat (limited to 'net')
-rw-r--r-- | net/youtube-dl/Makefile | 3 | ||||
-rw-r--r-- | net/youtube-dl/distinfo | 3 | ||||
-rw-r--r-- | net/youtube-dl/patches/patch-youtube__dl_extractor_rai.py | 212 |
3 files changed, 216 insertions, 2 deletions
diff --git a/net/youtube-dl/Makefile b/net/youtube-dl/Makefile index 4d55b68bfb6..9fa9fde63ca 100644 --- a/net/youtube-dl/Makefile +++ b/net/youtube-dl/Makefile @@ -1,10 +1,11 @@ -# $NetBSD: Makefile,v 1.193 2019/11/28 21:22:07 leot Exp $ +# $NetBSD: Makefile,v 1.194 2019/12/09 11:45:59 leot Exp $ # XXX: VERSION_DATE can contains also an optional part that indicates # XXX: possible same day revisions. PKGNAME preserves that dotted part as is. VERSION_DATE= 2019.11.28 DISTNAME= youtube-dl-${VERSION_DATE} PKGNAME= ${DISTNAME:S/.//:S/.//} +PKGREVISION= 1 CATEGORIES= net MASTER_SITES= https://youtube-dl.org/downloads/${VERSION_DATE}/ diff --git a/net/youtube-dl/distinfo b/net/youtube-dl/distinfo index c2274b04846..1f40ecdee07 100644 --- a/net/youtube-dl/distinfo +++ b/net/youtube-dl/distinfo @@ -1,8 +1,9 @@ -$NetBSD: distinfo,v 1.176 2019/11/28 21:22:07 leot Exp $ +$NetBSD: distinfo,v 1.177 2019/12/09 11:45:59 leot Exp $ SHA1 (youtube-dl-2019.11.28.tar.gz) = c8537c966bda1e68d3b7b8f7e392ae488dbed063 RMD160 (youtube-dl-2019.11.28.tar.gz) = 3958b538feb2d23968a96051f52487b61980b215 SHA512 (youtube-dl-2019.11.28.tar.gz) = fd3fc9658428ecf96d681dd699db49e02f94343e5c49e370cb68a5764e8ecebc14b3ce5bc44db0a829b0d3c4fbffc96a1f8288fb24cbd21add6e9c7852b0915d Size (youtube-dl-2019.11.28.tar.gz) = 3148365 bytes SHA1 (patch-setup.py) = a67074ae7cfe5e77847c2f610337ea553eddb69b +SHA1 (patch-youtube__dl_extractor_rai.py) = ae67a6fb599c90491fd68b72bf71821659a2eca4 SHA1 (patch-youtube__dl_postprocessor_ffmpeg.py) = f96676170a448d9205d542a7def4beca615a1490 diff --git a/net/youtube-dl/patches/patch-youtube__dl_extractor_rai.py b/net/youtube-dl/patches/patch-youtube__dl_extractor_rai.py new file mode 100644 index 00000000000..6d0298d9e12 --- /dev/null +++ b/net/youtube-dl/patches/patch-youtube__dl_extractor_rai.py @@ -0,0 +1,212 @@ +$NetBSD: patch-youtube__dl_extractor_rai.py,v 1.1 2019/12/09 11:45:59 leot Exp $ + +[rai] Fix extraction for recent raiplay.it updates + +- Introduce _BASE_URL in RaiBaseIE class so it could be reused as base for the + several subextractors. +- Remove first test of RaiPlayIE, it is no longer available +- Adjust RaiPlayIE to recent raiplay.it updates, make it extension agnostic + (passing possible `.json' URLs is now supported too) and update test + info_dict. +- Adjust RaiPlayLiveIE to recent raiplay.it updates. Passing it as + `url_transparent' is no longer supported (there is no longer an accessible + ContentItem) +- Adjust RaiPlayPlaylistIE to recent raiplay.it updates and instruct it about + ContentSet-s. + +This fix issue #22923, #22906 and supersedes #23006. + +Shared upstream via: + + https://github.com/ytdl-org/youtube-dl/pull/23040 + +--- youtube_dl/extractor/rai.py.orig ++++ youtube_dl/extractor/rai.py +@@ -17,7 +17,6 @@ + parse_duration, + strip_or_none, + try_get, +- unescapeHTML, + unified_strdate, + unified_timestamp, + update_url_query, +@@ -30,6 +29,7 @@ class RaiBaseIE(InfoExtractor): + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _GEO_COUNTRIES = ['IT'] + _GEO_BYPASS = False ++ _BASE_URL = 'https://www.raiplay.it' + + def _extract_relinker_info(self, relinker_url, video_id): + if not re.match(r'https?://', relinker_url): +@@ -122,41 +122,19 @@ def _extract_subtitles(url, subtitle_url): + + + class RaiPlayIE(RaiBaseIE): +- _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE ++ _VALID_URL = r'(?P<url>(?P<base>https?://(?:www\.)?raiplay\.it/.+?-)(?P<id>%s)(?P<ext>\.(?:html|json)))' % RaiBaseIE._UUID_RE + _TESTS = [{ +- 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', +- 'md5': '340aa3b7afb54bfd14a8c11786450d76', +- 'info_dict': { +- 'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', +- 'ext': 'mp4', +- 'title': 'La Casa Bianca', +- 'alt_title': 'S2016 - Puntata del 23/10/2016', +- 'description': 'md5:a09d45890850458077d1f68bb036e0a5', +- 'thumbnail': r're:^https?://.*\.jpg$', +- 'uploader': 'Rai 3', +- 'creator': 'Rai 3', +- 'duration': 3278, +- 'timestamp': 1477764300, +- 'upload_date': '20161029', +- 'series': 'La Casa Bianca', +- 'season': '2016', +- }, +- }, { + 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report del 07/04/2014', +- 'alt_title': 'S2013/14 - Puntata del 07/04/2014', +- 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', ++ 'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014 ', ++ 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', + 'thumbnail': r're:^https?://.*\.jpg$', +- 'uploader': 'Rai 5', +- 'creator': 'Rai 5', ++ 'uploader': 'Rai Gulp', + 'duration': 6160, +- 'series': 'Report', +- 'season_number': 5, +- 'season': '2013/14', + }, + 'params': { + 'skip_download': True, +@@ -168,16 +146,15 @@ class RaiPlayIE(RaiBaseIE): + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) +- url, video_id = mobj.group('url', 'id') ++ url, base, video_id, ext = mobj.group('url', 'base', 'id', 'ext') + + media = self._download_json( +- '%s?json' % url, video_id, 'Downloading video JSON') ++ '%s%s.json' % (base, video_id), video_id, 'Downloading video JSON') + + title = media['name'] +- + video = media['video'] + +- relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) ++ relinker_info = self._extract_relinker_info(video['content_url'], video_id) + self._sort_formats(relinker_info['formats']) + + thumbnails = [] +@@ -185,7 +162,7 @@ def _real_extract(self, url): + for _, value in media.get('images').items(): + if value: + thumbnails.append({ +- 'url': value.replace('[RESOLUTION]', '600x400') ++ 'url': urljoin(RaiBaseIE._BASE_URL, value.replace('[RESOLUTION]', '600x400')) + }) + + timestamp = unified_timestamp(try_get( +@@ -225,7 +202,7 @@ class RaiPlayLiveIE(RaiBaseIE): + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', +- 'description': 'md5:6eca31500550f9376819f174e5644754', ++ 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, +@@ -238,20 +215,32 @@ class RaiPlayLiveIE(RaiBaseIE): + def _real_extract(self, url): + display_id = self._match_id(url) + +- webpage = self._download_webpage(url, display_id) ++ media = self._download_json( ++ '%s.json' % urljoin(RaiBaseIE._BASE_URL, 'dirette/' + display_id), ++ display_id, 'Downloading channel JSON') + +- video_id = self._search_regex( +- r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE, +- webpage, 'content id') ++ title = media['name'] ++ video = media['video'] ++ video_id = media['id'].replace('ContentItem-', '') ++ ++ relinker_info = self._extract_relinker_info(video['content_url'], video_id) ++ self._sort_formats(relinker_info['formats']) + +- return { +- '_type': 'url_transparent', +- 'ie_key': RaiPlayIE.ie_key(), +- 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, ++ info = { + 'id': video_id, + 'display_id': display_id, ++ 'title': self._live_title(title) if relinker_info.get( ++ 'is_live') else title, ++ 'alt_title': media.get('subtitle'), ++ 'description': media.get('description'), ++ 'uploader': strip_or_none(media.get('channel')), ++ 'creator': strip_or_none(media.get('editor')), ++ 'duration': parse_duration(video.get('duration')), + } + ++ info.update(relinker_info) ++ return info ++ + + class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' +@@ -260,7 +249,7 @@ class RaiPlayPlaylistIE(InfoExtractor): + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', +- 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', ++ 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', + }, + 'playlist_mincount': 12, + }] +@@ -268,21 +257,25 @@ class RaiPlayPlaylistIE(InfoExtractor): + def _real_extract(self, url): + playlist_id = self._match_id(url) + +- webpage = self._download_webpage(url, playlist_id) ++ media = self._download_json( ++ '%s.json' % urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), ++ playlist_id, 'Downloading program JSON') ++ ++ title = media['name'] ++ description = media['program_info']['description'] + +- title = self._html_search_meta( +- ('programma', 'nomeProgramma'), webpage, 'title') +- description = unescapeHTML(self._html_search_meta( +- ('description', 'og:description'), webpage, 'description')) ++ content_sets = [s['id'] for b in media['blocks'] for s in b['sets']] + + entries = [] +- for mobj in re.finditer( +- r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', +- webpage): +- video_url = urljoin(url, mobj.group('path')) +- entries.append(self.url_result( +- video_url, ie=RaiPlayIE.ie_key(), +- video_id=RaiPlayIE._match_id(video_url))) ++ for cs in content_sets: ++ medias = self._download_json( ++ '%s/%s.json' % (urljoin(RaiBaseIE._BASE_URL, 'programmi/' + playlist_id), cs), ++ cs, 'Downloading content set JSON') ++ for m in medias['items']: ++ video_url = urljoin(url, m['path_id']) ++ entries.append(self.url_result( ++ video_url, ie=RaiPlayIE.ie_key(), ++ video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result(entries, playlist_id, title, description) + |