From 68f5867cf0516f152fc772e96c1931797102694f Mon Sep 17 00:00:00 2001 From: LE Date: Sun, 1 Aug 2021 22:16:12 -0400 Subject: [PATCH] [CBS] Add fallback (#579) Related: https://github.com/ytdl-org/youtube-dl/issues/29564 Authored-by: llacb47, pukkandan --- yt_dlp/extractor/cbs.py | 95 ++++++++++++++++++++++++++++++++------ yt_dlp/extractor/common.py | 2 + 2 files changed, 84 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index 716e945197..fbbbe5545b 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -53,6 +53,54 @@ class CBSIE(CBSBaseIE): 'skip_download': True, }, '_skip': 'Blocked outside the US', + }, { + 'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/', + 'info_dict': { + 'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k', + 'ext': 'mp4', + 'title': 'CatDog - Climb Every CatDog/The Canine Mutiny', + 'description': 'md5:7ac835000645a69933df226940e3c859', + 'duration': 1418, + 'timestamp': 920264400, + 'upload_date': '19990301', + 'uploader': 'CBSI-NEW', + }, + 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Blocked outside the US', + }, { + 'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/', + 'info_dict': { + 'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd', + 'ext': 'mp4', + 'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)', + 'description': 'md5:f4adcea3e8b106192022e121f1565bae', + 'duration': 2506, + 'timestamp': 1627063200, + 'upload_date': '20210723', + 'uploader': 'CBSI-NEW', + }, + 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Blocked outside the US', + }, { + 'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-', + 'info_dict': { + 'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2', + 'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)', + 'timestamp': 1624507140, + 'description': 'md5:e01af24e95c74d55e8775aef86117b95', + 'uploader': 'CBSI-NEW', + 'upload_date': '20210624', + }, + 'params': { + 'ignore_no_formats_error': True, + 'skip_download': True, + }, + 'expected_warnings': [ + 'This content expired on', 'No video formats found', 'Requested format is not available'], }, { 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', 'only_matching': True, @@ -79,17 +127,26 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517): asset_types = [] subtitles = {} formats = [] + useXMLmetadata = True last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') - if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type: - continue - asset_types.append(asset_type) query = { 'mbr': 'true', 'assetTypes': asset_type, } - if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): + if not asset_type: + # fallback for content_ids that videoPlayerService doesn't return anything for + useXMLmetadata = False + asset_type = 'fallback' + query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3' + del query['assetTypes'] + elif asset_type in asset_types: + continue + elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')): + continue + asset_types.append(asset_type) + if asset_type.startswith('HLS') or 'StreamPack' in asset_type: query['formats'] = 'MPEG4,M3U' elif asset_type in ('RTMP', 'WIFI', '3G'): query['formats'] = 'MPEG4,FLV' @@ -99,25 +156,37 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517): 'Downloading %s SMIL data' % asset_type) except ExtractorError as e: last_e = e - continue + if useXMLmetadata: + continue + query['formats'] = '' # blank query to check if expired + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data, trying again with another format' % asset_type) + except ExtractorError as e: + last_e = e + continue formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) if last_e and not formats: - raise last_e + self.raise_no_formats(last_e, True, content_id) self._sort_formats(formats) info = self._extract_theplatform_metadata(tp_path, content_id) info.update({ - 'id': content_id, - 'title': title, - 'series': xpath_text(video_data, 'seriesTitle'), - 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), - 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), - 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), - 'thumbnail': xpath_text(video_data, 'previewImageURL'), 'formats': formats, 'subtitles': subtitles, + 'id': content_id }) + if useXMLmetadata: + info.update({ + 'title': title, + 'series': xpath_text(video_data, 'seriesTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), + 'thumbnail': xpath_text(video_data, 'previewImageURL') + }) return info def _real_extract(self, url): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a3ac9dfb7d..a4a5b37aaf 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1052,6 +1052,8 @@ def raise_geo_restricted( def raise_no_formats(self, msg, expected=False, video_id=None): if expected and self.get_param('ignore_no_formats_error'): self.report_warning(msg, video_id) + elif isinstance(msg, ExtractorError): + raise msg else: raise ExtractorError(msg, expected=expected, video_id=video_id)