Merge branch 'yt-dlp:master' into ie-jiosaavn

2024-11-20 05:47:24 +01:00 · 2024-08-27 15:11:21 +05:30 · 2024-08-27 15:11:21 +05:30 · cc974e7b83
commit cc974e7b83
parent bfcb22aca1 41be32e78c
3 changed files with 176 additions and 1 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -2312,6 +2312,7 @@
    VideomoreVideoIE,
 )
 from .videopress import VideoPressIE
 from .vidflex import VidflexIE
 from .vidio import (
    VidioIE,
    VidioLiveIE,
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@ -6,6 +6,7 @@
    determine_ext,
    int_or_none,
    parse_qs,
    traverse_obj,
    try_get,
    unified_timestamp,
    url_or_none,
@ -80,6 +81,8 @@ def _extract_formats(self, options, video_id):
                    'url': format_url,
                    'format_id': format_id,
                })
        for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
            formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
        return formats
    def _download_and_extract_formats(self, video_id, query=None):
@ -90,7 +93,7 @@ def _download_and_extract_formats(self, video_id, query=None):
 class RutubeIE(RutubeBaseIE):
    IE_NAME = 'rutube'
    IE_DESC = 'Rutube videos'
-    _VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
+    _VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
    _TESTS = [{
@ -164,6 +167,29 @@ class RutubeIE(RutubeBaseIE):
            'uploader': 'Стас Быков',
        },
        'expected_warnings': ['Unable to download f4m'],
    }, {
        'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
        'info_dict': {
            'id': 'c58f502c7bb34a8fcdd976b221fca292',
            'ext': 'mp4',
            'categories': ['Телепередачи'],
            'description': '',
            'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
            'live_status': 'is_live',
            'age_limit': 0,
            'uploader_id': '23460655',
            'timestamp': 1652972968,
            'view_count': int,
            'upload_date': '20220519',
            'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'uploader': 'Первый канал',
        },
    }, {
        'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
        'only_matching': True,
    }, {
        'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
        'only_matching': True,
    }]
    @classmethod
--- a/yt_dlp/extractor/vidflex.py
+++ b/yt_dlp/extractor/vidflex.py
@ -0,0 +1,148 @@
 import base64
 import json
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    join_nonempty,
    mimetype2ext,
    url_or_none,
 )
 from ..utils.traversal import traverse_obj
 class VidflexIE(InfoExtractor):
    _DOMAINS_RE = [
        r'[^.]+\.vidflex\.tv',
        r'(?:www\.)?acactv\.ca',
        r'(?:www\.)?albertalacrossetv\.com',
        r'(?:www\.)?cjfltv\.com',
        r'(?:www\.)?figureitoutbaseball\.com',
        r'(?:www\.)?ocaalive\.com',
        r'(?:www\.)?pegasussports\.tv',
        r'(?:www\.)?praxisseries\.ca',
        r'(?:www\.)?silenticetv\.com',
        r'(?:www\.)?tuffhedemantv\.com',
        r'(?:www\.)?watchfuntv\.com',
        r'live\.ofsaa\.on\.ca',
        r'tv\.procoro\.ca',
        r'tv\.realcastmedia\.net',
        r'tv\.fringetheatre\.ca',
        r'video\.haisla\.ca',
        r'video\.hockeycanada\.ca',
        r'video\.huuayaht\.org',
        r'video\.turningpointensemble\.ca',
        r'videos\.livingworks\.net',
        r'videos\.telusworldofscienceedmonton\.ca',
        r'watch\.binghamtonbulldogs\.com',
        r'watch\.rekindle\.tv',
        r'watch\.wpca\.com',
    ]
    _VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
        'only_matching': True,
    }, {
        # m3u8 + https
        'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
        'info_dict': {
            'id': '107486',
            'title': 'NWT: Mic’d up with Jamie Lee Rattray',
            'ext': 'mp4',
            'duration': 115,
            'timestamp': 1634310409,
            'upload_date': '20211015',
            'tags': ['English', '2021', "National Women's Team"],
            'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
            'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
        },
        'params': {'skip_download': True},
    }, {
        'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
        'info_dict': {
            'id': '112307',
            'title': 'MWC: Remembering the wild ride in Riga',
            'ext': 'mp4',
            'duration': 322,
            'timestamp': 1716235607,
            'upload_date': '20240520',
            'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
            'description': r're:.+Canada’s National Men’s Team.+',
            'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
        },
        'params': {'skip_download': True},
    }, {
        # the same video in French
        'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
        'info_dict': {
            'id': '112304',
            'title': 'CMM : Retour sur un parcours endiablé à Riga',
            'ext': 'mp4',
            'duration': 322,
            'timestamp': 1716235545,
            'upload_date': '20240520',
            'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
            'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
            'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
        },
        'params': {'skip_download': True},
    }, {
        'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
        'only_matching': True,
    }, {
        'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
        'only_matching': True,
    }, {
        'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
        'only_matching': True,
    }, {
        'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
        'only_matching': True,
    }, {
        'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
        'only_matching': True,
    }, {
        'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
        'only_matching': True,
    }, {
        'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        data_url = self._html_search_regex(
            r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
        media_config = traverse_obj(
            self._download_json(data_url, video_id),
            ('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))
        return {
            'id': video_id,
            'formats': list(self._yield_formats(media_config, video_id)),
            **self._search_json_ld(
                webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
        }
    def _yield_formats(self, media_config, video_id):
        for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
            media_url = media_source['src']
            media_type = mimetype2ext(media_source.get('type'))
            if media_type == 'm3u8':
                yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
            elif media_type == 'mp4':
                bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
                yield {
                    'format_id': join_nonempty('http', bitrate),
                    'url': media_url,
                    'ext': 'mp4',
                    'tbr': int_or_none(bitrate),
                }
            else:
                yield {
                    'url': media_url,
                    'ext': media_type,
                }