diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 4faec914e..195563bbb 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -8,10 +8,12 @@ from ..utils import ( ExtractorError, float_or_none, - unified_strdate, int_or_none, qualities, + smuggle_url, unescapeHTML, + unified_strdate, + unsmuggle_url, urlencode_postdata, ) @@ -22,7 +24,7 @@ class OdnoklassnikiIE(InfoExtractor): (?:(?:www|m|mobile)\.)? (?:odnoklassniki|ok)\.ru/ (?: - video(?:embed)?/| + video(?Pembed)?/| web-api/video/moviePlayer/| live/| dk\?.*?st\.mvId= @@ -38,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'mp4', 'timestamp': 1545580896, 'view_count': int, - 'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg', + 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg', 'title': 'Народная забава', 'uploader': 'Nevata', 'upload_date': '20181223', @@ -65,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor): }, { # metadata in JSON 'url': 'http://ok.ru/video/20079905452', - 'md5': '0b62089b479e06681abaaca9d204f152', + 'md5': '5d2b64756e2af296e3b383a0bc02a6aa', 'info_dict': { 'id': '20079905452', 'ext': 'mp4', 'title': 'Культура меняет нас (прекрасный ролик!))', + 'thumbnail': str, 'duration': 100, 'upload_date': '20141207', 'uploader_id': '330537914540', @@ -80,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor): }, { # metadataUrl 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', - 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc', + 'md5': 'f8c951122516af72e6e6ffdd3c41103b', 'info_dict': { 'id': '63567059965189-0', 'ext': 'mp4', 'title': 'Девушка без комплексов ...', + 'thumbnail': str, 'duration': 191, 'upload_date': '20150518', 'uploader_id': '534380003155', @@ -95,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor): }, }, { # YouTube embed (metadataUrl, provider == USER_YOUTUBE) - 'url': 'http://ok.ru/video/64211978996595-1', - 'md5': '2f206894ffb5dbfcce2c5a14b909eea5', + 'url': 'https://ok.ru/video/3952212382174', + 'md5': '91749d0bd20763a28d083fa335bbd37a', 'info_dict': { - 'id': 'V_VztHT5BzY', + 'id': '5axVgHHDBvU', 'ext': 'mp4', - 'title': 'Космическая среда от 26 августа 2015', - 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0', - 'duration': 440, - 'upload_date': '20150826', - 'uploader_id': 'tvroscosmos', - 'uploader': 'Телестудия Роскосмоса', + 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide', + 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097', + 'uploader': 'Lod Mer', + 'uploader_id': '575186401502', + 'duration': 1529, 'age_limit': 0, + 'upload_date': '20210405', + 'comment_count': int, + 'live_status': 'not_live', + 'view_count': int, + 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8', + 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94', + 'channel_follower_count': int, + 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'], + 'channel_id': 'UCVGtvURtEURYHtJFUegdSug', + 'like_count': int, + 'availability': 'public', + 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug', + 'categories': ['Education'], + 'playable_in_embed': True, + 'channel': 'BornToReact', }, }, { # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) @@ -126,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor): }, 'skip': 'Video has not been found', }, { + # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading 'note': 'Only available in mobile webpage', 'url': 'https://m.ok.ru/video/2361249957145', 'info_dict': { 'id': '2361249957145', + 'ext': 'mp4', 'title': 'Быковское крещение', 'duration': 3038.181, }, @@ -158,8 +178,37 @@ class OdnoklassnikiIE(InfoExtractor): # Paid video 'url': 'https://ok.ru/video/954886983203', 'only_matching': True, + }, { + 'url': 'https://ok.ru/videoembed/2932705602075', + 'info_dict': { + 'id': '2932705602075', + 'ext': 'mp4', + 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8', + 'title': 'Boosty для тебя!', + 'uploader_id': '597811038747', + 'like_count': 0, + 'duration': 35, + }, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167', + 'info_dict': { + 'id': '3950343629563', + 'ext': 'mp4', + 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8', + 'title': 'Заяц Бусти.mp4', + 'uploader_id': '571368965883', + 'like_count': 0, + 'duration': 10444, + }, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + for x in super()._extract_embed_urls(url, webpage): + yield smuggle_url(x, {'referrer': url}) + def _real_extract(self, url): try: return self._extract_desktop(url) @@ -174,16 +223,23 @@ def _extract_desktop(self, url): start_time = int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) - video_id = self._match_id(url) + url, smuggled = unsmuggle_url(url, {}) + video_id, is_embed = self._match_valid_url(url).group('id', 'embed') + mode = 'videoembed' if is_embed else 'video' webpage = self._download_webpage( - 'http://ok.ru/video/%s' % video_id, video_id, - note='Downloading desktop webpage') + f'https://ok.ru/{mode}/{video_id}', video_id, + note='Downloading desktop webpage', + headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {}) error = self._search_regex( r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', webpage, 'error', default=None) - if error: + # Direct link from boosty + if (error == 'The author of this video has not been found or is blocked' + and not smuggled.get('referrer') and mode == 'videoembed'): + return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'})) + elif error: raise ExtractorError(error, expected=True) player = self._parse_json( @@ -270,7 +326,7 @@ def _extract_desktop(self, url): if provider == 'LIVE_TV_APP': info['title'] = title - quality = qualities(('4', '0', '1', '2', '3', '5')) + quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7')) formats = [{ 'url': f['url'],