diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index c08643a17c..c460dc7f98 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -6,7 +6,7 @@ from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, - unescapeHTML + merge_dicts, ) @@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor): 'title': 'sexy babe softcore', 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, - } + }, + 'skip': 'Video not found', }, { 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', 'md5': '1baa9602ede46ce904c431f5418d8916', @@ -77,19 +78,15 @@ def _real_extract(self, url): [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], webpage, 'video id', default=None) - video_url = unescapeHTML(self._search_regex( - r'([^<]+)', webpage, 'title') - thumbnail = self._search_regex( - r'onclick="showVideoPlayer\(\)">([^<]+)', r']*>(.+?)'), + webpage, 'title') - return { + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, - 'url': video_url, 'title': title, - 'thumbnail': thumbnail, 'age_limit': 18, - } + }) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c869c7b831..f4362aa474 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -985,6 +985,7 @@ from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE from .plays import PlaysTVIE +from .playstuff import PlayStuffIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE from .playwire import PlaywireIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 79025fd0ea..cd9efea162 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -126,6 +126,7 @@ from .expressen import ExpressenIE from .zype import ZypeIE from .odnoklassniki import OdnoklassnikiIE +from .vk import VKIE from .kinja import KinjaEmbedIE from .gedidigital import GediDigitalIE from .rcs import RCSEmbedsIE @@ -2252,6 +2253,10 @@ class GenericIE(InfoExtractor): 'playlist_mincount': 52, }, { + # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed) + 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', + 'only_matching': True, + }, { # WimTv embed player 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', 'info_dict': { @@ -2803,6 +2808,11 @@ def _real_extract(self, url): if odnoklassniki_url: return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) + # Look for sibnet embedded player + sibnet_urls = VKIE._extract_sibnet_urls(webpage) + if sibnet_urls: + return self.playlist_from_matches(sibnet_urls, video_id, video_title) + # Look for embedded ivi player mobj = re.search(r']+?src=(["\'])(?Phttps?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) if mobj is not None: @@ -3454,6 +3464,9 @@ def _real_extract(self, url): 'url': src, 'ext': (mimetype2ext(src_type) or ext if ext in KNOWN_EXTENSIONS else 'mp4'), + 'http_headers': { + 'Referer': full_response.geturl(), + }, }) if formats: self._sort_formats(formats) @@ -3522,7 +3535,7 @@ def filter_video(urls): m_video_type = re.findall(r'[^/?#&]+)' + _TESTS = [{ + 'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a', + 'md5': 'c82d3669e5247c64bc382577843e5bd0', + 'info_dict': { + 'id': '6250584958001', + 'ext': 'mp4', + 'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga', + 'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913', + 'uploader_id': '6005208634001', + 'timestamp': 1619491027, + 'upload_date': '20210427', + }, + 'add_ie': ['BrightcoveNew'], + }, { + # geo restricted, bypassable + 'url': 'https://play.stuff.co.nz/details/_6155660351001', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + state = self._parse_json( + self._search_regex( + r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'), + video_id) + + account_id = try_get( + state, lambda x: x['configurations']['accountId'], + compat_str) or '6005208634001' + player_id = try_get( + state, lambda x: x['configurations']['playerId'], + compat_str) or 'default' + + entries = [] + for item_id, video in state['items'].items(): + if not isinstance(video, dict): + continue + asset_id = try_get( + video, lambda x: x['content']['attributes']['assetId'], + compat_str) + if not asset_id: + continue + entries.append(self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id), + {'geo_countries': ['NZ']}), + 'BrightcoveNew', video_id)) + + return self.playlist_result(entries, video_id) diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index 3aae79f5da..6d000b3729 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -133,8 +133,10 @@ def _real_extract(self, url): rrn_id = self._match_id(url) asset_id = self._download_json( 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', - rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'}, - query={ + rrn_id, headers={ + 'Accept': 'application/json', + 'API-KEY': 'e90a1ff11335423998b100c929ecc866', + }, query={ 'query': '''{ resource(id: "%s", enforceGeoBlocking: false) { %s diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index 8cbb620ed9..7a08686fa5 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -21,6 +21,7 @@ class ShahidBaseIE(AWSIE): _AWS_PROXY_HOST = 'api2.shahid.net' _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' + _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/' def _handle_error(self, e): fail_data = self._parse_json( @@ -49,7 +50,7 @@ def _call_api(self, path, video_id, request=None): class ShahidIE(ShahidBaseIE): _NETRC_MACHINE = 'shahid' - _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?Pepisode|clip|movie)-(?P\d+)' + _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?Pepisode|clip|movie)-(?P\d+)' _TESTS = [{ 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924', 'info_dict': { @@ -73,6 +74,9 @@ class ShahidIE(ShahidBaseIE): # shahid plus subscriber only 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', 'only_matching': True + }, { + 'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319', + 'only_matching': True }] def _real_initialize(self): @@ -168,7 +172,7 @@ def _real_extract(self, url): class ShahidShowIE(ShahidBaseIE): - _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P\d+)' + _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P\d+)' _TESTS = [{ 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', 'info_dict': { diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py index 02295d1a4c..93ab2a1676 100644 --- a/yt_dlp/extractor/shared.py +++ b/yt_dlp/extractor/shared.py @@ -86,10 +86,10 @@ def _extract_video_url(self, webpage, video_id, url): class VivoIE(SharedBaseIE): IE_DESC = 'vivo.sx' - _VALID_URL = r'https?://vivo\.sx/(?P[\da-z]{10})' + _VALID_URL = r'https?://vivo\.s[xt]/(?P[\da-z]{10})' _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' - _TEST = { + _TESTS = [{ 'url': 'http://vivo.sx/d7ddda0e78', 'md5': '15b3af41be0b4fe01f4df075c2678b2c', 'info_dict': { @@ -98,7 +98,10 @@ class VivoIE(SharedBaseIE): 'title': 'Chicken', 'filesize': 515659, }, - } + }, { + 'url': 'http://vivo.st/d7ddda0e78', + 'only_matching': True, + }] def _extract_title(self, webpage): title = self._html_search_regex( diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 00ec006c46..6b3513ee0f 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -300,6 +300,13 @@ class VKIE(VKBaseIE): 'only_matching': True, }] + @staticmethod + def _extract_sibnet_urls(webpage): + # https://help.sibnet.ru/?sibnet_video_embed + return [unescapeHTML(mobj.group('url')) for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1', + webpage)] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') @@ -408,6 +415,10 @@ def _real_extract(self, url): if odnoklassniki_url: return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) + sibnet_urls = self._extract_sibnet_urls(info_page) + if sibnet_urls: + return self.url_result(sibnet_urls[0]) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))