Merge branch 'yt-dlp:master' into patch-1

This commit is contained in:
joaquinito2070 2022-12-15 18:41:40 +01:00 committed by GitHub
commit 0806b5a266
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 825 additions and 187 deletions

View File

@ -342,7 +342,6 @@ def can_merge_formats(cls, info_dict, params):
and cls.can_download(info_dict)) and cls.can_download(info_dict))
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
ffpp = FFmpegPostProcessor(downloader=self) ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available: if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
@ -372,16 +371,6 @@ def _call_downloader(self, tmpfilename, info_dict):
# http://trac.ffmpeg.org/ticket/6125#comment:10 # http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0'] args += ['-seekable', '1' if seekable else '0']
http_headers = None
if info_dict.get('http_headers'):
youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
http_headers = [
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
'-headers',
''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items())
]
env = None env = None
proxy = self.params.get('proxy') proxy = self.params.get('proxy')
if proxy: if proxy:
@ -434,21 +423,26 @@ def _call_downloader(self, tmpfilename, info_dict):
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
for i, url in enumerate(urls): selected_formats = info_dict.get('requested_formats') or [info_dict]
if http_headers is not None and re.match(r'^https?://', url): for i, fmt in enumerate(selected_formats):
args += http_headers if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
if start_time: if start_time:
args += ['-ss', str(start_time)] args += ['-ss', str(start_time)]
if end_time: if end_time:
args += ['-t', str(end_time - start_time)] args += ['-t', str(end_time - start_time)]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy'] args += ['-c', 'copy']
if info_dict.get('requested_formats') or protocol == 'http_dash_segments': if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): for i, fmt in enumerate(selected_formats):
stream_number = fmt.get('manifest_stream_number', 0) stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}']) args.extend(['-map', f'{i}:{stream_number}'])
@ -488,8 +482,9 @@ def _call_downloader(self, tmpfilename, info_dict):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args) self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
with Popen(args, stdin=subprocess.PIPE, env=env) as proc: with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
if url in ('-', 'pipe:'): if piped:
self.on_process_started(proc, proc.stdin) self.on_process_started(proc, proc.stdin)
try: try:
retval = proc.wait() retval = proc.wait()
@ -499,7 +494,7 @@ def _call_downloader(self, tmpfilename, info_dict):
# produces a file that is playable (this is mostly useful for live # produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable # streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300). # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped:
proc.communicate_or_kill(b'q') proc.communicate_or_kill(b'q')
else: else:
proc.kill(timeout=None) proc.kill(timeout=None)

View File

@ -78,6 +78,7 @@
WyborczaVideoIE, WyborczaVideoIE,
) )
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .airtv import AirTVIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .amara import AmaraIE from .amara import AmaraIE
@ -536,7 +537,7 @@
ESPNCricInfoIE, ESPNCricInfoIE,
) )
from .esri import EsriVideoIE from .esri import EsriVideoIE
from .europa import EuropaIE from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE from .eurosport import EurosportIE
from .euscreen import EUScreenIE from .euscreen import EUScreenIE
@ -1281,6 +1282,7 @@
from .ondemandkorea import OnDemandKoreaIE from .ondemandkorea import OnDemandKoreaIE
from .onefootball import OneFootballIE from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE
from .onet import ( from .onet import (
OnetIE, OnetIE,
OnetChannelIE, OnetChannelIE,

96
yt_dlp/extractor/airtv.py Normal file
View File

@ -0,0 +1,96 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
traverse_obj
)
class AirTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
_TESTS = [{
# without youtube_id
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
'info_dict': {
'id': 'W87jcWleSn2hXZN47zJZsQ',
'ext': 'mp4',
'release_date': '20221003',
'release_timestamp': 1664792603,
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
'upload_date': '20221003',
'duration': 151,
'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603,
}
}, {
# with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
'info_dict': {
'id': '2ZTqmpee-bQ',
'ext': 'mp4',
'comment_count': int,
'tags': 'count:11',
'channel_follower_count': int,
'like_count': int,
'uploader': 'Newsflare',
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
'availability': 'public',
'title': 'Geese Chase Alligator Across Golf Course',
'uploader_id': 'NewsflareBreaking',
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
'age_limit': 0,
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
'view_count': int,
'categories': ['News & Politics'],
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'Newsflare',
'duration': 37,
'upload_date': '20180511',
}
}]
def _get_formats_and_subtitle(self, json_data, video_id):
formats, subtitles = [], {}
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({'url': source.get('src'), 'ext': ext})
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
if nextjs_json.get('youtube_id'):
return self.url_result(
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
return {
'id': display_id,
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
'formats': formats,
'subtitles': subtitles,
'description': nextjs_json.get('description') or None,
'duration': int_or_none(nextjs_json.get('duration')),
'thumbnails': [
{'url': thumbnail}
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
'timestamp': parse_iso8601(nextjs_json.get('created')),
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
'view_count': int_or_none(nextjs_json.get('views')),
}

View File

@ -3,6 +3,7 @@
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration, parse_duration,
parse_iso8601,
parse_qs, parse_qs,
qualities, qualities,
unified_strdate, unified_strdate,
@ -87,3 +88,86 @@ def get_item(type_, preference):
'view_count': view_count, 'view_count': view_count,
'formats': formats 'formats': formats
} }
class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/
(?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
'''
_TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': {
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
'ext': 'mp4',
'release_timestamp': 1663137900,
'title': 'Plenary session',
'release_date': '20220914',
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
'info_dict': {
'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
'ext': 'mp4',
'release_timestamp': 1668434400,
'release_date': '20221114',
'title': 'md5:d3550280c33cc70e0678652e3d52c028',
},
'params': {
'skip_download': True,
}
}, {
# embed webpage
'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
'info_dict': {
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
'ext': 'mp4',
'title': 'Plenary session',
'release_date': '20220914',
'release_timestamp': 1663137900,
},
'params': {
'skip_download': True,
}
}, {
# live webstream
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA',
'info_dict': {
'ext': 'mp4',
'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715',
'release_timestamp': 1668502800,
'title': 'Euroscola 2022-11-15 19:21',
'release_date': '20221115',
'live_status': 'is_live',
},
'skip': 'not live anymore'
}]
def _real_extract(self, url):
display_id = self._match_id(url)
json_info = self._download_json(
'https://vis-api.vuplay.co.uk/event/external', display_id,
query={
'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c',
'external_id': display_id,
})
formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id)
fmts, subs = self._extract_m3u8_formats_and_subtitles(
json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': json_info['id'],
'title': json_info.get('title'),
'formats': formats,
'subtitles': subtitles,
'release_timestamp': parse_iso8601(json_info.get('published_start')),
'is_live': 'LIVE' in json_info.get('state', '')
}

View File

@ -1,31 +1,51 @@
from .common import InfoExtractor from .common import InfoExtractor
from .uplynk import UplynkPreplayIE
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
class FoxSportsIE(InfoExtractor): class FoxSportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)'
_TESTS = [{
_TEST = { 'url': 'https://www.foxsports.com/watch/play-612168c6700004b',
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
'md5': 'b49050e955bebe32c301972e4012ac17',
'info_dict': { 'info_dict': {
'id': '432609859715', 'id': 'b72f5bd8658140baa5791bb676433733',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', 'display_id': 'play-612168c6700004b',
'description': 'Courtney Lee talks about Memphis being focused.', 'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a',
# TODO: fix timestamp 'description': 'md5:371bc43609708ae2b9e1a939229762af',
'upload_date': '19700101', # '20150423', 'uploader_id': '06b4a36349624051a9ba52ac3a91d268',
# 'timestamp': 1429761109, 'upload_date': '20221205',
'uploader': 'NEWA-FNG-FOXSPORTS', 'timestamp': 1670262586,
'duration': 31.7317,
'thumbnail': r're:^https?://.*\.jpg$',
'extra_param_to_segment_url': str,
}, },
'params': { 'params': {
# m3u8 download 'skip_download': 'm3u8',
'skip_download': True,
}, },
'add_ie': ['ThePlatform'], }]
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
data = self._download_json(
f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}',
video_id, note='Downloading API JSON', headers={
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
})
preplay_url = self._request_webpage(
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
return self.url_result( return {
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed') '_type': 'url_transparent',
'ie_key': UplynkPreplayIE.ie_key(),
'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}),
'display_id': video_id,
'title': data.get('name') or json_ld.get('title'),
'description': data.get('description') or json_ld.get('description'),
'duration': float_or_none(data.get('durationInSeconds')),
'timestamp': json_ld.get('timestamp'),
'thumbnails': json_ld.get('thumbnails'),
'_old_archive_ids': [make_archive_id(self, video_id)],
}

View File

@ -2356,7 +2356,7 @@ def _real_extract(self, url):
info_dict.update({ info_dict.update({
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': headers, 'http_headers': headers or None,
}) })
return info_dict return info_dict

View File

@ -1,3 +1,5 @@
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from ..utils import smuggle_url, traverse_obj from ..utils import smuggle_url, traverse_obj
@ -16,6 +18,26 @@ def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}', f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
display_id or slug, query=query) display_id or slug, query=query)
def _get_comments(self, video_id):
last_page_number = None
for i in itertools.count(1):
comment_data = self._download_json(
f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
video_id, data=b'', fatal=False, query={'page': i},
note=f'Downloading JSON comment metadata page {i}') or {}
yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
'id': '_id',
'text': 'comment',
'author_id': 'customer_id',
'author': ('customer', 'name'),
'author_thumbnail': ('customer', 'profile_picture'),
}))
if not last_page_number:
last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
if i >= (last_page_number or 0):
break
class NetverseIE(NetverseBaseIE): class NetverseIE(NetverseBaseIE):
_VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)' _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
@ -28,7 +50,7 @@ class NetverseIE(NetverseBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'season': 'Season 2016', 'season': 'Season 2016',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T7aV31Y0eGRWBbwkK/x1080', 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 22, 'episode_number': 22,
'episode': 'Episode 22', 'episode': 'Episode 22',
'uploader_id': 'x2ir3vq', 'uploader_id': 'x2ir3vq',
@ -51,7 +73,7 @@ class NetverseIE(NetverseBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'season': 'Season 2', 'season': 'Season 2',
'description': 'md5:8a74f70812cca267e19ee0635f0af835', 'description': 'md5:8a74f70812cca267e19ee0635f0af835',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/Thwuy1YURicFmGu0v/x1080', 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 2, 'episode_number': 2,
'episode': 'Episode 2', 'episode': 'Episode 2',
'view_count': int, 'view_count': int,
@ -75,7 +97,7 @@ class NetverseIE(NetverseBaseIE):
'title': 'Tetangga Baru', 'title': 'Tetangga Baru',
'season': 'Season 1', 'season': 'Season 1',
'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9', 'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T3Ogm1YEnnyjVKAFF/x1080', 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 1, 'episode_number': 1,
'episode': 'Episode 1', 'episode': 'Episode 1',
'timestamp': 1624538169, 'timestamp': 1624538169,
@ -96,7 +118,7 @@ class NetverseIE(NetverseBaseIE):
'info_dict': { 'info_dict': {
'id': 'x887jzz', 'id': 'x887jzz',
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TfuZ_1Y6PboJ5An_s/x1080', 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'season': 'Season 1', 'season': 'Season 1',
'episode_number': 1, 'episode_number': 1,
'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5', 'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
@ -114,6 +136,60 @@ class NetverseIE(NetverseBaseIE):
'upload_date': '20220225', 'upload_date': '20220225',
}, },
'skip': 'This video get Geo-blocked for some country' 'skip': 'This video get Geo-blocked for some country'
}, {
# video with comments
'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
'info_dict': {
'id': 'k6hetBPiQMljSxxvAy7',
'ext': 'mp4',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'display_id': 'episode-1-season-2016-ok-food',
'like_count': int,
'description': '',
'duration': 1471,
'age_limit': 0,
'timestamp': 1642405848,
'episode_number': 1,
'season': 'Season 2016',
'uploader_id': 'x2ir3vq',
'title': 'Episode 1 - Season 2016 - Ok Food',
'upload_date': '20220117',
'tags': [],
'view_count': int,
'episode': 'Episode 1',
'uploader': 'Net Prime',
'comment_count': int,
},
'params':{
'getcomments': True
}
}, {
# video with multiple page comment
'url': 'https://netverse.id/video/match-island-eps-1-fix',
'info_dict': {
'id': 'x8aznjc',
'ext': 'mp4',
'like_count': int,
'tags': ['Match-Island', 'Pd00111'],
'display_id': 'match-island-eps-1-fix',
'view_count': int,
'episode': 'Episode 1',
'uploader': 'Net Prime',
'duration': 4070,
'timestamp': 1653068165,
'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
'age_limit': 0,
'title': 'Welcome To Match Island',
'upload_date': '20220520',
'episode_number': 1,
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'uploader_id': 'x2ir3vq',
'season': 'Season 1',
'comment_count': int,
},
'params':{
'getcomments': True
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -131,6 +207,7 @@ def _real_extract(self, url):
'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')), 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
'description': traverse_obj(videos, ('program_detail', 'description')), 'description': traverse_obj(videos, ('program_detail', 'description')),
'episode_number': videos.get('episode_order'), 'episode_number': videos.get('episode_order'),
'__post_extractor': self.extract_comments(display_id),
} }

View File

@ -3,7 +3,7 @@
class NOSNLArticleIE(InfoExtractor): class NOSNLArticleIE(InfoExtractor):
_VALID_URL = r'https?://nos\.nl/((?!video)(\w+/)?\w+/)\d+-(?P<display_id>[\w-]+)' _VALID_URL = r'https?://nos\.nl/(?P<type>video|(\w+/)?\w+)/?\d+-(?P<display_id>[\w-]+)'
_TESTS = [ _TESTS = [
{ {
# only 1 video # only 1 video
@ -22,13 +22,14 @@ class NOSNLArticleIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '2440409', 'id': '2440409',
'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten', 'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten',
'description': 'Er werd wel geprobeerd om kwetsbare migranten onderdak te bieden, zegt het COA.', 'description': 'md5:72b1e1674d798460e79d78fa37e9f56d',
'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'], 'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'],
'modified_timestamp': 1660452773, 'modified_timestamp': 1660452773,
'modified_date': '20220814', 'modified_date': '20220814',
'upload_date': '20220813', 'upload_date': '20220813',
'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg', 'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg',
'timestamp': 1660401384, 'timestamp': 1660401384,
'categories': ['Regionaal nieuws', 'Binnenland'],
}, },
'playlist_count': 2, 'playlist_count': 2,
}, { }, {
@ -37,20 +38,37 @@ class NOSNLArticleIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '2440789', 'id': '2440789',
'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ', 'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ',
'description': 'Nieuws, weer, verkeer: met dit overzicht begin je geïnformeerd aan de dag.', 'description': 'md5:0bd277ed7a44fc15cb12a9d27d8f6641',
'tags': ['wekdienst'], 'tags': ['wekdienst'],
'modified_date': '20220816', 'modified_date': '20220816',
'modified_timestamp': 1660625449, 'modified_timestamp': 1660625449,
'timestamp': 1660625449, 'timestamp': 1660625449,
'upload_date': '20220816', 'upload_date': '20220816',
'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg', 'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg',
'categories': ['Binnenland', 'Buitenland'],
}, },
'playlist_count': 2, 'playlist_count': 2,
}, {
# video url
'url': 'https://nos.nl/video/2452718-xi-en-trudeau-botsen-voor-de-camera-op-g20-top-je-hebt-gelekt',
'info_dict': {
'id': '2452718',
'title': 'Xi en Trudeau botsen voor de camera op G20-top: \'Je hebt gelekt\'',
'modified_date': '20221117',
'description': 'md5:61907dac576f75c11bf8ffffd4a3cc0f',
'tags': ['Xi', 'Trudeau', 'G20', 'indonesié'],
'upload_date': '20221117',
'thumbnail': 'https://cdn.nos.nl/image/2022/11/17/916155/1024x576a.jpg',
'modified_timestamp': 1668663388,
'timestamp': 1668663388,
'categories': ['Buitenland'],
},
'playlist_mincount': 1,
} }
] ]
def _entries(self, nextjs_json, display_id): def _entries(self, nextjs_json, display_id):
for item in nextjs_json['items']: for item in nextjs_json:
if item.get('type') == 'video': if item.get('type') == 'video':
formats, subtitle = self._extract_m3u8_formats_and_subtitles( formats, subtitle = self._extract_m3u8_formats_and_subtitles(
traverse_obj(item, ('source', 'url')), display_id, ext='mp4') traverse_obj(item, ('source', 'url')), display_id, ext='mp4')
@ -77,13 +95,14 @@ def _entries(self, nextjs_json, display_id):
} }
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_valid_url(url).group('display_id') site_type, display_id = self._match_valid_url(url).group('type', 'display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data'] nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
return { return {
'_type': 'playlist', '_type': 'playlist',
'entries': self._entries(nextjs_json, display_id), 'entries': self._entries(
[nextjs_json['video']] if site_type == 'video' else nextjs_json['items'], display_id),
'id': str(nextjs_json['id']), 'id': str(nextjs_json['id']),
'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage), 'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': (nextjs_json.get('description') 'description': (nextjs_json.get('description')
@ -91,5 +110,6 @@ def _real_extract(self, url):
'tags': nextjs_json.get('keywords'), 'tags': nextjs_json.get('keywords'),
'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')), 'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')),
'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage), 'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage),
'timestamp': parse_iso8601(nextjs_json.get('publishedAt')) 'timestamp': parse_iso8601(nextjs_json.get('publishedAt')),
'categories': traverse_obj(nextjs_json, ('categories', ..., 'label')),
} }

View File

@ -0,0 +1,43 @@
from .common import InfoExtractor
class OnePlacePodcastIE(InfoExtractor):
_VALID_URL = r'https?://www\.oneplace\.com/[\w]+/[^/]+/listen/[\w-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.oneplace.com/ministries/a-daily-walk/listen/living-in-the-last-days-part-2-958461.html',
'info_dict': {
'id': '958461',
'ext': 'mp3',
'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall',
'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e',
}
}, {
'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html',
'info_dict': {
'id': '922513',
'ext': 'mp3',
'description': 'md5:8b810b4349aa40a5d033b4536fe428e1',
'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'url': self._search_regex((
r'mp3-url\s*=\s*"([^"]+)',
r'<div[^>]+id\s*=\s*"player"[^>]+data-media-url\s*=\s*"(?P<media_url>[^"]+)',
), webpage, 'media url'),
'ext': 'mp3',
'vcodec': 'none',
'title': self._html_search_regex((
r'<div[^>]class\s*=\s*"details"[^>]+>[^<]<h2[^>]+>(?P<content>[^>]+)>',
self._meta_regex('og:title'), self._meta_regex('title'),
), webpage, 'title', group='content', default=None),
'description': self._html_search_regex(
r'<div[^>]+class="[^"]+epDesc"[^>]*>\s*(?P<desc>.+?)\s*</div>',
webpage, 'description', default=None),
}

View File

@ -1,19 +1,24 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
try_get, str_or_none,
strip_or_none,
traverse_obj,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
class PinterestBaseIE(InfoExtractor): class PinterestBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)' _VALID_URL_BASE = r'''(?x)
https?://(?:[^/]+\.)?pinterest\.(?:
com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|
dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|
co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'''
def _call_api(self, resource, video_id, options): def _call_api(self, resource, video_id, options):
return self._download_json( return self._download_json(
@ -24,14 +29,53 @@ def _call_api(self, resource, video_id, options):
def _extract_video(self, data, extract_formats=True): def _extract_video(self, data, extract_formats=True):
video_id = data['id'] video_id = data['id']
thumbnails = []
images = data.get('images')
if isinstance(images, dict):
for thumbnail_id, thumbnail in images.items():
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
title = (data.get('title') or data.get('grid_title') or video_id).strip() info = {
'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')),
'description': traverse_obj(data, 'seo_description', 'description'),
'timestamp': unified_timestamp(data.get('created_at')),
'thumbnails': thumbnails,
'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')),
'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))),
'repost_count': int_or_none(data.get('repin_count')),
'comment_count': int_or_none(data.get('comment_count')),
'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list),
'tags': traverse_obj(data, 'hashtags', expected_type=list),
}
urls = [] urls = []
formats = [] formats = []
duration = None duration = None
if extract_formats: domain = data.get('domain', '')
for format_id, format_dict in data['videos']['video_list'].items(): if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')):
if not info['title']:
info['title'] = None
return {
'_type': 'url_transparent',
'url': data['embed']['src'],
**info,
}
elif extract_formats:
video_list = traverse_obj(
data, ('videos', 'video_list'),
('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'),
expected_type=dict, get_all=False, default={})
for format_id, format_dict in video_list.items():
if not isinstance(format_dict, dict): if not isinstance(format_dict, dict):
continue continue
format_url = url_or_none(format_dict.get('url')) format_url = url_or_none(format_dict.get('url'))
@ -53,72 +97,79 @@ def _extract_video(self, data, extract_formats=True):
'duration': duration, 'duration': duration,
}) })
description = data.get('description') or data.get('description_html') or data.get('seo_description')
timestamp = unified_timestamp(data.get('created_at'))
def _u(field):
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
uploader = _u('full_name')
uploader_id = _u('id')
repost_count = int_or_none(data.get('repin_count'))
comment_count = int_or_none(data.get('comment_count'))
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
tags = data.get('hashtags')
thumbnails = []
images = data.get('images')
if isinstance(images, dict):
for thumbnail_id, thumbnail in images.items():
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return { return {
'id': video_id, 'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'thumbnails': thumbnails,
'uploader': uploader,
'uploader_id': uploader_id,
'repost_count': repost_count,
'comment_count': comment_count,
'categories': categories,
'tags': tags,
'formats': formats, 'formats': formats,
'duration': duration,
'webpage_url': f'https://www.pinterest.com/pin/{video_id}/',
'extractor_key': PinterestIE.ie_key(), 'extractor_key': PinterestIE.ie_key(),
'extractor': PinterestIE.IE_NAME,
**info,
} }
class PinterestIE(PinterestBaseIE): class PinterestIE(PinterestBaseIE):
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
_TESTS = [{ _TESTS = [{
# formats found in data['videos']
'url': 'https://www.pinterest.com/pin/664281013778109217/', 'url': 'https://www.pinterest.com/pin/664281013778109217/',
'md5': '6550c2af85d6d9f3fe3b88954d1577fc', 'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
'info_dict': { 'info_dict': {
'id': '664281013778109217', 'id': '664281013778109217',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Origami', 'title': 'Origami',
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd', 'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab',
'duration': 57.7, 'duration': 57.7,
'timestamp': 1593073622, 'timestamp': 1593073622,
'upload_date': '20200625', 'upload_date': '20200625',
'uploader': 'Love origami -I am Dafei', 'repost_count': int,
'uploader_id': '586523688879454212', 'comment_count': int,
'repost_count': 50,
'comment_count': 0,
'categories': list, 'categories': list,
'tags': list, 'tags': list,
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}, {
# formats found in data['story_pin_data']
'url': 'https://www.pinterest.com/pin/1084663891475263837/',
'md5': '069ac19919ab9e1e13fa60de46290b03',
'info_dict': {
'id': '1084663891475263837',
'ext': 'mp4',
'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets',
'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13',
'uploader': 'CoolCrazyGadgets',
'uploader_id': '1084664028912989237',
'upload_date': '20211003',
'timestamp': 1633246654.0,
'duration': 14.9,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': list,
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}, {
# vimeo.com embed
'url': 'https://www.pinterest.ca/pin/441282463481903715/',
'info_dict': {
'id': '111691128',
'ext': 'mp4',
'title': 'Tonite Let\'s All Make Love In London (1967)',
'description': 'md5:8190f37b3926807809ec57ec21aa77b2',
'uploader': 'Vimeo',
'uploader_id': '473792960706651251',
'upload_date': '20180120',
'timestamp': 1516409040,
'duration': 3404,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': [],
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'uploader_url': 'https://vimeo.com/willardandrade',
},
'params': {
'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'https://co.pinterest.com/pin/824721750502199491/', 'url': 'https://co.pinterest.com/pin/824721750502199491/',

View File

@ -91,12 +91,12 @@ def _download_and_extract_formats(self, video_id, query=None):
class RutubeIE(RutubeBaseIE): class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube' IE_NAME = 'rutube'
IE_DESC = 'Rutube videos' IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})' _VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': '1d24f180fac7a02f3900712e5a5764d6', 'md5': 'e33ac625efca66aba86cbec9851f2692',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4', 'ext': 'mp4',
@ -108,6 +108,10 @@ class RutubeIE(RutubeBaseIE):
'timestamp': 1381943602, 'timestamp': 1381943602,
'upload_date': '20131016', 'upload_date': '20131016',
'age_limit': 0, 'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
'category': ['Новости и СМИ'],
}, },
}, { }, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
@ -121,6 +125,24 @@ class RutubeIE(RutubeBaseIE):
}, { }, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
'md5': 'd106225f15d625538fe22971158e896f',
'info_dict': {
'id': '884fb55f07a97ab673c7d654553e0f48',
'ext': 'mp4',
'title': 'Яцуноками, Nioh2',
'description': 'Nioh2: финал сражения с боссом Яцуноками',
'duration': 15,
'uploader': 'mexus',
'uploader_id': '24222106',
'timestamp': 1670646232,
'upload_date': '20221210',
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
'category': ['Видеоигры'],
},
}] }]
@classmethod @classmethod
@ -129,8 +151,9 @@ def suitable(cls, url):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
info = self._download_and_extract_info(video_id) query = parse_qs(url)
info['formats'] = self._download_and_extract_formats(video_id) info = self._download_and_extract_info(video_id, query)
info['formats'] = self._download_and_extract_formats(video_id, query)
return info return info

View File

@ -1,92 +1,176 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
bool_or_none,
smuggle_url, smuggle_url,
try_get, traverse_obj,
unified_timestamp,
url_or_none, url_or_none,
) )
class SlidesLiveIE(InfoExtractor): class SlidesLiveIE(InfoExtractor):
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)' _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
_WORKING = False
_TESTS = [{ _TESTS = [{
# video_service_name = YOUTUBE # service_name = yoda
'url': 'https://slideslive.com/38902413/gcc-ia16-backend', 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
'info_dict': { 'info_dict': {
'id': 'LMtgR8ba0b0', 'id': '38902413',
'ext': 'mp4', 'ext': 'mp4',
'title': 'GCC IA16 backend', 'title': 'GCC IA16 backend',
'description': 'Watch full version of this video at https://slideslive.com/38902413.', 'timestamp': 1648189972,
'uploader': 'SlidesLive Videos - A', 'upload_date': '20220325',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1597615266, },
'upload_date': '20170925', 'params': {
} 'skip_download': 'm3u8',
}, {
# video_service_name = yoda
'url': 'https://slideslive.com/38935785',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
'info_dict': {
'id': 'RMraDYN5ozA_',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
}, },
}, { }, {
# video_service_name = youtube # service_name = yoda
'url': 'https://slideslive.com/38935785',
'info_dict': {
'id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20211115',
'timestamp': 1636996003,
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}, {
# service_name = yoda
'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
'info_dict': {
'id': '38973182',
'ext': 'mp4',
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
'upload_date': '20220201',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1643728135,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# service_name = youtube
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': {
'id': 'jmg02wCJD5M',
'display_id': '38897546',
'ext': 'mp4',
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
'description': 'Watch full version of this video at https://slideslive.com/38897546.',
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'channel': 'SlidesLive Videos - G1',
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader': 'SlidesLive Videos - G1',
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'live_status': 'not_live',
'upload_date': '20160710',
'timestamp': 1618786715,
'duration': 6827,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'age_limit': 0,
'thumbnail': r're:^https?://.*\.jpg',
'playable_in_embed': True,
'availability': 'unlisted',
'tags': [],
'categories': ['People & Blogs'],
},
}, {
# service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
'only_matching': True, 'only_matching': True,
}, { }, {
# video_service_name = url # service_name = url
'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
'only_matching': True, 'only_matching': True,
}, { }, {
# video_service_name = vimeo # service_name = vimeo
'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
'only_matching': True, 'only_matching': True,
}] }]
def _extract_custom_m3u8_info(self, m3u8_data):
m3u8_dict = {}
lookup = {
'PRESENTATION-TITLE': 'title',
'PRESENTATION-UPDATED-AT': 'timestamp',
'PRESENTATION-THUMBNAIL': 'thumbnail',
'PLAYLIST-TYPE': 'playlist_type',
'VOD-VIDEO-SERVICE-NAME': 'service_name',
'VOD-VIDEO-ID': 'service_id',
'VOD-VIDEO-SERVERS': 'video_servers',
'VOD-SUBTITLES': 'subtitles',
}
for line in m3u8_data.splitlines():
if not line.startswith('#EXT-SL-'):
continue
tag, _, value = line.partition(':')
key = lookup.get(tag.lstrip('#EXT-SL-'))
if not key:
continue
m3u8_dict[key] = value
# Some values are stringified JSON arrays
for key in ('video_servers', 'subtitles'):
if key in m3u8_dict:
m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
return m3u8_dict
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( webpage = self._download_webpage(url, video_id)
'https://ben.slideslive.com/player/' + video_id, video_id) player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
service_name = video_data['video_service_name'].lower() player_data = self._download_webpage(
f'https://ben.slideslive.com/player/{video_id}', video_id,
note='Downloading player info', query={'player_token': player_token})
player_info = self._extract_custom_m3u8_info(player_data)
service_name = player_info['service_name'].lower()
assert service_name in ('url', 'yoda', 'vimeo', 'youtube') assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
service_id = video_data['video_service_id'] service_id = player_info['service_id']
subtitles = {} subtitles = {}
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []: for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
if not isinstance(sub, dict):
continue
webvtt_url = url_or_none(sub.get('webvtt_url')) webvtt_url = url_or_none(sub.get('webvtt_url'))
if not webvtt_url: if not webvtt_url:
continue continue
lang = sub.get('language') or 'en' subtitles.setdefault(sub.get('language') or 'en', []).append({
subtitles.setdefault(lang, []).append({
'url': webvtt_url, 'url': webvtt_url,
'ext': 'vtt',
}) })
info = { info = {
'id': video_id, 'id': video_id,
'thumbnail': video_data.get('thumbnail'), 'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
'is_live': bool_or_none(video_data.get('is_live')), 'timestamp': unified_timestamp(player_info.get('timestamp')),
'is_live': player_info.get('playlist_type') != 'vod',
'thumbnail': url_or_none(player_info.get('thumbnail')),
'subtitles': subtitles, 'subtitles': subtitles,
} }
if service_name in ('url', 'yoda'): if service_name in ('url', 'yoda'):
info['title'] = video_data['title']
if service_name == 'url': if service_name == 'url':
info['url'] = service_id info['url'] = service_id
else: else:
cdn_hostname = player_info['video_servers'][0]
formats = [] formats = []
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
_MANIFEST_PATTERN % (service_id, 'm3u8'), f'https://{cdn_hostname}/{service_id}/master.m3u8',
service_id, 'mp4', m3u8_id='hls', fatal=False)) video_id, 'mp4', m3u8_id='hls', fatal=False, live=True))
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id, f'https://{cdn_hostname}/{service_id}/master.mpd',
mpd_id='dash', fatal=False)) video_id, mpd_id='dash', fatal=False))
info.update({ info.update({
'id': service_id,
'formats': formats, 'formats': formats,
}) })
else: else:
@ -94,10 +178,11 @@ def _real_extract(self, url):
'_type': 'url_transparent', '_type': 'url_transparent',
'url': service_id, 'url': service_id,
'ie_key': service_name.capitalize(), 'ie_key': service_name.capitalize(),
'title': video_data.get('title'), 'display_id': video_id,
}) })
if service_name == 'vimeo': if service_name == 'vimeo':
info['url'] = smuggle_url( info['url'] = smuggle_url(
'https://player.vimeo.com/video/' + service_id, f'https://player.vimeo.com/video/{service_id}',
{'http_headers': {'Referer': url}}) {'http_headers': {'Referer': url}})
return info return info

View File

@ -293,7 +293,7 @@ def _real_extract(self, url):
class TwitterIE(TwitterBaseIE): class TwitterIE(TwitterBaseIE):
IE_NAME = 'twitter' IE_NAME = 'twitter'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)' _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480', 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
@ -336,7 +336,7 @@ class TwitterIE(TwitterBaseIE):
'id': '665052190608723968', 'id': '665052190608723968',
'display_id': '665052190608723968', 'display_id': '665052190608723968',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:55fef1d5b811944f1550e91b44abb82e', 'title': 'md5:e99588f17b3dd0503814ffb560e64731',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': r're:Star Wars.*', 'uploader': r're:Star Wars.*',
@ -648,7 +648,7 @@ class TwitterIE(TwitterBaseIE):
'uploader_url': 'https://twitter.com/Rizdraws', 'uploader_url': 'https://twitter.com/Rizdraws',
'upload_date': '20220928', 'upload_date': '20220928',
'timestamp': 1664391723, 'timestamp': 1664391723,
'thumbnail': 're:^https?://.*\\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
@ -727,6 +727,48 @@ class TwitterIE(TwitterBaseIE):
}, },
'add_ie': ['TwitterSpaces'], 'add_ie': ['TwitterSpaces'],
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
# URL specifies video number but --yes-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
'playlist_mincount': 2,
'info_dict': {
'id': '1600649710662213632',
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
'timestamp': 1670459604.0,
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int,
'uploader_id': 'CTVJLaidlaw',
'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208',
'age_limit': 0,
'uploader': 'Jocelyn Laidlaw',
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
'like_count': int,
},
}, {
# URL specifies video number and --no-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
'info_dict': {
'id': '1600649511827013632',
'ext': 'mp4',
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0,
'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw',
'repost_count': int,
'comment_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'duration': 102.226,
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
'display_id': '1600649710662213632',
'like_count': int,
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'upload_date': '20221208',
'age_limit': 0,
},
'params': {'noplaylist': True},
}, { }, {
# onion route # onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -828,7 +870,7 @@ def _build_graphql_query(self, media_id):
} }
def _real_extract(self, url): def _real_extract(self, url):
twid = self._match_id(url) twid, selected_index = self._match_valid_url(url).group('id', 'index')
if self.is_logged_in or self._configuration_arg('force_graphql'): if self.is_logged_in or self._configuration_arg('force_graphql'):
self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})') self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid) result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
@ -998,6 +1040,13 @@ def get_binding_value(k):
entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)] entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
index = int(selected_index) - 1
if index >= len(entries):
raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
return entries[index]
if len(entries) == 1: if len(entries) == 1:
return entries[0] return entries[0]

View File

@ -2,40 +2,42 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none,
ExtractorError, ExtractorError,
float_or_none,
smuggle_url,
traverse_obj,
unsmuggle_url,
update_url_query,
) )
class UplynkIE(InfoExtractor): class UplynkBaseIE(InfoExtractor):
IE_NAME = 'uplynk' _UPLYNK_URL_RE = r'''(?x)
_VALID_URL = r'https?://.*?\.uplynk\.com/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P<session_id>[^&]+))?' https?://[\w-]+\.uplynk\.com/(?P<path>
_TEST = { ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|
'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', (?P<id>[0-9a-f]{32})
'info_dict': { )\.(?:m3u8|json)
'id': 'e89eaf2ce9054aa89d92ddb2d817a52e', (?:.*?\bpbs=(?P<session_id>[^&]+))?'''
'ext': 'mp4',
'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _extract_uplynk_info(self, uplynk_content_url): def _extract_uplynk_info(self, url):
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups() uplynk_content_url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._UPLYNK_URL_RE, uplynk_content_url)
if not mobj:
raise ExtractorError('Necessary parameters not found in Uplynk URL')
path, external_id, video_id, session_id = mobj.group('path', 'external_id', 'id', 'session_id')
display_id = video_id or external_id display_id = video_id or external_id
headers = traverse_obj(
smuggled_data, {'Referer': 'Referer', 'Origin': 'Origin'}, casesense=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles( formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'http://content.uplynk.com/%s.m3u8' % path, f'http://content.uplynk.com/{path}.m3u8', display_id, 'mp4', headers=headers)
display_id, 'mp4', 'm3u8_native')
if session_id: if session_id:
for f in formats: for f in formats:
f['extra_param_to_segment_url'] = 'pbs=' + session_id f['extra_param_to_segment_url'] = f'pbs={session_id}'
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) asset = self._download_json(
f'http://content.uplynk.com/player/assetinfo/{path}.json', display_id)
if asset.get('error') == 1: if asset.get('error') == 1:
raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True) msg = asset.get('msg') or 'unknown error'
raise ExtractorError(f'{self.IE_NAME} said: {msg}', expected=True)
return { return {
'id': asset['asset'], 'id': asset['asset'],
@ -47,20 +49,40 @@ def _extract_uplynk_info(self, uplynk_content_url):
'subtitles': subtitles, 'subtitles': subtitles,
} }
class UplynkIE(UplynkBaseIE):
IE_NAME = 'uplynk'
_VALID_URL = UplynkBaseIE._UPLYNK_URL_RE
_TEST = {
'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
'info_dict': {
'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
'ext': 'mp4',
'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
'duration': 530.2739166666679,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8',
},
}
def _real_extract(self, url): def _real_extract(self, url):
return self._extract_uplynk_info(url) return self._extract_uplynk_info(url)
class UplynkPreplayIE(UplynkIE): # XXX: Do not subclass from concrete IE class UplynkPreplayIE(UplynkBaseIE):
IE_NAME = 'uplynk:preplay' IE_NAME = 'uplynk:preplay'
_VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json' _VALID_URL = r'https?://[\w-]+\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
path, external_id, video_id = self._match_valid_url(url).groups() path, external_id, video_id = self._match_valid_url(url).groups()
display_id = video_id or external_id display_id = video_id or external_id
preplay = self._download_json(url, display_id) preplay = self._download_json(url, display_id)
content_url = 'http://content.uplynk.com/%s.m3u8' % path content_url = f'http://content.uplynk.com/{path}.m3u8'
session_id = preplay.get('sid') session_id = preplay.get('sid')
if session_id: if session_id:
content_url += '?pbs=' + session_id content_url = update_url_query(content_url, {'pbs': session_id})
return self._extract_uplynk_info(content_url) return self._extract_uplynk_info(smuggle_url(content_url, smuggled_data))

View File

@ -4382,6 +4382,25 @@ def _extract_basic_item_renderer(item):
elif key.startswith('grid') and key.endswith('Renderer'): elif key.startswith('grid') and key.endswith('Renderer'):
return renderer return renderer
def _extract_channel_renderer(self, renderer):
channel_id = renderer['channelId']
title = self._get_text(renderer, 'title')
channel_url = f'https://www.youtube.com/channel/{channel_id}'
return {
'_type': 'url',
'url': channel_url,
'id': channel_id,
'ie_key': YoutubeTabIE.ie_key(),
'channel': title,
'channel_id': channel_id,
'channel_url': channel_url,
'title': title,
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
'playlist_count': self._get_count(renderer, 'videoCountText'),
'description': self._get_text(renderer, 'descriptionSnippet'),
}
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in grid_renderer['items']:
if not isinstance(item, dict): if not isinstance(item, dict):
@ -4407,9 +4426,7 @@ def _grid_entries(self, grid_renderer):
# channel # channel
channel_id = renderer.get('channelId') channel_id = renderer.get('channelId')
if channel_id: if channel_id:
yield self.url_result( yield self._extract_channel_renderer(renderer)
'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title)
continue continue
# generic endpoint URL support # generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get( ep_url = urljoin('https://www.youtube.com/', try_get(
@ -5762,7 +5779,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'cole-dlp-test-acc', 'uploader': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc', 'channel': 'cole-dlp-test-acc',
'channel_follower_count': int,
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}}, 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
@ -5930,7 +5946,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'cole-dlp-test-acc - Shorts', 'title': 'cole-dlp-test-acc - Shorts',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA', 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc', 'channel': 'cole-dlp-test-acc',
'channel_follower_count': int,
'description': 'test description', 'description': 'test description',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
@ -5976,8 +5991,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': str, 'channel': str,
} }
}], }],
'params': {'extract_flat': True}, 'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1 'playlist_mincount': 1
}, {
# Channel renderer metadata. Contains number of videos on the channel
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA',
'title': 'cole-dlp-test-acc - Channels',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
'description': 'test description',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'tags': [],
'uploader': 'cole-dlp-test-acc',
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
},
'playlist': [{
'info_dict': {
'_type': 'url',
'ie_key': 'YoutubeTab',
'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'title': 'PewDiePie',
'channel': 'PewDiePie',
'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'thumbnails': list,
'channel_follower_count': int,
'playlist_count': int
}
}],
'params': {'extract_flat': True},
}] }]
@classmethod @classmethod
@ -6531,6 +6578,30 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
# 'title': '#cats', # 'title': '#cats',
# }], # }],
}, },
}, {
# Channel results
'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
'info_dict': {
'id': 'kurzgesagt',
'title': 'kurzgesagt',
},
'playlist': [{
'info_dict': {
'_type': 'url',
'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'ie_key': 'YoutubeTab',
'channel': 'Kurzgesagt In a Nutshell',
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'playlist_count': int, # XXX: should have a way of saying > 1
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1,
}, { }, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
'only_matching': True, 'only_matching': True,