Merge branch 'yt-dlp:master' into patch-1

This commit is contained in:
joaquinito2070 2022-12-15 18:41:40 +01:00 committed by GitHub
commit 0806b5a266
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 825 additions and 187 deletions

View File

@ -342,7 +342,6 @@ def can_merge_formats(cls, info_dict, params):
and cls.can_download(info_dict))
def _call_downloader(self, tmpfilename, info_dict):
urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
@ -372,16 +371,6 @@ def _call_downloader(self, tmpfilename, info_dict):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
http_headers = None
if info_dict.get('http_headers'):
youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
http_headers = [
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
'-headers',
''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items())
]
env = None
proxy = self.params.get('proxy')
if proxy:
@ -434,21 +423,26 @@ def _call_downloader(self, tmpfilename, info_dict):
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
for i, url in enumerate(urls):
if http_headers is not None and re.match(r'^https?://', url):
args += http_headers
selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats):
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
headers_dict = handle_youtubedl_headers(fmt['http_headers'])
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())])
if start_time:
args += ['-ss', str(start_time)]
if end_time:
args += ['-t', str(end_time - start_time)]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
for i, fmt in enumerate(selected_formats):
stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}'])
@ -488,8 +482,9 @@ def _call_downloader(self, tmpfilename, info_dict):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
if url in ('-', 'pipe:'):
if piped:
self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
@ -499,7 +494,7 @@ def _call_downloader(self, tmpfilename, info_dict):
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped:
proc.communicate_or_kill(b'q')
else:
proc.kill(timeout=None)

View File

@ -78,6 +78,7 @@
WyborczaVideoIE,
)
from .airmozilla import AirMozillaIE
from .airtv import AirTVIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
@ -536,7 +537,7 @@
ESPNCricInfoIE,
)
from .esri import EsriVideoIE
from .europa import EuropaIE
from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
from .euscreen import EUScreenIE
@ -1281,6 +1282,7 @@
from .ondemandkorea import OnDemandKoreaIE
from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE
from .onet import (
OnetIE,
OnetChannelIE,

96
yt_dlp/extractor/airtv.py Normal file
View File

@ -0,0 +1,96 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
traverse_obj
)
class AirTVIE(InfoExtractor):
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
_TESTS = [{
# without youtube_id
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
'info_dict': {
'id': 'W87jcWleSn2hXZN47zJZsQ',
'ext': 'mp4',
'release_date': '20221003',
'release_timestamp': 1664792603,
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
'upload_date': '20221003',
'duration': 151,
'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603,
}
}, {
# with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
'info_dict': {
'id': '2ZTqmpee-bQ',
'ext': 'mp4',
'comment_count': int,
'tags': 'count:11',
'channel_follower_count': int,
'like_count': int,
'uploader': 'Newsflare',
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
'availability': 'public',
'title': 'Geese Chase Alligator Across Golf Course',
'uploader_id': 'NewsflareBreaking',
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
'age_limit': 0,
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
'view_count': int,
'categories': ['News & Politics'],
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'Newsflare',
'duration': 37,
'upload_date': '20180511',
}
}]
def _get_formats_and_subtitle(self, json_data, video_id):
formats, subtitles = [], {}
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({'url': source.get('src'), 'ext': ext})
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
if nextjs_json.get('youtube_id'):
return self.url_result(
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
return {
'id': display_id,
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
'formats': formats,
'subtitles': subtitles,
'description': nextjs_json.get('description') or None,
'duration': int_or_none(nextjs_json.get('duration')),
'thumbnails': [
{'url': thumbnail}
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
'timestamp': parse_iso8601(nextjs_json.get('created')),
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
'view_count': int_or_none(nextjs_json.get('views')),
}

View File

@ -3,6 +3,7 @@
int_or_none,
orderedSet,
parse_duration,
parse_iso8601,
parse_qs,
qualities,
unified_strdate,
@ -87,3 +88,86 @@ def get_item(type_, preference):
'view_count': view_count,
'formats': formats
}
class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:multimedia|webstreaming)\.europarl\.europa\.eu/[^/#?]+/
(?:embed/embed\.html\?event=|(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
'''
_TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': {
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
'ext': 'mp4',
'release_timestamp': 1663137900,
'title': 'Plenary session',
'release_date': '20220914',
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/eu-cop27-un-climate-change-conference-in-sharm-el-sheikh-egypt-ep-delegation-meets-with-ngo-represen_20221114-1600-SPECIAL-OTHER',
'info_dict': {
'id': 'a8428de8-b9cd-6a2e-11e4-3805d9c9ff5c',
'ext': 'mp4',
'release_timestamp': 1668434400,
'release_date': '20221114',
'title': 'md5:d3550280c33cc70e0678652e3d52c028',
},
'params': {
'skip_download': True,
}
}, {
# embed webpage
'url': 'https://webstreaming.europarl.europa.eu/ep/embed/embed.html?event=20220914-0900-PLENARY&language=en&autoplay=true&logo=true',
'info_dict': {
'id': 'bcaa1db4-76ef-7e06-8da7-839bd0ad1dbe',
'ext': 'mp4',
'title': 'Plenary session',
'release_date': '20220914',
'release_timestamp': 1663137900,
},
'params': {
'skip_download': True,
}
}, {
# live webstream
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA',
'info_dict': {
'ext': 'mp4',
'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715',
'release_timestamp': 1668502800,
'title': 'Euroscola 2022-11-15 19:21',
'release_date': '20221115',
'live_status': 'is_live',
},
'skip': 'not live anymore'
}]
def _real_extract(self, url):
display_id = self._match_id(url)
json_info = self._download_json(
'https://vis-api.vuplay.co.uk/event/external', display_id,
query={
'player_key': 'europarl|718f822c-a48c-4841-9947-c9cb9bb1743c',
'external_id': display_id,
})
formats, subtitles = self._extract_mpd_formats_and_subtitles(json_info['streaming_url'], display_id)
fmts, subs = self._extract_m3u8_formats_and_subtitles(
json_info['streaming_url'].replace('.mpd', '.m3u8'), display_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': json_info['id'],
'title': json_info.get('title'),
'formats': formats,
'subtitles': subtitles,
'release_timestamp': parse_iso8601(json_info.get('published_start')),
'is_live': 'LIVE' in json_info.get('state', '')
}

View File

@ -1,31 +1,51 @@
from .common import InfoExtractor
from .uplynk import UplynkPreplayIE
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
class FoxSportsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
'md5': 'b49050e955bebe32c301972e4012ac17',
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.foxsports.com/watch/play-612168c6700004b',
'info_dict': {
'id': '432609859715',
'id': 'b72f5bd8658140baa5791bb676433733',
'ext': 'mp4',
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
'description': 'Courtney Lee talks about Memphis being focused.',
# TODO: fix timestamp
'upload_date': '19700101', # '20150423',
# 'timestamp': 1429761109,
'uploader': 'NEWA-FNG-FOXSPORTS',
'display_id': 'play-612168c6700004b',
'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a',
'description': 'md5:371bc43609708ae2b9e1a939229762af',
'uploader_id': '06b4a36349624051a9ba52ac3a91d268',
'upload_date': '20221205',
'timestamp': 1670262586,
'duration': 31.7317,
'thumbnail': r're:^https?://.*\.jpg$',
'extra_param_to_segment_url': str,
},
'params': {
# m3u8 download
'skip_download': True,
'skip_download': 'm3u8',
},
'add_ie': ['ThePlatform'],
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
data = self._download_json(
f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}',
video_id, note='Downloading API JSON', headers={
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
})
preplay_url = self._request_webpage(
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
return self.url_result(
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
return {
'_type': 'url_transparent',
'ie_key': UplynkPreplayIE.ie_key(),
'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}),
'display_id': video_id,
'title': data.get('name') or json_ld.get('title'),
'description': data.get('description') or json_ld.get('description'),
'duration': float_or_none(data.get('durationInSeconds')),
'timestamp': json_ld.get('timestamp'),
'thumbnails': json_ld.get('thumbnails'),
'_old_archive_ids': [make_archive_id(self, video_id)],
}

View File

@ -2356,7 +2356,7 @@ def _real_extract(self, url):
info_dict.update({
'formats': formats,
'subtitles': subtitles,
'http_headers': headers,
'http_headers': headers or None,
})
return info_dict

View File

@ -1,3 +1,5 @@
import itertools
from .common import InfoExtractor
from .dailymotion import DailymotionIE
from ..utils import smuggle_url, traverse_obj
@ -16,6 +18,26 @@ def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
display_id or slug, query=query)
def _get_comments(self, video_id):
last_page_number = None
for i in itertools.count(1):
comment_data = self._download_json(
f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
video_id, data=b'', fatal=False, query={'page': i},
note=f'Downloading JSON comment metadata page {i}') or {}
yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
'id': '_id',
'text': 'comment',
'author_id': 'customer_id',
'author': ('customer', 'name'),
'author_thumbnail': ('customer', 'profile_picture'),
}))
if not last_page_number:
last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
if i >= (last_page_number or 0):
break
class NetverseIE(NetverseBaseIE):
_VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
@ -28,7 +50,7 @@ class NetverseIE(NetverseBaseIE):
'ext': 'mp4',
'season': 'Season 2016',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T7aV31Y0eGRWBbwkK/x1080',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 22,
'episode': 'Episode 22',
'uploader_id': 'x2ir3vq',
@ -51,7 +73,7 @@ class NetverseIE(NetverseBaseIE):
'ext': 'mp4',
'season': 'Season 2',
'description': 'md5:8a74f70812cca267e19ee0635f0af835',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/Thwuy1YURicFmGu0v/x1080',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 2,
'episode': 'Episode 2',
'view_count': int,
@ -75,7 +97,7 @@ class NetverseIE(NetverseBaseIE):
'title': 'Tetangga Baru',
'season': 'Season 1',
'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/T3Ogm1YEnnyjVKAFF/x1080',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'episode_number': 1,
'episode': 'Episode 1',
'timestamp': 1624538169,
@ -96,7 +118,7 @@ class NetverseIE(NetverseBaseIE):
'info_dict': {
'id': 'x887jzz',
'ext': 'mp4',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TfuZ_1Y6PboJ5An_s/x1080',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'season': 'Season 1',
'episode_number': 1,
'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
@ -114,6 +136,60 @@ class NetverseIE(NetverseBaseIE):
'upload_date': '20220225',
},
'skip': 'This video get Geo-blocked for some country'
}, {
# video with comments
'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
'info_dict': {
'id': 'k6hetBPiQMljSxxvAy7',
'ext': 'mp4',
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'display_id': 'episode-1-season-2016-ok-food',
'like_count': int,
'description': '',
'duration': 1471,
'age_limit': 0,
'timestamp': 1642405848,
'episode_number': 1,
'season': 'Season 2016',
'uploader_id': 'x2ir3vq',
'title': 'Episode 1 - Season 2016 - Ok Food',
'upload_date': '20220117',
'tags': [],
'view_count': int,
'episode': 'Episode 1',
'uploader': 'Net Prime',
'comment_count': int,
},
'params':{
'getcomments': True
}
}, {
# video with multiple page comment
'url': 'https://netverse.id/video/match-island-eps-1-fix',
'info_dict': {
'id': 'x8aznjc',
'ext': 'mp4',
'like_count': int,
'tags': ['Match-Island', 'Pd00111'],
'display_id': 'match-island-eps-1-fix',
'view_count': int,
'episode': 'Episode 1',
'uploader': 'Net Prime',
'duration': 4070,
'timestamp': 1653068165,
'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
'age_limit': 0,
'title': 'Welcome To Match Island',
'upload_date': '20220520',
'episode_number': 1,
'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
'uploader_id': 'x2ir3vq',
'season': 'Season 1',
'comment_count': int,
},
'params':{
'getcomments': True
}
}]
def _real_extract(self, url):
@ -131,6 +207,7 @@ def _real_extract(self, url):
'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
'description': traverse_obj(videos, ('program_detail', 'description')),
'episode_number': videos.get('episode_order'),
'__post_extractor': self.extract_comments(display_id),
}

View File

@ -3,7 +3,7 @@
class NOSNLArticleIE(InfoExtractor):
_VALID_URL = r'https?://nos\.nl/((?!video)(\w+/)?\w+/)\d+-(?P<display_id>[\w-]+)'
_VALID_URL = r'https?://nos\.nl/(?P<type>video|(\w+/)?\w+)/?\d+-(?P<display_id>[\w-]+)'
_TESTS = [
{
# only 1 video
@ -22,13 +22,14 @@ class NOSNLArticleIE(InfoExtractor):
'info_dict': {
'id': '2440409',
'title': 'Vannacht sliepen weer enkele honderden asielzoekers in Ter Apel buiten',
'description': 'Er werd wel geprobeerd om kwetsbare migranten onderdak te bieden, zegt het COA.',
'description': 'md5:72b1e1674d798460e79d78fa37e9f56d',
'tags': ['aanmeldcentrum', 'Centraal Orgaan opvang asielzoekers', 'COA', 'asielzoekers', 'Ter Apel'],
'modified_timestamp': 1660452773,
'modified_date': '20220814',
'upload_date': '20220813',
'thumbnail': 'https://cdn.nos.nl/image/2022/07/18/880346/1024x576a.jpg',
'timestamp': 1660401384,
'categories': ['Regionaal nieuws', 'Binnenland'],
},
'playlist_count': 2,
}, {
@ -37,20 +38,37 @@ class NOSNLArticleIE(InfoExtractor):
'info_dict': {
'id': '2440789',
'title': 'Wekdienst 16/8: Groningse acties tien jaar na zware aardbeving • Femke Bol in actie op EK atletiek ',
'description': 'Nieuws, weer, verkeer: met dit overzicht begin je geïnformeerd aan de dag.',
'description': 'md5:0bd277ed7a44fc15cb12a9d27d8f6641',
'tags': ['wekdienst'],
'modified_date': '20220816',
'modified_timestamp': 1660625449,
'timestamp': 1660625449,
'upload_date': '20220816',
'thumbnail': 'https://cdn.nos.nl/image/2022/08/16/888178/1024x576a.jpg',
'categories': ['Binnenland', 'Buitenland'],
},
'playlist_count': 2,
}, {
# video url
'url': 'https://nos.nl/video/2452718-xi-en-trudeau-botsen-voor-de-camera-op-g20-top-je-hebt-gelekt',
'info_dict': {
'id': '2452718',
'title': 'Xi en Trudeau botsen voor de camera op G20-top: \'Je hebt gelekt\'',
'modified_date': '20221117',
'description': 'md5:61907dac576f75c11bf8ffffd4a3cc0f',
'tags': ['Xi', 'Trudeau', 'G20', 'indonesié'],
'upload_date': '20221117',
'thumbnail': 'https://cdn.nos.nl/image/2022/11/17/916155/1024x576a.jpg',
'modified_timestamp': 1668663388,
'timestamp': 1668663388,
'categories': ['Buitenland'],
},
'playlist_mincount': 1,
}
]
def _entries(self, nextjs_json, display_id):
for item in nextjs_json['items']:
for item in nextjs_json:
if item.get('type') == 'video':
formats, subtitle = self._extract_m3u8_formats_and_subtitles(
traverse_obj(item, ('source', 'url')), display_id, ext='mp4')
@ -77,13 +95,14 @@ def _entries(self, nextjs_json, display_id):
}
def _real_extract(self, url):
display_id = self._match_valid_url(url).group('display_id')
site_type, display_id = self._match_valid_url(url).group('type', 'display_id')
webpage = self._download_webpage(url, display_id)
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['data']
return {
'_type': 'playlist',
'entries': self._entries(nextjs_json, display_id),
'entries': self._entries(
[nextjs_json['video']] if site_type == 'video' else nextjs_json['items'], display_id),
'id': str(nextjs_json['id']),
'title': nextjs_json.get('title') or self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': (nextjs_json.get('description')
@ -91,5 +110,6 @@ def _real_extract(self, url):
'tags': nextjs_json.get('keywords'),
'modified_timestamp': parse_iso8601(nextjs_json.get('modifiedAt')),
'thumbnail': nextjs_json.get('shareImageSrc') or self._html_search_meta(['og:image', 'twitter:image'], webpage),
'timestamp': parse_iso8601(nextjs_json.get('publishedAt'))
'timestamp': parse_iso8601(nextjs_json.get('publishedAt')),
'categories': traverse_obj(nextjs_json, ('categories', ..., 'label')),
}

View File

@ -0,0 +1,43 @@
from .common import InfoExtractor
class OnePlacePodcastIE(InfoExtractor):
_VALID_URL = r'https?://www\.oneplace\.com/[\w]+/[^/]+/listen/[\w-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.oneplace.com/ministries/a-daily-walk/listen/living-in-the-last-days-part-2-958461.html',
'info_dict': {
'id': '958461',
'ext': 'mp3',
'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall',
'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e',
}
}, {
'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html',
'info_dict': {
'id': '922513',
'ext': 'mp3',
'description': 'md5:8b810b4349aa40a5d033b4536fe428e1',
'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'url': self._search_regex((
r'mp3-url\s*=\s*"([^"]+)',
r'<div[^>]+id\s*=\s*"player"[^>]+data-media-url\s*=\s*"(?P<media_url>[^"]+)',
), webpage, 'media url'),
'ext': 'mp3',
'vcodec': 'none',
'title': self._html_search_regex((
r'<div[^>]class\s*=\s*"details"[^>]+>[^<]<h2[^>]+>(?P<content>[^>]+)>',
self._meta_regex('og:title'), self._meta_regex('title'),
), webpage, 'title', group='content', default=None),
'description': self._html_search_regex(
r'<div[^>]+class="[^"]+epDesc"[^>]*>\s*(?P<desc>.+?)\s*</div>',
webpage, 'description', default=None),
}

View File

@ -1,19 +1,24 @@
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
try_get,
str_or_none,
strip_or_none,
traverse_obj,
unified_timestamp,
url_or_none,
)
class PinterestBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
_VALID_URL_BASE = r'''(?x)
https?://(?:[^/]+\.)?pinterest\.(?:
com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|
dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|
co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'''
def _call_api(self, resource, video_id, options):
return self._download_json(
@ -24,14 +29,53 @@ def _call_api(self, resource, video_id, options):
def _extract_video(self, data, extract_formats=True):
video_id = data['id']
thumbnails = []
images = data.get('images')
if isinstance(images, dict):
for thumbnail_id, thumbnail in images.items():
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
title = (data.get('title') or data.get('grid_title') or video_id).strip()
info = {
'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')),
'description': traverse_obj(data, 'seo_description', 'description'),
'timestamp': unified_timestamp(data.get('created_at')),
'thumbnails': thumbnails,
'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')),
'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))),
'repost_count': int_or_none(data.get('repin_count')),
'comment_count': int_or_none(data.get('comment_count')),
'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list),
'tags': traverse_obj(data, 'hashtags', expected_type=list),
}
urls = []
formats = []
duration = None
if extract_formats:
for format_id, format_dict in data['videos']['video_list'].items():
domain = data.get('domain', '')
if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')):
if not info['title']:
info['title'] = None
return {
'_type': 'url_transparent',
'url': data['embed']['src'],
**info,
}
elif extract_formats:
video_list = traverse_obj(
data, ('videos', 'video_list'),
('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'),
expected_type=dict, get_all=False, default={})
for format_id, format_dict in video_list.items():
if not isinstance(format_dict, dict):
continue
format_url = url_or_none(format_dict.get('url'))
@ -53,72 +97,79 @@ def _extract_video(self, data, extract_formats=True):
'duration': duration,
})
description = data.get('description') or data.get('description_html') or data.get('seo_description')
timestamp = unified_timestamp(data.get('created_at'))
def _u(field):
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
uploader = _u('full_name')
uploader_id = _u('id')
repost_count = int_or_none(data.get('repin_count'))
comment_count = int_or_none(data.get('comment_count'))
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
tags = data.get('hashtags')
thumbnails = []
images = data.get('images')
if isinstance(images, dict):
for thumbnail_id, thumbnail in images.items():
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'thumbnails': thumbnails,
'uploader': uploader,
'uploader_id': uploader_id,
'repost_count': repost_count,
'comment_count': comment_count,
'categories': categories,
'tags': tags,
'formats': formats,
'duration': duration,
'webpage_url': f'https://www.pinterest.com/pin/{video_id}/',
'extractor_key': PinterestIE.ie_key(),
'extractor': PinterestIE.IE_NAME,
**info,
}
class PinterestIE(PinterestBaseIE):
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
_TESTS = [{
# formats found in data['videos']
'url': 'https://www.pinterest.com/pin/664281013778109217/',
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
'info_dict': {
'id': '664281013778109217',
'ext': 'mp4',
'title': 'Origami',
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab',
'duration': 57.7,
'timestamp': 1593073622,
'upload_date': '20200625',
'uploader': 'Love origami -I am Dafei',
'uploader_id': '586523688879454212',
'repost_count': 50,
'comment_count': 0,
'repost_count': int,
'comment_count': int,
'categories': list,
'tags': list,
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}, {
# formats found in data['story_pin_data']
'url': 'https://www.pinterest.com/pin/1084663891475263837/',
'md5': '069ac19919ab9e1e13fa60de46290b03',
'info_dict': {
'id': '1084663891475263837',
'ext': 'mp4',
'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets',
'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13',
'uploader': 'CoolCrazyGadgets',
'uploader_id': '1084664028912989237',
'upload_date': '20211003',
'timestamp': 1633246654.0,
'duration': 14.9,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': list,
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}, {
# vimeo.com embed
'url': 'https://www.pinterest.ca/pin/441282463481903715/',
'info_dict': {
'id': '111691128',
'ext': 'mp4',
'title': 'Tonite Let\'s All Make Love In London (1967)',
'description': 'md5:8190f37b3926807809ec57ec21aa77b2',
'uploader': 'Vimeo',
'uploader_id': '473792960706651251',
'upload_date': '20180120',
'timestamp': 1516409040,
'duration': 3404,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': [],
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'uploader_url': 'https://vimeo.com/willardandrade',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://co.pinterest.com/pin/824721750502199491/',

View File

@ -91,12 +91,12 @@ def _download_and_extract_formats(self, video_id, query=None):
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': '1d24f180fac7a02f3900712e5a5764d6',
'md5': 'e33ac625efca66aba86cbec9851f2692',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4',
@ -108,6 +108,10 @@ class RutubeIE(RutubeBaseIE):
'timestamp': 1381943602,
'upload_date': '20131016',
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
'category': ['Новости и СМИ'],
},
}, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
@ -121,6 +125,24 @@ class RutubeIE(RutubeBaseIE):
}, {
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
'only_matching': True,
}, {
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
'md5': 'd106225f15d625538fe22971158e896f',
'info_dict': {
'id': '884fb55f07a97ab673c7d654553e0f48',
'ext': 'mp4',
'title': 'Яцуноками, Nioh2',
'description': 'Nioh2: финал сражения с боссом Яцуноками',
'duration': 15,
'uploader': 'mexus',
'uploader_id': '24222106',
'timestamp': 1670646232,
'upload_date': '20221210',
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
'category': ['Видеоигры'],
},
}]
@classmethod
@ -129,8 +151,9 @@ def suitable(cls, url):
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_and_extract_info(video_id)
info['formats'] = self._download_and_extract_formats(video_id)
query = parse_qs(url)
info = self._download_and_extract_info(video_id, query)
info['formats'] = self._download_and_extract_formats(video_id, query)
return info

View File

@ -1,92 +1,176 @@
from .common import InfoExtractor
from ..utils import (
bool_or_none,
smuggle_url,
try_get,
traverse_obj,
unified_timestamp,
url_or_none,
)
class SlidesLiveIE(InfoExtractor):
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
_WORKING = False
_TESTS = [{
# video_service_name = YOUTUBE
# service_name = yoda
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
'info_dict': {
'id': 'LMtgR8ba0b0',
'id': '38902413',
'ext': 'mp4',
'title': 'GCC IA16 backend',
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'timestamp': 1597615266,
'upload_date': '20170925',
}
}, {
# video_service_name = yoda
'url': 'https://slideslive.com/38935785',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
'info_dict': {
'id': 'RMraDYN5ozA_',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'timestamp': 1648189972,
'upload_date': '20220325',
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}, {
# video_service_name = youtube
# service_name = yoda
'url': 'https://slideslive.com/38935785',
'info_dict': {
'id': '38935785',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20211115',
'timestamp': 1636996003,
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}, {
# service_name = yoda
'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
'info_dict': {
'id': '38973182',
'ext': 'mp4',
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
'upload_date': '20220201',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1643728135,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# service_name = youtube
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': {
'id': 'jmg02wCJD5M',
'display_id': '38897546',
'ext': 'mp4',
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
'description': 'Watch full version of this video at https://slideslive.com/38897546.',
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'channel': 'SlidesLive Videos - G1',
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
'uploader': 'SlidesLive Videos - G1',
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
'live_status': 'not_live',
'upload_date': '20160710',
'timestamp': 1618786715,
'duration': 6827,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'age_limit': 0,
'thumbnail': r're:^https?://.*\.jpg',
'playable_in_embed': True,
'availability': 'unlisted',
'tags': [],
'categories': ['People & Blogs'],
},
}, {
# service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
'only_matching': True,
}, {
# video_service_name = url
# service_name = url
'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
'only_matching': True,
}, {
# video_service_name = vimeo
# service_name = vimeo
'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
'only_matching': True,
}]
def _extract_custom_m3u8_info(self, m3u8_data):
m3u8_dict = {}
lookup = {
'PRESENTATION-TITLE': 'title',
'PRESENTATION-UPDATED-AT': 'timestamp',
'PRESENTATION-THUMBNAIL': 'thumbnail',
'PLAYLIST-TYPE': 'playlist_type',
'VOD-VIDEO-SERVICE-NAME': 'service_name',
'VOD-VIDEO-ID': 'service_id',
'VOD-VIDEO-SERVERS': 'video_servers',
'VOD-SUBTITLES': 'subtitles',
}
for line in m3u8_data.splitlines():
if not line.startswith('#EXT-SL-'):
continue
tag, _, value = line.partition(':')
key = lookup.get(tag.lstrip('#EXT-SL-'))
if not key:
continue
m3u8_dict[key] = value
# Some values are stringified JSON arrays
for key in ('video_servers', 'subtitles'):
if key in m3u8_dict:
m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
return m3u8_dict
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'https://ben.slideslive.com/player/' + video_id, video_id)
service_name = video_data['video_service_name'].lower()
webpage = self._download_webpage(url, video_id)
player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
player_data = self._download_webpage(
f'https://ben.slideslive.com/player/{video_id}', video_id,
note='Downloading player info', query={'player_token': player_token})
player_info = self._extract_custom_m3u8_info(player_data)
service_name = player_info['service_name'].lower()
assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
service_id = video_data['video_service_id']
service_id = player_info['service_id']
subtitles = {}
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
if not isinstance(sub, dict):
continue
for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
webvtt_url = url_or_none(sub.get('webvtt_url'))
if not webvtt_url:
continue
lang = sub.get('language') or 'en'
subtitles.setdefault(lang, []).append({
subtitles.setdefault(sub.get('language') or 'en', []).append({
'url': webvtt_url,
'ext': 'vtt',
})
info = {
'id': video_id,
'thumbnail': video_data.get('thumbnail'),
'is_live': bool_or_none(video_data.get('is_live')),
'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
'timestamp': unified_timestamp(player_info.get('timestamp')),
'is_live': player_info.get('playlist_type') != 'vod',
'thumbnail': url_or_none(player_info.get('thumbnail')),
'subtitles': subtitles,
}
if service_name in ('url', 'yoda'):
info['title'] = video_data['title']
if service_name == 'url':
info['url'] = service_id
else:
cdn_hostname = player_info['video_servers'][0]
formats = []
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
formats.extend(self._extract_m3u8_formats(
_MANIFEST_PATTERN % (service_id, 'm3u8'),
service_id, 'mp4', m3u8_id='hls', fatal=False))
f'https://{cdn_hostname}/{service_id}/master.m3u8',
video_id, 'mp4', m3u8_id='hls', fatal=False, live=True))
formats.extend(self._extract_mpd_formats(
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
mpd_id='dash', fatal=False))
f'https://{cdn_hostname}/{service_id}/master.mpd',
video_id, mpd_id='dash', fatal=False))
info.update({
'id': service_id,
'formats': formats,
})
else:
@ -94,10 +178,11 @@ def _real_extract(self, url):
'_type': 'url_transparent',
'url': service_id,
'ie_key': service_name.capitalize(),
'title': video_data.get('title'),
'display_id': video_id,
})
if service_name == 'vimeo':
info['url'] = smuggle_url(
'https://player.vimeo.com/video/' + service_id,
f'https://player.vimeo.com/video/{service_id}',
{'http_headers': {'Referer': url}})
return info

View File

@ -293,7 +293,7 @@ def _real_extract(self, url):
class TwitterIE(TwitterBaseIE):
IE_NAME = 'twitter'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
_TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
@ -336,7 +336,7 @@ class TwitterIE(TwitterBaseIE):
'id': '665052190608723968',
'display_id': '665052190608723968',
'ext': 'mp4',
'title': 'md5:55fef1d5b811944f1550e91b44abb82e',
'title': 'md5:e99588f17b3dd0503814ffb560e64731',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'uploader_id': 'starwars',
'uploader': r're:Star Wars.*',
@ -648,7 +648,7 @@ class TwitterIE(TwitterBaseIE):
'uploader_url': 'https://twitter.com/Rizdraws',
'upload_date': '20220928',
'timestamp': 1664391723,
'thumbnail': 're:^https?://.*\\.jpg',
'thumbnail': r're:^https?://.+\.jpg',
'like_count': int,
'repost_count': int,
'comment_count': int,
@ -727,6 +727,48 @@ class TwitterIE(TwitterBaseIE):
},
'add_ie': ['TwitterSpaces'],
'params': {'skip_download': 'm3u8'},
}, {
# URL specifies video number but --yes-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
'playlist_mincount': 2,
'info_dict': {
'id': '1600649710662213632',
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
'timestamp': 1670459604.0,
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int,
'uploader_id': 'CTVJLaidlaw',
'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208',
'age_limit': 0,
'uploader': 'Jocelyn Laidlaw',
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
'like_count': int,
},
}, {
# URL specifies video number and --no-playlist
'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
'info_dict': {
'id': '1600649511827013632',
'ext': 'mp4',
'title': 'md5:be05989b0722e114103ed3851a0ffae2',
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0,
'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw',
'repost_count': int,
'comment_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'duration': 102.226,
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
'display_id': '1600649710662213632',
'like_count': int,
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'upload_date': '20221208',
'age_limit': 0,
},
'params': {'noplaylist': True},
}, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@ -828,7 +870,7 @@ def _build_graphql_query(self, media_id):
}
def _real_extract(self, url):
twid = self._match_id(url)
twid, selected_index = self._match_valid_url(url).group('id', 'index')
if self.is_logged_in or self._configuration_arg('force_graphql'):
self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
@ -998,6 +1040,13 @@ def get_binding_value(k):
entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
index = int(selected_index) - 1
if index >= len(entries):
raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
return entries[index]
if len(entries) == 1:
return entries[0]

View File

@ -2,40 +2,42 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
ExtractorError,
float_or_none,
smuggle_url,
traverse_obj,
unsmuggle_url,
update_url_query,
)
class UplynkIE(InfoExtractor):
IE_NAME = 'uplynk'
_VALID_URL = r'https?://.*?\.uplynk\.com/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P<session_id>[^&]+))?'
_TEST = {
'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
'info_dict': {
'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
'ext': 'mp4',
'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
class UplynkBaseIE(InfoExtractor):
_UPLYNK_URL_RE = r'''(?x)
https?://[\w-]+\.uplynk\.com/(?P<path>
ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|
(?P<id>[0-9a-f]{32})
)\.(?:m3u8|json)
(?:.*?\bpbs=(?P<session_id>[^&]+))?'''
def _extract_uplynk_info(self, uplynk_content_url):
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
def _extract_uplynk_info(self, url):
uplynk_content_url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._UPLYNK_URL_RE, uplynk_content_url)
if not mobj:
raise ExtractorError('Necessary parameters not found in Uplynk URL')
path, external_id, video_id, session_id = mobj.group('path', 'external_id', 'id', 'session_id')
display_id = video_id or external_id
headers = traverse_obj(
smuggled_data, {'Referer': 'Referer', 'Origin': 'Origin'}, casesense=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'http://content.uplynk.com/%s.m3u8' % path,
display_id, 'mp4', 'm3u8_native')
f'http://content.uplynk.com/{path}.m3u8', display_id, 'mp4', headers=headers)
if session_id:
for f in formats:
f['extra_param_to_segment_url'] = 'pbs=' + session_id
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
f['extra_param_to_segment_url'] = f'pbs={session_id}'
asset = self._download_json(
f'http://content.uplynk.com/player/assetinfo/{path}.json', display_id)
if asset.get('error') == 1:
raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True)
msg = asset.get('msg') or 'unknown error'
raise ExtractorError(f'{self.IE_NAME} said: {msg}', expected=True)
return {
'id': asset['asset'],
@ -47,20 +49,40 @@ def _extract_uplynk_info(self, uplynk_content_url):
'subtitles': subtitles,
}
class UplynkIE(UplynkBaseIE):
IE_NAME = 'uplynk'
_VALID_URL = UplynkBaseIE._UPLYNK_URL_RE
_TEST = {
'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
'info_dict': {
'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
'ext': 'mp4',
'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
'duration': 530.2739166666679,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8',
},
}
def _real_extract(self, url):
return self._extract_uplynk_info(url)
class UplynkPreplayIE(UplynkIE): # XXX: Do not subclass from concrete IE
class UplynkPreplayIE(UplynkBaseIE):
IE_NAME = 'uplynk:preplay'
_VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
_VALID_URL = r'https?://[\w-]+\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
path, external_id, video_id = self._match_valid_url(url).groups()
display_id = video_id or external_id
preplay = self._download_json(url, display_id)
content_url = 'http://content.uplynk.com/%s.m3u8' % path
content_url = f'http://content.uplynk.com/{path}.m3u8'
session_id = preplay.get('sid')
if session_id:
content_url += '?pbs=' + session_id
return self._extract_uplynk_info(content_url)
content_url = update_url_query(content_url, {'pbs': session_id})
return self._extract_uplynk_info(smuggle_url(content_url, smuggled_data))

View File

@ -4382,6 +4382,25 @@ def _extract_basic_item_renderer(item):
elif key.startswith('grid') and key.endswith('Renderer'):
return renderer
def _extract_channel_renderer(self, renderer):
channel_id = renderer['channelId']
title = self._get_text(renderer, 'title')
channel_url = f'https://www.youtube.com/channel/{channel_id}'
return {
'_type': 'url',
'url': channel_url,
'id': channel_id,
'ie_key': YoutubeTabIE.ie_key(),
'channel': title,
'channel_id': channel_id,
'channel_url': channel_url,
'title': title,
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
'playlist_count': self._get_count(renderer, 'videoCountText'),
'description': self._get_text(renderer, 'descriptionSnippet'),
}
def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']:
if not isinstance(item, dict):
@ -4407,9 +4426,7 @@ def _grid_entries(self, grid_renderer):
# channel
channel_id = renderer.get('channelId')
if channel_id:
yield self.url_result(
'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title)
yield self._extract_channel_renderer(renderer)
continue
# generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get(
@ -5762,7 +5779,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
'channel_follower_count': int,
},
'playlist_mincount': 1,
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
@ -5930,7 +5946,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'cole-dlp-test-acc - Shorts',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
'channel_follower_count': int,
'description': 'test description',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
@ -5976,8 +5991,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': str,
}
}],
'params': {'extract_flat': True},
'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1
}, {
# Channel renderer metadata. Contains number of videos on the channel
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA',
'title': 'cole-dlp-test-acc - Channels',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
'description': 'test description',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'tags': [],
'uploader': 'cole-dlp-test-acc',
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
},
'playlist': [{
'info_dict': {
'_type': 'url',
'ie_key': 'YoutubeTab',
'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'title': 'PewDiePie',
'channel': 'PewDiePie',
'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'thumbnails': list,
'channel_follower_count': int,
'playlist_count': int
}
}],
'params': {'extract_flat': True},
}]
@classmethod
@ -6531,6 +6578,30 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
# 'title': '#cats',
# }],
},
}, {
# Channel results
'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
'info_dict': {
'id': 'kurzgesagt',
'title': 'kurzgesagt',
},
'playlist': [{
'info_dict': {
'_type': 'url',
'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'ie_key': 'YoutubeTab',
'channel': 'Kurzgesagt In a Nutshell',
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'playlist_count': int, # XXX: should have a way of saying > 1
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1,
}, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
'only_matching': True,