Updated to release 2020.11.26

This commit is contained in:
pukkandan 2020-11-26 22:57:34 +05:30
parent 02ced43cbf
commit 38d7028407
11 changed files with 896 additions and 493 deletions

View File

@ -477,6 +477,7 @@ # Supported sites
- **massengeschmack.tv** - **massengeschmack.tv**
- **MatchTV** - **MatchTV**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **MedalTV**
- **media.ccc.de** - **media.ccc.de**
- **media.ccc.de:lists** - **media.ccc.de:lists**
- **Medialaan** - **Medialaan**
@ -846,6 +847,10 @@ # Supported sites
- **Sport5** - **Sport5**
- **SportBox** - **SportBox**
- **SportDeutschland** - **SportDeutschland**
- **Spreaker**
- **SpreakerPage**
- **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform** - **SpringboardPlatform**
- **Sprout** - **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk - **sr:mediathek**: Saarländischer Rundfunk
@ -1064,7 +1069,7 @@ # Supported sites
- **vk:wallpost** - **vk:wallpost**
- **vlive** - **vlive**
- **vlive:channel** - **vlive:channel**
- **vlive:playlist** - **vlive:post**
- **Vodlocker** - **Vodlocker**
- **VODPl** - **VODPl**
- **VODPlatform** - **VODPlatform**

View File

@ -97,12 +97,15 @@ def _write_ytdl_file(self, ctx):
def _download_fragment(self, ctx, frag_url, info_dict, headers=None): def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
success = ctx['dl'].download(fragment_filename, { fragment_info_dict = {
'url': frag_url, 'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'), 'http_headers': headers or info_dict.get('http_headers'),
}) }
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success: if not success:
return False, None return False, None
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
down, frag_sanitized = sanitize_open(fragment_filename, 'rb') down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read() frag_content = down.read()
@ -258,6 +261,13 @@ def _finish_frag_download(self, ctx):
downloaded_bytes = ctx['complete_frags_downloaded_bytes'] downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else: else:
self.try_rename(ctx['tmpfilename'], ctx['filename']) self.try_rename(ctx['tmpfilename'], ctx['filename'])
if self.params.get('updatetime', True):
filetime = ctx.get('fragment_filetime')
if filetime:
try:
os.utime(ctx['filename'], (time.time(), filetime))
except Exception:
pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
self._hook_progress({ self._hook_progress({

View File

@ -981,7 +981,7 @@ def _real_extract(self, url):
group_id = self._search_regex( group_id = self._search_regex(
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
webpage, 'group id', default=None) webpage, 'group id', default=None)
if playlist_id: if group_id:
return self.url_result( return self.url_result(
'https://www.bbc.co.uk/programmes/%s' % group_id, 'https://www.bbc.co.uk/programmes/%s' % group_id,
ie=BBCCoUkIE.ie_key()) ie=BBCCoUkIE.ie_key())
@ -1092,10 +1092,26 @@ def _real_extract(self, url):
self._search_regex( self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage, r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
'bbcthree config', default='{}'), 'bbcthree config', default='{}'),
playlist_id, transform_source=js_to_json, fatal=False) playlist_id, transform_source=js_to_json, fatal=False) or {}
if bbc3_config: payload = bbc3_config.get('payload') or {}
if payload:
clip = payload.get('currentClip') or {}
clip_vpid = clip.get('vpid')
clip_title = clip.get('title')
if clip_vpid and clip_title:
formats, subtitles = self._download_media_selector(clip_vpid)
self._sort_formats(formats)
return {
'id': clip_vpid,
'title': clip_title,
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
'description': clip.get('description'),
'duration': parse_duration(clip.get('duration')),
'formats': formats,
'subtitles': subtitles,
}
bbc3_playlist = try_get( bbc3_playlist = try_get(
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'], payload, lambda x: x['content']['bbcMedia']['playlist'],
dict) dict)
if bbc3_playlist: if bbc3_playlist:
playlist_title = bbc3_playlist.get('title') or playlist_title playlist_title = bbc3_playlist.get('title') or playlist_title
@ -1118,6 +1134,39 @@ def _real_extract(self, url):
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description) entries, playlist_id, playlist_title, playlist_description)
initial_data = self._parse_json(self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if initial_data:
def parse_media(media):
if not media:
return
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
item_id = item.get('id')
item_title = item.get('title')
if not (item_id and item_title):
continue
formats, subtitles = self._download_media_selector(item_id)
self._sort_formats(formats)
entries.append({
'id': item_id,
'title': item_title,
'thumbnail': item.get('holdingImageUrl'),
'formats': formats,
'subtitles': subtitles,
})
for resp in (initial_data.get('data') or {}).values():
name = resp.get('name')
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
if block.get('type') != 'media':
continue
parse_media(block.get('model'))
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def extract_all(pattern): def extract_all(pattern):
return list(filter(None, map( return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False), lambda s: self._parse_json(s, playlist_id, fatal=False),

View File

@ -5,10 +5,16 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts,
multipart_encode, multipart_encode,
parse_duration, parse_duration,
random_birthday, random_birthday,
@ -107,8 +113,9 @@ def _real_extract(self, url):
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage, r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None) 'view_count', default=None)
average_rating = self._search_regex( average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value') r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
group='rating_value')
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
@ -123,6 +130,24 @@ def _real_extract(self, url):
'age_limit': 18 if need_confirm_age else 0, 'age_limit': 18 if need_confirm_age else 0,
} }
# Source: https://www.cda.pl/js/player.js?t=1606154898
def decrypt_file(a):
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
a = a.replace(p, '')
a = compat_urllib_parse_unquote(a)
b = []
for c in a:
f = compat_ord(c)
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
a = a.replace(p, '.cda.pl')
if '/upstream' in a:
a = a.replace('/upstream', '.mp4/upstream')
return 'https://' + a
return 'https://' + a + '.mp4'
def extract_format(page, version): def extract_format(page, version):
json_str = self._html_search_regex( json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
@ -141,6 +166,8 @@ def extract_format(page, version):
video['file'] = codecs.decode(video['file'], 'rot_13') video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'): if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4') video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
f = { f = {
'url': video['file'], 'url': video['file'],
} }
@ -179,4 +206,6 @@ def extract_format(page, version):
self._sort_formats(formats) self._sort_formats(formats)
return info_dict info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(info_dict, info)

View File

@ -620,6 +620,7 @@
from .massengeschmacktv import MassengeschmackTVIE from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaset import MediasetIE from .mediaset import MediasetIE
from .mediasite import ( from .mediasite import (
MediasiteIE, MediasiteIE,
@ -1102,6 +1103,12 @@
from .sport5 import Sport5IE from .sport5 import Sport5IE
from .sportbox import SportBoxIE from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
SpreakerShowIE,
SpreakerShowPageIE,
)
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
from .srgssr import ( from .srgssr import (
@ -1395,8 +1402,8 @@
) )
from .vlive import ( from .vlive import (
VLiveIE, VLiveIE,
VLivePostIE,
VLiveChannelIE, VLiveChannelIE,
VLivePlaylistIE
) )
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vodpl import VODPlIE from .vodpl import VODPlIE

View File

@ -0,0 +1,131 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
str_or_none,
try_get,
)
class MedalTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
'info_dict': {
'id': '34934644',
'ext': 'mp4',
'title': 'Quad Cold',
'description': 'Medal,https://medal.tv/desktop/',
'uploader': 'MowgliSB',
'timestamp': 1603165266,
'upload_date': '20201020',
'uploader_id': 10619174,
}
}, {
'url': 'https://medal.tv/clips/36787208',
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
'info_dict': {
'id': '36787208',
'ext': 'mp4',
'title': 'u tk me i tk u bigger',
'description': 'Medal,https://medal.tv/desktop/',
'uploader': 'Mimicc',
'timestamp': 1605580939,
'upload_date': '20201117',
'uploader_id': 5156321,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
hydration_data = self._parse_json(self._search_regex(
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
webpage, 'hydration data', default='{}'), video_id)
clip = try_get(
hydration_data, lambda x: x['clips'][video_id], dict) or {}
if not clip:
raise ExtractorError(
'Could not find video information.', video_id=video_id)
title = clip['contentTitle']
source_width = int_or_none(clip.get('sourceWidth'))
source_height = int_or_none(clip.get('sourceHeight'))
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
def add_item(container, item_url, height, id_key='format_id', item_id=None):
item_id = item_id or '%dp' % height
if item_id not in item_url:
return
width = int(round(aspect_ratio * height))
container.append({
'url': item_url,
id_key: item_id,
'width': width,
'height': height
})
formats = []
thumbnails = []
for k, v in clip.items():
if not (v and isinstance(v, compat_str)):
continue
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
if not mobj:
continue
prefix = mobj.group(1)
height = int_or_none(mobj.group(2))
if prefix == 'contentUrl':
add_item(
formats, v, height or source_height,
item_id=None if height else 'source')
elif prefix == 'thumbnail':
add_item(thumbnails, v, height, 'id')
error = clip.get('error')
if not formats and error:
if error == 404:
raise ExtractorError(
'That clip does not exist.',
expected=True, video_id=video_id)
else:
raise ExtractorError(
'An unknown error occurred ({0}).'.format(error),
video_id=video_id)
self._sort_formats(formats)
# Necessary because the id of the author is not known in advance.
# Won't raise an issue if no profile can be found as this is optional.
author = try_get(
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
author_id = str_or_none(author.get('id'))
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': clip.get('contentDescription'),
'uploader': author.get('displayName'),
'timestamp': float_or_none(clip.get('created'), 1000),
'uploader_id': author_id,
'uploader_url': author_url,
'duration': int_or_none(clip.get('videoLengthSeconds')),
'view_count': int_or_none(clip.get('views')),
'like_count': int_or_none(clip.get('likes')),
'comment_count': int_or_none(clip.get('comments')),
}

View File

@ -9,6 +9,7 @@
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
@ -16,17 +17,269 @@
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
try_get, try_get,
url_or_none,
) )
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO'] _GEO_COUNTRIES = ['NO']
_api_host = None
class NRKIE(NRKBaseIE):
_VALID_URL = r'''(?x)
(?:
nrk:|
https?://
(?:
(?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
v8[-.]psapi\.nrk\.no/mediaelement/
)
)
(?P<id>[^?\#&]+)
'''
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
'md5': '706f34cdf1322577589e369e522b50ef',
'info_dict': {
'id': '150533',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
'duration': 262,
}
}, {
# audio
'url': 'http://www.nrk.no/video/PS*154915',
# MD5 is unstable
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
'duration': 20,
}
}, {
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
}, {
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
'only_matching': True,
}, {
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
}, {
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
'only_matching': True,
}, {
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
'only_matching': True,
}]
def _extract_from_playback(self, video_id):
manifest = self._download_json(
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
video_id, 'Downloading manifest JSON')
playable = manifest['playable']
formats = []
for asset in playable['assets']:
if not isinstance(asset, dict):
continue
if asset.get('encrypted'):
continue
format_url = url_or_none(asset.get('url'))
if not format_url:
continue
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
data = self._download_json(
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
video_id, 'Downloading metadata JSON')
preplay = data['preplay']
titles = preplay['titles']
title = titles['title']
alt_title = titles.get('subtitle')
description = preplay.get('description')
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
thumbnails = []
for image in try_get(
preplay, lambda x: x['poster']['images'], list) or []:
if not isinstance(image, dict):
continue
image_url = url_or_none(image.get('url'))
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('pixelWidth')),
'height': int_or_none(image.get('pixelHeight')),
})
return {
'id': video_id,
'title': title,
'alt_title': alt_title,
'description': description,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
return self._extract_from_playback(video_id)
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
_VALID_URL = r'''(?x)
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
(?:serie(?:/[^/]+){1,2}|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117',
'md5': '8270824df46ec629b66aeaa5796b36fb',
'info_dict': {
'id': 'MDDP12000117AA',
'ext': 'mp4',
'title': 'Alarm Trolltunga',
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
'duration': 2223,
'age_limit': 6,
},
}, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
'series': '20 spørsmål',
'episode': '23.05.2014',
},
'skip': 'NoProgramRights',
}, {
'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': {
'id': 'MDFP15000514CA',
'ext': 'mp4',
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
'duration': 4605,
'series': 'Kunnskapskanalen',
'episode': '24.05.2014',
},
'params': {
'skip_download': True,
},
}, {
# single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Video is geo restricted'],
'skip': 'particular part is not supported currently',
}, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [{
'info_dict': {
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
},
'params': {
'skip_download': True,
},
}, {
'info_dict': {
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
},
'params': {
'skip_download': True,
},
}],
'info_dict': {
'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
},
'expected_warnings': ['Video is geo restricted'],
}, {
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
'info_dict': {
'id': 'KMTE50001317AA',
'ext': 'mp4',
'title': 'Anno 13:30',
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
'duration': 2340,
'series': 'Anno',
'episode': '13:30',
'season_number': 3,
'episode_number': 13,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
'info_dict': {
'id': 'MUHH46000317AA',
'ext': 'mp4',
'title': 'Nytt på Nytt 27.01.2017',
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
'duration': 1796,
'series': 'Nytt på nytt',
'episode': '27.01.2017',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True,
}, {
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
'only_matching': True,
}]
_api_host = None
def _extract_from_mediaelement(self, video_id):
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
for api_host in api_hosts: for api_host in api_hosts:
@ -195,190 +448,9 @@ def video_id_and_title(idx):
return self.playlist_result(entries, video_id, title, description) return self.playlist_result(entries, video_id, title, description)
def _real_extract(self, url):
class NRKIE(NRKBaseIE): video_id = self._match_id(url)
_VALID_URL = r'''(?x) return self._extract_from_mediaelement(video_id)
(?:
nrk:|
https?://
(?:
(?:www\.)?nrk\.no/video/PS\*|
v8[-.]psapi\.nrk\.no/mediaelement/
)
)
(?P<id>[^?#&]+)
'''
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
'md5': '706f34cdf1322577589e369e522b50ef',
'info_dict': {
'id': '150533',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
'duration': 262,
}
}, {
# audio
'url': 'http://www.nrk.no/video/PS*154915',
# MD5 is unstable
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
'duration': 20,
}
}, {
'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
}, {
'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
'only_matching': True,
}, {
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
}]
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
_VALID_URL = r'''(?x)
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
(?:serie(?:/[^/]+){1,2}|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117',
'md5': '8270824df46ec629b66aeaa5796b36fb',
'info_dict': {
'id': 'MDDP12000117AA',
'ext': 'mp4',
'title': 'Alarm Trolltunga',
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
'duration': 2223,
'age_limit': 6,
},
}, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
'series': '20 spørsmål',
'episode': '23.05.2014',
},
'skip': 'NoProgramRights',
}, {
'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': {
'id': 'MDFP15000514CA',
'ext': 'mp4',
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
'duration': 4605,
'series': 'Kunnskapskanalen',
'episode': '24.05.2014',
},
'params': {
'skip_download': True,
},
}, {
# single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'info_dict': {
'id': 'MSPO40010515-part2',
'ext': 'flv',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Video is geo restricted'],
'skip': 'particular part is not supported currently',
}, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [{
'info_dict': {
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
},
'params': {
'skip_download': True,
},
}, {
'info_dict': {
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
},
'params': {
'skip_download': True,
},
}],
'info_dict': {
'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
},
'expected_warnings': ['Video is geo restricted'],
}, {
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
'info_dict': {
'id': 'KMTE50001317AA',
'ext': 'mp4',
'title': 'Anno 13:30',
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
'duration': 2340,
'series': 'Anno',
'episode': '13:30',
'season_number': 3,
'episode_number': 13,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
'info_dict': {
'id': 'MUHH46000317AA',
'ext': 'mp4',
'title': 'Nytt på Nytt 27.01.2017',
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
'duration': 1796,
'series': 'Nytt på nytt',
'episode': '27.01.2017',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True,
}, {
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
'only_matching': True,
}]
class NRKTVEpisodeIE(InfoExtractor): class NRKTVEpisodeIE(InfoExtractor):

View File

@ -0,0 +1,176 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
int_or_none,
str_or_none,
try_get,
unified_timestamp,
url_or_none,
)
def _extract_episode(data, episode_id=None):
title = data['title']
download_url = data['download_url']
series = try_get(data, lambda x: x['show']['title'], compat_str)
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
thumbnails = []
for image in ('image_original', 'image_medium', 'image'):
image_url = url_or_none(data.get('%s_url' % image))
if image_url:
thumbnails.append({'url': image_url})
def stats(key):
return int_or_none(try_get(
data,
(lambda x: x['%ss_count' % key],
lambda x: x['stats']['%ss' % key])))
def duration(key):
return float_or_none(data.get(key), scale=1000)
return {
'id': compat_str(episode_id or data['episode_id']),
'url': download_url,
'display_id': data.get('permalink'),
'title': title,
'description': data.get('description'),
'timestamp': unified_timestamp(data.get('published_at')),
'uploader': uploader,
'uploader_id': str_or_none(data.get('author_id')),
'creator': uploader,
'duration': duration('duration') or duration('length'),
'view_count': stats('play'),
'like_count': stats('like'),
'comment_count': stats('message'),
'format': 'MPEG Layer 3',
'format_id': 'mp3',
'container': 'mp3',
'ext': 'mp3',
'thumbnails': thumbnails,
'series': series,
'extractor_key': SpreakerIE.ie_key(),
}
class SpreakerIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
api\.spreaker\.com/
(?:
(?:download/)?episode|
v2/episodes
)/
(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://api.spreaker.com/episode/12534508',
'info_dict': {
'id': '12534508',
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
'ext': 'mp3',
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
'description': 'md5:0588c43e27be46423e183076fa071177',
'timestamp': 1502250336,
'upload_date': '20170809',
'uploader': 'SWM',
'uploader_id': '9780658',
'duration': 1063.42,
'view_count': int,
'like_count': int,
'comment_count': int,
'series': 'Success With Music (SWM)',
},
}, {
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
'only_matching': True,
}, {
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
'only_matching': True,
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
data = self._download_json(
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
episode_id)['response']['episode']
return _extract_episode(data, episode_id)
class SpreakerPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
episode_id = self._search_regex(
(r'data-episode_id=["\'](?P<id>\d+)',
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
return self.url_result(
'https://api.spreaker.com/episode/%s' % episode_id,
ie=SpreakerIE.ie_key(), video_id=episode_id)
class SpreakerShowIE(InfoExtractor):
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/3-ninjas-podcast',
'info_dict': {
'id': '4652058',
},
'playlist_mincount': 118,
}]
def _entries(self, show_id):
for page_num in itertools.count(1):
episodes = self._download_json(
'https://api.spreaker.com/show/%s/episodes' % show_id,
show_id, note='Downloading JSON page %d' % page_num, query={
'page': page_num,
'max_per_page': 100,
})
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
if not pager:
break
results = pager.get('results')
if not results or not isinstance(results, list):
break
for result in results:
if not isinstance(result, dict):
continue
yield _extract_episode(result)
if page_num == pager.get('last_page'):
break
def _real_extract(self, url):
show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
class SpreakerShowPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/success-with-music',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
show_id = self._search_regex(
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
return self.url_result(
'https://api.spreaker.com/show/%s' % show_id,
ie=SpreakerShowIE.ie_key(), video_id=show_id)

View File

@ -21,6 +21,7 @@
parse_age_limit, parse_age_limit,
parse_iso8601, parse_iso8601,
sanitized_Request, sanitized_Request,
std_headers,
) )
@ -227,8 +228,10 @@ def _real_extract(self, url):
resp = self._download_json( resp = self._download_json(
'https://www.viki.com/api/videos/' + video_id, 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON', video_id, 'Downloading video JSON', headers={
headers={'x-viki-app-ver': '4.0.57'}) 'x-client-user-agent': std_headers['User-Agent'],
'x-viki-app-ver': '4.0.57',
})
video = resp['video'] video = resp['video']
self._check_errors(video) self._check_errors(video)

View File

@ -1,55 +1,50 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import time
import itertools import itertools
import json
from .common import InfoExtractor
from .naver import NaverBaseIE from .naver import NaverBaseIE
from ..compat import compat_str from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
merge_dicts, merge_dicts,
str_or_none,
strip_or_none,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
class VLiveIE(NaverBaseIE): class VLiveBaseIE(NaverBaseIE):
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
_TESTS = [{ _TESTS = [{
'url': 'https://www.vlive.tv/video/1326', 'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
'ext': 'mp4', 'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast", 'title': "Girl's Day's Broadcast",
'creator': "Girl's Day", 'creator': "Girl's Day",
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a', 'uploader_id': 'muploader_a',
}, },
}, }, {
{ 'url': 'http://www.vlive.tv/video/16937',
'url': 'https://vlive.tv/post/1-18244258',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
},
{
'url': 'https://www.vlive.tv/video/16937',
'info_dict': { 'info_dict': {
'id': '16937', 'id': '16937',
'ext': 'mp4', 'ext': 'mp4',
'title': '[V LIVE] 첸백시 걍방', 'title': '첸백시 걍방',
'creator': 'EXO', 'creator': 'EXO',
'view_count': int, 'view_count': int,
'subtitles': 'mincount:12', 'subtitles': 'mincount:12',
@ -70,12 +65,15 @@ class VLiveIE(NaverBaseIE):
'subtitles': 'mincount:10', 'subtitles': 'mincount:10',
}, },
'skip': 'This video is only available for CH+ subscribers', 'skip': 'This video is only available for CH+ subscribers',
}, {
'url': 'https://www.vlive.tv/embed/1326',
'only_matching': True,
}, {
# works only with gcc=KR
'url': 'https://www.vlive.tv/video/225019',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -107,118 +105,159 @@ def is_logged_in():
if not is_logged_in(): if not is_logged_in():
raise ExtractorError('Unable to log in', expected=True) raise ExtractorError('Unable to log in', expected=True)
def _call_api(self, path_template, video_id, fields=None):
query = {'appId': self._APP_ID, 'gcc': 'KR'}
if fields:
query['fields'] = fields
try:
return self._download_json(
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
raise
def _real_extract(self, url): def _real_extract(self, url):
# url may match on a post or a video url with a post_id potentially matching a video_id video_id = self._match_id(url)
working_id = self._match_id(url)
webpage = self._download_webpage(url, working_id)
PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>' post = self._call_api(
PARAMS_FIELD = 'params' 'post/v1.0/officialVideoPost-%s', video_id,
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
params = self._search_regex( video = post['officialVideo']
PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
params = self._parse_json(params, working_id, fatal=False)
video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict) def get_common_fields():
channel = post.get('channel') or {}
return {
'title': video.get('title'),
'creator': post.get('author', {}).get('nickname'),
'channel': channel.get('channelName'),
'channel_id': channel.get('channelCode'),
'duration': int_or_none(video.get('playTime')),
'view_count': int_or_none(video.get('playCount')),
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
}
if video_params is None: video_type = video.get('type')
error = try_get(params, lambda x: x["postDetail"]["error"], dict) if video_type == 'VOD':
error_data = try_get(error, lambda x: x["data"], dict) inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
error_video = try_get(error_data, lambda x: x["officialVideo"], dict) vod_id = video['vodId']
error_msg = try_get(error, lambda x: x["message"], compat_str) return merge_dicts(
product_type = try_get(error_data, get_common_fields(),
[lambda x: x["officialVideo"]["productType"], self._extract_video_info(video_id, vod_id, inkey))
lambda x: x["board"]["boardType"]], elif video_type == 'LIVE':
compat_str) status = video.get('status')
if status == 'ON_AIR':
if error_video is not None: stream_url = self._call_api(
if product_type in ('VLIVE_PLUS', 'VLIVE+'): 'old/v3/live/%s/playInfo',
self.raise_login_required('This video is only available with V LIVE+.') video_id)['result']['adaptiveStreamUrl']
elif error_msg is not None: formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
raise ExtractorError('V LIVE reported the following error: %s' % error_msg) info = get_common_fields()
else: info.update({
raise ExtractorError('Failed to extract video parameters.') 'title': self._live_title(video['title']),
elif 'post' in url: 'id': video_id,
raise ExtractorError('Url does not appear to be a video post.', expected=True) 'formats': formats,
else: 'is_live': True,
raise ExtractorError('Failed to extract video parameters.') })
return info
video_id = working_id if 'video' in url else str(video_params["videoSeq"]) elif status == 'ENDED':
raise ExtractorError(
video_type = video_params["type"] 'Uploading for replay. Please wait...', expected=True)
if video_type in ('VOD'): elif status == 'RESERVED':
encoding_status = video_params["encodingStatus"]
if encoding_status == 'COMPLETE':
return self._replay(video_id, webpage, params, video_params)
else:
raise ExtractorError('VOD encoding not yet complete. Please try again later.',
expected=True)
elif video_type in ('LIVE'):
video_status = video_params["status"]
if video_status in ('RESERVED'):
raise ExtractorError('Coming soon!', expected=True) raise ExtractorError('Coming soon!', expected=True)
elif video_status in ('ENDED', 'END'): elif video.get('exposeStatus') == 'CANCEL':
raise ExtractorError('Uploading for replay. Please wait...', expected=True) raise ExtractorError(
'We are sorry, but the live broadcast has been canceled.',
expected=True)
else: else:
return self._live(video_id, webpage, params) raise ExtractorError('Unknown status ' + status)
else:
raise ExtractorError('Unknown video type %s' % video_type)
def _get_common_fields(self, webpage, params):
title = self._og_search_title(webpage)
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
thumbnail = self._og_search_thumbnail(webpage)
return {
'title': title,
'creator': creator,
'thumbnail': thumbnail,
}
def _live(self, video_id, webpage, params):
LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
formats = []
for stream in streams:
formats.extend(self._extract_m3u8_formats(
stream['serviceUrl'], video_id, 'mp4',
fatal=False, live=True))
self._sort_formats(formats)
info = self._get_common_fields(webpage, params)
info.update({
'title': self._live_title(info['title']),
'id': video_id,
'formats': formats,
'is_live': True,
})
return info
def _replay(self, video_id, webpage, params, video_params):
long_video_id = video_params["vodId"]
VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
key = key_json["inkey"]
return merge_dicts(
self._get_common_fields(webpage, params),
self._extract_video_info(video_id, long_video_id, key))
class VLiveChannelIE(InfoExtractor): class VLivePostIE(VLiveIE):
IE_NAME = 'vlive:channel' IE_NAME = 'vlive:post'
_VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://channels.vlive.tv/FCD4B', # uploadType = SOS
'url': 'https://www.vlive.tv/post/1-20088044',
'info_dict': {
'id': '1-20088044',
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
},
'playlist_count': 3,
}, {
# uploadType = V
'url': 'https://www.vlive.tv/post/1-20087926',
'info_dict': {
'id': '1-20087926',
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
},
'playlist_count': 1,
}]
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
def _real_extract(self, url):
post_id = self._match_id(url)
post = self._call_api(
'post/v1.0/post-%s', post_id,
'attachments{video},officialVideo{videoSeq},plainBody,title')
video_seq = str_or_none(try_get(
post, lambda x: x['officialVideo']['videoSeq']))
if video_seq:
return self.url_result(
'http://www.vlive.tv/video/' + video_seq,
VLiveIE.ie_key(), video_seq)
title = post['title']
entries = []
for idx, video in enumerate(post['attachments']['video'].values()):
video_id = video.get('videoId')
if not video_id:
continue
upload_type = video.get('uploadType')
upload_info = video.get('uploadInfo') or {}
entry = None
if upload_type == 'SOS':
download = self._call_api(
self._SOS_TMPL, video_id)['videoUrl']['download']
formats = []
for f_id, f_url in download.items():
formats.append({
'format_id': f_id,
'url': f_url,
'height': int_or_none(f_id[:-1]),
})
self._sort_formats(formats)
entry = {
'formats': formats,
'id': video_id,
'thumbnail': upload_info.get('imageUrl'),
}
elif upload_type == 'V':
vod_id = upload_info.get('videoId')
if not vod_id:
continue
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
entry = self._extract_video_info(video_id, vod_id, inkey)
if entry:
entry['title'] = '%s_part%s' % (title, idx)
entries.append(entry)
return self.playlist_result(
entries, post_id, title, strip_or_none(post.get('plainBody')))
class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
_TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': { 'info_dict': {
'id': 'FCD4B', 'id': 'FCD4B',
'title': 'MAMAMOO', 'title': 'MAMAMOO',
@ -226,63 +265,39 @@ class VLiveChannelIE(InfoExtractor):
'playlist_mincount': 110 'playlist_mincount': 110
}, { }, {
'url': 'https://www.vlive.tv/channel/FCD4B', 'url': 'https://www.vlive.tv/channel/FCD4B',
'info_dict': { 'only_matching': True,
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}] }]
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
def _call_api(self, path, channel_key_suffix, channel_value, note, query):
q = {
'app_id': self._APP_ID,
'channel' + channel_key_suffix: channel_value,
}
q.update(query)
return self._download_json(
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
channel_value, note='Downloading ' + note, query=q)['result']
def _real_extract(self, url): def _real_extract(self, url):
channel_code = self._match_id(url) channel_code = self._match_id(url)
webpage = self._download_webpage( channel_seq = self._call_api(
'http://channels.vlive.tv/%s/video' % channel_code, channel_code) 'decodeChannelCode', 'Code', channel_code,
'decode channel code', {})['channelSeq']
app_id = None
app_js_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
webpage, 'app js', default=None, group='url')
if app_js_url:
app_js = self._download_webpage(
app_js_url, channel_code, 'Downloading app JS', fatal=False)
if app_js:
app_id = self._search_regex(
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
app_js, 'app id', default=None)
app_id = app_id or self._APP_ID
channel_info = self._download_json(
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
channel_code, note='Downloading decode channel code',
query={
'app_id': app_id,
'channelCode': channel_code,
'_': int(time.time())
})
channel_seq = channel_info['result']['channelSeq']
channel_name = None channel_name = None
entries = [] entries = []
for page_num in itertools.count(1): for page_num in itertools.count(1):
video_list = self._download_json( video_list = self._call_api(
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', 'getChannelVideoList', 'Seq', channel_seq,
channel_code, note='Downloading channel list page #%d' % page_num, 'channel list page #%d' % page_num, {
query={
'app_id': app_id,
'channelSeq': channel_seq,
# Large values of maxNumOfRows (~300 or above) may cause # Large values of maxNumOfRows (~300 or above) may cause
# empty responses (see [1]), e.g. this happens for [2] that # empty responses (see [1]), e.g. this happens for [2] that
# has more than 300 videos. # has more than 300 videos.
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830 # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
# 2. http://channels.vlive.tv/EDBF. # 2. http://channels.vlive.tv/EDBF.
'maxNumOfRows': 100, 'maxNumOfRows': 100,
'_': int(time.time()),
'pageNo': page_num 'pageNo': page_num
} }
) )
@ -290,11 +305,11 @@ def _real_extract(self, url):
if not channel_name: if not channel_name:
channel_name = try_get( channel_name = try_get(
video_list, video_list,
lambda x: x['result']['channelInfo']['channelName'], lambda x: x['channelInfo']['channelName'],
compat_str) compat_str)
videos = try_get( videos = try_get(
video_list, lambda x: x['result']['videoList'], list) video_list, lambda x: x['videoList'], list)
if not videos: if not videos:
break break
@ -310,79 +325,3 @@ def _real_extract(self, url):
return self.playlist_result( return self.playlist_result(
entries, channel_code, channel_name) entries, channel_code, channel_name)
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
_TESTS = [{
# regular working playlist
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
'info_dict': {
'id': '117963',
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
},
'playlist_mincount': 10
}, {
# playlist with no playlistVideoSeqs
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
'id': '22867',
'ext': 'mp4',
'title': '[V LIVE] Valentine Day Message from MINA',
'creator': 'TWICE',
'view_count': int
},
'params': {
'skip_download': True,
}
}]
def _build_video_result(self, video_id, message):
self.to_screen(message)
return self.url_result(
self._VIDEO_URL_TEMPLATE % video_id,
ie=VLiveIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
if self._downloader.params.get('noplaylist'):
return self._build_video_result(
video_id,
'Downloading just video %s because of --no-playlist'
% video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% playlist_id)
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
raw_item_ids = self._search_regex(
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
'playlist video seqs', default=None, fatal=False)
if not raw_item_ids:
return self._build_video_result(
video_id,
'Downloading just video %s because no playlist was found'
% video_id)
item_ids = self._parse_json(raw_item_ids, playlist_id)
entries = [
self.url_result(
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
playlist_name = self._html_search_regex(
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
webpage, 'playlist title', fatal=False)
return self.playlist_result(entries, playlist_id, playlist_name)

View File

@ -1335,44 +1335,6 @@ def _get_ytplayer_config(self, video_id, webpage):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = []
key_map = {
'Album': 'album',
'Artist': 'artist',
'Song': 'track'
}
contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
if type(contents) is list:
for content in contents:
music_track = {}
if type(content) is not dict:
continue
videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
if type(videoSecondaryInfoRenderer) is not dict:
continue
rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
if type(rows) is not list:
continue
for row in rows:
metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
if type(metadataRowRenderer) is not dict:
continue
key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
if type(key) is not str or type(value) is not str:
continue
if key in key_map:
if key_map[key] in music_track:
# we've started on a new track
music_metadata.append(music_track)
music_track = {}
music_track[key_map[key]] = value
if len(music_track.keys()):
music_metadata.append(music_track)
return music_metadata
def _get_automatic_captions(self, video_id, webpage): def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
@ -2295,7 +2257,7 @@ def extract_meta(field):
# Youtube Music Auto-generated description # Youtube Music Auto-generated description
release_date = release_year = None release_date = release_year = None
if video_description: if video_description:
mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
if mobj: if mobj:
if not track: if not track:
track = mobj.group('track').strip() track = mobj.group('track').strip()
@ -2312,13 +2274,33 @@ def extract_meta(field):
if release_year: if release_year:
release_year = int(release_year) release_year = int(release_year)
yt_initial = self._get_yt_initial_data(video_id, video_webpage) yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
if yt_initial: contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) for content in contents:
if len(music_metadata): rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
album = music_metadata[0].get('album') multiple_songs = False
artist = music_metadata[0].get('artist') for row in rows:
track = music_metadata[0].get('track') if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
multiple_songs = True
break
for row in rows:
mrr = row.get('metadataRowRenderer') or {}
mrr_title = try_get(
mrr, lambda x: x['title']['simpleText'], compat_str)
mrr_contents = try_get(
mrr, lambda x: x['contents'][0], dict) or {}
mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
if not (mrr_title and mrr_contents_text):
continue
if mrr_title == 'License':
video_license = mrr_contents_text
elif not multiple_songs:
if mrr_title == 'Album':
album = mrr_contents_text
elif mrr_title == 'Artist':
artist = mrr_contents_text
elif mrr_title == 'Song':
track = mrr_contents_text
m_episode = re.search( m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',