2016-07-06 20:09:05 +08:00
# coding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . utils import (
determine_ext ,
ExtractorError ,
float_or_none ,
get_element_by_class ,
int_or_none ,
js_to_json ,
2017-06-24 18:33:31 +07:00
NO_DEFAULT ,
2016-07-06 20:09:05 +08:00
parse_iso8601 ,
remove_start ,
strip_or_none ,
url_basename ,
)
class OnetBaseIE ( InfoExtractor ) :
2019-10-29 09:44:07 +01:00
_URL_BASE_RE = r ' https?://(?:(?:www \ .)?onet \ .tv|onet100 \ .vod \ .pl)/[a-z]/ '
2016-07-06 20:09:05 +08:00
def _search_mvp_id ( self , webpage ) :
return self . _search_regex (
r ' id=([ " \' ])mvp:(?P<id>.+?) \ 1 ' , webpage , ' mvp id ' , group = ' id ' )
2017-02-15 00:58:18 +07:00
def _extract_from_id ( self , video_id , webpage = None ) :
2016-07-06 20:09:05 +08:00
response = self . _download_json (
' http://qi.ckm.onetapi.pl/ ' , video_id ,
query = {
' body[id] ' : video_id ,
' body[jsonrpc] ' : ' 2.0 ' ,
' body[method] ' : ' get_asset_detail ' ,
' body[params][ID_Publikacji] ' : video_id ,
' body[params][Service] ' : ' www.onet.pl ' ,
' content-type ' : ' application/jsonp ' ,
' x-onet-app ' : ' player.front.onetapi.pl ' ,
} )
error = response . get ( ' error ' )
if error :
raise ExtractorError (
' %s said: %s ' % ( self . IE_NAME , error [ ' message ' ] ) , expected = True )
video = response [ ' result ' ] . get ( ' 0 ' )
formats = [ ]
2019-10-29 09:44:07 +01:00
for format_type , formats_dict in video [ ' formats ' ] . items ( ) :
2016-07-06 20:09:05 +08:00
if not isinstance ( formats_dict , dict ) :
continue
for format_id , format_list in formats_dict . items ( ) :
if not isinstance ( format_list , list ) :
continue
for f in format_list :
video_url = f . get ( ' url ' )
if not video_url :
continue
ext = determine_ext ( video_url )
2019-10-29 09:44:07 +01:00
if format_id . startswith ( ' ism ' ) :
2016-10-19 16:24:43 +01:00
formats . extend ( self . _extract_ism_formats (
video_url , video_id , ' mss ' , fatal = False ) )
2016-07-06 20:09:05 +08:00
elif ext == ' mpd ' :
2016-07-24 10:43:05 +07:00
formats . extend ( self . _extract_mpd_formats (
video_url , video_id , mpd_id = ' dash ' , fatal = False ) )
2019-10-29 09:44:07 +01:00
elif format_id . startswith ( ' hls ' ) :
formats . extend ( self . _extract_m3u8_formats (
video_url , video_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
2016-07-06 20:09:05 +08:00
else :
2019-10-29 09:44:07 +01:00
http_f = {
2016-07-06 20:09:05 +08:00
' url ' : video_url ,
' format_id ' : format_id ,
' abr ' : float_or_none ( f . get ( ' audio_bitrate ' ) ) ,
2019-10-29 09:44:07 +01:00
}
if format_type == ' audio ' :
http_f [ ' vcodec ' ] = ' none '
else :
http_f . update ( {
' height ' : int_or_none ( f . get ( ' vertical_resolution ' ) ) ,
' width ' : int_or_none ( f . get ( ' horizontal_resolution ' ) ) ,
' vbr ' : float_or_none ( f . get ( ' video_bitrate ' ) ) ,
} )
formats . append ( http_f )
2016-07-06 20:09:05 +08:00
self . _sort_formats ( formats )
meta = video . get ( ' meta ' , { } )
2017-02-15 00:58:18 +07:00
title = ( self . _og_search_title (
webpage , default = None ) if webpage else None ) or meta [ ' title ' ]
description = ( self . _og_search_description (
webpage , default = None ) if webpage else None ) or meta . get ( ' description ' )
2016-07-06 20:09:05 +08:00
duration = meta . get ( ' length ' ) or meta . get ( ' lenght ' )
timestamp = parse_iso8601 ( meta . get ( ' addDate ' ) , ' ' )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' timestamp ' : timestamp ,
' formats ' : formats ,
}
2017-02-15 00:58:18 +07:00
class OnetMVPIE ( OnetBaseIE ) :
_VALID_URL = r ' onetmvp:(?P<id> \ d+ \ . \ d+) '
_TEST = {
' url ' : ' onetmvp:381027.1509591944 ' ,
' only_matching ' : True ,
}
def _real_extract ( self , url ) :
return self . _extract_from_id ( self . _match_id ( url ) )
2016-07-06 20:09:05 +08:00
class OnetIE ( OnetBaseIE ) :
2019-10-29 09:44:07 +01:00
_VALID_URL = OnetBaseIE . _URL_BASE_RE + r ' [a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+) '
2016-07-06 20:09:05 +08:00
IE_NAME = ' onet.tv '
2019-10-29 09:44:07 +01:00
_TESTS = [ {
2016-07-06 20:09:05 +08:00
' url ' : ' http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc ' ,
2019-10-29 09:44:07 +01:00
' md5 ' : ' 436102770fb095c75b8bb0392d3da9ff ' ,
2016-07-06 20:09:05 +08:00
' info_dict ' : {
' id ' : ' qbpyqc ' ,
' display_id ' : ' open-er-festival-2016-najdziwniejsze-wymagania-gwiazd ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Open \' er Festival 2016: najdziwniejsze wymagania gwiazd ' ,
' description ' : ' Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia... ' ,
' upload_date ' : ' 20160705 ' ,
' timestamp ' : 1467721580 ,
} ,
2019-10-29 09:44:07 +01:00
} , {
' url ' : ' https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc ' ,
' only_matching ' : True ,
} ]
2016-07-06 20:09:05 +08:00
def _real_extract ( self , url ) :
2021-08-19 07:11:24 +05:30
mobj = self . _match_valid_url ( url )
2016-07-06 20:09:05 +08:00
display_id , video_id = mobj . group ( ' display_id ' , ' id ' )
webpage = self . _download_webpage ( url , display_id )
mvp_id = self . _search_mvp_id ( webpage )
info_dict = self . _extract_from_id ( mvp_id , webpage )
info_dict . update ( {
' id ' : video_id ,
' display_id ' : display_id ,
} )
return info_dict
class OnetChannelIE ( OnetBaseIE ) :
2019-10-29 09:44:07 +01:00
_VALID_URL = OnetBaseIE . _URL_BASE_RE + r ' (?P<id>[a-z]+)(?:[?#]|$) '
2016-07-06 20:09:05 +08:00
IE_NAME = ' onet.tv:channel '
2019-10-29 09:44:07 +01:00
_TESTS = [ {
2016-07-06 20:09:05 +08:00
' url ' : ' http://onet.tv/k/openerfestival ' ,
' info_dict ' : {
' id ' : ' openerfestival ' ,
2019-10-29 09:44:07 +01:00
' title ' : " Open ' er Festival " ,
' description ' : " Tak było na Open ' er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami. " ,
2016-07-06 20:09:05 +08:00
} ,
2019-10-29 09:44:07 +01:00
' playlist_mincount ' : 35 ,
} , {
' url ' : ' https://onet100.vod.pl/k/openerfestival ' ,
' only_matching ' : True ,
} ]
2016-07-06 20:09:05 +08:00
def _real_extract ( self , url ) :
channel_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , channel_id )
current_clip_info = self . _parse_json ( self . _search_regex (
r ' var \ s+currentClip \ s*= \ s*( { [^}]+}) ' , webpage , ' video info ' ) , channel_id ,
transform_source = lambda s : js_to_json ( re . sub ( r ' \' \ s* \ + \ s* \' ' , ' ' , s ) ) )
video_id = remove_start ( current_clip_info [ ' ckmId ' ] , ' mvp: ' )
video_name = url_basename ( current_clip_info [ ' url ' ] )