2016-05-21 13:39:42 +08:00
# coding: utf-8
from __future__ import unicode_literals
import re
2017-02-16 23:42:36 +08:00
from . common import InfoExtractor
2016-05-21 13:39:42 +08:00
from . . utils import (
2016-08-15 13:31:08 +08:00
float_or_none ,
parse_iso8601 ,
update_url_query ,
2017-01-06 11:23:43 +01:00
int_or_none ,
determine_protocol ,
unescapeHTML ,
2016-05-21 13:39:42 +08:00
)
2017-02-16 23:42:36 +08:00
class SendtoNewsIE ( InfoExtractor ) :
2016-08-15 13:31:08 +08:00
_VALID_URL = r ' https?://embed \ .sendtonews \ .com/player2/embedplayer \ .php \ ?.* \ bSC=(?P<id>[0-9A-Za-z-]+) '
2016-05-21 13:39:42 +08:00
_TEST = {
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
2016-08-15 13:31:08 +08:00
' url ' : ' http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES ' ,
2016-05-21 13:39:42 +08:00
' info_dict ' : {
2016-08-15 13:31:08 +08:00
' id ' : ' GxfCe0Zo7D-175909-5588 '
2016-05-21 13:39:42 +08:00
} ,
2017-01-06 11:23:43 +01:00
' playlist_count ' : 8 ,
2016-08-15 13:31:08 +08:00
# test the first video only to prevent lengthy tests
' playlist ' : [ {
' info_dict ' : {
2017-01-06 11:23:43 +01:00
' id ' : ' 240385 ' ,
2016-08-15 13:31:08 +08:00
' ext ' : ' mp4 ' ,
2017-01-06 11:23:43 +01:00
' title ' : ' Indians introduce Encarnacion ' ,
' description ' : ' Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger \' s three-year contract with Cleveland ' ,
' duration ' : 137.898 ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:https?://.* \ .jpg$ ' ,
2017-01-06 11:23:43 +01:00
' upload_date ' : ' 20170105 ' ,
' timestamp ' : 1483649762 ,
2016-08-15 13:31:08 +08:00
} ,
} ] ,
2016-05-21 13:39:42 +08:00
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
}
2016-08-15 13:31:08 +08:00
_URL_TEMPLATE = ' //embed.sendtonews.com/player2/embedplayer.php?SC= %s '
2016-05-21 13:39:42 +08:00
@classmethod
def _extract_url ( cls , webpage ) :
mobj = re . search ( r ''' (?x)<script[^>]+src=([ \ ' " ])
( ? : https ? : ) ? / / embed \. sendtonews \. com / player / responsiveembed \. php \?
. * \bSC = ( ? P < SC > [ 0 - 9 a - zA - Z - ] + ) . *
\1 > ''' , webpage)
if mobj :
2016-08-15 13:31:08 +08:00
sc = mobj . group ( ' SC ' )
return cls . _URL_TEMPLATE % sc
2016-05-21 13:39:42 +08:00
def _real_extract ( self , url ) :
2016-08-15 13:31:08 +08:00
playlist_id = self . _match_id ( url )
data_url = update_url_query (
url . replace ( ' embedplayer.php ' , ' data_read.php ' ) ,
{ ' cmd ' : ' loadInitial ' } )
playlist_data = self . _download_json ( data_url , playlist_id )
entries = [ ]
for video in playlist_data [ ' playlistData ' ] [ 0 ] :
info_dict = self . _parse_jwplayer_data (
video [ ' jwconfiguration ' ] ,
2017-01-06 11:23:43 +01:00
require_title = False , m3u8_id = ' hls ' , rtmp_params = { ' no_resume ' : True } )
for f in info_dict [ ' formats ' ] :
if f . get ( ' tbr ' ) :
continue
tbr = int_or_none ( self . _search_regex (
r ' /( \ d+)k/ ' , f [ ' url ' ] , ' bitrate ' , default = None ) )
if not tbr :
continue
f . update ( {
' format_id ' : ' %s - %d ' % ( determine_protocol ( f ) , tbr ) ,
' tbr ' : tbr ,
} )
self . _sort_formats ( info_dict [ ' formats ' ] , ( ' tbr ' , ' height ' , ' width ' , ' format_id ' ) )
2016-08-15 13:31:08 +08:00
thumbnails = [ ]
if video . get ( ' thumbnailUrl ' ) :
thumbnails . append ( {
' id ' : ' normal ' ,
' url ' : video [ ' thumbnailUrl ' ] ,
} )
if video . get ( ' smThumbnailUrl ' ) :
thumbnails . append ( {
' id ' : ' small ' ,
' url ' : video [ ' smThumbnailUrl ' ] ,
} )
info_dict . update ( {
2017-01-06 11:23:43 +01:00
' title ' : video [ ' S_headLine ' ] . strip ( ) ,
' description ' : unescapeHTML ( video . get ( ' S_fullStory ' ) ) ,
2016-08-15 13:31:08 +08:00
' thumbnails ' : thumbnails ,
' duration ' : float_or_none ( video . get ( ' SM_length ' ) ) ,
' timestamp ' : parse_iso8601 ( video . get ( ' S_sysDate ' ) , delimiter = ' ' ) ,
} )
entries . append ( info_dict )
return self . playlist_result ( entries , playlist_id )