2014-01-07 10:27:20 +01:00
from __future__ import unicode_literals
2013-09-06 18:08:07 +02:00
import re
from . common import InfoExtractor
2013-12-10 21:03:53 +01:00
from . . utils import (
2014-01-20 22:11:34 +01:00
fix_xml_ampersands ,
2013-12-10 21:03:53 +01:00
)
2013-09-06 18:08:07 +02:00
class MetacriticIE ( InfoExtractor ) :
2016-09-08 18:29:05 +07:00
_VALID_URL = r ' https?://(?:www \ .)?metacritic \ .com/.+?/trailers/(?P<id> \ d+) '
2013-09-06 18:08:07 +02:00
2016-04-14 15:12:59 +08:00
_TESTS = [ {
2014-01-07 10:27:20 +01:00
' url ' : ' http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222 ' ,
' info_dict ' : {
2014-03-29 14:46:05 +01:00
' id ' : ' 3698222 ' ,
' ext ' : ' mp4 ' ,
2014-01-07 10:27:20 +01:00
' title ' : ' inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors ' ,
' description ' : ' Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4. ' ,
' duration ' : 221 ,
2013-09-06 18:08:07 +02:00
} ,
2016-04-14 15:12:59 +08:00
' skip ' : ' Not providing trailers anymore ' ,
} , {
' url ' : ' http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315 ' ,
' info_dict ' : {
' id ' : ' 5740315 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Tales from the Borderlands - Finale: The Vault of the Traveler ' ,
' description ' : ' In the final episode of the season, all hell breaks loose. Jack is now in control of Helios \' systems, and he \' s ready to reclaim his rightful place as king of Hyperion (with or without you). ' ,
' duration ' : 114 ,
} ,
} ]
2013-09-06 18:08:07 +02:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
webpage = self . _download_webpage ( url , video_id )
# The xml is not well formatted, there are raw '&'
2013-12-10 21:03:53 +01:00
info = self . _download_xml ( ' http://www.metacritic.com/video_data?video= ' + video_id ,
2014-11-23 21:39:15 +01:00
video_id , ' Downloading info xml ' , transform_source = fix_xml_ampersands )
2013-09-06 18:08:07 +02:00
clip = next ( c for c in info . findall ( ' playList/clip ' ) if c . find ( ' id ' ) . text == video_id )
formats = [ ]
for videoFile in clip . findall ( ' httpURI/videoFile ' ) :
rate_str = videoFile . find ( ' rate ' ) . text
video_url = videoFile . find ( ' filePath ' ) . text
formats . append ( {
' url ' : video_url ,
' ext ' : ' mp4 ' ,
' format_id ' : rate_str ,
2014-01-07 10:27:20 +01:00
' tbr ' : int ( rate_str ) ,
2013-09-06 18:08:07 +02:00
} )
2014-01-07 10:27:20 +01:00
self . _sort_formats ( formats )
2013-09-06 18:08:07 +02:00
description = self . _html_search_regex ( r ' <b>Description:</b>(.*?)</p> ' ,
2014-11-23 21:39:15 +01:00
webpage , ' description ' , flags = re . DOTALL )
2013-09-06 18:08:07 +02:00
2013-12-03 14:21:06 +01:00
return {
2013-09-06 18:08:07 +02:00
' id ' : video_id ,
' title ' : clip . find ( ' title ' ) . text ,
' formats ' : formats ,
' description ' : description ,
' duration ' : int ( clip . find ( ' duration ' ) . text ) ,
}