From 605ec701b7b4cd120a9acb33bfcc4306719b59b4 Mon Sep 17 00:00:00 2001 From: PeterDing Date: Fri, 29 May 2015 23:32:04 +0800 Subject: [PATCH 01/14] [iqiyi] Add new extractor for iqiyi.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/iqiyi.py | 214 +++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 youtube_dl/extractor/iqiyi.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 80c9cb107..85c1b1a3a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -229,6 +229,7 @@ from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE +from .iqiyi import IqiyiIE from .ivi import ( IviIE, IviCompilationIE diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py new file mode 100644 index 000000000..d96d13225 --- /dev/null +++ b/youtube_dl/extractor/iqiyi.py @@ -0,0 +1,214 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_chr, + compat_parse_qs, + compat_urllib_parse, + compat_urllib_request, + compat_urlparse, + compat_str, +) + +from ..utils import ExtractorError + +import re +import time +import json +import uuid +import math +import random +import zlib +import hashlib + +class IqiyiIE(InfoExtractor): + IE_NAME = 'iqiyi' + + _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' + + _TEST = { + 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', + 'md5': '260f0f59686e65e886995d0ba791ab83', + 'info_dict': { + 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', + 'title': '美国德州空中惊现奇异云团 酷似UFO', + 'ext': 'f4v' + } + } + + def construct_video_urls(self, data, video_id, _uuid): + def do_xor(x, y): + a = y % 3 + if a == 1: + return x ^ 121 + if a == 2: + return x ^ 72 + return x ^ 103 + + def get_encode_code(l): + a = 0 + b = l.split('-') + c = len(b) + s = '' + for i in range(c - 1, -1, -1): + a = do_xor(int(b[c-i-1], 16), i) + s += chr(a) + return s[::-1] + + def get_path_key(x): + mg = ')(*&^flash@#$%a' + tm = self._download_json( + 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] + t = str(int(math.floor(int(tm)/(600.0)))) + return hashlib.md5( + (t+mg+x).encode('utf8')).hexdigest() + + video_urls_dict = {} + for i in data['vp']['tkl'][0]['vs']: + if 0 < int(i['bid']) <= 10: + format_id = self.get_format(i['bid']) + + video_urls_info = i['fs'] + if not i['fs'][0]['l'].startswith('/'): + t = get_encode_code(i['fs'][0]['l']) + if t.endswith('mp4'): + video_urls_info = i['flvs'] + + video_urls = [] + for ii in video_urls_info: + vl = ii['l'] + if not vl.startswith('/'): + vl = get_encode_code(vl) + key = get_path_key( + vl.split('/')[-1].split('.')[0]) + filesize = ii['b'] + base_url = data['vp']['du'].split('/') + base_url.insert(-1, key) + base_url = '/'.join(base_url) + param = { + 'su': _uuid, + 'qyid': uuid.uuid4().hex, + 'client': '', + 'z': '', + 'bt': '', + 'ct': '', + 'tn': str(int(time.time())) + } + api_video_url = base_url + vl + '?' + \ + compat_urllib_parse.urlencode(param) + js = self._download_json(api_video_url, video_id) + video_url = js['l'] + video_urls.append( + (video_url, filesize)) + + video_urls_dict[format_id] = video_urls + return video_urls_dict + + def get_format(self, bid): + bid_dict = { + '1': 'standard', + '2': 'high', + '3': 'super', + '4': 'suprt-high', + '5': 'fullhd', + '10': '4k' + } + return bid_dict[str(bid)] + + def get_raw_data(self, tvid, video_id, enc_key, _uuid): + tm = str(int(time.time())) + param = { + 'key': 'fvip', + 'src': hashlib.md5(b'youtube-dl').hexdigest(), + 'tvId': tvid, + 'vid': video_id, + 'vinfo': 1, + 'tm': tm, + 'enc': hashlib.md5( + (enc_key + tm + tvid).encode('utf8')).hexdigest(), + 'qyid': _uuid, + 'tn': random.random(), + 'um': 0, + 'authkey': hashlib.md5( + (tm + tvid).encode('utf8')).hexdigest() + } + + api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ + compat_urllib_parse.urlencode(param) + raw_data = self._download_json(api_url, video_id) + return raw_data + + def get_enc_key(self, swf_url, video_id): + req = self._request_webpage( + swf_url, video_id, note='download swf content') + cn = req.read() + cn = zlib.decompress(cn[8:]) + pt = re.compile(b'MixerRemote\x08(?P.+?)\$&vv') + enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') + return enc_key + + def _real_extract(self, url): + webpage = self._download_webpage( + url, 'temp_id', note='download video page') + tvid = self._search_regex( + r'tvId ?= ?(\'|\")(?P\d+)', webpage, 'tvid', flags=re.I, group='tvid') + video_id = self._search_regex( + r'videoId ?= ?(\'|\")(?P[a-z\d]+)', + webpage, 'video_id', flags=re.I, group='video_id') + swf_url = self._search_regex( + r'(?Phttp://.+?MainPlayer.+?\.swf)', webpage, 'swf') + _uuid = uuid.uuid4().hex + + enc_key = self.get_enc_key(swf_url, video_id) + + raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) + assert raw_data['code'] == 'A000000' + if not raw_data['data']['vp']['tkl']: + raise ExtractorError('No support iQiqy VIP video') + + data = raw_data['data'] + + title = data['vi']['vn'] + + # generate video_urls_dict + video_urls_dict = self.construct_video_urls(data, video_id, _uuid) + + # construct info + entries = [] + for format_id in video_urls_dict: + video_urls = video_urls_dict[format_id] + for i, video_url_info in enumerate(video_urls): + if len(entries) < i+1: + entries.append({'formats': []}) + entries[i]['formats'].append( + { + 'url': video_url_info[0], + 'filesize': video_url_info[-1], + 'format_id': format_id, + } + ) + + for i in range(len(entries)): + entries[i].update( + { + 'id': '_part%d' % (i+1), + 'title': title, + } + ) + + if len(entries) > 1: + info = { + '_type': 'multi_video', + 'id': video_id, + 'title': title, + 'entries': entries, + } + else: + info = entries[0] + info['id'] = video_id + info['title'] = title + + return info From 670861bd206ab4063baeb6b80d06a054ce4e1d62 Mon Sep 17 00:00:00 2001 From: PeterDing Date: Sat, 30 May 2015 10:37:54 +0800 Subject: [PATCH 02/14] [iqiyi] Do not request for unneeded formats --- youtube_dl/extractor/iqiyi.py | 72 ++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index d96d13225..747f3f902 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -4,20 +4,12 @@ from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_parse_qs, - compat_urllib_parse, - compat_urllib_request, - compat_urlparse, - compat_str, -) +from ..compat import compat_urllib_parse from ..utils import ExtractorError import re import time -import json import uuid import math import random @@ -31,15 +23,15 @@ class IqiyiIE(InfoExtractor): _TEST = { 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', - 'md5': '260f0f59686e65e886995d0ba791ab83', + 'md5': '2cb594dc2781e6c941a110d8f358118b', 'info_dict': { 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 'title': '美国德州空中惊现奇异云团 酷似UFO', - 'ext': 'f4v' + 'ext': 'f4v', } } - def construct_video_urls(self, data, video_id, _uuid): + def construct_video_urls(self, data, video_id, _uuid, bid): def do_xor(x, y): a = y % 3 if a == 1: @@ -66,10 +58,21 @@ def get_path_key(x): return hashlib.md5( (t+mg+x).encode('utf8')).hexdigest() + # get accept format + # getting all format will spend minutes for a big video. + if bid == 'best': + bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] \ + if 0 < int(i['bid']) <= 10] + bid = str(max(bids)) + video_urls_dict = {} for i in data['vp']['tkl'][0]['vs']: if 0 < int(i['bid']) <= 10: format_id = self.get_format(i['bid']) + else: + continue + + video_urls = [] video_urls_info = i['fs'] if not i['fs'][0]['l'].startswith('/'): @@ -77,7 +80,12 @@ def get_path_key(x): if t.endswith('mp4'): video_urls_info = i['flvs'] - video_urls = [] + if int(i['bid']) != int(bid): # ignore missing match format + video_urls.extend( + [('http://example.com/v.flv', ii['b']) for ii in video_urls_info]) + video_urls_dict[format_id] = video_urls + continue + for ii in video_urls_info: vl = ii['l'] if not vl.startswith('/'): @@ -108,15 +116,27 @@ def get_path_key(x): return video_urls_dict def get_format(self, bid): - bid_dict = { - '1': 'standard', - '2': 'high', - '3': 'super', - '4': 'suprt-high', - '5': 'fullhd', - '10': '4k' + _dict = { + '1' : 'h6', + '2' : 'h5', + '3' : 'h4', + '4' : 'h3', + '5' : 'h2', + '10' : 'h1' } - return bid_dict[str(bid)] + return _dict.get(str(bid), None) + + def get_bid(self, format_id): + _dict = { + 'h6' : '1', + 'h5' : '2', + 'h4' : '3', + 'h3' : '4', + 'h2' : '5', + 'h1' : '10', + 'best' : 'best' + } + return _dict.get(format_id, None) def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) @@ -173,8 +193,14 @@ def _real_extract(self, url): title = data['vi']['vn'] + format = self._downloader.params.get('format', None) + bid = self.get_bid(format) if format else 'best' + if not bid: + raise ExtractorError('Can\'t get format.') + # generate video_urls_dict - video_urls_dict = self.construct_video_urls(data, video_id, _uuid) + video_urls_dict = self.construct_video_urls( + data, video_id, _uuid, bid) # construct info entries = [] @@ -188,10 +214,12 @@ def _real_extract(self, url): 'url': video_url_info[0], 'filesize': video_url_info[-1], 'format_id': format_id, + 'preference': int(self.get_bid(format_id)) } ) for i in range(len(entries)): + self._sort_formats(entries[i]['formats']) entries[i].update( { 'id': '_part%d' % (i+1), From f1da861018924e6f442ffedd9a5682055c79aea6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 00:37:29 +0800 Subject: [PATCH 03/14] [iqiyi] PEP8 --- youtube_dl/extractor/iqiyi.py | 56 +++++++++++++++++------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 747f3f902..597441baf 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -16,19 +16,20 @@ import zlib import hashlib + class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' _TEST = { - 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', - 'md5': '2cb594dc2781e6c941a110d8f358118b', - 'info_dict': { - 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', - 'title': '美国德州空中惊现奇异云团 酷似UFO', - 'ext': 'f4v', - } + 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', + 'md5': '2cb594dc2781e6c941a110d8f358118b', + 'info_dict': { + 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', + 'title': '美国德州空中惊现奇异云团 酷似UFO', + 'ext': 'f4v', + } } def construct_video_urls(self, data, video_id, _uuid, bid): @@ -46,7 +47,7 @@ def get_encode_code(l): c = len(b) s = '' for i in range(c - 1, -1, -1): - a = do_xor(int(b[c-i-1], 16), i) + a = do_xor(int(b[c - i - 1], 16), i) s += chr(a) return s[::-1] @@ -54,15 +55,14 @@ def get_path_key(x): mg = ')(*&^flash@#$%a' tm = self._download_json( 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] - t = str(int(math.floor(int(tm)/(600.0)))) - return hashlib.md5( - (t+mg+x).encode('utf8')).hexdigest() + t = str(int(math.floor(int(tm) / (600.0)))) + return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() # get accept format # getting all format will spend minutes for a big video. if bid == 'best': - bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] \ - if 0 < int(i['bid']) <= 10] + bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] + if 0 < int(i['bid']) <= 10] bid = str(max(bids)) video_urls_dict = {} @@ -117,24 +117,24 @@ def get_path_key(x): def get_format(self, bid): _dict = { - '1' : 'h6', - '2' : 'h5', - '3' : 'h4', - '4' : 'h3', - '5' : 'h2', - '10' : 'h1' + '1': 'h6', + '2': 'h5', + '3': 'h4', + '4': 'h3', + '5': 'h2', + '10': 'h1' } return _dict.get(str(bid), None) def get_bid(self, format_id): _dict = { - 'h6' : '1', - 'h5' : '2', - 'h4' : '3', - 'h3' : '4', - 'h2' : '5', - 'h1' : '10', - 'best' : 'best' + 'h6': '1', + 'h5': '2', + 'h4': '3', + 'h3': '4', + 'h2': '5', + 'h1': '10', + 'best': 'best' } return _dict.get(format_id, None) @@ -207,7 +207,7 @@ def _real_extract(self, url): for format_id in video_urls_dict: video_urls = video_urls_dict[format_id] for i, video_url_info in enumerate(video_urls): - if len(entries) < i+1: + if len(entries) < i + 1: entries.append({'formats': []}) entries[i]['formats'].append( { @@ -222,7 +222,7 @@ def _real_extract(self, url): self._sort_formats(entries[i]['formats']) entries[i].update( { - 'id': '_part%d' % (i+1), + 'id': '_part%d' % (i + 1), 'title': title, } ) From 7012620e2b9355d25ddfc855fc5990af938f04d8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 00:44:54 +0800 Subject: [PATCH 04/14] [iqiyi] Remove format selection codes --- youtube_dl/extractor/iqiyi.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 597441baf..5645fb6ee 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -32,7 +32,7 @@ class IqiyiIE(InfoExtractor): } } - def construct_video_urls(self, data, video_id, _uuid, bid): + def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): a = y % 3 if a == 1: @@ -58,13 +58,6 @@ def get_path_key(x): t = str(int(math.floor(int(tm) / (600.0)))) return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() - # get accept format - # getting all format will spend minutes for a big video. - if bid == 'best': - bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] - if 0 < int(i['bid']) <= 10] - bid = str(max(bids)) - video_urls_dict = {} for i in data['vp']['tkl'][0]['vs']: if 0 < int(i['bid']) <= 10: @@ -80,12 +73,6 @@ def get_path_key(x): if t.endswith('mp4'): video_urls_info = i['flvs'] - if int(i['bid']) != int(bid): # ignore missing match format - video_urls.extend( - [('http://example.com/v.flv', ii['b']) for ii in video_urls_info]) - video_urls_dict[format_id] = video_urls - continue - for ii in video_urls_info: vl = ii['l'] if not vl.startswith('/'): @@ -193,14 +180,9 @@ def _real_extract(self, url): title = data['vi']['vn'] - format = self._downloader.params.get('format', None) - bid = self.get_bid(format) if format else 'best' - if not bid: - raise ExtractorError('Can\'t get format.') - # generate video_urls_dict video_urls_dict = self.construct_video_urls( - data, video_id, _uuid, bid) + data, video_id, _uuid) # construct info entries = [] From 29e7e0781b1b8e276c28a079bc5b18e1b0db2d5e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 00:56:08 +0800 Subject: [PATCH 05/14] [iqiyi] Simplify and improve regex patterns See the comments in #5849 --- youtube_dl/extractor/iqiyi.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 5645fb6ee..18a7587a2 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -161,12 +161,11 @@ def _real_extract(self, url): webpage = self._download_webpage( url, 'temp_id', note='download video page') tvid = self._search_regex( - r'tvId ?= ?(\'|\")(?P\d+)', webpage, 'tvid', flags=re.I, group='tvid') + r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') video_id = self._search_regex( - r'videoId ?= ?(\'|\")(?P[a-z\d]+)', - webpage, 'video_id', flags=re.I, group='video_id') + r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') swf_url = self._search_regex( - r'(?Phttp://.+?MainPlayer.+?\.swf)', webpage, 'swf') + r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex enc_key = self.get_enc_key(swf_url, video_id) From aacda28b28c1804866d634c5c5086b3d53cb2b2f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 01:32:03 +0800 Subject: [PATCH 06/14] [iqiyi] Give error message for assertion failures --- youtube_dl/extractor/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 18a7587a2..dc35c3380 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -171,7 +171,10 @@ def _real_extract(self, url): enc_key = self.get_enc_key(swf_url, video_id) raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) - assert raw_data['code'] == 'A000000' + + if raw_data['code'] != 'A000000': + raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) + if not raw_data['data']['vp']['tkl']: raise ExtractorError('No support iQiqy VIP video') From 958d0b659b80d4493d045d4da82074ed68ed6c4e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 01:35:09 +0800 Subject: [PATCH 07/14] [iqiyi] Reorder imports --- youtube_dl/extractor/iqiyi.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index dc35c3380..36029361a 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -1,20 +1,17 @@ # coding: utf-8 - from __future__ import unicode_literals -from .common import InfoExtractor - -from ..compat import compat_urllib_parse - -from ..utils import ExtractorError - +import hashlib +import math +import random import re import time import uuid -import math -import random import zlib -import hashlib + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ExtractorError class IqiyiIE(InfoExtractor): From ffba4edb067238b593b98c71f4293e9b60ba95ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 01:52:51 +0800 Subject: [PATCH 08/14] [iqiyi] Improve some variable names and add download notes --- youtube_dl/extractor/iqiyi.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 36029361a..c17e1fde4 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -48,35 +48,37 @@ def get_encode_code(l): s += chr(a) return s[::-1] - def get_path_key(x): + def get_path_key(x, format_id, segment_index): mg = ')(*&^flash@#$%a' tm = self._download_json( - 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] + 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, + note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) + )['t'] t = str(int(math.floor(int(tm) / (600.0)))) return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() video_urls_dict = {} - for i in data['vp']['tkl'][0]['vs']: - if 0 < int(i['bid']) <= 10: - format_id = self.get_format(i['bid']) + for format_item in data['vp']['tkl'][0]['vs']: + if 0 < int(format_item['bid']) <= 10: + format_id = self.get_format(format_item['bid']) else: continue video_urls = [] - video_urls_info = i['fs'] - if not i['fs'][0]['l'].startswith('/'): - t = get_encode_code(i['fs'][0]['l']) + video_urls_info = format_item['fs'] + if not format_item['fs'][0]['l'].startswith('/'): + t = get_encode_code(format_item['fs'][0]['l']) if t.endswith('mp4'): - video_urls_info = i['flvs'] + video_urls_info = format_item['flvs'] - for ii in video_urls_info: - vl = ii['l'] + for segment_index, segment in enumerate(video_urls_info): + vl = segment['l'] if not vl.startswith('/'): vl = get_encode_code(vl) key = get_path_key( - vl.split('/')[-1].split('.')[0]) - filesize = ii['b'] + vl.split('/')[-1].split('.')[0], format_id, segment_index) + filesize = segment['b'] base_url = data['vp']['du'].split('/') base_url.insert(-1, key) base_url = '/'.join(base_url) @@ -91,7 +93,9 @@ def get_path_key(x): } api_video_url = base_url + vl + '?' + \ compat_urllib_parse.urlencode(param) - js = self._download_json(api_video_url, video_id) + js = self._download_json( + api_video_url, video_id, + note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) video_url = js['l'] video_urls.append( (video_url, filesize)) From c4ee87022bd18863fc3f22f80064453e272d956f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 01:57:05 +0800 Subject: [PATCH 09/14] [iqiyi] Change id for multipart videos --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index c17e1fde4..840cc9a4d 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -207,7 +207,7 @@ def _real_extract(self, url): self._sort_formats(entries[i]['formats']) entries[i].update( { - 'id': '_part%d' % (i + 1), + 'id': '%s_part%d' % (video_id, i + 1), 'title': title, } ) From 99481135907b5fa3558d4f176fd02acbdafccdb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 02:09:33 +0800 Subject: [PATCH 10/14] [iqiyi] Add a multipart test case --- youtube_dl/extractor/iqiyi.py | 67 +++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 840cc9a4d..d73687d88 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -19,7 +19,7 @@ class IqiyiIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' - _TEST = { + _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', 'md5': '2cb594dc2781e6c941a110d8f358118b', 'info_dict': { @@ -27,7 +27,70 @@ class IqiyiIE(InfoExtractor): 'title': '美国德州空中惊现奇异云团 酷似UFO', 'ext': 'f4v', } - } + }, { + 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb', + 'title': '名侦探柯南第752集', + }, + 'playlist': [{ + 'md5': '7e49376fecaffa115d951634917fe105', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '41b75ba13bb7ac0e411131f92bc4f6ca', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '0cee1dd0a3d46a83e71e2badeae2aab0', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '4f8ad72373b0c491b582e7c196b0b1f9', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': 'd89ad028bcfad282918e8098e811711d', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '9cb1e5c95da25dff0660c32ae50903b7', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '155116e0ff1867bbc9b98df294faabc9', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '53f5db77622ae14fa493ed2a278a082b', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }], + }] def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): From 865ab62f43eb94a9f4f757a464df147e983cb439 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 02:13:22 +0800 Subject: [PATCH 11/14] [iqiyi] Make _VALID_URL more accurate v_* urls are individual videos, while a_* urls are playlists, which are not supported yet. --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index d73687d88..f0d423331 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -17,7 +17,7 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' - _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' + _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html' _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', From 08bb8ef2011d795948d8e89478bf3afe4b99405f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 02:25:00 +0800 Subject: [PATCH 12/14] [iqiyi] Unify get_format() and get_bid() --- youtube_dl/extractor/iqiyi.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index f0d423331..122f33692 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -92,6 +92,15 @@ class IqiyiIE(InfoExtractor): }], }] + _FORMATS_MAP = [ + ('1', 'h6'), + ('2', 'h5'), + ('3', 'h4'), + ('4', 'h3'), + ('5', 'h2'), + ('10', 'h1'), + ] + def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): a = y % 3 @@ -167,27 +176,12 @@ def get_path_key(x, format_id, segment_index): return video_urls_dict def get_format(self, bid): - _dict = { - '1': 'h6', - '2': 'h5', - '3': 'h4', - '4': 'h3', - '5': 'h2', - '10': 'h1' - } - return _dict.get(str(bid), None) + matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] + return matched_format_ids[0] if len(matched_format_ids) else None def get_bid(self, format_id): - _dict = { - 'h6': '1', - 'h5': '2', - 'h4': '3', - 'h3': '4', - 'h2': '5', - 'h1': '10', - 'best': 'best' - } - return _dict.get(format_id, None) + matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] + return matched_bids[0] if len(matched_bids) else None def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) From 9c5f685ef14a8b44d17b897ba8ae2da051011c35 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 02:39:03 +0800 Subject: [PATCH 13/14] [iqiyi] Improve regex pattern again --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 122f33692..15481b84b 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -223,7 +223,7 @@ def _real_extract(self, url): video_id = self._search_regex( r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') swf_url = self._search_regex( - r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL') + r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex enc_key = self.get_enc_key(swf_url, video_id) From b5a3c7f10927c9d55f6fdad5f5c002e02338642e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Jun 2015 02:47:36 +0800 Subject: [PATCH 14/14] [iqiyi] Cache encryption keys --- youtube_dl/extractor/iqiyi.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 15481b84b..9106dd074 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -3,6 +3,7 @@ import hashlib import math +import os.path import random import re import time @@ -11,7 +12,10 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + url_basename, +) class IqiyiIE(InfoExtractor): @@ -207,12 +211,20 @@ def get_raw_data(self, tvid, video_id, enc_key, _uuid): return raw_data def get_enc_key(self, swf_url, video_id): + filename, _ = os.path.splitext(url_basename(swf_url)) + enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename) + if enc_key_json is not None: + return enc_key_json[0] + req = self._request_webpage( swf_url, video_id, note='download swf content') cn = req.read() cn = zlib.decompress(cn[8:]) pt = re.compile(b'MixerRemote\x08(?P.+?)\$&vv') enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') + + self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key]) + return enc_key def _real_extract(self, url):