From 03cc7c20c1baae0e3642b85e8b0f240ff2105c38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Aug 2013 12:21:28 +0200 Subject: [PATCH 01/33] [youtube] show which formats are in 3D with "-F" and in the format field --- youtube_dl/extractor/youtube.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c08387c273..31c5bebdc7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -221,6 +221,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '132': '240p', '151': '72p', } + _3d_itags = ['85', '84', '102', '83', '101', '82', '100'] IE_NAME = u'youtube' _TESTS = [ { @@ -467,7 +468,9 @@ def _extract_all_subtitles(self, video_id): def _print_formats(self, formats): print('Available formats:') for x in formats: - print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) + print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'), + self._video_dimensions.get(x, '???'), + ' (3D)' if x in self._3d_itags else '')) def _extract_id(self, url): mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -751,8 +754,9 @@ def _real_extract(self, url): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - video_format = '{0} - {1}'.format(format_param if format_param else video_extension, - self._video_dimensions.get(format_param, '???')) + video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, + self._video_dimensions.get(format_param, '???'), + ' (3D)' if format_param in self._3d_itags else '') results.append({ 'id': video_id, From 04bca64bded9311bdd9e4e83c289e15981911b0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Aug 2013 12:38:17 +0200 Subject: [PATCH 02/33] [youtube]: new algo for length 83 (fixes #1164) --- devscripts/youtube_genalgo.py | 4 ++-- test/test_youtube_sig.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index fd01206506..31d6ec9529 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -26,9 +26,9 @@ # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), - # 83 - vflcaqGO8 2013/07/11 + # 83 - vflTWC9KW 2013/08/01 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", - "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"), + "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), # 82 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py index d06f3c8aa5..d645c08fb7 100644 --- a/test/test_youtube_sig.py +++ b/test/test_youtube_sig.py @@ -52,7 +52,7 @@ def test_84(self): def test_83(self): wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" - right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" + right = "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f" self.assertEqual(sig(wrong), right) def test_82(self): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 31c5bebdc7..bc89a14ffc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -346,7 +346,7 @@ def _decrypt_signature(self, s): elif len(s) == 84: return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] elif len(s) == 83: - return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:] + return s[:15] + s[80] + s[16:80] + s[15] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] elif len(s) == 81: From fd5539eb41ede741ae6ccacd17eaf293b7a0ed8a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 2 Aug 2013 13:35:13 +0200 Subject: [PATCH 03/33] release 2013.08.02 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0be025ed8a..4fc85fac1f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.07.31' +__version__ = '2013.08.02' From ed27d35674b015728f4a9ec9d5d6d5733b597db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Aug 2013 14:17:01 +0200 Subject: [PATCH 04/33] [youtube] don't crash in verbose mode if 'ad3_module' is not defined in age protected videos (fixes #1159) --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bc89a14ffc..4d3bce879f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -718,8 +718,8 @@ def _real_extract(self, url): s = url_data['s'][0] if age_gate: player_version = self._search_regex(r'ad3-(.+?)\.swf', - video_info['ad3_module'][0], 'flash player', - fatal=False) + video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', + 'flash player', fatal=False) player = 'flash player %s' % player_version else: player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, From 09825cb5c0a5f1d53e66e91efe3db49da7c8ae4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 27 Jul 2013 14:53:14 +0200 Subject: [PATCH 05/33] Add an extractor for Ooyala (closes #833) Only works for some sites, it doesn't work for videos that use a f4m manifest --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ooyala.py | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/ooyala.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c20172a53a..f15caf4b40 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -48,6 +48,7 @@ from .myspass import MySpassIE from .myvideo import MyVideoIE from .nba import NBAIE +from .ooyala import OoyalaIE from .photobucket import PhotobucketIE from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py new file mode 100644 index 0000000000..b734722d08 --- /dev/null +++ b/youtube_dl/extractor/ooyala.py @@ -0,0 +1,52 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import unescapeHTML + +class OoyalaIE(InfoExtractor): + _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P.+?)(&|$)' + + _TEST = { + # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video + u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', + u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', + u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', + u'info_dict': { + u'title': u'Explaining Data Recovery from Hard Drives and SSDs', + u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', + }, + } + + def _extract_result(self, info, more_info): + return {'id': info['embedCode'], + 'ext': 'mp4', + 'title': unescapeHTML(info['title']), + 'url': info['url'], + 'description': unescapeHTML(more_info['description']), + 'thumbnail': more_info['promo'], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + embedCode = mobj.group('id') + player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode + player = self._download_webpage(player_url, embedCode) + mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', + player, u'mobile player url') + mobile_player = self._download_webpage(mobile_url, embedCode) + videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') + videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') + videos_info = json.loads(videos_info) + videos_more_info =json.loads(videos_more_info) + + if videos_more_info.get('lineup'): + videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] + return {'_type': 'playlist', + 'id': embedCode, + 'title': unescapeHTML(videos_more_info['title']), + 'entries': videos, + } + else: + return self._extract_result(videos_info[0], videos_more_info) + From fbf189a6eedb1a42a5beeea475f1d072db5c2322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 2 Aug 2013 21:09:17 +0200 Subject: [PATCH 06/33] [myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) --- youtube_dl/extractor/myvideo.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index b2a7b1df04..0404e6e43f 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -2,11 +2,13 @@ import base64 import hashlib import re +import json from .common import InfoExtractor from ..utils import ( compat_ord, compat_urllib_parse, + compat_urllib_request, ExtractorError, ) @@ -16,7 +18,7 @@ class MyVideoIE(InfoExtractor): """Information Extractor for myvideo.de.""" - _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' _TEST = { u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', @@ -85,6 +87,20 @@ def _real_extract(self,url): 'ext': video_ext, }] + mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) + if mobj is not None: + request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') + response = self._download_webpage(request, video_id, + u'Downloading video info') + info = json.loads(base64.b64decode(response).decode('utf-8')) + return {'id': video_id, + 'title': info['title'], + 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), + 'play_path': info['filename'], + 'ext': 'flv', + 'thumbnail': info['thumbnail'][0]['url'], + } + # try encxml mobj = re.search('var flashvars={(.+?)}', webpage) if mobj is None: From f5791ed136d75efe8b163899ac4239d87a0a278e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 3 Aug 2013 17:32:29 +0200 Subject: [PATCH 07/33] =?UTF-8?q?[arte]=20Prefer=20v=C3=ADdeos=20without?= =?UTF-8?q?=20subtitles=20in=20the=20same=20language=20(fixes=20#1173)=20a?= =?UTF-8?q?nd=20fix=20crash=20when=20there's=20no=20description?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube_dl/extractor/arte.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 18d5916589..959ce787fd 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -85,7 +85,7 @@ def _extract_emission(self, url, video_id, lang): info_dict = {'id': player_info['VID'], 'title': player_info['VTI'], - 'description': player_info['VDE'], + 'description': player_info.get('VDE'), 'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]), 'thumbnail': player_info['programImage'], 'ext': 'flv', @@ -104,6 +104,8 @@ def _match_lang(f): formats = filter(_match_lang, formats) # We order the formats by quality formats = sorted(formats, key=lambda f: int(f['height'])) + # Prefer videos without subtitles in the same language + formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None) # Pick the best quality format_info = formats[-1] if format_info['mediaType'] == u'rtmp': From 70c4c03cb8991a08f62699f3982112e3a8e010ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 3 Aug 2013 19:07:04 +0200 Subject: [PATCH 08/33] [arte] add support for downloading from http://liveweb.arte.tv (fixes #1014) --- youtube_dl/extractor/arte.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 959ce787fd..69b3b0ad78 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor): """ _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?Pfr|de)/.*-(?P.*?).html' + _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?Pfr|de)/(?P.+?)/(?P.+)' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' @classmethod def suitable(cls, url): - return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) + return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL)) # TODO implement Live Stream # from ..utils import compat_urllib_parse @@ -68,6 +69,12 @@ def _real_extract(self, url): lang = mobj.group('lang') return self._extract_video(url, id, lang) + mobj = re.match(self._LIVEWEB_URL, url) + if mobj is not None: + name = mobj.group('name') + lang = mobj.group('lang') + return self._extract_liveweb(url, name, lang) + if re.search(self._LIVE_URL, video_id) is not None: raise ExtractorError(u'Arte live streams are not yet supported, sorry') # self.extractLiveStream(url) @@ -146,3 +153,22 @@ def _key(m): 'url': video_url, 'ext': 'flv', } + + def _extract_liveweb(self, url, name, lang): + """Extract form http://liveweb.arte.tv/""" + webpage = self._download_webpage(url, name) + video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') + config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, + video_id, u'Downloading information') + config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) + event_doc = config_doc.find('event') + url_node = event_doc.find('video').find('urlHd') + if url_node is None: + url_node = video_doc.find('urlSd') + + return {'id': video_id, + 'title': event_doc.find('name%s' % lang.capitalize()).text, + 'url': url_node.text.replace('MP4', 'mp4'), + 'ext': 'flv', + 'thumbnail': self._og_search_thumbnail(webpage), + } From bba12cec89397ee76cb939b80741314eb99023ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 3 Aug 2013 22:50:27 +0200 Subject: [PATCH 09/33] Add an extractor for videofy.me (closes #1171) Also modify find_xpath_attr to accept values with spaces like for id="HQ on" --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videofyme.py | 49 +++++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/videofyme.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f15caf4b40..c58ef260ae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -73,6 +73,7 @@ from .vbox7 import Vbox7IE from .veoh import VeohIE from .vevo import VevoIE +from .videofyme import VideofyMeIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE from .c56 import C56IE diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py new file mode 100644 index 0000000000..04106672b5 --- /dev/null +++ b/youtube_dl/extractor/videofyme.py @@ -0,0 +1,49 @@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + find_xpath_attr, + determine_ext, +) + +class VideofyMeIE(InfoExtractor): + _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P\d+)(&|#|$)' + IE_NAME = u'videofy.me' + + _TEST = { + u'url': u'http://www.videofy.me/thisisvideofyme/1100701', + u'file': u'1100701.mp4', + u'md5': u'2046dd5758541d630bfa93e741e2fd79', + u'info_dict': { + u'title': u'This is VideofyMe', + u'description': None, + u'uploader': u'VideofyMe', + u'uploader_id': u'thisisvideofyme', + }, + + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, + video_id) + config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) + video = config.find('video') + sources = video.find('sources') + url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on') + if url_node is None: + url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off') + video_url = url_node.find('url').text + + return {'id': video_id, + 'title': video.find('title').text, + 'url': video_url, + 'ext': determine_ext(video_url), + 'thumbnail': video.find('thumb').text, + 'description': video.find('description').text, + 'uploader': config.find('blog/name').text, + 'uploader_id': video.find('identifier').text, + 'view_count': re.search(r'\d+', video.find('views').text).group(), + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cf2ea654e8..59eeaf4a89 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -207,7 +207,7 @@ def write_json_file(obj, fn): def find_xpath_attr(node, xpath, key, val): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z]+$', key) - assert re.match(r'^[a-zA-Z@]*$', val) + assert re.match(r'^[a-zA-Z@\s]*$', val) expr = xpath + u"[@%s='%s']" % (key, val) return node.find(expr) else: From 577664c8e8fd80f0155ef13cbb050139ffbe7a40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 4 Aug 2013 11:10:57 +0200 Subject: [PATCH 10/33] Add an extractor from muzu.tv (closes #1177) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/muzu.py | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/muzu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c58ef260ae..84c02c2ed9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -45,6 +45,7 @@ from .metacafe import MetacafeIE from .mixcloud import MixcloudIE from .mtv import MTVIE +from .muzu import MuzuTVIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .nba import NBAIE diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py new file mode 100644 index 0000000000..03e31ea1c9 --- /dev/null +++ b/youtube_dl/extractor/muzu.py @@ -0,0 +1,64 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + determine_ext, +) + + +class MuzuTVIE(InfoExtractor): + _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P\d+)' + IE_NAME = u'muzu.tv' + + _TEST = { + u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', + u'file': u'1981454.mp4', + u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', + u'info_dict': { + u'title': u'Cat Walk (Original Mix)', + u'description': u'md5:90e868994de201b2570e4e5854e19420', + u'uploader': u'MarcAshken featuring SOS', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + info_data = compat_urllib_parse.urlencode({'format': 'json', + 'url': url, + }) + video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, + video_id, u'Downloading video info') + info = json.loads(video_info_page) + + player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, + video_id, u'Downloading player info') + video_info = json.loads(player_info_page)['videos'][0] + for quality in ['1080' , '720', '480', '360']: + if video_info.get('v%s' % quality): + break + + data = compat_urllib_parse.urlencode({'ai': video_id, + # Even if each time you watch a video the hash changes, + # it seems to work for different videos, and it will work + # even if you use any non empty string as a hash + 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', + 'device': 'web', + 'qv': quality, + }) + video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, + video_id, u'Downloading video url') + video_url_info = json.loads(video_url_page) + video_url = video_url_info['url'] + + return {'id': video_id, + 'title': info['title'], + 'url': video_url, + 'ext': determine_ext(video_url), + 'thumbnail': info['thumbnail_url'], + 'description': info['description'], + 'uploader': info['author_name'], + } From 39b782b390f4a57514466aa6e1793910a12b1e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 4 Aug 2013 16:36:48 +0200 Subject: [PATCH 11/33] [collegehumor] support urls in the format www.collegehumor.com/e/{video_id} (fixes #1179) --- youtube_dl/extractor/collegehumor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 5badde03a0..30b9c7549f 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -10,7 +10,7 @@ class CollegeHumorIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/(?P.*)$' + _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/?(?P.*)$' _TEST = { u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', From 7edcb8f39cac73bb048a9543a3ac5cc82749a470 Mon Sep 17 00:00:00 2001 From: user Date: Mon, 5 Aug 2013 19:43:09 -0700 Subject: [PATCH 12/33] More informative error --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4968669002..b510831542 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -547,7 +547,7 @@ def process_info(self, info_dict): try: success = self.fd._do_download(filename, info_dict) except (OSError, IOError) as err: - raise UnavailableVideoError() + raise UnavailableVideoError(err) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error(u'unable to download video data: %s' % str(err)) return From 7a4c6cc92f9ffec9135652a49153caffa5520c29 Mon Sep 17 00:00:00 2001 From: AndreiArba Date: Tue, 6 Aug 2013 15:41:13 +0300 Subject: [PATCH 13/33] Updated the 84 length signature decryption Updated the right 84 length signature decryption 06.08.2013 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 580f39ee84..7577c12f5a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -188,7 +188,7 @@ def _decrypt_signature(self, s): elif len(s) == 85: return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] elif len(s) == 84: - return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] + return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] elif len(s) == 83: return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:] elif len(s) == 82: From 36cb11f068b8bfc2822bdeca59f6dc2e42f896f9 Mon Sep 17 00:00:00 2001 From: patrickslin Date: Tue, 6 Aug 2013 21:35:37 -0700 Subject: [PATCH 14/33] Encrypted sig 87 broken again (fixes #1200) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4d3bce879f..ba3b12b0ec 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -338,7 +338,7 @@ def _decrypt_signature(self, s): elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: - return s[4:23] + s[86] + s[24:85] + return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53] elif len(s) == 86: return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1] elif len(s) == 85: From e2f48f96436f24af3001b4fc2600cc3d3c1bf0ac Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 7 Aug 2013 20:11:38 +0200 Subject: [PATCH 15/33] Remove youtube sig tests The signature algo changes too often for the static test to make sense. --- test/test_youtube_sig.py | 79 ---------------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 test/test_youtube_sig.py diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py deleted file mode 100644 index d645c08fb7..0000000000 --- a/test/test_youtube_sig.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python - -import unittest -import sys - -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.extractor.youtube import YoutubeIE -from helper import FakeYDL - -ie = YoutubeIE(FakeYDL()) -sig = ie._decrypt_signature -sig_age_gate = ie._decrypt_signature_age_gate - -class TestYoutubeSig(unittest.TestCase): - def test_92(self): - wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" - right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" - self.assertEqual(sig(wrong), right) - - def test_90(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" - right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" - self.assertEqual(sig(wrong), right) - - def test_88(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" - right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" - self.assertEqual(sig(wrong), right) - - def test_87(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" - right = "tyuioplkjhgfdsazxcv" - self.assertEqual(sig(wrong), right) - - def test_86(self): - wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" - right = ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK Date: Thu, 8 Aug 2013 00:39:23 +0200 Subject: [PATCH 16/33] release 2013.08.08 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4fc85fac1f..82c24646c2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.02' +__version__ = '2013.08.08' From 4efba05c56cda1cfe036b6ad8a4e8fc55cc21d4d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 8 Aug 2013 08:55:26 +0200 Subject: [PATCH 17/33] Clarify template error message (#1209) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4968669002..3df653c5d0 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -264,7 +264,7 @@ def prepare_filename(self, info_dict): self.report_error(u'Erroneous output template') return None except ValueError as err: - self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) + self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')') return None def _match_entry(self, info_dict): From 02cf62e2403a83196c51762016a2c1ba29f35f90 Mon Sep 17 00:00:00 2001 From: patrickslin Date: Thu, 8 Aug 2013 11:28:50 -0700 Subject: [PATCH 18/33] Invalid signature again (fixes #1215) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b191021db5..ba2da4c514 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -340,7 +340,7 @@ def _decrypt_signature(self, s): elif len(s) == 87: return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53] elif len(s) == 86: - return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1] + return s[5:20] + s[2] + s[21:] elif len(s) == 85: return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] elif len(s) == 84: From d468a09789cc5d931dd1a2e95dcf95a6988c83cb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 8 Aug 2013 20:45:16 +0200 Subject: [PATCH 19/33] release 2013.08.08.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 82c24646c2..aa14c64fee 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.08' +__version__ = '2013.08.08.1' From 8a9d86a2a79e78bd7bda941c48fb50dba29d1d66 Mon Sep 17 00:00:00 2001 From: patrickslin Date: Thu, 8 Aug 2013 21:48:12 -0700 Subject: [PATCH 20/33] New sig len 89 algo Fixes new YT encrypted sig len 89. --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ba2da4c514..3a8edbdc24 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -335,6 +335,8 @@ def _decrypt_signature(self, s): return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] elif len(s) == 90: return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] + elif len(s) == 89: + return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: From 0f399e6e5e621acff75d1a9dd9195f163bbcfe20 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 9 Aug 2013 15:49:09 +0200 Subject: [PATCH 21/33] release 2013.08.09 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index aa14c64fee..8c9fc201b1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.08.1' +__version__ = '2013.08.09' From 298f833b161682ce50250895061285251933351d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 11 Aug 2013 06:46:24 +0200 Subject: [PATCH 22/33] Note update possibility on errors (thanks @chbrown, #1229) --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59eeaf4a89..5dd5b2923d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -497,7 +497,7 @@ def __init__(self, msg, tb=None, expected=False): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): expected = True if not expected: - msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' + msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.' super(ExtractorError, self).__init__(msg) self.traceback = tb From 0577177e3e504b3c893b03e5683382ad64863ca8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 11 Aug 2013 07:12:38 +0200 Subject: [PATCH 23/33] [vevo] fix testcase --- youtube_dl/extractor/vevo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 67537eae5a..14abd58e83 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -8,7 +8,7 @@ class VevoIE(InfoExtractor): """ - Accecps urls from vevo.com or in the format 'vevo:{id}' + Accepts urls from vevo.com or in the format 'vevo:{id}' (currently used by MTVIE) """ _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*)$' @@ -19,7 +19,7 @@ class VevoIE(InfoExtractor): u'info_dict': { u"upload_date": u"20130624", u"uploader": u"Hurts", - u"title": u"Somebody To Die For" + u"title": u"Somebody to Die For" } } From e1842025d0403784d5a9af82e4fbd4b71fb31b11 Mon Sep 17 00:00:00 2001 From: patrickslin Date: Tue, 13 Aug 2013 17:57:35 -0700 Subject: [PATCH 24/33] Updated Vevo Signature Length (fixes #1237) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3a8edbdc24..64a3e39c19 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -348,7 +348,7 @@ def _decrypt_signature(self, s): elif len(s) == 84: return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] elif len(s) == 83: - return s[:15] + s[80] + s[16:80] + s[15] + return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] elif len(s) == 81: From d1ba998274e96467184575e6bc0c33e698e714bf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 14 Aug 2013 10:19:53 +0200 Subject: [PATCH 25/33] release 2013.08.14 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8c9fc201b1..9b213e4fb8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.09' +__version__ = '2013.08.14' From 3a7256697e7c97a79f51d6ef6b9a1108bf8d7d3a Mon Sep 17 00:00:00 2001 From: patrickslin Date: Thu, 15 Aug 2013 13:00:20 -0700 Subject: [PATCH 26/33] Unable to Download Video (fixes #1247) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 64a3e39c19..bf064c1819 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -340,7 +340,7 @@ def _decrypt_signature(self, s): elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: - return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53] + return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] elif len(s) == 86: return s[5:20] + s[2] + s[21:] elif len(s) == 85: From 6daccbe3172dbddb75cbd55871b283e3c33a51e2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 15 Aug 2013 22:40:00 +0200 Subject: [PATCH 27/33] release 2013.08.15 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9b213e4fb8..72693e0fe5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.14' +__version__ = '2013.08.15' From f9c3c90ca8bee20f99d6172371b749ed7223588e Mon Sep 17 00:00:00 2001 From: patrickslin Date: Fri, 16 Aug 2013 08:54:01 -0700 Subject: [PATCH 28/33] Length 85 changed again? (fixes #1254) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bf064c1819..f747189505 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -344,7 +344,7 @@ def _decrypt_signature(self, s): elif len(s) == 86: return s[5:20] + s[2] + s[21:] elif len(s) == 85: - return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] + return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] elif len(s) == 84: return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] elif len(s) == 83: From ddf3bd328b0077f3a88ff96e7f17bcd89baaeeac Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 17 Aug 2013 08:33:36 +0200 Subject: [PATCH 29/33] release 2013.08.17 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 72693e0fe5..8c93a275c1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.15' +__version__ = '2013.08.17' From dbda1b51473ddc452d75bc1e98b3edabf4a7f5e8 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sun, 18 Aug 2013 08:15:18 +0200 Subject: [PATCH 30/33] Add RTLnow extractor Supports http://rtl2now.rtl2.de and http://rtl-now.rtl.de --- youtube_dl/extractor/rtlnow.py | 88 ++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 youtube_dl/extractor/rtlnow.py diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py new file mode 100644 index 0000000000..15f01a2e2b --- /dev/null +++ b/youtube_dl/extractor/rtlnow.py @@ -0,0 +1,88 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + +class RTLnowIE(InfoExtractor): + """Information Extractor for RTL(2)now""" + _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + _TESTS = [{ + u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', + u'file': u'90419.flv', + u'info_dict': { + u'upload_date': u'20070416', + u'title': u'Ahornallee - Folge 1 - Der Einzug', + u'description': u'Folge 1 - Der Einzug', + }, + u'params': { + u'skip_download': True, + }, + }, + { + u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', + u'file': u'69756.flv', + u'info_dict': { + u'upload_date': u'20120519', + u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', + u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', + u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', + }, + u'params': { + u'skip_download': True, + }, + },] + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + + webpage_url = u'http://' + mobj.group('url') + video_page_url = u'http://' + mobj.group('base_url') + video_id = mobj.group(u'video_id') + + webpage = self._download_webpage(webpage_url, video_id) + video_title = self._html_search_regex(r'(?P<title>[^<]+)', + webpage, u'title') + playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P[^\']+)\'', + webpage, u'playerdata_url') + + playerdata = self._download_webpage(playerdata_url, video_id) + mobj = re.search(r'<!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]>', playerdata) + if mobj: + video_description = mobj.group(u'description') + if mobj.group('upload_date_Y'): + video_upload_date = mobj.group('upload_date_Y') + else: + video_upload_date = u'20' + mobj.group('upload_date_y') + video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') + else: + video_description = None + video_upload_date = None + self._downloader.report_warning(u'Unable to extract description and upload date') + + # Thumbnail: not every video has an thumbnail + mobj = re.search(r'', webpage) + if mobj: + video_thumbnail = mobj.group(u'thumbnail') + else: + video_thumbnail = None + + mobj = re.search(r']+>rtmpe://(?:[^/]+/){2})(?P[^\]]+)\]\]>', playerdata) + if mobj is None: + raise ExtractorError(u'Unable to extract media URL') + video_url = mobj.group(u'url') + video_play_path = u'mp4:' + mobj.group(u'play_path') + video_player_url = video_page_url + u'includes/vodplayer.swf' + + return [{ + 'id': video_id, + 'url': video_url, + 'play_path': video_play_path, + 'page_url': video_page_url, + 'player_url': video_player_url, + 'ext': 'flv', + 'title': video_title, + 'description': video_description, + 'upload_date': video_upload_date, + 'thumbnail': video_thumbnail, + }] From 01b32990da992a5f271532e7408f4c6d546e162c Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sun, 18 Aug 2013 08:16:53 +0200 Subject: [PATCH 31/33] Add RTLnow extractor --- youtube_dl/extractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84c02c2ed9..5bb44e7649 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -56,6 +56,7 @@ from .redtube import RedTubeIE from .ringtv import RingTVIE from .roxwel import RoxwelIE +from .rtlnow import RTLnowIE from .sina import SinaIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE From ea55b2a4cac1d56c578380b6bcb21b5fbc496a57 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 19 Aug 2013 08:57:36 +0200 Subject: [PATCH 32/33] Add VOXnow to RTLnow extractor --- youtube_dl/extractor/rtlnow.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 15f01a2e2b..d993a990ad 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -5,8 +5,8 @@ from ..utils import ExtractorError class RTLnowIE(InfoExtractor): - """Information Extractor for RTL(2)now""" - _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + """Information Extractor for RTLnow, RTL2now and VOXnow""" + _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', u'file': u'90419.flv', @@ -31,7 +31,19 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, - },] + }, + { + u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', + u'file': u'13883.flv', + u'info_dict': { + u'upload_date': u'20090627', + u'title': u'Voxtours - Südafrika-Reporter II', + u'description': u'Südafrika-Reporter II', + }, + u'params': { + u'skip_download': True, + }, + }] def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) From d741e55a423a09c40b3c5e19551f432a050353d7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 19 Aug 2013 10:27:42 +0200 Subject: [PATCH 33/33] [youtube] Support watch_popup URLs (Fixes #1275) --- test/test_all_urls.py | 1 + youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index c73d0e4679..c54faa380e 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -50,6 +50,7 @@ def test_youtube_extract(self): self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') + self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc') def test_no_duplicates(self): ies = gen_extractors() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f747189505..843a973ca7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ |(?: # or the v= param in all its forms - (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) + (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v=