From 21a9c6aaac074b3ad85adc55818573e7971f8043 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 20 Dec 2012 21:28:27 +0100 Subject: [PATCH] FunnyOrDie IE (Fixes #599) --- test/tests.json | 6 +++++ youtube_dl/InfoExtractors.py | 49 ++++++++++++++++++++++++++++++++++++ youtube_dl/__init__.py | 1 + 3 files changed, 56 insertions(+) diff --git a/test/tests.json b/test/tests.json index 5c4cf51bf5..9bf56082e3 100644 --- a/test/tests.json +++ b/test/tests.json @@ -87,5 +87,11 @@ "name": "GooglePlus", "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", "file": "ZButuJc6CtH.flv" + }, + { + "name": "FunnyOrDie", + "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version", + "file": "0732f586d7.mp4", + "md5": "f647e9e90064b53b6e046e75d0241fbd" } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index cf5b51bd8c..d94ebde34d 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3630,3 +3630,52 @@ def _real_extract(self, url): break offset += limit return info + +class FunnyOrDieIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P[0-9a-f]+)/.*$' + IE_NAME = u'FunnyOrDie' + + def report_extraction(self, video_id): + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group('id') + self.report_extraction(video_id) + try: + urlh = compat_urllib_request.urlopen(url) + webpage_bytes = urlh.read() + webpage = webpage_bytes.decode('utf-8', 'ignore') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + return + + m = re.search(r']*>\s*]*>\s*\s+(?P.*?)</a>", webpage) + if not m: + self._downloader.trouble(u'Cannot find video title') + title = unescapeHTML(m.group('title')) + + m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage) + if m: + desc = unescapeHTML(m.group('desc')) + else: + desc = None + + info = { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': desc, + } + return [info] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index efa8b813f8..f94e0dcdb5 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -400,6 +400,7 @@ def gen_extractors(): ArteTvIE(), NBAIE(), JustinTVIE(), + FunnyOrDieIE(), GenericIE() ]