From d882161d5a25b658c3be7f6c6c86f4c707d58bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Jan 2014 15:34:35 +0100 Subject: [PATCH] [infoq] Simplify and use unicode_literals --- youtube_dl/extractor/infoq.py | 39 ++++++++++++++--------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index c79c589c7..7c208b85d 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -1,27 +1,27 @@ +from __future__ import unicode_literals + import base64 import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, - - ExtractorError, ) class InfoQIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' _TEST = { - u"name": u"InfoQ", - u"url": u"http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", - u"file": u"12-jan-pythonthings.mp4", - u"info_dict": { - u"description": u"Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", - u"title": u"A Few of My Favorite [Python] Things" + "name": "InfoQ", + "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", + "file": "12-jan-pythonthings.mp4", + "info_dict": { + "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", + "title": "A Few of My Favorite [Python] Things", + }, + "params": { + "skip_download": True, }, - u"params": { - u"skip_download": True - } } def _real_extract(self, url): @@ -31,32 +31,25 @@ def _real_extract(self, url): self.report_extraction(url) # Extract video URL - mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage) - if mobj is None: - raise ExtractorError(u'Unable to extract video url') - real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8')) + encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id') + real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id # Extract title video_title = self._search_regex(r'contentTitle = "(.*?)";', - webpage, u'title') + webpage, 'title') # Extract description video_description = self._html_search_regex(r'', - webpage, u'description', fatal=False) + webpage, 'description', fatal=False) video_filename = video_url.split('/')[-1] video_id, extension = video_filename.split('.') - info = { + return { 'id': video_id, 'url': video_url, - 'uploader': None, - 'upload_date': None, 'title': video_title, 'ext': extension, # Extension is always(?) mp4, but seems to be flv - 'thumbnail': None, 'description': video_description, } - - return [info] \ No newline at end of file