Merge branch 'duncankl-voicerepublic'

This commit is contained in:
Sergey M․ 2015-05-10 18:29:36 +06:00
commit 3da8038918
2 changed files with 100 additions and 0 deletions

View File

@ -637,6 +637,7 @@
VKUserVideosIE, VKUserVideosIE,
) )
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .voicerepublic import VoiceRepublicIE
from .vporn import VpornIE from .vporn import VpornIE
from .vrt import VRTIE from .vrt import VRTIE
from .vube import VubeIE from .vube import VubeIE

View File

@ -0,0 +1,99 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
)
class VoiceRepublicIE(InfoExtractor):
_VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
_TESTS = [{
'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
'md5': '0554a24d1657915aa8e8f84e15dc9353',
'info_dict': {
'id': '2296',
'display_id': 'watching-the-watchers-building-a-sousveillance-state',
'ext': 'm4a',
'title': 'Watching the Watchers: Building a Sousveillance State',
'description': 'md5:715ba964958afa2398df615809cfecb1',
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
'duration': 1800,
'view_count': int,
}
}, {
'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
req = compat_urllib_request.Request(
compat_urlparse.urljoin(url, '/talks/%s' % display_id))
# Older versions of Firefox get redirected to an "upgrade browser" page
req.add_header('User-Agent', 'youtube-dl')
webpage = self._download_webpage(req, display_id)
if '>Queued for processing, please stand by...<' in webpage:
raise ExtractorError(
'Audio is still queued for processing', expected=True)
data = self._parse_json(
self._search_regex(
r'(?s)return ({.+?});\s*\n', webpage,
'data', default=None),
display_id, fatal=False)
if data:
title = data['title']
description = data.get('teaser')
talk_id = data.get('talk_id') or display_id
talk = data['talk']
duration = int_or_none(talk.get('duration'))
formats = [{
'url': compat_urlparse.urljoin(url, talk_url),
'format_id': format_id,
'ext': determine_ext(talk_url) or format_id,
'vcodec': 'none',
} for format_id, talk_url in talk['links'].items()]
else:
title = self._og_search_title(webpage)
description = self._html_search_regex(
r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
webpage, 'description', fatal=False)
talk_id = self._search_regex(
[r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
webpage, 'talk id', default=None) or display_id
duration = None
formats = [{
'url': compat_urlparse.urljoin(url, talk_url),
'format_id': format_id,
'ext': determine_ext(talk_url) or format_id,
'vcodec': 'none',
} for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", webpage)]
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
view_count = int_or_none(self._search_regex(
r"class='play-count[^']*'>\s*(\d+) plays",
webpage, 'play count', fatal=False))
return {
'id': talk_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
}