youtube-dl/youtube_dl/extractor/rbmaradio.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    clean_html,
    int_or_none,
    unified_timestamp,
    update_url_query,
)


class RBMARadioIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
        'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
        'info_dict': {
            'id': 'ford-lopatin-live-at-primavera-sound-2011',
            'ext': 'mp3',
            'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg',
            'duration': 2452,
            'timestamp': 1307103164,
            'upload_date': '20110603',
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        show_id = mobj.group('show_id')
        episode_id = mobj.group('id')

        webpage = self._download_webpage(url, episode_id)

        episode = self._parse_json(
            self._search_regex(
                r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>',
                webpage, 'json data'),
            episode_id)['episodes'][show_id][episode_id]

        title = episode['title']

        show_title = episode.get('showTitle')
        if show_title:
            title = '%s - %s' % (show_title, title)

        formats = [{
            'url': update_url_query(episode['audioURL'], query={'cbr': abr}),
            'format_id': compat_str(abr),
            'abr': abr,
            'vcodec': 'none',
        } for abr in (96, 128, 256)]

        description = clean_html(episode.get('longTeaser'))
        thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
        duration = int_or_none(episode.get('duration'))
        timestamp = unified_timestamp(episode.get('publishedAt'))

        return {
            'id': episode_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'timestamp': timestamp,
            'formats': formats,
        }
[rbmaradio] Simplify and use unicode_literals 2014-01-29 16:37:10 +01:00			`from __future__ import unicode_literals`

[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`import re`

[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00			`from .common import InfoExtractor`
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`from ..compat import compat_str`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00			`from ..utils import (`
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`clean_html,`
			`int_or_none,`
			`unified_timestamp,`
			`update_url_query,`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00			`)`


			`class RBMARadioIE(InfoExtractor):`
[rbmaradio] Add support for redbullradio.com URLs 2017-04-08 16:39:07 +02:00			`_VALID_URL = r'https?://(?:www\.)?(?:rbmaradio\|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`_TEST = {`
[rbmaradio] Fixed extractor 2016-08-06 15:26:48 +02:00			`'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',`
[rbmaradio] Simplify and use unicode_literals 2014-01-29 16:37:10 +01:00			`'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',`
			`'info_dict': {`
Remove unused imports and simplify 2014-02-02 12:03:36 +01:00			`'id': 'ford-lopatin-live-at-primavera-sound-2011',`
			`'ext': 'mp3',`
[rbmaradio] Add support for redbullradio.com URLs 2017-04-08 16:39:07 +02:00			`'title': 'Main Stage - Ford & Lopatin at Primavera Sound',`
			`'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`'thumbnail': r're:^https?://.*\.jpg',`
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`'duration': 2452,`
			`'timestamp': 1307103164,`
			`'upload_date': '20110603',`
[rbmaradio] Simplify and use unicode_literals 2014-01-29 16:37:10 +01:00			`},`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`}`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00
			`def _real_extract(self, url):`
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`mobj = re.match(self._VALID_URL, url)`
			`show_id = mobj.group('show_id')`
			`episode_id = mobj.group('id')`

			`webpage = self._download_webpage(url, episode_id)`

			`episode = self._parse_json(`
			`self._search_regex(`
			`r'__INITIAL_STATE__\s=\s({.+?})\s*</script>',`
			`webpage, 'json data'),`
			`episode_id)['episodes'][show_id][episode_id]`

			`title = episode['title']`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`show_title = episode.get('showTitle')`
			`if show_title:`
			`title = '%s - %s' % (show_title, title)`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`formats = [{`
			`'url': update_url_query(episode['audioURL'], query={'cbr': abr}),`
			`'format_id': compat_str(abr),`
			`'abr': abr,`
			`'vcodec': 'none',`
			`} for abr in (96, 128, 256)]`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`description = clean_html(episode.get('longTeaser'))`
			`thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))`
			`duration = int_or_none(episode.get('duration'))`
			`timestamp = unified_timestamp(episode.get('publishedAt'))`
[rbmaradio] Simplify and use unicode_literals 2014-01-29 16:37:10 +01:00
			`return {`
[rbmaradio] Improve, simplify and extract all formats (Closes #10242) 2016-08-08 21:46:29 +02:00			`'id': episode_id,`
			`'title': title,`
			`'description': description,`
			`'thumbnail': thumbnail,`
			`'duration': duration,`
			`'timestamp': timestamp,`
			`'formats': formats,`
[RBMARadio] move into own file 2013-06-23 22:09:32 +02:00			`}`