From a9bad429b309e614b4b8905c085ef425350ceeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Oct 2014 11:04:48 +0100 Subject: [PATCH] [niconico] Add extractor for playlists (closes #4043) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/niconico.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 615018c09..32236f0fa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -252,7 +252,7 @@ from .nfb import NFBIE from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE -from .niconico import NiconicoIE +from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7b85589b7..62d5707fe 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import ( @@ -146,3 +147,36 @@ def _real_extract(self, url): 'duration': duration, 'webpage_url': webpage_url, } + + +class NiconicoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P\d+)' + + _TEST = { + 'url': 'http://www.nicovideo.jp/mylist/27411728', + 'info_dict': { + 'id': '27411728', + 'title': 'AKB48のオールナイトニッポン', + }, + 'playlist_mincount': 225, + } + + def _real_extract(self, url): + list_id = self._match_id(url) + webpage = self._download_webpage(url, list_id) + + entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', + webpage, 'entries') + entries = json.loads(entries_json) + entries = [{ + '_type': 'url', + 'ie_key': NiconicoIE.ie_key(), + 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], + } for entry in entries] + + return { + '_type': 'playlist', + 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), + 'id': list_id, + 'entries': entries, + }