From aa6a10c44a8e2e86f709c5301f9ea6ac3f01f002 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 23 Aug 2013 18:34:57 +0200 Subject: [PATCH] Allow to specify multiple subtitles languages separated by commas (closes #518) --- test/test_youtube_subtitles.py | 13 +++++++++++-- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 12 ++++++++---- youtube_dl/extractor/youtube.py | 29 ++++++++++++++++------------- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index fe0eac6804..641206277b 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -40,7 +40,7 @@ def test_youtube_subtitles(self): def test_youtube_subtitles_it(self): DL = FakeYDL() DL.params['writesubtitles'] = True - DL.params['subtitleslang'] = 'it' + DL.params['subtitleslangs'] = ['it'] IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') sub = info_dict[0]['subtitles']['it'] @@ -85,11 +85,20 @@ def test_youtube_list_subtitles(self): def test_youtube_automatic_captions(self): DL = FakeYDL() DL.params['writeautomaticsub'] = True - DL.params['subtitleslang'] = 'it' + DL.params['subtitleslangs'] = ['it'] IE = YoutubeIE(DL) info_dict = IE.extract('8YoUxe5ncPo') sub = info_dict[0]['subtitles']['it'] self.assertTrue(sub is not None) + def test_youtube_multiple_langs(self): + DL = FakeYDL() + DL.params['writesubtitles'] = True + langs = ['it', 'fr', 'de'] + DL.params['subtitleslangs'] = langs + IE = YoutubeIE(DL) + subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles'] + for lang in langs: + self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1fd610a6e7..3fc4ec378d 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -76,7 +76,7 @@ class YoutubeDL(object): allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) - subtitleslang: Language of the subtitles to download + subtitleslangs: List of languages of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 441ca6b6a7..6144290735 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -83,6 +83,9 @@ def _format_option_string(option): return "".join(opts) + def _comma_separated_values_options_callback(option, opt_str, value, parser): + setattr(parser.values, option.dest, value.split(',')) + def _find_term_columns(): columns = os.environ.get('COLUMNS', None) if columns: @@ -206,9 +209,10 @@ def _find_term_columns(): subtitles.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') - subtitles.add_option('--sub-lang', '--srt-lang', - action='store', dest='subtitleslang', metavar='LANG', - help='language of the subtitles to download (optional) use IETF language tags like \'en\'') + subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang', + action='callback', dest='subtitleslang', metavar='LANGS', type='str', + default=[], callback=_comma_separated_values_options_callback, + help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader.add_option('-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') @@ -573,7 +577,7 @@ def _real_main(argv=None): 'allsubtitles': opts.allsubtitles, 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, - 'subtitleslang': opts.subtitleslang, + 'subtitleslangs': opts.subtitleslang, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), 'max_downloads': opts.max_downloads, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 446d53f644..5f843a8713 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -496,7 +496,7 @@ def _request_subtitle(self, sub_lang, sub_name, video_id, format): def _request_automatic_caption(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" - sub_lang = self._downloader.params.get('subtitleslang') or 'en' + sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0] sub_format = self._downloader.params.get('subtitlesformat') self.to_screen(u'%s: Looking for automatic captions' % video_id) mobj = re.search(r';ytplayer.config = ({.*?});', webpage) @@ -530,23 +530,26 @@ def _extract_subtitles(self, video_id): Return a dictionary: {language: subtitles} or {} if the subtitles couldn't be found """ - sub_lang_list = self._get_available_subtitles(video_id) + available_subs_list = self._get_available_subtitles(video_id) sub_format = self._downloader.params.get('subtitlesformat') - if not sub_lang_list: #There was some error, it didn't get the available subtitles + if not available_subs_list: #There was some error, it didn't get the available subtitles return {} if self._downloader.params.get('allsubtitles', False): - pass + sub_lang_list = available_subs_list else: - if self._downloader.params.get('subtitleslang', False): - sub_lang = self._downloader.params.get('subtitleslang') - elif 'en' in sub_lang_list: - sub_lang = 'en' + if self._downloader.params.get('subtitleslangs', False): + reqested_langs = self._downloader.params.get('subtitleslangs') + elif 'en' in available_subs_list: + reqested_langs = ['en'] else: - sub_lang = list(sub_lang_list.keys())[0] - if not sub_lang in sub_lang_list: - self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) - return {} - sub_lang_list = {sub_lang: sub_lang_list[sub_lang]} + reqested_langs = [list(available_subs_list.keys())[0]] + + sub_lang_list = {} + for sub_lang in reqested_langs: + if not sub_lang in available_subs_list: + self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) + continue + sub_lang_list[sub_lang] = available_subs_list[sub_lang] subtitles = {} for sub_lang in sub_lang_list: subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)