From 7fc875195fd74c235d6ca86afc642f683a485a4b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 2 May 2017 00:06:19 +0100 Subject: [PATCH] [amp] imporove thumbnail and subtitle extraction --- youtube_dl/extractor/amp.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 98f8e69cdc..fde1a8ff74 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -34,9 +34,12 @@ def get_media_node(name, default=None): if isinstance(media_thumbnail, dict): media_thumbnail = [media_thumbnail] for thumbnail_data in media_thumbnail: - thumbnail = thumbnail_data['@attributes'] + thumbnail = thumbnail_data.get('@attributes', {}) + thumbnail_url = thumbnail.get('url') + if not thumbnail_url: + continue thumbnails.append({ - 'url': self._proto_relative_url(thumbnail['url'], 'http:'), + 'url': self._proto_relative_url(thumbnail_url, 'http:'), 'width': int_or_none(thumbnail.get('width')), 'height': int_or_none(thumbnail.get('height')), }) @@ -47,9 +50,14 @@ def get_media_node(name, default=None): if isinstance(media_subtitle, dict): media_subtitle = [media_subtitle] for subtitle_data in media_subtitle: - subtitle = subtitle_data['@attributes'] - lang = subtitle.get('lang') or 'en' - subtitles[lang] = [{'url': subtitle['href']}] + subtitle = subtitle_data.get('@attributes', {}) + subtitle_href = subtitle.get('href') + if not subtitle_href: + continue + subtitles.setdefault(subtitle.get('lang') or 'en', []).append({ + 'url': subtitle_href, + 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href), + }) formats = [] media_content = get_media_node('content')