From 1f7659dbe90b28ecd5257aba872aba35515e6127 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Mon, 24 Feb 2014 23:44:31 +0700 Subject: [PATCH 1/3] [generic] Unescape webpage contents --- youtube_dl/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6e6324779..a3de0ba60 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -291,6 +291,8 @@ def _real_extract(self, url): except compat_xml_parse_error: pass + webpage = compat_urllib_parse.unquote(webpage) + # it's tempting to parse this further, but you would # have to take into account all the variations like # Video Title - Site Name From 143907304908ee34bfcfe4914cda596d235291f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Mar 2014 04:38:49 +0700 Subject: [PATCH 2/3] [generic] Add comment for unescaping webpage contents --- youtube_dl/extractor/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a3de0ba60..300485c7f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -291,6 +291,9 @@ def _real_extract(self, url): except compat_xml_parse_error: pass + # Sometimes embedded video player is hidden behind percent encoding + # (e.g. https://github.com/rg3/youtube-dl/issues/2448) + # Unescaping the whole page allows to handle those cases in a generic way webpage = compat_urllib_parse.unquote(webpage) # it's tempting to parse this further, but you would From d95e35d659ea0dd2aa1dba3a4ee9925c25505fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 15 Mar 2014 04:39:53 +0700 Subject: [PATCH 3/3] [generic] Add nowvideo test hidden behind percent encoding --- youtube_dl/extractor/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 300485c7f..0d02f836e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -145,6 +145,17 @@ class GenericIE(InfoExtractor): 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', } }, + # nowvideo embed hidden behind percent encoding + { + 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', + 'md5': '2baf4ddd70f697d94b1c18cf796d5107', + 'info_dict': { + 'id': '06e53103ca9aa', + 'ext': 'flv', + 'title': 'Macross Episode 001 Watch Macross Episode 001 onl', + 'description': 'No description', + }, + } ] def report_download_webpage(self, video_id):