From c19f7764a5499b0f1e1914dd5101619b8d57d7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 27 Oct 2013 14:40:25 +0100 Subject: [PATCH] [generic] Detect bandcamp pages that use custom domains (closes #1662) They embed the original url in the 'og:url' property. --- youtube_dl/extractor/generic.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ab4a5b7de5..2c8fcf5ae5 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -41,7 +41,17 @@ class GenericIE(InfoExtractor): u"uploader_id": u"skillsmatter", u"uploader": u"Skills Matter", } - } + }, + # bandcamp page with custom domain + { + u'url': u'http://bronyrock.com/track/the-pony-mash', + u'file': u'3235767654.mp3', + u'info_dict': { + u'title': u'The Pony Mash', + u'uploader': u'M_Pallante', + }, + u'skip': u'There is a limit of 200 free downloads / month for the test song', + }, ] def report_download_webpage(self, video_id): @@ -155,6 +165,12 @@ def _real_extract(self, url): surl = unescapeHTML(mobj.group(1)) return self.url_result(surl, 'Youtube') + # Look for Bandcamp pages with custom domain + mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage) + if mobj is not None: + burl = unescapeHTML(mobj.group(1)) + return self.url_result(burl, 'Bandcamp') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: