From 187da2c093ad1013ea714a464e615de9aa773482 Mon Sep 17 00:00:00 2001 From: Jeff Crouse Date: Sun, 16 Dec 2012 00:26:27 -0500 Subject: [PATCH] added YouJizz extractor --- README.md | 14 +++--- youtube_dl/InfoExtractors.py | 86 +++++++++++++++++++++++++++++++++++- youtube_dl/__init__.py | 2 +- 3 files changed, 93 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1a96bd98d8..0ac75f5d7f 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ # OPTIONS --list-extractors List all supported extractors and the URLs they would handle -## Video Selection: + Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) --playlist-end NUMBER playlist video to end at (default is last) --match-title REGEX download only matching titles (regex or caseless @@ -34,7 +34,7 @@ ## Video Selection: caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files -## Filesystem Options: + Filesystem Options: -t, --title use title in file name --id use video ID in file name -l, --literal use literal title in file name @@ -59,7 +59,7 @@ ## Filesystem Options: --write-description write video description to a .description file --write-info-json write video metadata to a .info.json file -## Verbosity / Simulation Options: + Verbosity / Simulation Options: -q, --quiet activates quiet mode -s, --simulate do not download the video and do not write anything to disk @@ -74,7 +74,7 @@ ## Verbosity / Simulation Options: --console-title display progress in console titlebar -v, --verbose print various debugging information -## Video Format Options: + Video Format Options: -f, --format FORMAT video format code --all-formats download all available video formats --prefer-free-formats prefer free video formats unless a specific one is @@ -86,12 +86,12 @@ ## Video Format Options: --srt-lang LANG language of the closed captions to download (optional) use IETF language tags like 'en' -## Authentication Options: + Authentication Options: -u, --username USERNAME account username -p, --password PASSWORD account password -n, --netrc use .netrc authentication data -## Post-processing Options: + Post-processing Options: -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; @@ -133,7 +133,7 @@ ### SyntaxError: Non-ASCII character ### The error File "youtube-dl", line 2 - SyntaxError: Non-ASCII character '\x93' ... + SyntaxError: Non-ASCII character '“' ... means you're using an outdated version of Python. Please update to Python 2.6 or 2.7. diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index c44fc852c4..d30de69435 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3439,7 +3439,7 @@ def _real_extract(self, url): return self.report_webpage(url) - # Get the video URL + # Get the video title result = re.search(self.VIDEO_TITLE_RE, webpage) if result is None: self._downloader.trouble(u'ERROR: unable to extract video title') @@ -3610,3 +3610,87 @@ def _real_extract(self, url): return [info] + + +class YouJizzIE(InfoExtractor): + """Information extractor for youjizz.com.""" + + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/([^.]+).html$' + IE_NAME = u'youjizz' + VIDEO_TITLE_RE = r'(?P<title>.*)' + EMBED_PAGE_RE = r'http://www.youjizz.com/videos/embed/(?P[0-9]+)' + SOURCE_RE = r'so.addVariable\("file",encodeURIComponent\("(?P[^"]+)"\)\);' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_extract_entry(self, url): + """Report downloading extry""" + self._downloader.to_screen(u'[youjizz] Downloading entry: %s' % url.decode('utf-8')) + + def report_webpage(self, url): + """Report downloading page""" + self._downloader.to_screen(u'[youjizz] Downloaded page: %s' % url) + + def report_title(self, video_title): + """Report downloading extry""" + self._downloader.to_screen(u'[youjizz] Title: %s' % video_title.decode('utf-8')) + + def report_embed_page(self, embed_page): + """Report downloading extry""" + self._downloader.to_screen(u'[youjizz] Embed Page: %s' % embed_page.decode('utf-8')) + + def _real_extract(self, url): + # Get webpage content + try: + webpage = urllib2.urlopen(url).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err) + return + self.report_webpage(url) + + # Get the video title + result = re.search(self.VIDEO_TITLE_RE, webpage) + if result is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = result.group('title').decode('utf-8').strip() + self.report_title(video_title) + + # Get the embed page + result = re.search(self.EMBED_PAGE_RE, webpage) + if result is None: + self._downloader.trouble(u'ERROR: unable to extract embed page') + return + + embed_page_url = result.group(0).decode('utf-8').strip() + video_id = result.group('videoid').decode('utf-8') + self.report_embed_page(embed_page_url) + + try: + webpage = urllib2.urlopen(embed_page_url).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download video embed page: %s' % err) + return + + # Get the video URL + result = re.search(self.SOURCE_RE, webpage) + if result is None: + self._downloader.trouble(u'ERROR: unable to extract video url') + return + video_url = result.group('source').decode('utf-8') + self.report_extract_entry(video_url) + + info = {'id': video_id, + 'url': video_url, + 'uploader': None, + 'upload_date': None, + 'title': video_title, + 'ext': 'flv', + 'format': 'flv', + 'thumbnail': None, + 'description': None, + 'player_url': embed_page_url} + + return [info] + diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5a2a55b49b..e99ac2d593 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -363,7 +363,7 @@ def gen_extractors(): GooglePlusIE(), PornotubeIE(), YouPornIE(), - + YouJizzIE(), GenericIE() ]