Add support for http://www.tube8.com

2024-11-15 03:27:46 +01:00 · 2013-10-26 23:27:30 +02:00 · 2013-10-26 23:27:30 +02:00 · 1d45a23b74
commit 1d45a23b74
parent 49a25557b0
2 changed files with 64 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -121,6 +121,7 @@ from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dl/extractor/tube8.py
@ -0,0 +1,63 @@
 import os
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
    unescapeHTML,
 )
 from ..aes import (
    aes_decrypt_text
 )
 class Tube8IE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>tube8.com/[^/]+/[^/]+/(?P<videoid>[0-9]+)/?)'
    _TEST = {
        u'url': u'http://www.tube8.com/teen/kasia-music-video/229795/',
        u'file': u'229795.mp4',
        u'md5': u'e9e0b0c86734e5e3766e653509475db0',
        u'info_dict': {
            u"description": u"hot teen Kasia grinding", 
            u"uploader": u"unknown", 
            u"title": u"Kasia music video",
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
        url = 'http://www.' + mobj.group('url')
        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
        video_title = self._html_search_regex(r'videotitle	="([^"]+)', webpage, u'title')
        video_description = self._html_search_regex(r'>Description:</strong>(.+?)<', webpage, u'description', fatal=False)
        video_uploader = self._html_search_regex(r'>Submitted by:</strong>(?:\w|<[^>]*>)*(.+?)<', webpage, u'uploader', fatal=False)
        thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, u'thumbnail', fatal=False)
        if thumbnail:
            thumbnail = thumbnail.replace('\\/', '/')
        video_url = self._html_search_regex(r'"video_url":"([^"]+)', webpage, u'video_url')
        if webpage.find('"encrypted":true')!=-1:
            password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, u'password')
            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
        path = compat_urllib_parse_urlparse( video_url ).path
        extension = os.path.splitext( path )[1][1:]
        format = path.split('/')[4].split('_')[:2]
        format = "-".join( format )
        return {
            'id': video_id,
            'uploader': video_uploader,
            'title': video_title,
            'thumbnail': thumbnail,
            'description': video_description,
            'url': video_url,
            'ext': extension,
            'format': format,
            'format_id': format,
        }