youtube-dl/youtube_dl/extractor/xhamster.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    clean_html,
    determine_ext,
    dict_get,
    ExtractorError,
    int_or_none,
    parse_duration,
    try_get,
    unified_strdate,
    url_or_none,
)


class XHamsterIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:.+?\.)?xhamster\.(?:com|one)/
                        (?:
                            movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
                            videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
                        )
                    '''

    _TESTS = [{
        'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
        'md5': '8281348b8d3c53d39fffb377d24eac4e',
        'info_dict': {
            'id': '1509445',
            'display_id': 'femaleagent_shy_beauty_takes_the_bait',
            'ext': 'mp4',
            'title': 'FemaleAgent Shy beauty takes the bait',
            'timestamp': 1350194821,
            'upload_date': '20121014',
            'uploader': 'Ruseful2011',
            'duration': 893,
            'age_limit': 18,
            'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
        },
    }, {
        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
        'info_dict': {
            'id': '2221348',
            'display_id': 'britney_spears_sexy_booty',
            'ext': 'mp4',
            'title': 'Britney Spears  Sexy Booty',
            'timestamp': 1379123460,
            'upload_date': '20130914',
            'uploader': 'jojo747400',
            'duration': 200,
            'age_limit': 18,
            'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # empty seo
        'url': 'http://xhamster.com/movies/5667973/.html',
        'info_dict': {
            'id': '5667973',
            'ext': 'mp4',
            'title': '....',
            'timestamp': 1454948101,
            'upload_date': '20160208',
            'uploader': 'parejafree',
            'duration': 72,
            'age_limit': 18,
            'categories': ['Amateur', 'Blowjobs'],
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # mobile site
        'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
        'only_matching': True,
    }, {
        'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
        'only_matching': True,
    }, {
        # This video is visible for marcoalfa123456's friends only
        'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
        'only_matching': True,
    }, {
        # new URL schema
        'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
        'only_matching': True,
    }, {
        'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id') or mobj.group('id_2')
        display_id = mobj.group('display_id') or mobj.group('display_id_2')

        desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
        webpage = self._download_webpage(desktop_url, video_id)

        error = self._html_search_regex(
            r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        age_limit = self._rta_search(webpage)

        def get_height(s):
            return int_or_none(self._search_regex(
                r'^(\d+)[pP]', s, 'height', default=None))

        initials = self._parse_json(
            self._search_regex(
                r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials',
                default='{}'),
            video_id, fatal=False)
        if initials:
            video = initials['videoModel']
            title = video['title']
            formats = []
            for format_id, formats_dict in video['sources'].items():
                if not isinstance(formats_dict, dict):
                    continue
                for quality, format_item in formats_dict.items():
                    if format_id == 'download':
                        # Download link takes some time to be generated,
                        # skipping for now
                        continue
                        if not isinstance(format_item, dict):
                            continue
                        format_url = format_item.get('link')
                        filesize = int_or_none(
                            format_item.get('size'), invscale=1000000)
                    else:
                        format_url = format_item
                        filesize = None
                    format_url = url_or_none(format_url)
                    if not format_url:
                        continue
                    formats.append({
                        'format_id': '%s-%s' % (format_id, quality),
                        'url': format_url,
                        'ext': determine_ext(format_url, 'mp4'),
                        'height': get_height(quality),
                        'filesize': filesize,
                    })
            self._sort_formats(formats)

            categories_list = video.get('categories')
            if isinstance(categories_list, list):
                categories = []
                for c in categories_list:
                    if not isinstance(c, dict):
                        continue
                    c_name = c.get('name')
                    if isinstance(c_name, compat_str):
                        categories.append(c_name)
            else:
                categories = None

            return {
                'id': video_id,
                'display_id': display_id,
                'title': title,
                'description': video.get('description'),
                'timestamp': int_or_none(video.get('created')),
                'uploader': try_get(
                    video, lambda x: x['author']['name'], compat_str),
                'thumbnail': video.get('thumbURL'),
                'duration': int_or_none(video.get('duration')),
                'view_count': int_or_none(video.get('views')),
                'like_count': int_or_none(try_get(
                    video, lambda x: x['rating']['likes'], int)),
                'dislike_count': int_or_none(try_get(
                    video, lambda x: x['rating']['dislikes'], int)),
                'comment_count': int_or_none(video.get('views')),
                'age_limit': age_limit,
                'categories': categories,
                'formats': formats,
            }

        # Old layout fallback

        title = self._html_search_regex(
            [r'<h1[^>]*>([^<]+)</h1>',
             r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
             r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
            webpage, 'title')

        formats = []
        format_urls = set()

        sources = self._parse_json(
            self._search_regex(
                r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
                default='{}'),
            video_id, fatal=False)
        for format_id, format_url in sources.items():
            format_url = url_or_none(format_url)
            if not format_url:
                continue
            if format_url in format_urls:
                continue
            format_urls.add(format_url)
            formats.append({
                'format_id': format_id,
                'url': format_url,
                'height': get_height(format_id),
            })

        video_url = self._search_regex(
            [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
             r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
             r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
            webpage, 'video url', group='mp4', default=None)
        if video_url and video_url not in format_urls:
            formats.append({
                'url': video_url,
            })

        self._sort_formats(formats)

        # Only a few videos have an description
        mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
        description = mobj.group(1) if mobj else None

        upload_date = unified_strdate(self._search_regex(
            r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
            webpage, 'upload date', fatal=False))

        uploader = self._html_search_regex(
            r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
            webpage, 'uploader', default='anonymous')

        thumbnail = self._search_regex(
            [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
             r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
            webpage, 'thumbnail', fatal=False, group='thumbnail')

        duration = parse_duration(self._search_regex(
            [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
             r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
            'duration', fatal=False))

        view_count = int_or_none(self._search_regex(
            r'content=["\']User(?:View|Play)s:(\d+)',
            webpage, 'view count', fatal=False))

        mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
        (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)

        mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
        comment_count = mobj.group('commentcount') if mobj else 0

        categories_html = self._search_regex(
            r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
            'categories', default=None)
        categories = [clean_html(category) for category in re.findall(
            r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'upload_date': upload_date,
            'uploader': uploader,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'like_count': int_or_none(like_count),
            'dislike_count': int_or_none(dislike_count),
            'comment_count': int_or_none(comment_count),
            'age_limit': age_limit,
            'categories': categories,
            'formats': formats,
        }


class XHamsterEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
    _TEST = {
        'url': 'http://xhamster.com/xembed.php?video=3328539',
        'info_dict': {
            'id': '3328539',
            'ext': 'mp4',
            'title': 'Pen Masturbation',
            'timestamp': 1406581861,
            'upload_date': '20140728',
            'uploader': 'ManyakisArt',
            'duration': 5,
            'age_limit': 18,
        }
    }

    @staticmethod
    def _extract_urls(webpage):
        return [url for _, url in re.findall(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
            webpage)]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        video_url = self._search_regex(
            r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
            webpage, 'xhamster url', default=None)

        if not video_url:
            vars = self._parse_json(
                self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
                video_id)
            video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))

        return self.url_result(video_url, 'XHamster')
[xhamster] Use unicode_literals 2014-01-23 03:52:59 +01:00			`from __future__ import unicode_literals`

[xhamster] Move into own file 2013-06-23 22:32:44 +02:00			`import re`

			`from .common import InfoExtractor`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`from ..compat import compat_str`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00			`from ..utils import (`
[xhamster] Extract categories (closes #11728) 2017-05-28 02:50:15 +02:00			`clean_html,`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`determine_ext,`
[xhamster:embed] Extract vars (Closes #8912) 2016-03-21 17:07:34 +01:00			`dict_get,`
[xhamster] Capture and output videoClosed error (#12263) 2017-02-25 14:38:21 +01:00			`ExtractorError,`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`int_or_none,`
[xhamster] Fix duration extraction (closes #11549) 2016-12-28 17:01:52 +01:00			`parse_duration,`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`try_get,`
[xhamster] Fix duration extraction 2016-01-08 19:26:37 +01:00			`unified_strdate,`
Improve URL extraction 2018-07-21 14:08:28 +02:00			`url_or_none,`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00			`)`


			`class XHamsterIE(InfoExtractor):`
[xhamster] Add support for new URL schema (closes #13593) 2017-07-07 17:22:29 +02:00			`_VALID_URL = r'''(?x)`
			`https?://`
[xhamster] Add support for xhamster.one (closes #20508) 2019-03-30 19:27:45 +01:00			`(?:.+?\.)?xhamster\.(?:com\|one)/`
[xhamster] Add support for new URL schema (closes #13593) 2017-07-07 17:22:29 +02:00			`(?:`
			`movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html\|`
			`videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)`
			`)`
			`'''`

[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`_TESTS = [{`
			`'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',`
			`'md5': '8281348b8d3c53d39fffb377d24eac4e',`
			`'info_dict': {`
			`'id': '1509445',`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`'display_id': 'femaleagent_shy_beauty_takes_the_bait',`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'ext': 'mp4',`
			`'title': 'FemaleAgent Shy beauty takes the bait',`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`'timestamp': 1350194821,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'upload_date': '20121014',`
			`'uploader': 'Ruseful2011',`
[xhamster] Fix duration extraction (closes #11549) 2016-12-28 17:01:52 +01:00			`'duration': 893,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'age_limit': 18,`
[xhamster] Update test 2018-02-20 16:18:50 +01:00			`'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`},`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`}, {`
			`'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',`
			`'info_dict': {`
			`'id': '2221348',`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`'display_id': 'britney_spears_sexy_booty',`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'ext': 'mp4',`
			`'title': 'Britney Spears Sexy Booty',`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`'timestamp': 1379123460,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'upload_date': '20130914',`
			`'uploader': 'jojo747400',`
[xhamster] Fix duration extraction (closes #11549) 2016-12-28 17:01:52 +01:00			`'duration': 200,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'age_limit': 18,`
[xhamster] Extract categories (closes #11728) 2017-05-28 02:50:15 +02:00			`'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],`
[xhamster] Add HTTPS support 2015-01-02 12:52:48 +01:00			`},`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'params': {`
			`'skip_download': True,`
[xhamster] url regex fix for videos with empty title. 2016-05-23 23:32:39 +02:00			`},`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`}, {`
			`# empty seo`
			`'url': 'http://xhamster.com/movies/5667973/.html',`
			`'info_dict': {`
			`'id': '5667973',`
			`'ext': 'mp4',`
			`'title': '....',`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`'timestamp': 1454948101,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'upload_date': '20160208',`
			`'uploader': 'parejafree',`
[xhamster] Fix duration extraction (closes #11549) 2016-12-28 17:01:52 +01:00			`'duration': 72,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'age_limit': 18,`
[xhamster] Extract categories (closes #11728) 2017-05-28 02:50:15 +02:00			`'categories': ['Amateur', 'Blowjobs'],`
[xhamster] Add HTTPS support 2015-01-02 12:52:48 +01:00			`},`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`'params': {`
			`'skip_download': True,`
			`},`
[xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-05 18:08:31 +01:00			`}, {`
			`# mobile site`
			`'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',`
			`'only_matching': True,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`}, {`
			`'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',`
			`'only_matching': True,`
[xhamster] Capture and output videoClosed error (#12263) 2017-02-25 14:38:21 +01:00			`}, {`
			`# This video is visible for marcoalfa123456's friends only`
			`'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',`
			`'only_matching': True,`
[xhamster] Add support for new URL schema (closes #13593) 2017-07-07 17:22:29 +02:00			`}, {`
			`# new URL schema`
			`'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',`
			`'only_matching': True,`
[xhamster] Add support for xhamster.one (closes #20508) 2019-03-30 19:27:45 +01:00			`}, {`
			`'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',`
			`'only_matching': True,`
[xhamster] Update tests 2016-05-24 17:38:27 +02:00			`}]`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
PEP8 applied 2014-11-23 20:41:03 +01:00			`def _real_extract(self, url):`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00			`mobj = re.match(self._VALID_URL, url)`
[xhamster] Add support for new URL schema (closes #13593) 2017-07-07 17:22:29 +02:00			`video_id = mobj.group('id') or mobj.group('id_2')`
			`display_id = mobj.group('display_id') or mobj.group('display_id_2')`

[xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-05 18:08:31 +01:00			`desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)`
			`webpage = self._download_webpage(desktop_url, video_id)`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
[xhamster] Capture and output videoClosed error (#12263) 2017-02-25 14:38:21 +01:00			`error = self._html_search_regex(`
			`r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',`
			`webpage, 'error', default=None)`
			`if error:`
			`raise ExtractorError(error, expected=True)`

[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`age_limit = self._rta_search(webpage)`

			`def get_height(s):`
			`return int_or_none(self._search_regex(`
			`r'^(\d+)[pP]', s, 'height', default=None))`

			`initials = self._parse_json(`
			`self._search_regex(`
			`r'window\.initials\s=\s({.+?})\s;\s\n', webpage, 'initials',`
			`default='{}'),`
			`video_id, fatal=False)`
			`if initials:`
			`video = initials['videoModel']`
			`title = video['title']`
			`formats = []`
			`for format_id, formats_dict in video['sources'].items():`
			`if not isinstance(formats_dict, dict):`
			`continue`
			`for quality, format_item in formats_dict.items():`
			`if format_id == 'download':`
			`# Download link takes some time to be generated,`
			`# skipping for now`
			`continue`
			`if not isinstance(format_item, dict):`
			`continue`
			`format_url = format_item.get('link')`
			`filesize = int_or_none(`
			`format_item.get('size'), invscale=1000000)`
			`else:`
			`format_url = format_item`
			`filesize = None`
Improve URL extraction 2018-07-21 14:08:28 +02:00			`format_url = url_or_none(format_url)`
			`if not format_url:`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`continue`
			`formats.append({`
			`'format_id': '%s-%s' % (format_id, quality),`
			`'url': format_url,`
			`'ext': determine_ext(format_url, 'mp4'),`
			`'height': get_height(quality),`
			`'filesize': filesize,`
			`})`
			`self._sort_formats(formats)`

			`categories_list = video.get('categories')`
			`if isinstance(categories_list, list):`
			`categories = []`
			`for c in categories_list:`
			`if not isinstance(c, dict):`
			`continue`
			`c_name = c.get('name')`
			`if isinstance(c_name, compat_str):`
			`categories.append(c_name)`
			`else:`
			`categories = None`

			`return {`
			`'id': video_id,`
			`'display_id': display_id,`
			`'title': title,`
			`'description': video.get('description'),`
			`'timestamp': int_or_none(video.get('created')),`
			`'uploader': try_get(`
			`video, lambda x: x['author']['name'], compat_str),`
			`'thumbnail': video.get('thumbURL'),`
			`'duration': int_or_none(video.get('duration')),`
			`'view_count': int_or_none(video.get('views')),`
			`'like_count': int_or_none(try_get(`
			`video, lambda x: x['rating']['likes'], int)),`
			`'dislike_count': int_or_none(try_get(`
			`video, lambda x: x['rating']['dislikes'], int)),`
			`'comment_count': int_or_none(video.get('views')),`
			`'age_limit': age_limit,`
			`'categories': categories,`
			`'formats': formats,`
			`}`

			`# Old layout fallback`

[xhamster] Fix title extraction (Closes #6944) 2015-09-24 15:56:54 +02:00			`title = self._html_search_regex(`
[xhamster] Change title regex precedence 2016-01-08 19:31:24 +01:00			`[r'<h1[^>]*>([^<]+)</h1>',`
			`r'<meta[^>]+itemprop=".?caption.?"[^>]+content="(.+?)"',`
			`r'<title[^>]>(.+?)(?:,\s[^,]?\sPorn\s[^,]?:\sxHamster[^<]\| - xHamster\.com)</title>'],`
			`webpage, 'title')`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`formats = []`
			`format_urls = set()`

			`sources = self._parse_json(`
			`self._search_regex(`
			`r'sources\s:\s({.+?})\s,?\s\n', webpage, 'sources',`
			`default='{}'),`
			`video_id, fatal=False)`
			`for format_id, format_url in sources.items():`
Improve URL extraction 2018-07-21 14:08:28 +02:00			`format_url = url_or_none(format_url)`
			`if not format_url:`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`continue`
			`if format_url in format_urls:`
			`continue`
			`format_urls.add(format_url)`
			`formats.append({`
			`'format_id': format_id,`
			`'url': format_url,`
[xhamster] Fix extraction (closes #14884) 2017-12-02 13:03:24 +01:00			`'height': get_height(format_id),`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`})`

			`video_url = self._search_regex(`
			`[r'''file\s:\s(?P<q>["'])(?P<mp4>.+?)(?P=q)''',`
			`r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',`
			`r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],`
			`webpage, 'video url', group='mp4', default=None)`
			`if video_url and video_url not in format_urls:`
			`formats.append({`
			`'url': video_url,`
			`})`

			`self._sort_formats(formats)`

XHamsterIE: Add video description 2013-08-23 16:40:20 +02:00			`# Only a few videos have an description`
[xhamster] Futher simplification 2014-01-23 04:04:35 +01:00			`mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`description = mobj.group(1) if mobj else None`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
[xhamster] Fix upload date extraction 2016-01-08 19:21:57 +01:00			`upload_date = unified_strdate(self._search_regex(`
			`r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',`
			`webpage, 'upload date', fatal=False))`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
[xhamster] Fix uploader extraction 2015-08-05 16:41:40 +02:00			`uploader = self._html_search_regex(`
[xhamster] Simplify (closes #13216) 2017-05-28 02:55:56 +02:00			`r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',`
[xhamster] Fix uploader extraction 2015-08-05 16:41:40 +02:00			`webpage, 'uploader', default='anonymous')`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
[xhamster] Fix thumbnail extraction 2015-08-05 16:36:37 +02:00			`thumbnail = self._search_regex(`
[xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-05 18:08:31 +01:00			`[r'''["']thumbUrl["']\s:\s(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',`
			`r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],`
[xhamster] flake8 2015-08-05 19:08:55 +02:00			`webpage, 'thumbnail', fatal=False, group='thumbnail')`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00
[xhamster] Fix duration extraction (closes #11549) 2016-12-28 17:01:52 +01:00			`duration = parse_duration(self._search_regex(`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`[r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',`
			`r'Runtime:\s</span>\s([\d:]+)'], webpage,`
[xhamster] Fix duration extraction (closes #11549) 2016-12-28 17:01:52 +01:00			`'duration', fatal=False))`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00
[xhamster] Fix view count extraction 2016-01-08 19:29:10 +01:00			`view_count = int_or_none(self._search_regex(`
			`r'content=["\']User(?:View\|Play)s:(\d+)',`
			`webpage, 'view count', fatal=False))`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00
[xhamster] Simplify (closes #13216) 2017-05-28 02:55:56 +02:00			`mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)`

			`mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)`
			`comment_count = mobj.group('commentcount') if mobj else 0`
[xhamster] Move into own file 2013-06-23 22:32:44 +02:00
[xhamster] Extract categories (closes #11728) 2017-05-28 02:50:15 +02:00			`categories_html = self._search_regex(`
			`r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,`
			`'categories', default=None)`
			`categories = [clean_html(category) for category in re.findall(`
			`r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None`

[XHamsterIE] Extract SD and HD video 2013-10-26 20:38:54 +02:00			`return {`
			`'id': video_id,`
[xhamster] Extract all formats and fix duration extraction (#13593) 2017-07-07 17:49:11 +02:00			`'display_id': display_id,`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`'title': title,`
			`'description': description,`
			`'upload_date': upload_date,`
[xhamster] Fix uploader extraction 2015-08-05 16:41:40 +02:00			`'uploader': uploader,`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`'thumbnail': thumbnail,`
			`'duration': duration,`
			`'view_count': view_count,`
			`'like_count': int_or_none(like_count),`
			`'dislike_count': int_or_none(dislike_count),`
			`'comment_count': int_or_none(comment_count),`
[xhamster] Add support for age_limit (Instead of #1627) 2013-10-19 21:09:48 +02:00			`'age_limit': age_limit,`
[xhamster] Extract categories (closes #11728) 2017-05-28 02:50:15 +02:00			`'categories': categories,`
[xhamster] Fix and improve 2014-02-19 19:42:15 +01:00			`'formats': formats,`
[XHamsterIE] Extract SD and HD video 2013-10-26 20:38:54 +02:00			`}`
[xhamster:embed] Add extractor (Closes #6032) 2015-06-21 19:10:38 +02:00

			`class XHamsterEmbedIE(InfoExtractor):`
[xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-05 18:08:31 +01:00			`_VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'`
[xhamster:embed] Add extractor (Closes #6032) 2015-06-21 19:10:38 +02:00			`_TEST = {`
			`'url': 'http://xhamster.com/xembed.php?video=3328539',`
			`'info_dict': {`
			`'id': '3328539',`
			`'ext': 'mp4',`
			`'title': 'Pen Masturbation',`
[xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-05 18:08:31 +01:00			`'timestamp': 1406581861,`
[xhamster:embed] Add extractor (Closes #6032) 2015-06-21 19:10:38 +02:00			`'upload_date': '20140728',`
[xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-05 18:08:31 +01:00			`'uploader': 'ManyakisArt',`
[xhamster:embed] Add extractor (Closes #6032) 2015-06-21 19:10:38 +02:00			`'duration': 5,`
			`'age_limit': 18,`
			`}`
			`}`

[generic] Add support for xhamster embeds 2015-06-21 19:11:25 +02:00			`@staticmethod`
			`def _extract_urls(webpage):`
			`return [url for _, url in re.findall(`
			`r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',`
			`webpage)]`

[xhamster:embed] Add extractor (Closes #6032) 2015-06-21 19:10:38 +02:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

			`webpage = self._download_webpage(url, video_id)`

			`video_url = self._search_regex(`
[xhamsterembed] Fix extraction (closes #14308) 2017-09-24 14:23:08 +02:00			`r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]\.html\|videos/[^/]-{0})[^"]*)"'.format(video_id),`
[xhamster:embed] Extract vars (Closes #8912) 2016-03-21 17:07:34 +01:00			`webpage, 'xhamster url', default=None)`

			`if not video_url:`
			`vars = self._parse_json(`
			`self._search_regex(r'vars\s:\s({.+?})\s,\s\n', webpage, 'vars'),`
			`video_id)`
			`video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))`
[xhamster:embed] Add extractor (Closes #6032) 2015-06-21 19:10:38 +02:00
[xhamster] pep8: remove trailing ';' 2015-06-22 11:18:52 +02:00			`return self.url_result(video_url, 'XHamster')`