1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-18 01:16:42 +01:00

Merge branch 'gebn-moviefap'

This commit is contained in:
Sergey M․ 2015-06-28 23:05:49 +06:00
commit e8b9ee5e08
6 changed files with 246 additions and 90 deletions

View File

@ -1008,7 +1008,7 @@ class YoutubeDL(object):
t.get('preference'), t.get('width'), t.get('height'), t.get('preference'), t.get('width'), t.get('height'),
t.get('id'), t.get('url'))) t.get('id'), t.get('url')))
for i, t in enumerate(thumbnails): for i, t in enumerate(thumbnails):
if 'width' in t and 'height' in t: if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height']) t['resolution'] = '%dx%d' % (t['width'], t['height'])
if t.get('id') is None: if t.get('id') is None:
t['id'] = '%d' % i t['id'] = '%d' % i

View File

@ -144,7 +144,6 @@ from .ellentv import (
) )
from .elpais import ElPaisIE from .elpais import ElPaisIE
from .embedly import EmbedlyIE from .embedly import EmbedlyIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE from .engadget import EngadgetIE
from .eporner import EpornerIE from .eporner import EpornerIE
from .eroprofile import EroProfileIE from .eroprofile import EroProfileIE
@ -577,7 +576,11 @@ from .tmz import (
TMZIE, TMZIE,
TMZArticleIE, TMZArticleIE,
) )
from .tnaflix import TNAFlixIE from .tnaflix import (
TNAFlixIE,
EMPFlixIE,
MovieFapIE,
)
from .thvideo import ( from .thvideo import (
THVideoIE, THVideoIE,
THVideoPlaylistIE THVideoPlaylistIE

View File

@ -22,6 +22,7 @@ from ..compat import (
compat_str, compat_str,
) )
from ..utils import ( from ..utils import (
NO_DEFAULT,
age_restricted, age_restricted,
bug_reports_message, bug_reports_message,
clean_html, clean_html,
@ -33,7 +34,7 @@ from ..utils import (
sanitize_filename, sanitize_filename,
unescapeHTML, unescapeHTML,
) )
_NO_DEFAULT = object()
class InfoExtractor(object): class InfoExtractor(object):
@ -523,7 +524,7 @@ class InfoExtractor(object):
video_info['description'] = playlist_description video_info['description'] = playlist_description
return video_info return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
""" """
Perform a regex search on the given string, using a single or a list of Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group. patterns returning the first matching group.
@ -549,7 +550,7 @@ class InfoExtractor(object):
return next(g for g in mobj.groups() if g is not None) return next(g for g in mobj.groups() if g is not None)
else: else:
return mobj.group(group) return mobj.group(group)
elif default is not _NO_DEFAULT: elif default is not NO_DEFAULT:
return default return default
elif fatal: elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name) raise RegexNotFoundError('Unable to extract %s' % _name)
@ -557,7 +558,7 @@ class InfoExtractor(object):
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message()) self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None return None
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
""" """
Like _search_regex, but strips HTML tags and unescapes entities. Like _search_regex, but strips HTML tags and unescapes entities.
""" """

View File

@ -1,31 +0,0 @@
from __future__ import unicode_literals
from .tnaflix import TNAFlixIE
class EMPFlixIE(TNAFlixIE):
_VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
_TESTS = [
{
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
'info_dict': {
'id': '33051',
'display_id': 'Amateur-Finger-Fuck',
'ext': 'mp4',
'title': 'Amateur Finger Fuck',
'description': 'Amateur solo finger fucking.',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
},
{
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
'only_matching': True,
}
]

View File

@ -3,39 +3,70 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
parse_duration,
fix_xml_ampersands, fix_xml_ampersands,
float_or_none,
int_or_none,
parse_duration,
str_to_int,
xpath_text,
) )
class TNAFlixIE(InfoExtractor): class TNAFlixNetworkBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)' # May be overridden in descendants if necessary
_CONFIG_REGEX = [
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>' r'flashvars\.config\s*=\s*escape\("([^"]+)"',
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>' r'<input[^>]+name="config\d?" value="([^"]+)"',
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
_TESTS = [
{
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
'info_dict': {
'id': '553878',
'display_id': 'Carmella-Decesare-striptease',
'ext': 'mp4',
'title': 'Carmella Decesare - striptease',
'description': '',
'thumbnail': 're:https?://.*\.jpg$',
'duration': 91,
'age_limit': 18,
}
},
{
'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
'only_matching': True,
}
] ]
_TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
_DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
_UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
_VIEW_COUNT_REGEX = None
_COMMENT_COUNT_REGEX = None
_AVERAGE_RATING_REGEX = None
_CATEGORIES_REGEX = r'<li[^>]*>\s*<span[^>]+class="infoTitle"[^>]*>Categories:</span>\s*<span[^>]+class="listView"[^>]*>(.+?)</span>\s*</li>'
def _extract_thumbnails(self, flix_xml):
def get_child(elem, names):
for name in names:
child = elem.find(name)
if child is not None:
return child
timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage'])
if timeline is None:
return
pattern_el = get_child(timeline, ['imagePattern', 'pattern'])
if pattern_el is None or not pattern_el.text:
return
first_el = get_child(timeline, ['imageFirst', 'first'])
last_el = get_child(timeline, ['imageLast', 'last'])
if first_el is None or last_el is None:
return
first_text = first_el.text
last_text = last_el.text
if not first_text.isdigit() or not last_text.isdigit():
return
first = int(first_text)
last = int(last_text)
if first > last:
return
width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width'))
height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height'))
return [{
'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'),
'width': width,
'height': height,
} for i in range(first, last + 1)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -44,39 +75,64 @@ class TNAFlixIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
cfg_url = self._proto_relative_url(self._html_search_regex(
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
cfg_xml = self._download_xml(
cfg_url, display_id, 'Downloading metadata',
transform_source=fix_xml_ampersands)
formats = []
def extract_video_url(vl):
return re.sub('speed=\d+', 'speed=', vl.text)
video_link = cfg_xml.find('./videoLink')
if video_link is not None:
formats.append({
'url': extract_video_url(video_link),
'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'),
})
for item in cfg_xml.findall('./quality/item'):
video_link = item.find('./videoLink')
if video_link is None:
continue
res = item.find('res')
format_id = None if res is None else res.text
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None))
formats.append({
'url': self._proto_relative_url(extract_video_url(video_link), 'http:'),
'format_id': format_id,
'height': height,
})
self._sort_formats(formats)
thumbnail = self._proto_relative_url(
xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
thumbnails = self._extract_thumbnails(cfg_xml)
title = self._html_search_regex( title = self._html_search_regex(
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage) self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
description = self._html_search_regex(
self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)
duration = parse_duration(self._html_search_meta( duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', default=None)) 'duration', webpage, 'duration', default=None))
cfg_url = self._proto_relative_url(self._html_search_regex( def extract_field(pattern, name):
self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:') return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
cfg_xml = self._download_xml( description = extract_field(self._DESCRIPTION_REGEX, 'description')
cfg_url, display_id, note='Downloading metadata', uploader = extract_field(self._UPLOADER_REGEX, 'uploader')
transform_source=fix_xml_ampersands) view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count'))
comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count'))
average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating'))
thumbnail = self._proto_relative_url( categories_str = extract_field(self._CATEGORIES_REGEX, 'categories')
cfg_xml.find('./startThumb').text, 'http:') categories = categories_str.split(', ') if categories_str is not None else []
formats = []
for item in cfg_xml.findall('./quality/item'):
video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
format_id = item.find('res').text
fmt = {
'url': self._proto_relative_url(video_url, 'http:'),
'format_id': format_id,
}
m = re.search(r'^(\d+)', format_id)
if m:
fmt['height'] = int(m.group(1))
formats.append(fmt)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
@ -84,7 +140,130 @@ class TNAFlixIE(InfoExtractor):
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'thumbnails': thumbnails,
'duration': duration, 'duration': duration,
'age_limit': age_limit, 'age_limit': age_limit,
'uploader': uploader,
'view_count': view_count,
'comment_count': comment_count,
'average_rating': average_rating,
'categories': categories,
'formats': formats, 'formats': formats,
} }
class TNAFlixIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
_UPLOADER_REGEX = r'(?s)<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)<div'
_TESTS = [{
# anonymous uploader, no categories
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
'info_dict': {
'id': '553878',
'display_id': 'Carmella-Decesare-striptease',
'ext': 'mp4',
'title': 'Carmella Decesare - striptease',
'thumbnail': 're:https?://.*\.jpg$',
'duration': 91,
'age_limit': 18,
'uploader': 'Anonymous',
'categories': [],
}
}, {
# non-anonymous uploader, categories
'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
'md5': '0f5d4d490dbfd117b8607054248a07c0',
'info_dict': {
'id': '6538',
'display_id': 'Educational-xxx-video',
'ext': 'mp4',
'title': 'Educational xxx video',
'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
'thumbnail': 're:https?://.*\.jpg$',
'duration': 164,
'age_limit': 18,
'uploader': 'bobwhite39',
'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
}
}, {
'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
'only_matching': True,
}]
class EMPFlixIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
_UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
_TESTS = [{
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
'info_dict': {
'id': '33051',
'display_id': 'Amateur-Finger-Fuck',
'ext': 'mp4',
'title': 'Amateur Finger Fuck',
'description': 'Amateur solo finger fucking.',
'thumbnail': 're:https?://.*\.jpg$',
'duration': 83,
'age_limit': 18,
'uploader': 'cwbike',
'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'],
}
}, {
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
'only_matching': True,
}]
class MovieFapIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<display_id>[^/]+)\.html'
_VIEW_COUNT_REGEX = r'<br>Views\s*<strong>([\d,.]+)</strong>'
_COMMENT_COUNT_REGEX = r'<span[^>]+id="comCount"[^>]*>([\d,.]+)</span>'
_AVERAGE_RATING_REGEX = r'Current Rating\s*<br>\s*<strong>([\d.]+)</strong>'
_CATEGORIES_REGEX = r'(?s)<div[^>]+id="vid_info"[^>]*>\s*<div[^>]*>.+?</div>(.*?)<br>'
_TESTS = [{
# normal, multi-format video
'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html',
'md5': '26624b4e2523051b550067d547615906',
'info_dict': {
'id': 'be9867c9416c19f54a4a',
'display_id': 'experienced-milf-amazing-handjob',
'ext': 'mp4',
'title': 'Experienced MILF Amazing Handjob',
'description': 'Experienced MILF giving an Amazing Handjob',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
'uploader': 'darvinfred06',
'view_count': int,
'comment_count': int,
'average_rating': float,
'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'],
}
}, {
# quirky single-format case where the extension is given as fid, but the video is really an flv
'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html',
'md5': 'fa56683e291fc80635907168a743c9ad',
'info_dict': {
'id': 'e5da0d3edce5404418f5',
'display_id': 'jeune-couple-russe',
'ext': 'flv',
'title': 'Jeune Couple Russe',
'description': 'Amateur',
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
'uploader': 'whiskeyjar',
'view_count': int,
'comment_count': int,
'average_rating': float,
'categories': ['Amateur', 'Teen'],
}
}]

View File

@ -62,6 +62,8 @@ std_headers = {
} }
NO_DEFAULT = object()
ENGLISH_MONTH_NAMES = [ ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June', 'January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December'] 'July', 'August', 'September', 'October', 'November', 'December']
@ -171,13 +173,15 @@ def xpath_with_ns(path, ns_map):
return '/'.join(replaced) return '/'.join(replaced)
def xpath_text(node, xpath, name=None, fatal=False): def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
if sys.version_info < (2, 7): # Crazy 2.6 if sys.version_info < (2, 7): # Crazy 2.6
xpath = xpath.encode('ascii') xpath = xpath.encode('ascii')
n = node.find(xpath) n = node.find(xpath)
if n is None or n.text is None: if n is None or n.text is None:
if fatal: if default is not NO_DEFAULT:
return default
elif fatal:
name = xpath if name is None else name name = xpath if name is None else name
raise ExtractorError('Could not find XML element %s' % name) raise ExtractorError('Could not find XML element %s' % name)
else: else: