1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-15 03:27:46 +01:00

[cammodels] Improve and simplify (closes #14499)

This commit is contained in:
Sergey M․ 2018-05-26 21:25:01 +07:00
parent 2a49d01992
commit 8b1da46e8f
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -1,93 +1,94 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .common import ExtractorError from ..compat import compat_str
import json from ..utils import (
import re ExtractorError,
from ..utils import int_or_none int_or_none,
)
class CamModelsIE(InfoExtractor): class CamModelsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
_HEADERS = { _TESTS = [{
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' 'url': 'https://www.cammodels.com/cam/AutumnKnight/',
# Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used 'only_matching': True,
} }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) user_id = self._match_id(url)
webpage = self._download_webpage(
url, webpage = self._download_webpage(url, user_id)
video_id,
headers=self._HEADERS) manifest_root = self._html_search_regex(
manifest_url_root = self._html_search_regex( r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
webpage, if not manifest_root:
'manifest', ERRORS = (
None, ("I'm offline, but let's stay connected", 'This user is currently offline'),
False) ('in a private show', 'This user is in a private show'),
if not manifest_url_root:
offline = self._html_search_regex(
r'(?P<id>I\'m offline, but let\'s stay connected!)',
webpage,
'offline indicator',
None,
False)
private = self._html_search_regex(
r'(?P<id>Im in a private show right now)',
webpage,
'private show indicator',
None,
False)
err = 'This user is currently offline, so nothing can be downloaded.' if offline \
else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \
else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.'
raise ExtractorError(
err,
expected=True if offline or private else False,
video_id=video_id
) )
manifest_url = manifest_url_root + video_id + '.json' for pattern, message in ERRORS:
if pattern in webpage:
error = message
expected = True
break
else:
error = 'Unable to find manifest URL root'
expected = False
raise ExtractorError(error, expected=expected)
manifest = self._download_json( manifest = self._download_json(
manifest_url, '%s%s.json' % (manifest_root, user_id), user_id)
video_id,
'Downloading links to streams.', formats = []
'Link to stream URLs was found, but we couldn\'t access it.', for format_id, format_dict in manifest['formats'].items():
headers=self._HEADERS) if not isinstance(format_dict, dict):
try: continue
formats = [] encodings = format_dict.get('encodings')
for fmtName in ['mp4-rtmp', 'mp4-hls']: if not isinstance(encodings, list):
for encoding in manifest['formats'][fmtName]['encodings']: continue
formats.append({ vcodec = format_dict.get('videoCodec')
acodec = format_dict.get('audioCodec')
for media in encodings:
if not isinstance(media, dict):
continue
media_url = media.get('location')
if not media_url or not isinstance(media_url, compat_str):
continue
format_id_list = [format_id]
height = int_or_none(media.get('videoHeight'))
if height is not None:
format_id_list.append('%dp' % height)
f = {
'url': media_url,
'format_id': '-'.join(format_id_list),
'width': int_or_none(media.get('videoWidth')),
'height': height,
'vbr': int_or_none(media.get('videoKbps')),
'abr': int_or_none(media.get('audioKbps')),
'fps': int_or_none(media.get('fps')),
'vcodec': vcodec,
'acodec': acodec,
}
if 'rtmp' in format_id:
f['ext'] = 'flv'
elif 'hls' in format_id:
f.update({
'ext': 'mp4', 'ext': 'mp4',
'url': encoding['location'], # hls skips fragments, preferring rtmp
'width': int_or_none(encoding.get('videoWidth')), 'preference': -1,
'height': int_or_none(encoding.get('videoHeight')),
'vbr': int_or_none(encoding.get('videoKbps')),
'abr': int_or_none(encoding.get('audioKbps')),
'format_id': fmtName + str(encoding.get('videoWidth'))
}) })
# If they change the JSON format, then fallback to parsing out RTMP links via regex. else:
except KeyError: continue
manifest_json = json.dumps(manifest) formats.append(f)
manifest_links = re.finditer(
r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
manifest_json)
if not manifest_links:
raise ExtractorError(
'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
expected=False,
video_id=video_id)
formats = []
for manifest_link in manifest_links:
url = manifest_link.group('id')
formats.append({
'ext': 'mp4',
'url': url,
'format_id': url.split(sep='/')[-1]
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': user_id,
'title': self._live_title(video_id), 'title': self._live_title(user_id),
'formats': formats 'is_live': True,
'formats': formats,
} }