1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-18 01:16:42 +01:00

Merge remote-tracking branch 'jtwaleson/master'

This commit is contained in:
Philipp Hagemeister 2014-11-23 22:10:26 +01:00
commit 27f8b0994e
77 changed files with 226 additions and 223 deletions

View File

@ -142,7 +142,7 @@ def win_service_set_status(handle, status_code):
def win_service_main(service_name, real_main, argc, argv_raw): def win_service_main(service_name, real_main, argc, argv_raw):
try: try:
#args = [argv_raw[i].value for i in range(argc)] # args = [argv_raw[i].value for i in range(argc)]
stop_event = threading.Event() stop_event = threading.Event()
handler = HandlerEx(functools.partial(stop_event, win_service_handler)) handler = HandlerEx(functools.partial(stop_event, win_service_handler))
h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)

View File

@ -30,7 +30,6 @@ def build_completion(opt_parser):
for group in opt_parser.option_groups: for group in opt_parser.option_groups:
for option in group.option_list: for option in group.option_list:
long_option = option.get_opt_string().strip('-') long_option = option.get_opt_string().strip('-')
help_msg = shell_quote([option.help])
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option] complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
if option._short_opts: if option._short_opts:
complete_cmd += ['--short-option', option._short_opts[0].strip('-')] complete_cmd += ['--short-option', option._short_opts[0].strip('-')]

View File

@ -4,7 +4,6 @@
from __future__ import print_function from __future__ import print_function
import os.path import os.path
import pkg_resources
import warnings import warnings
import sys import sys

View File

@ -116,14 +116,14 @@ def expect_info_dict(self, expected_dict, got_dict):
elif isinstance(expected, type): elif isinstance(expected, type):
got = got_dict.get(info_field) got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected), self.assertTrue(isinstance(got, expected),
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got))) 'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
else: else:
if isinstance(expected, compat_str) and expected.startswith('md5:'): if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(got_dict.get(info_field)) got = 'md5:' + md5(got_dict.get(info_field))
else: else:
got = got_dict.get(info_field) got = got_dict.get(info_field)
self.assertEqual(expected, got, self.assertEqual(expected, got,
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) 'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# Check for the presence of mandatory fields # Check for the presence of mandatory fields
if got_dict.get('_type') != 'playlist': if got_dict.get('_type') != 'playlist':
@ -135,8 +135,8 @@ def expect_info_dict(self, expected_dict, got_dict):
# Are checkable fields missing from the test case definition? # Are checkable fields missing from the test case definition?
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
for key, value in got_dict.items() for key, value in got_dict.items()
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys: if missing_keys:
def _repr(v): def _repr(v):

View File

@ -314,7 +314,7 @@ class YoutubeDL(object):
self._output_process.stdin.write((message + '\n').encode('utf-8')) self._output_process.stdin.write((message + '\n').encode('utf-8'))
self._output_process.stdin.flush() self._output_process.stdin.flush()
res = ''.join(self._output_channel.readline().decode('utf-8') res = ''.join(self._output_channel.readline().decode('utf-8')
for _ in range(line_count)) for _ in range(line_count))
return res[:-len('\n')] return res[:-len('\n')]
def to_screen(self, message, skip_eol=False): def to_screen(self, message, skip_eol=False):
@ -701,13 +701,15 @@ class YoutubeDL(object):
'It needs to be updated.' % ie_result.get('extractor')) 'It needs to be updated.' % ie_result.get('extractor'))
def _fixup(r): def _fixup(r):
self.add_extra_info(r, self.add_extra_info(
r,
{ {
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
}) }
)
return r return r
ie_result['entries'] = [ ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info) self.process_ie_result(_fixup(r), download, extra_info)
@ -857,14 +859,14 @@ class YoutubeDL(object):
# Two formats have been requested like '137+139' # Two formats have been requested like '137+139'
format_1, format_2 = rf.split('+') format_1, format_2 = rf.split('+')
formats_info = (self.select_format(format_1, formats), formats_info = (self.select_format(format_1, formats),
self.select_format(format_2, formats)) self.select_format(format_2, formats))
if all(formats_info): if all(formats_info):
# The first format must contain the video and the # The first format must contain the video and the
# second the audio # second the audio
if formats_info[0].get('vcodec') == 'none': if formats_info[0].get('vcodec') == 'none':
self.report_error('The first format must ' self.report_error('The first format must '
'contain the video, try using ' 'contain the video, try using '
'"-f %s+%s"' % (format_2, format_1)) '"-f %s+%s"' % (format_2, format_1))
return return
selected_format = { selected_format = {
'requested_formats': formats_info, 'requested_formats': formats_info,
@ -1042,10 +1044,10 @@ class YoutubeDL(object):
with open(thumb_filename, 'wb') as thumbf: with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
self.to_screen('[%s] %s: Writing thumbnail to: %s' % self.to_screen('[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename)) (info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' % self.report_warning('Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err))) (info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False): if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
@ -1066,8 +1068,8 @@ class YoutubeDL(object):
if not merger._executable: if not merger._executable:
postprocessors = [] postprocessors = []
self.report_warning('You have requested multiple ' self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.' 'formats but ffmpeg or avconv are not installed.'
' The formats won\'t be merged') ' The formats won\'t be merged')
else: else:
postprocessors = [merger] postprocessors = [merger]
for f in info_dict['requested_formats']: for f in info_dict['requested_formats']:

View File

@ -116,7 +116,7 @@ except ImportError: # Python 2
# Python 2's version is apparently totally broken # Python 2's version is apparently totally broken
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'): encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, unicode qs, _coerce_result = qs, unicode
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
r = [] r = []
@ -145,10 +145,10 @@ except ImportError: # Python 2
return r return r
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'): encoding='utf-8', errors='replace'):
parsed_result = {} parsed_result = {}
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors) encoding=encoding, errors=errors)
for name, value in pairs: for name, value in pairs:
if name in parsed_result: if name in parsed_result:
parsed_result[name].append(value) parsed_result[name].append(value)

View File

@ -225,13 +225,15 @@ class F4mFD(FileDownloader):
self.to_screen('[download] Downloading f4m manifest') self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read() manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename) self.report_destination(filename)
http_dl = HttpQuietDownloader(self.ydl, http_dl = HttpQuietDownloader(
self.ydl,
{ {
'continuedl': True, 'continuedl': True,
'quiet': True, 'quiet': True,
'noprogress': True, 'noprogress': True,
'test': self.params.get('test', False), 'test': self.params.get('test', False),
}) }
)
doc = etree.fromstring(manifest) doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
@ -277,7 +279,7 @@ class F4mFD(FileDownloader):
def frag_progress_hook(status): def frag_progress_hook(status):
frag_total_bytes = status.get('total_bytes', 0) frag_total_bytes = status.get('total_bytes', 0)
estimated_size = (state['downloaded_bytes'] + estimated_size = (state['downloaded_bytes'] +
(total_frags - state['frag_counter']) * frag_total_bytes) (total_frags - state['frag_counter']) * frag_total_bytes)
if status['status'] == 'finished': if status['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes state['downloaded_bytes'] += frag_total_bytes
state['frag_counter'] += 1 state['frag_counter'] += 1
@ -287,13 +289,13 @@ class F4mFD(FileDownloader):
frag_downloaded_bytes = status['downloaded_bytes'] frag_downloaded_bytes = status['downloaded_bytes']
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
frag_progress = self.calc_percent(frag_downloaded_bytes, frag_progress = self.calc_percent(frag_downloaded_bytes,
frag_total_bytes) frag_total_bytes)
progress = self.calc_percent(state['frag_counter'], total_frags) progress = self.calc_percent(state['frag_counter'], total_frags)
progress += frag_progress / float(total_frags) progress += frag_progress / float(total_frags)
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
self.report_progress(progress, format_bytes(estimated_size), self.report_progress(progress, format_bytes(estimated_size),
status.get('speed'), eta) status.get('speed'), eta)
http_dl.add_progress_hook(frag_progress_hook) http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = [] frags_filenames = []

View File

@ -88,7 +88,7 @@ class AppleTrailersIE(InfoExtractor):
for li in doc.findall('./div/ul/li'): for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick'] on_click = li.find('.//a').attrib['onClick']
trailer_info_json = self._search_regex(self._JSON_RE, trailer_info_json = self._search_regex(self._JSON_RE,
on_click, 'trailer info') on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json) trailer_info = json.loads(trailer_info_json)
title = trailer_info['title'] title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()

View File

@ -38,7 +38,7 @@ class BambuserIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
info_url = ('http://player-c.api.bambuser.com/getVideo.json?' info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
'&api_key=%s&vid=%s' % (self._API_KEY, video_id)) '&api_key=%s&vid=%s' % (self._API_KEY, video_id))
info_json = self._download_webpage(info_url, video_id) info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)['result'] info = json.loads(info_json)['result']
@ -74,8 +74,8 @@ class BambuserChannelIE(InfoExtractor):
last_id = '' last_id = ''
for i in itertools.count(1): for i in itertools.count(1):
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}' req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}' '&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}' '&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id) ).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url) req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result # Without setting this header, we wouldn't get any result

View File

@ -165,10 +165,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page') webpage = self._download_webpage(url, group_id, 'Downloading video page')
if re.search(r'id="emp-error" class="notinuk">', webpage): if re.search(r'id="emp-error" class="notinuk">', webpage):
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only', raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
expected=True) expected=True)
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id, playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML') 'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None: if no_items is not None:

View File

@ -25,8 +25,7 @@ class CNNIE(InfoExtractor):
'duration': 135, 'duration': 135,
'upload_date': '20130609', 'upload_date': '20130609',
}, },
}, }, {
{
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
"md5": "b5cc60c60a3477d185af8f19a2a26f4e", "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
"info_dict": { "info_dict": {

View File

@ -10,47 +10,46 @@ from ..utils import int_or_none
class CollegeHumorIE(InfoExtractor): class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TESTS = [{ _TESTS = [
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', {
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
'info_dict': { 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
'id': '6902724', 'info_dict': {
'ext': 'mp4', 'id': '6902724',
'title': 'Comic-Con Cosplay Catastrophe', 'ext': 'mp4',
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.", 'title': 'Comic-Con Cosplay Catastrophe',
'age_limit': 13, 'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
'duration': 187, 'age_limit': 13,
'duration': 187,
},
}, {
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': {
'id': '3505939',
'ext': 'mp4',
'title': 'Font Conference',
'description': "This video wasn't long enough, so we made it double-spaced.",
'age_limit': 10,
'duration': 179,
},
}, {
# embedded youtube video
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'Z-bao9fg6Yc',
'ext': 'mp4',
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
'uploader': 'Mark Dice',
'uploader_id': 'MarkDice',
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
'upload_date': '20140127',
},
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
}, },
},
{
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
'info_dict': {
'id': '3505939',
'ext': 'mp4',
'title': 'Font Conference',
'description': "This video wasn't long enough, so we made it double-spaced.",
'age_limit': 10,
'duration': 179,
},
},
# embedded youtube video
{
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'Z-bao9fg6Yc',
'ext': 'mp4',
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
'uploader': 'Mark Dice',
'uploader_id': 'MarkDice',
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
'upload_date': '20140127',
},
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
},
] ]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -478,7 +478,7 @@ class InfoExtractor(object):
raise RegexNotFoundError('Unable to extract %s' % _name) raise RegexNotFoundError('Unable to extract %s' % _name)
else: else:
self._downloader.report_warning('unable to extract %s; ' self._downloader.report_warning('unable to extract %s; '
'please report this issue on http://yt-dl.org/bug' % _name) 'please report this issue on http://yt-dl.org/bug' % _name)
return None return None
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@ -612,7 +612,7 @@ class InfoExtractor(object):
def _twitter_search_player(self, html): def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html, return self._html_search_meta('twitter:player', html,
'twitter card player') 'twitter card player')
def _sort_formats(self, formats): def _sort_formats(self, formats):
if not formats: if not formats:

View File

@ -114,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
embed_page = self._download_webpage(embed_url, video_id, embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed page') 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page, info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE) 'video info', flags=re.MULTILINE)
info = json.loads(info) info = json.loads(info)
if info.get('error') is not None: if info.get('error') is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
@ -208,7 +208,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in orderedSet(video_ids)] for video_id in orderedSet(video_ids)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -9,7 +9,7 @@ from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor): class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr' IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
r'ligthboxvideo/base-de-medias/webtv/(.*)') r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = { _TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
@ -28,9 +28,9 @@ class DefenseGouvFrIE(InfoExtractor):
webpage, 'ID') webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id) + video_id)
info = self._download_webpage(json_url, title, info = self._download_webpage(json_url, title,
'Downloading JSON config') 'Downloading JSON config')
video_url = json.loads(info)['renditions'][0]['url'] video_url = json.loads(info)['renditions'][0]['url']
return {'id': video_id, return {'id': video_id,

View File

@ -16,9 +16,9 @@ class DiscoveryIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'MythBusters: Mission Impossible Outtakes', 'title': 'MythBusters: Mission Impossible Outtakes',
'description': ('Watch Jamie Hyneman and Adam Savage practice being' 'description': ('Watch Jamie Hyneman and Adam Savage practice being'
' each other -- to the point of confusing Jamie\'s dog -- and ' ' each other -- to the point of confusing Jamie\'s dog -- and '
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s' 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
' back.'), ' back.'),
'duration': 156, 'duration': 156,
}, },
} }
@ -29,7 +29,7 @@ class DiscoveryIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});', video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
webpage, 'video list', flags=re.DOTALL) webpage, 'video list', flags=re.DOTALL)
video_list = json.loads(video_list_json) video_list = json.loads(video_list_json)
info = video_list['clips'][0] info = video_list['clips'][0]
formats = [] formats = []

View File

@ -11,18 +11,18 @@ from ..utils import url_basename
class DropboxIE(InfoExtractor): class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*' _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
_TESTS = [{ _TESTS = [
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', {
'info_dict': { 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
'id': 'nelirfsxnmcfbfh', 'info_dict': {
'ext': 'mp4', 'id': 'nelirfsxnmcfbfh',
'title': 'youtube-dl test video \'ä"BaW_jenozKc' 'ext': 'mp4',
} 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
}, }
{ }, {
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
'only_matching': True, 'only_matching': True,
}, },
] ]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -28,7 +28,7 @@ class EHowIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)', video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
webpage, 'video URL') webpage, 'video URL')
final_url = compat_urllib_parse.unquote(video_url) final_url = compat_urllib_parse.unquote(video_url)
uploader = self._html_search_meta('uploader', webpage) uploader = self._html_search_meta('uploader', webpage)
title = self._og_search_title(webpage).replace(' | eHow', '') title = self._og_search_title(webpage).replace(' | eHow', '')

View File

@ -60,8 +60,8 @@ class FacebookIE(InfoExtractor):
login_page_req = compat_urllib_request.Request(self._LOGIN_URL) login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
login_page_req.add_header('Cookie', 'locale=en_US') login_page_req.add_header('Cookie', 'locale=en_US')
login_page = self._download_webpage(login_page_req, None, login_page = self._download_webpage(login_page_req, None,
note='Downloading login page', note='Downloading login page',
errnote='Unable to download login page') errnote='Unable to download login page')
lsd = self._search_regex( lsd = self._search_regex(
r'<input type="hidden" name="lsd" value="([^"]*)"', r'<input type="hidden" name="lsd" value="([^"]*)"',
login_page, 'lsd') login_page, 'lsd')
@ -82,7 +82,7 @@ class FacebookIE(InfoExtractor):
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try: try:
login_results = self._download_webpage(request, None, login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page') note='Logging in', errnote='unable to fetch login page')
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None: if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return return
@ -96,7 +96,7 @@ class FacebookIE(InfoExtractor):
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
check_response = self._download_webpage(check_req, None, check_response = self._download_webpage(check_req, None,
note='Confirming login') note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None: if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

View File

@ -44,9 +44,9 @@ class FirstTVIE(InfoExtractor):
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', fatal=False) webpage, 'like count', fatal=False)
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', fatal=False) webpage, 'dislike count', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -50,7 +50,7 @@ class FiveMinIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
embed_page = self._download_webpage(embed_url, video_id, embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed page') 'Downloading embed page')
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
query = compat_urllib_parse.urlencode({ query = compat_urllib_parse.urlencode({
'func': 'GetResults', 'func': 'GetResults',

View File

@ -32,9 +32,9 @@ class FKTVIE(InfoExtractor):
server = random.randint(2, 4) server = random.randint(2, 4)
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode, start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
episode) episode)
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
'playlist', flags=re.DOTALL) 'playlist', flags=re.DOTALL)
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
# TODO: return a single multipart video # TODO: return a single multipart video
videos = [] videos = []

View File

@ -37,7 +37,7 @@ class FlickrIE(InfoExtractor):
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>', node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
first_xml, 'node_id') first_xml, 'node_id')
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')

View File

@ -55,7 +55,7 @@ class FourTubeIE(InfoExtractor):
description = self._html_search_meta('description', webpage, 'description') description = self._html_search_meta('description', webpage, 'description')
if description: if description:
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date', upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
fatal=False) fatal=False)
if upload_date: if upload_date:
upload_date = unified_strdate(upload_date) upload_date = unified_strdate(upload_date)
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False) view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)

View File

@ -234,7 +234,7 @@ class GenerationQuoiIE(InfoExtractor):
info_json = self._download_webpage(info_url, name) info_json = self._download_webpage(info_url, name)
info = json.loads(info_json) info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
ie='Dailymotion') ie='Dailymotion')
class CultureboxIE(FranceTVBaseInfoExtractor): class CultureboxIE(FranceTVBaseInfoExtractor):

View File

@ -784,7 +784,7 @@ class GenericIE(InfoExtractor):
# Look for Ooyala videos # Look for Ooyala videos
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)) re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None: if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec')) return OoyalaIE._build_url_result(mobj.group('ec'))

View File

@ -27,10 +27,10 @@ class HowcastIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)', video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
webpage, 'video URL') webpage, 'video URL')
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'', video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
webpage, 'description', fatal=False) webpage, 'description', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -99,7 +99,7 @@ class IGNIE(InfoExtractor):
video_id = self._find_video_id(webpage) video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id) result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE, description = self._html_search_regex(self._DESCRIPTION_RE,
webpage, 'video description', flags=re.DOTALL) webpage, 'video description', flags=re.DOTALL)
result['description'] = description result['description'] = description
return result return result

View File

@ -27,9 +27,9 @@ class InstagramIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, 'uploader id', fatal=False) webpage, 'uploader id', fatal=False)
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description', desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
fatal=False) fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -45,22 +45,26 @@ class InternetVideoArchiveIE(InfoExtractor):
url = self._build_url(query) url = self._build_url(query)
flashconfiguration = self._download_xml(url, video_id, flashconfiguration = self._download_xml(url, video_id,
'Downloading flash configuration') 'Downloading flash configuration')
file_url = flashconfiguration.find('file').text file_url = flashconfiguration.find('file').text
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
# Replace some of the parameters in the query to get the best quality # Replace some of the parameters in the query to get the best quality
# and http links (no m3u8 manifests) # and http links (no m3u8 manifests)
file_url = re.sub(r'(?<=\?)(.+)$', file_url = re.sub(r'(?<=\?)(.+)$',
lambda m: self._clean_query(m.group()), lambda m: self._clean_query(m.group()),
file_url) file_url)
info = self._download_xml(file_url, video_id, info = self._download_xml(file_url, video_id,
'Downloading video info') 'Downloading video info')
item = info.find('channel/item') item = info.find('channel/item')
def _bp(p): def _bp(p):
return xpath_with_ns(p, return xpath_with_ns(
{'media': 'http://search.yahoo.com/mrss/', p,
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'}) {
'media': 'http://search.yahoo.com/mrss/',
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
}
)
formats = [] formats = []
for content in item.findall(_bp('media:group/media:content')): for content in item.findall(_bp('media:group/media:content')):
attr = content.attrib attr = content.attrib

View File

@ -36,7 +36,7 @@ class JukeboxIE(InfoExtractor):
try: try:
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"', video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
iframe_html, 'video url') iframe_html, 'video url')
video_url = unescapeHTML(video_url).replace('\/', '/') video_url = unescapeHTML(video_url).replace('\/', '/')
except RegexNotFoundError: except RegexNotFoundError:
youtube_url = self._search_regex( youtube_url = self._search_regex(
@ -47,9 +47,9 @@ class JukeboxIE(InfoExtractor):
return self.url_result(youtube_url, ie='Youtube') return self.url_result(youtube_url, ie='Youtube')
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>', title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
html, 'title') html, 'title')
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>', artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
html, 'artist') html, 'artist')
return { return {
'id': video_id, 'id': video_id,

View File

@ -13,8 +13,10 @@ class KickStarterIE(InfoExtractor):
'id': '1404461844', 'id': '1404461844',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling', 'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
'description': 'A unique motocross documentary that examines the ' 'description': (
'life and mind of one of sports most elite athletes: Josh Grant.', 'A unique motocross documentary that examines the '
'life and mind of one of sports most elite athletes: Josh Grant.'
),
}, },
}, { }, {
'note': 'Embedded video (not using the native kickstarter video service)', 'note': 'Embedded video (not using the native kickstarter video service)',

View File

@ -45,7 +45,7 @@ class LyndaIE(SubtitlesInfoExtractor):
video_id = mobj.group(1) video_id = mobj.group(1)
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
'Downloading video JSON') 'Downloading video JSON')
video_json = json.loads(page) video_json = json.loads(page)
if 'Status' in video_json: if 'Status' in video_json:

View File

@ -27,7 +27,7 @@ class M6IE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id, rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
'Downloading video RSS') 'Downloading video RSS')
title = rss.find('./channel/item/title').text title = rss.find('./channel/item/title').text
description = rss.find('./channel/item/description').text description = rss.find('./channel/item/description').text

View File

@ -219,8 +219,8 @@ class MetacafeIE(InfoExtractor):
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, 'uploader nickname', fatal=False) webpage, 'uploader nickname', fatal=False)
duration = int_or_none( duration = int_or_none(
self._html_search_meta('video:duration', webpage)) self._html_search_meta('video:duration', webpage))

View File

@ -28,7 +28,7 @@ class MetacriticIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# The xml is not well formatted, there are raw '&' # The xml is not well formatted, there are raw '&'
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id, info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
video_id, 'Downloading info xml', transform_source=fix_xml_ampersands) video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
formats = [] formats = []
@ -44,7 +44,7 @@ class MetacriticIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
webpage, 'description', flags=re.DOTALL) webpage, 'description', flags=re.DOTALL)
return { return {
'id': video_id, 'id': video_id,

View File

@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
# Otherwise we get a webpage that would execute some javascript # Otherwise we get a webpage that would execute some javascript
req.add_header('Youtubedl-user-agent', 'curl/7') req.add_header('Youtubedl-user-agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id, webpage = self._download_webpage(req, mtvn_id,
'Downloading mobile page') 'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
req = HEADRequest(metrics_url) req = HEADRequest(metrics_url)
response = self._request_webpage(req, mtvn_id, 'Resolving url') response = self._request_webpage(req, mtvn_id, 'Resolving url')
@ -66,10 +66,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
self.to_screen('The normal version is not available from your ' self.to_screen('The normal version is not available from your '
'country, trying with the mobile version') 'country, trying with the mobile version')
return self._extract_mobile_video_formats(mtvn_id) return self._extract_mobile_video_formats(mtvn_id)
raise ExtractorError('This video is not available from your country.', raise ExtractorError('This video is not available from your country.',
expected=True) expected=True)
formats = [] formats = []
for rendition in mdoc.findall('.//rendition'): for rendition in mdoc.findall('.//rendition'):
@ -98,7 +98,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
mediagen_url += '&acceptMethods=fms' mediagen_url += '&acceptMethods=fms'
mediagen_doc = self._download_xml(mediagen_url, video_id, mediagen_doc = self._download_xml(mediagen_url, video_id,
'Downloading video urls') 'Downloading video urls')
description_node = itemdoc.find('description') description_node = itemdoc.find('description')
if description_node is not None: if description_node is not None:
@ -126,7 +126,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
# This a short id that's used in the webpage urls # This a short id that's used in the webpage urls
mtvn_id = None mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
'scheme', 'urn:mtvn:id') 'scheme', 'urn:mtvn:id')
if mtvn_id_node is not None: if mtvn_id_node is not None:
mtvn_id = mtvn_id_node.text mtvn_id = mtvn_id_node.text
@ -188,7 +188,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
video_id = self._id_from_uri(uri) video_id = self._id_from_uri(uri)
site_id = uri.replace(video_id, '') site_id = uri.replace(video_id, '')
config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/' config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
'context4/context5/config.xml'.format(site_id)) 'context4/context5/config.xml'.format(site_id))
config_doc = self._download_xml(config_url, video_id) config_doc = self._download_xml(config_url, video_id)
feed_node = config_doc.find('.//feed') feed_node = config_doc.find('.//feed')
feed_url = feed_node.text.strip().split('?')[0] feed_url = feed_node.text.strip().split('?')[0]

View File

@ -53,7 +53,7 @@ class MySpaceIE(InfoExtractor):
# songs don't store any useful info in the 'context' variable # songs don't store any useful info in the 'context' variable
def search_data(name): def search_data(name):
return self._search_regex(r'data-%s="(.*?)"' % name, webpage, return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
name) name)
streamUrl = search_data('stream-url') streamUrl = search_data('stream-url')
info = { info = {
'id': video_id, 'id': video_id,
@ -63,7 +63,7 @@ class MySpaceIE(InfoExtractor):
} }
else: else:
context = json.loads(self._search_regex(r'context = ({.*?});', webpage, context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
u'context')) u'context'))
video = context['video'] video = context['video']
streamUrl = video['streamUrl'] streamUrl = video['streamUrl']
info = { info = {

View File

@ -72,7 +72,7 @@ class MyVideoIE(InfoExtractor):
video_url = mobj.group(1) + '.flv' video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>', video_title = self._html_search_regex('<title>([^<]+)</title>',
webpage, 'title') webpage, 'title')
return { return {
'id': video_id, 'id': video_id,
@ -162,7 +162,7 @@ class MyVideoIE(InfoExtractor):
video_swfobj = compat_urllib_parse.unquote(video_swfobj) video_swfobj = compat_urllib_parse.unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, 'title') webpage, 'title')
return { return {
'id': video_id, 'id': video_id,

View File

@ -30,7 +30,7 @@ class NaverIE(InfoExtractor):
video_id = mobj.group(1) video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage) webpage)
if m_id is None: if m_id is None:
m_error = re.search( m_error = re.search(
r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',

View File

@ -38,12 +38,12 @@ class NFBIE(InfoExtractor):
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
page, 'director id', fatal=False) page, 'director id', fatal=False)
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
page, 'director name', fatal=False) page, 'director name', fatal=False)
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')

View File

@ -125,7 +125,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
self._downloader.report_warning(u'Got an empty reponse, trying ' self._downloader.report_warning(u'Got an empty reponse, trying '
'adding the "newvideos" parameter') 'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true', response = self._download_webpage(request_url + '&newvideos=true',
playlist_title) playlist_title)
response = self._fix_json(response) response = self._fix_json(response)
videos = json.loads(response) videos = json.loads(response)

View File

@ -111,7 +111,7 @@ class NiconicoIE(InfoExtractor):
if 'deleted=' in flv_info_webpage: if 'deleted=' in flv_info_webpage:
raise ExtractorError('The video has been deleted.', raise ExtractorError('The video has been deleted.',
expected=True) expected=True)
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
# Start extracting information # Start extracting information
@ -170,13 +170,13 @@ class NiconicoPlaylistIE(InfoExtractor):
webpage = self._download_webpage(url, list_id) webpage = self._download_webpage(url, list_id)
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
webpage, 'entries') webpage, 'entries')
entries = json.loads(entries_json) entries = json.loads(entries_json)
entries = [{ entries = [{
'_type': 'url', '_type': 'url',
'ie_key': NiconicoIE.ie_key(), 'ie_key': NiconicoIE.ie_key(),
'url': ('http://www.nicovideo.jp/watch/%s' % 'url': ('http://www.nicovideo.jp/watch/%s' %
entry['item_data']['video_id']), entry['item_data']['video_id']),
} for entry in entries] } for entry in entries]
return { return {

View File

@ -27,8 +27,7 @@ class NineGagIE(InfoExtractor):
"thumbnail": "re:^https?://", "thumbnail": "re:^https?://",
}, },
'add_ie': ['Youtube'] 'add_ie': ['Youtube']
}, }, {
{
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', 'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
'info_dict': { 'info_dict': {
'id': 'KklwM', 'id': 'KklwM',

View File

@ -31,9 +31,9 @@ class NormalbootsIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
webpage, 'uploader') webpage, 'uploader')
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date') webpage, 'date')
video_upload_date = unified_strdate(raw_upload_date) video_upload_date = unified_strdate(raw_upload_date)
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')

View File

@ -43,7 +43,7 @@ class OoyalaIE(InfoExtractor):
@classmethod @classmethod
def _build_url_result(cls, embed_code): def _build_url_result(cls, embed_code):
return cls.url_result(cls._url_for_embed_code(embed_code), return cls.url_result(cls._url_for_embed_code(embed_code),
ie=cls.ie_key()) ie=cls.ie_key())
def _extract_result(self, info, more_info): def _extract_result(self, info, more_info):
return { return {

View File

@ -31,7 +31,7 @@ class PhotobucketIE(InfoExtractor):
# Extract URL, uploader, and title from webpage # Extract URL, uploader, and title from webpage
self.report_extraction(video_id) self.report_extraction(video_id)
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
webpage, 'info json') webpage, 'info json')
info = json.loads(info_json) info = json.loads(info_json)
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
return { return {

View File

@ -33,7 +33,7 @@ class RBMARadioIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$', json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
webpage, 'json data', flags=re.MULTILINE) webpage, 'json data', flags=re.MULTILINE)
try: try:
data = json.loads(json_data) data = json.loads(json_data)

View File

@ -27,8 +27,7 @@ class SBSIE(InfoExtractor):
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
'add_ies': ['generic'], 'add_ies': ['generic'],
}, }, {
{
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
'only_matching': True, 'only_matching': True,
}] }]

View File

@ -96,7 +96,7 @@ class ScreencastIE(InfoExtractor):
if title is None: if title is None:
title = self._html_search_regex( title = self._html_search_regex(
[r'<b>Title:</b> ([^<]*)</div>', [r'<b>Title:</b> ([^<]*)</div>',
r'class="tabSeperator">></span><span class="tabText">(.*?)<'], r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
webpage, 'title') webpage, 'title')
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage, default=None) description = self._og_search_description(webpage, default=None)

View File

@ -46,7 +46,7 @@ class SinaIE(InfoExtractor):
def _extract_video(self, video_id): def _extract_video(self, video_id):
data = compat_urllib_parse.urlencode({'vid': video_id}) data = compat_urllib_parse.urlencode({'vid': video_id})
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
video_id, 'Downloading video url') video_id, 'Downloading video url')
image_page = self._download_webpage( image_page = self._download_webpage(
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
video_id, 'Downloading thumbnail info') video_id, 'Downloading thumbnail info')

View File

@ -26,7 +26,7 @@ class SlutloadIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
webpage, 'title').strip() webpage, 'title').strip()
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',

View File

@ -282,7 +282,7 @@ class SmotriBroadcastIE(InfoExtractor):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
if username is None: if username is None:
raise ExtractorError('Erotic broadcasts allowed only for registered users, ' raise ExtractorError('Erotic broadcasts allowed only for registered users, '
'use --username and --password options to provide account credentials.', expected=True) 'use --username and --password options to provide account credentials.', expected=True)
login_form = { login_form = {
'login-hint53': '1', 'login-hint53': '1',

View File

@ -159,7 +159,7 @@ class SoundcloudIE(InfoExtractor):
# We have to retrieve the url # We have to retrieve the url
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?' streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token)) 'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
format_dict = self._download_json( format_dict = self._download_json(
streams_url, streams_url,
track_id, 'Downloading track url') track_id, 'Downloading track url')

View File

@ -82,7 +82,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
rootpage = self._download_webpage(rootURL, info['id'], rootpage = self._download_webpage(rootURL, info['id'],
errnote='Unable to download course info page') errnote='Unable to download course info page')
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage)) links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result( info['entries'] = [self.url_result(

View File

@ -8,24 +8,23 @@ from .common import InfoExtractor
class TeamcocoIE(InfoExtractor): class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
'file': '80187.mp4', 'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea', 'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': { 'info_dict': {
'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
}
}, {
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
'info_dict': {
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
"title": "Louis C.K. Interview Pt. 1 11/3/11"
}
} }
},
{
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
'info_dict': {
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
"title": "Louis C.K. Interview Pt. 1 11/3/11"
}
}
] ]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -33,9 +33,9 @@ class TEDIE(SubtitlesInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The illusion of consciousness', 'title': 'The illusion of consciousness',
'description': ('Philosopher Dan Dennett makes a compelling ' 'description': ('Philosopher Dan Dennett makes a compelling '
'argument that not only don\'t we understand our own ' 'argument that not only don\'t we understand our own '
'consciousness, but that half the time our brains are ' 'consciousness, but that half the time our brains are '
'actively fooling us.'), 'actively fooling us.'),
'uploader': 'Dan Dennett', 'uploader': 'Dan Dennett',
'width': 854, 'width': 854,
'duration': 1308, 'duration': 1308,
@ -93,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor):
def _extract_info(self, webpage): def _extract_info(self, webpage):
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
webpage, 'info json') webpage, 'info json')
return json.loads(info_json) return json.loads(info_json)
def _real_extract(self, url): def _real_extract(self, url):
@ -113,7 +113,7 @@ class TEDIE(SubtitlesInfoExtractor):
'''Returns the videos of the playlist''' '''Returns the videos of the playlist'''
webpage = self._download_webpage(url, name, webpage = self._download_webpage(url, name,
'Downloading playlist webpage') 'Downloading playlist webpage')
info = self._extract_info(webpage) info = self._extract_info(webpage)
playlist_info = info['playlist'] playlist_info = info['playlist']

View File

@ -30,7 +30,7 @@ class TF1IE(InfoExtractor):
embed_url = self._html_search_regex( embed_url = self._html_search_regex(
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url') r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
embed_page = self._download_webpage(embed_url, video_id, embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed player page') 'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id') wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
wat_info = self._download_json( wat_info = self._download_json(
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id) 'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)

View File

@ -47,7 +47,7 @@ class ThePlatformIE(InfoExtractor):
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
else: else:
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
'format=smil&mbr=true'.format(video_id)) 'format=smil&mbr=true'.format(video_id))
meta = self._download_xml(smil_url, video_id) meta = self._download_xml(smil_url, video_id)
try: try:

View File

@ -28,7 +28,7 @@ class TinyPicIE(InfoExtractor):
webpage = self._download_webpage(url, video_id, 'Downloading page') webpage = self._download_webpage(url, video_id, 'Downloading page')
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n' mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage) '\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
if mobj is None: if mobj is None:
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)

View File

@ -25,7 +25,7 @@ class TrailerAddictIE(InfoExtractor):
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
title = self._search_regex(r'<title>(.+?)</title>', title = self._search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' - Trailer Addict', '') webpage, 'video title').replace(' - Trailer Addict', '')
view_count_str = self._search_regex( view_count_str = self._search_regex(
r'<span class="views_n">([0-9,.]+)</span>', r'<span class="views_n">([0-9,.]+)</span>',
webpage, 'view count', fatal=False) webpage, 'view count', fatal=False)
@ -46,9 +46,9 @@ class TrailerAddictIE(InfoExtractor):
info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage") info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage")
final_url = self._search_regex(r'&fileurl=(.+)', final_url = self._search_regex(r'&fileurl=(.+)',
info_webpage, 'Download url').replace('%3F', '?') info_webpage, 'Download url').replace('%3F', '?')
thumbnail_url = self._search_regex(r'&image=(.+?)&', thumbnail_url = self._search_regex(r'&image=(.+?)&',
info_webpage, 'thumbnail url') info_webpage, 'thumbnail url')
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>', r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',

View File

@ -43,7 +43,7 @@ class TumblrIE(InfoExtractor):
webpage, 'iframe url') webpage, 'iframe url')
iframe = self._download_webpage(iframe_url, video_id) iframe = self._download_webpage(iframe_url, video_id)
video_url = self._search_regex(r'<source src="([^"]+)"', video_url = self._search_regex(r'<source src="([^"]+)"',
iframe, 'video url') iframe, 'video url')
# The only place where you can get a title, it's not complete, # The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos # but searching in other places doesn't work for all videos

View File

@ -154,7 +154,7 @@ class UdemyCourseIE(UdemyIE):
self.to_screen('%s: Already enrolled in' % course_id) self.to_screen('%s: Already enrolled in' % course_id)
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
course_id, 'Downloading course curriculum') course_id, 'Downloading course curriculum')
entries = [ entries = [
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')

View File

@ -45,13 +45,13 @@ class UstreamIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
webpage, 'title') webpage, 'title')
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
webpage, 'uploader', fatal=False, flags=re.DOTALL) webpage, 'uploader', fatal=False, flags=re.DOTALL)
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -30,13 +30,13 @@ class Vbox7IE(InfoExtractor):
redirect_page, urlh = self._download_webpage_handle(url, video_id) redirect_page, urlh = self._download_webpage_handle(url, video_id)
new_location = self._search_regex(r'window\.location = \'(.*)\';', new_location = self._search_regex(r'window\.location = \'(.*)\';',
redirect_page, 'redirect location') redirect_page, 'redirect location')
redirect_url = urlh.geturl() + new_location redirect_url = urlh.geturl() + new_location
webpage = self._download_webpage(redirect_url, video_id, webpage = self._download_webpage(redirect_url, video_id,
'Downloading redirect page') 'Downloading redirect page')
title = self._html_search_regex(r'<title>(.*)</title>', title = self._html_search_regex(r'<title>(.*)</title>',
webpage, 'title').split('/')[0].strip() webpage, 'title').split('/')[0].strip()
info_url = "http://vbox7.com/play/magare.do" info_url = "http://vbox7.com/play/magare.do"
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id}) data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})

View File

@ -48,11 +48,11 @@ class VeeHDIE(InfoExtractor):
video_url = compat_urlparse.unquote(config['clip']['url']) video_url = compat_urlparse.unquote(config['clip']['url'])
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0]) title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>', uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
webpage, 'uploader') webpage, 'uploader')
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"', thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
webpage, 'thumbnail') webpage, 'thumbnail')
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul', description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
webpage, 'description', flags=re.DOTALL) webpage, 'description', flags=re.DOTALL)
return { return {
'_type': 'video', '_type': 'video',

View File

@ -112,7 +112,7 @@ class VestiIE(InfoExtractor):
if mobj: if mobj:
video_id = mobj.group('id') video_id = mobj.group('id')
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
'Downloading video page') 'Downloading video page')
rutv_url = RUTVIE._extract_url(page) rutv_url = RUTVIE._extract_url(page)
if rutv_url: if rutv_url:

View File

@ -28,11 +28,11 @@ class VideofyMeIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id, config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id) video_id)
video = config.find('video') video = config.find('video')
sources = video.find('sources') sources = video.find('sources')
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
for key in ['on', 'av', 'off']] if node is not None) for key in ['on', 'av', 'off']] if node is not None)
video_url = url_node.find('url').text video_url = url_node.find('url').text
return {'id': video_id, return {'id': video_id,

View File

@ -260,7 +260,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
else: else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config = self._search_regex(config_re, webpage, 'info section', config = self._search_regex(config_re, webpage, 'info section',
flags=re.DOTALL) flags=re.DOTALL)
config = json.loads(config) config = json.loads(config)
except Exception as e: except Exception as e:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):

View File

@ -121,7 +121,7 @@ class VKIE(InfoExtractor):
} }
request = compat_urllib_request.Request('https://login.vk.com/?act=login', request = compat_urllib_request.Request('https://login.vk.com/?act=login',
compat_urllib_parse.urlencode(login_form).encode('utf-8')) compat_urllib_parse.urlencode(login_form).encode('utf-8'))
login_page = self._download_webpage(request, None, note='Logging in as %s' % username) login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
if re.search(r'onLoginFailed', login_page): if re.search(r'onLoginFailed', login_page):
@ -175,7 +175,7 @@ class VKIE(InfoExtractor):
upload_date = None upload_date = None
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
if mobj is not None: if mobj is not None:
x = mobj.group(1) + ' ' + mobj.group(2) mobj.group(1) + ' ' + mobj.group(2)
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
formats = [{ formats = [{

View File

@ -41,7 +41,7 @@ class WeiboIE(InfoExtractor):
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u) videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
player_url = videos_urls[-1] player_url = videos_urls[-1]
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html', m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
player_url) player_url)
if m_sina is not None: if m_sina is not None:
self.to_screen('Sina video detected') self.to_screen('Sina video detected')
sina_id = m_sina.group(1) sina_id = m_sina.group(1)

View File

@ -67,17 +67,17 @@ class XHamsterIE(InfoExtractor):
description = mobj.group(1) if mobj else None description = mobj.group(1) if mobj else None
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'', upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
webpage, 'upload date', fatal=False) webpage, 'upload date', fatal=False)
if upload_date: if upload_date:
upload_date = unified_strdate(upload_date) upload_date = unified_strdate(upload_date)
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
webpage, 'uploader id', default='anonymous') webpage, 'uploader id', default='anonymous')
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False) thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>', duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False) view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
if view_count: if view_count:

View File

@ -30,14 +30,14 @@ class XNXXIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'flv_url=(.*?)&amp;', video_url = self._search_regex(r'flv_url=(.*?)&amp;',
webpage, 'video URL') webpage, 'video URL')
video_url = compat_urllib_parse.unquote(video_url) video_url = compat_urllib_parse.unquote(video_url)
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM', video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
webpage, 'title') webpage, 'title')
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;', video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&amp;',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
return { return {
'id': video_id, 'id': video_id,

View File

@ -229,7 +229,7 @@ class YahooSearchIE(SearchInfoExtractor):
for pagenum in itertools.count(0): for pagenum in itertools.count(0):
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query, info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1)) note='Downloading results page ' + str(pagenum + 1))
m = info['m'] m = info['m']
results = info['results'] results = info['results']

View File

@ -74,7 +74,7 @@ class YoukuIE(InfoExtractor):
# -8 means blocked outside China. # -8 means blocked outside China.
error = config['data'][0].get('error') # Chinese and English, separated by newline. error = config['data'][0].get('error') # Chinese and English, separated by newline.
raise ExtractorError(error or 'Server reported error %i' % error_code, raise ExtractorError(error or 'Server reported error %i' % error_code,
expected=True) expected=True)
video_title = config['data'][0]['title'] video_title = config['data'][0]['title']
seed = config['data'][0]['seed'] seed = config['data'][0]['seed']

View File

@ -64,7 +64,7 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page # Get all of the links from the page
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>' DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
download_list_html = self._search_regex(DOWNLOAD_LIST_RE, download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
webpage, 'download list').strip() webpage, 'download list').strip()
LINK_RE = r'<a href="([^"]+)">' LINK_RE = r'<a href="([^"]+)">'
links = re.findall(LINK_RE, download_list_html) links = re.findall(LINK_RE, download_list_html)

View File

@ -950,7 +950,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
parts_sizes = self._signature_cache_id(encrypted_sig) parts_sizes = self._signature_cache_id(encrypted_sig)
self.to_screen('{%s} signature length %s, %s' % self.to_screen('{%s} signature length %s, %s' %
(format_id, parts_sizes, player_desc)) (format_id, parts_sizes, player_desc))
signature = self._decrypt_signature( signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate) encrypted_sig, video_id, player_url, age_gate)
@ -1214,7 +1214,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
class YoutubeTopListIE(YoutubePlaylistIE): class YoutubeTopListIE(YoutubePlaylistIE):
IE_NAME = 'youtube:toplist' IE_NAME = 'youtube:toplist'
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
' (Example: "yttoplist:music:Top Tracks")') ' (Example: "yttoplist:music:Top Tracks")')
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
_TESTS = [{ _TESTS = [{
'url': 'yttoplist:music:Trending', 'url': 'yttoplist:music:Trending',