mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-18 01:06:49 +01:00
[generic] Handle audio streams that do not implement HEAD (Fixes #4032)
This commit is contained in:
parent
488447455d
commit
23be51d8ce
@ -242,7 +242,6 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns a tuple (page content as string, URL handle) """
|
""" Returns a tuple (page content as string, URL handle) """
|
||||||
|
|
||||||
# Strip hashes from the URL (#1038)
|
# Strip hashes from the URL (#1038)
|
||||||
if isinstance(url_or_request, (compat_str, str)):
|
if isinstance(url_or_request, (compat_str, str)):
|
||||||
url_or_request = url_or_request.partition('#')[0]
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
@ -251,6 +250,10 @@ class InfoExtractor(object):
|
|||||||
if urlh is False:
|
if urlh is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
return False
|
return False
|
||||||
|
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
||||||
|
return (content, urlh)
|
||||||
|
|
||||||
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
content_type = urlh.headers.get('Content-Type', '')
|
content_type = urlh.headers.get('Content-Type', '')
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||||
@ -309,7 +312,7 @@ class InfoExtractor(object):
|
|||||||
msg += ' Visit %s for more details' % blocked_iframe
|
msg += ' Visit %s for more details' % blocked_iframe
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
return (content, urlh)
|
return content
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
|
@ -503,14 +503,14 @@ class GenericIE(InfoExtractor):
|
|||||||
self.to_screen('%s: Requesting header' % video_id)
|
self.to_screen('%s: Requesting header' % video_id)
|
||||||
|
|
||||||
head_req = HEADRequest(url)
|
head_req = HEADRequest(url)
|
||||||
response = self._request_webpage(
|
head_response = self._request_webpage(
|
||||||
head_req, video_id,
|
head_req, video_id,
|
||||||
note=False, errnote='Could not send HEAD request to %s' % url,
|
note=False, errnote='Could not send HEAD request to %s' % url,
|
||||||
fatal=False)
|
fatal=False)
|
||||||
|
|
||||||
if response is not False:
|
if head_response is not False:
|
||||||
# Check for redirect
|
# Check for redirect
|
||||||
new_url = response.geturl()
|
new_url = head_response.geturl()
|
||||||
if url != new_url:
|
if url != new_url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
if force_videoid:
|
if force_videoid:
|
||||||
@ -518,13 +518,17 @@ class GenericIE(InfoExtractor):
|
|||||||
new_url, {'force_videoid': force_videoid})
|
new_url, {'force_videoid': force_videoid})
|
||||||
return self.url_result(new_url)
|
return self.url_result(new_url)
|
||||||
|
|
||||||
|
full_response = None
|
||||||
|
if head_response is False:
|
||||||
|
full_response = self._request_webpage(url, video_id)
|
||||||
|
head_response = full_response
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = response.headers.get('Content-Type', '')
|
content_type = head_response.headers.get('Content-Type', '')
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
||||||
if m:
|
if m:
|
||||||
upload_date = response.headers.get('Last-Modified')
|
upload_date = unified_strdate(
|
||||||
if upload_date:
|
head_response.headers.get('Last-Modified'))
|
||||||
upload_date = unified_strdate(upload_date)
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': os.path.splitext(url_basename(url))[0],
|
'title': os.path.splitext(url_basename(url))[0],
|
||||||
@ -539,13 +543,10 @@ class GenericIE(InfoExtractor):
|
|||||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||||
|
|
||||||
try:
|
if full_response:
|
||||||
|
webpage = _webpage_read_content(url, video_id)
|
||||||
|
else:
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
except ValueError:
|
|
||||||
# since this is the last-resort InfoExtractor, if
|
|
||||||
# this error is thrown, it'll be thrown here
|
|
||||||
raise ExtractorError('Failed to download URL: %s' % url)
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
# Is it an RSS feed?
|
# Is it an RSS feed?
|
||||||
|
Loading…
x
Reference in New Issue
Block a user