mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-02-22 03:09:30 +01:00
[fd/hls] Support --write-pages
for m3u8 media playlists (#12333)
Authored by: bashonly
This commit is contained in:
parent
5271ef48c6
commit
be69468752
@ -16,6 +16,7 @@ from ..utils import (
|
|||||||
update_url_query,
|
update_url_query,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils._utils import _request_dump_filename
|
||||||
|
|
||||||
|
|
||||||
class HlsFD(FragmentFD):
|
class HlsFD(FragmentFD):
|
||||||
@ -80,7 +81,15 @@ class HlsFD(FragmentFD):
|
|||||||
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
|
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
|
||||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||||
man_url = urlh.url
|
man_url = urlh.url
|
||||||
s = urlh.read().decode('utf-8', 'ignore')
|
s_bytes = urlh.read()
|
||||||
|
if self.params.get('write_pages'):
|
||||||
|
dump_filename = _request_dump_filename(
|
||||||
|
man_url, info_dict['id'], None,
|
||||||
|
trim_length=self.params.get('trim_file_name'))
|
||||||
|
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
|
||||||
|
with open(dump_filename, 'wb') as outf:
|
||||||
|
outf.write(s_bytes)
|
||||||
|
s = s_bytes.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||||
if can_download:
|
if can_download:
|
||||||
|
@ -2,7 +2,6 @@ import base64
|
|||||||
import collections
|
import collections
|
||||||
import functools
|
import functools
|
||||||
import getpass
|
import getpass
|
||||||
import hashlib
|
|
||||||
import http.client
|
import http.client
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import http.cookies
|
import http.cookies
|
||||||
@ -78,7 +77,6 @@ from ..utils import (
|
|||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_m3u8_attributes,
|
parse_m3u8_attributes,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
sanitize_filename,
|
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
@ -100,6 +98,7 @@ from ..utils import (
|
|||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
from ..utils._utils import _request_dump_filename
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor:
|
class InfoExtractor:
|
||||||
@ -1022,23 +1021,6 @@ class InfoExtractor:
|
|||||||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _request_dump_filename(self, url, video_id, data=None):
|
|
||||||
if data is not None:
|
|
||||||
data = hashlib.md5(data).hexdigest()
|
|
||||||
basen = join_nonempty(video_id, data, url, delim='_')
|
|
||||||
trim_length = self.get_param('trim_file_name') or 240
|
|
||||||
if len(basen) > trim_length:
|
|
||||||
h = '___' + hashlib.md5(basen.encode()).hexdigest()
|
|
||||||
basen = basen[:trim_length - len(h)] + h
|
|
||||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
|
||||||
# Working around MAX_PATH limitation on Windows (see
|
|
||||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
|
||||||
if os.name == 'nt':
|
|
||||||
absfilepath = os.path.abspath(filename)
|
|
||||||
if len(absfilepath) > 259:
|
|
||||||
filename = fR'\\?\{absfilepath}'
|
|
||||||
return filename
|
|
||||||
|
|
||||||
def __decode_webpage(self, webpage_bytes, encoding, headers):
|
def __decode_webpage(self, webpage_bytes, encoding, headers):
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
|
encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
|
||||||
@ -1067,7 +1049,9 @@ class InfoExtractor:
|
|||||||
if self.get_param('write_pages'):
|
if self.get_param('write_pages'):
|
||||||
if isinstance(url_or_request, Request):
|
if isinstance(url_or_request, Request):
|
||||||
data = self._create_request(url_or_request, data).data
|
data = self._create_request(url_or_request, data).data
|
||||||
filename = self._request_dump_filename(urlh.url, video_id, data)
|
filename = _request_dump_filename(
|
||||||
|
urlh.url, video_id, data,
|
||||||
|
trim_length=self.get_param('trim_file_name'))
|
||||||
self.to_screen(f'Saving request to {filename}')
|
self.to_screen(f'Saving request to {filename}')
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
@ -1128,7 +1112,9 @@ class InfoExtractor:
|
|||||||
impersonate=None, require_impersonation=False):
|
impersonate=None, require_impersonation=False):
|
||||||
if self.get_param('load_pages'):
|
if self.get_param('load_pages'):
|
||||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
filename = _request_dump_filename(
|
||||||
|
url_or_request.url, video_id, url_or_request.data,
|
||||||
|
trim_length=self.get_param('trim_file_name'))
|
||||||
self.to_screen(f'Loading request from {filename}')
|
self.to_screen(f'Loading request from {filename}')
|
||||||
try:
|
try:
|
||||||
with open(filename, 'rb') as dumpf:
|
with open(filename, 'rb') as dumpf:
|
||||||
|
@ -5631,6 +5631,24 @@ def filesize_from_tbr(tbr, duration):
|
|||||||
return int(duration * tbr * (1000 / 8))
|
return int(duration * tbr * (1000 / 8))
|
||||||
|
|
||||||
|
|
||||||
|
def _request_dump_filename(url, video_id, data=None, trim_length=None):
|
||||||
|
if data is not None:
|
||||||
|
data = hashlib.md5(data).hexdigest()
|
||||||
|
basen = join_nonempty(video_id, data, url, delim='_')
|
||||||
|
trim_length = trim_length or 240
|
||||||
|
if len(basen) > trim_length:
|
||||||
|
h = '___' + hashlib.md5(basen.encode()).hexdigest()
|
||||||
|
basen = basen[:trim_length - len(h)] + h
|
||||||
|
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||||
|
# Working around MAX_PATH limitation on Windows (see
|
||||||
|
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||||
|
if os.name == 'nt':
|
||||||
|
absfilepath = os.path.abspath(filename)
|
||||||
|
if len(absfilepath) > 259:
|
||||||
|
filename = fR'\\?\{absfilepath}'
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
# XXX: Temporary
|
# XXX: Temporary
|
||||||
class _YDLLogger:
|
class _YDLLogger:
|
||||||
def __init__(self, ydl=None):
|
def __init__(self, ydl=None):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user