[downloader/fragment] Fix bugs around resuming with Range (#2901)

Authored by: Lesmiscore
This commit is contained in:
Lesmiscore (Naoya Ozaki) 2022-02-28 13:10:54 +09:00 committed by GitHub
parent 195c22840c
commit 93c8410d33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 41 additions and 19 deletions

View File

@ -178,7 +178,7 @@ def _prepare_frag_download(self, ctx):
dl = HttpQuietDownloader( dl = HttpQuietDownloader(
self.ydl, self.ydl,
{ {
'continuedl': True, 'continuedl': self.params.get('continuedl', True),
'quiet': self.params.get('quiet'), 'quiet': self.params.get('quiet'),
'noprogress': True, 'noprogress': True,
'ratelimit': self.params.get('ratelimit'), 'ratelimit': self.params.get('ratelimit'),

View File

@ -5,7 +5,6 @@
import socket import socket
import time import time
import random import random
import re
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import (
@ -16,6 +15,7 @@
ContentTooShortError, ContentTooShortError,
encodeFilename, encodeFilename,
int_or_none, int_or_none,
parse_http_range,
sanitized_Request, sanitized_Request,
ThrottledDownload, ThrottledDownload,
write_xattr, write_xattr,
@ -59,6 +59,9 @@ class DownloadContext(dict):
ctx.chunk_size = None ctx.chunk_size = None
throttle_start = None throttle_start = None
# parse given Range
req_start, req_end, _ = parse_http_range(headers.get('Range'))
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): if os.path.isfile(encodeFilename(ctx.tmpfilename)):
@ -91,6 +94,9 @@ def establish_connection():
if not is_test and chunk_size else chunk_size) if not is_test and chunk_size else chunk_size)
if ctx.resume_len > 0: if ctx.resume_len > 0:
range_start = ctx.resume_len range_start = ctx.resume_len
if req_start is not None:
# offset the beginning of Range to be within request
range_start += req_start
if ctx.is_resume: if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len) self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab' ctx.open_mode = 'ab'
@ -99,7 +105,17 @@ def establish_connection():
else: else:
range_start = None range_start = None
ctx.is_resume = False ctx.is_resume = False
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
if ctx.chunk_size:
chunk_aware_end = range_start + ctx.chunk_size - 1
# we're not allowed to download outside Range
range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
elif req_end is not None:
# there's no need for chunked downloads, so download until the end of Range
range_end = req_end
else:
range_end = None
if range_end and ctx.data_len is not None and range_end >= ctx.data_len: if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
range_end = ctx.data_len - 1 range_end = ctx.data_len - 1
has_range = range_start is not None has_range = range_start is not None
@ -124,23 +140,19 @@ def establish_connection():
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range: if has_range:
content_range = ctx.data.headers.get('Content-Range') content_range = ctx.data.headers.get('Content-Range')
if content_range: content_range_start, content_range_end, content_len = parse_http_range(content_range)
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) if content_range_start is not None and range_start == content_range_start:
# Content-Range is present and matches requested Range, resume is possible # Content-Range is present and matches requested Range, resume is possible
if content_range_m: accept_content_len = (
if range_start == int(content_range_m.group(1)): # Non-chunked download
content_range_end = int_or_none(content_range_m.group(2)) not ctx.chunk_size
content_len = int_or_none(content_range_m.group(3)) # Chunked download and requested piece or
accept_content_len = ( # its part is promised to be served
# Non-chunked download or content_range_end == range_end
not ctx.chunk_size or content_len < range_end)
# Chunked download and requested piece or if accept_content_len:
# its part is promised to be served ctx.data_len = content_len
or content_range_end == range_end return
or content_len < range_end)
if accept_content_len:
ctx.data_len = content_len
return
# Content-Range is either not present or invalid. Assuming remote webserver is # Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file # trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload # and performing entire redownload

View File

@ -5252,6 +5252,16 @@ def join_nonempty(*values, delim='-', from_dict=None):
return delim.join(map(str, filter(None, values))) return delim.join(map(str, filter(None, values)))
def parse_http_range(range):
""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
if not range:
return None, None, None
crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
if not crg:
return None, None, None
return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
class Config: class Config:
own_args = None own_args = None
filename = None filename = None