[core] Fix the byte string-format going over the specified byte limit

The byte string-format should be applied after the sanitization is done, as sanitize might replace a single byte character with a multi-byte one, e.g. '/' with '⧸', making the resulting string go over the desired byte limit. Fixes #10060
2024-11-20 05:47:24 +01:00 · 2024-05-30 11:48:20 -04:00 · 2024-05-30 11:48:20 -04:00 · ec2ee10f34
commit ec2ee10f34
parent cc767e9490
2 changed files with 9 additions and 7 deletions
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -656,13 +656,13 @@ def test_add_extra_info(self):
    }
    def test_prepare_outtmpl_and_filename(self):
-        def test(tmpl, expected, *, info=None, **params):
+        def test(tmpl, expected, *, info=None, sanitize=False, **params):
            params['outtmpl'] = tmpl
            ydl = FakeYDL(params)
            ydl._num_downloads = 1
            self.assertEqual(ydl.validate_outtmpl(tmpl), None)
-            out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info)
+            out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info, sanitize=sanitize)
            fname = ydl.prepare_filename(info or self.outtmpl_info)
            if not isinstance(expected, (list, tuple)):
@ -861,6 +861,7 @@ def gen():
        test('Hello %(title2)s', 'Hello %PATH%')
        test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test'))
        test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test'))
        test('%(title3).7B', 'foo⧸b', sanitize=True)
    def test_format_note(self):
        ydl = YoutubeDL()
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1371,9 +1371,6 @@ def create_key(outer_mobj):
            elif fmt[-1] == 'q':  # quoted
                value = map(str, variadic(value) if '#' in flags else [value])
                value, fmt = shell_quote(value, shell=True), str_fmt
            elif fmt[-1] == 'B':  # bytes
                value = f'%{str_fmt}'.encode() % str(value).encode()
                value, fmt = value.decode('utf-8', 'ignore'), 's'
            elif fmt[-1] == 'U':  # unicode normalized
                value, fmt = unicodedata.normalize(
                    # "+" = compatibility equivalence, "#" = NFD
@ -1390,7 +1387,7 @@ def create_key(outer_mobj):
                    value = str(value)[0]
                else:
                    fmt = str_fmt
-            elif fmt[-1] not in 'rsa':  # numeric
+            elif fmt[-1] not in 'rsaB':  # numeric
                value = float_or_none(value)
                if value is None:
                    value, fmt = default, 's'
@ -1402,9 +1399,13 @@ def create_key(outer_mobj):
                    value, fmt = repr(value), str_fmt
                elif fmt[-1] == 'a':
                    value, fmt = ascii(value), str_fmt
-                if fmt[-1] in 'csra':
+                if fmt[-1] in 'csraB':
                    value = sanitizer(last_field, value)
            if fmt[-1] == 'B':  # bytes
                value = f'%{str_fmt}'.encode() % str(value).encode()
                value, fmt = value.decode('utf-8', 'ignore'), 's'
            key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
            TMPL_DICT[key] = value
            return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))