From 2b8a2973bde415fc227790275dfd3e55e43babae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 7 Aug 2021 05:12:54 +0530 Subject: [PATCH] Allow entire infodict to be printed using `%()s` Makes `--dump-json` redundant --- README.md | 3 ++- test/test_YoutubeDL.py | 27 +++++++++++++++++---------- yt_dlp/YoutubeDL.py | 18 +++++++++++------- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d4f436d926..40a3bb8735 100644 --- a/README.md +++ b/README.md @@ -919,7 +919,7 @@ # OUTPUT TEMPLATE It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: -1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` @@ -1417,6 +1417,7 @@ #### Redundant options --get-thumbnail --print thumbnail -e, --get-title --print title -g, --get-url --print urls + -j, --dump-json --print "%()j" #### Not recommended diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1e08651022..7e01330273 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -668,15 +668,13 @@ def test(tmpl, expected, *, info=None, **params): out = ydl.escape_outtmpl(outtmpl) % tmpl_dict fname = ydl.prepare_filename(info or self.outtmpl_info) - if callable(expected): - self.assertTrue(expected(out)) - self.assertTrue(expected(fname)) - elif isinstance(expected, str): - self.assertEqual(out, expected) - self.assertEqual(fname, expected) - else: - self.assertEqual(out, expected[0]) - self.assertEqual(fname, expected[1]) + if not isinstance(expected, (list, tuple)): + expected = (expected, expected) + for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected): + if callable(expect): + self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') + else: + self.assertEqual(got, expect, f'Wrong {name} from {tmpl}') # Side-effects original_infodict = dict(self.outtmpl_info) @@ -721,7 +719,16 @@ def test(tmpl, expected, *, info=None, **params): # Invalid templates self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') - test('%()s', 'NA') + test('%(..)s', 'NA') + + # Entire info_dict + def expect_same_infodict(out): + got_dict = json.loads(out) + for info_field, expected in self.outtmpl_info.items(): + self.assertEqual(got_dict.get(info_field), expected, info_field) + return True + + test('%()j', (expect_same_infodict, str)) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index acd85af05e..ac99dd45b5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -917,7 +917,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): } # Field is of the form key1.key2... # where keys (except first) can be string, int or slice - FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') + FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) INTERNAL_FORMAT_RE = re.compile(r'''(?x) @@ -928,12 +928,15 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): (?:\|(?P.*?))? $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) - get_key = lambda k: traverse_obj( - info_dict, k.split('.'), is_user_input=True, traverse_string=True) + def _traverse_infodict(k): + k = k.split('.') + if k[0] == '': + k.pop(0) + return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True) def get_value(mdict): # Object traversal - value = get_key(mdict['fields']) + value = _traverse_infodict(mdict['fields']) # Negative if mdict['negate']: value = float_or_none(value) @@ -955,7 +958,7 @@ def get_value(mdict): item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) offset = float_or_none(item) if offset is None: - offset = float_or_none(get_key(item)) + offset = float_or_none(_traverse_infodict(item)) try: value = operator(value, multiplier * offset) except (TypeError, ZeroDivisionError): @@ -2378,6 +2381,8 @@ def print_optional(field): elif 'url' in info_dict: info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') + if self.params.get('forceprint') or self.params.get('forcejson'): + self.post_extract(info_dict) for tmpl in self.params.get('forceprint', []): if re.match(r'\w+$', tmpl): tmpl = '%({})s'.format(tmpl) @@ -2394,8 +2399,7 @@ def print_optional(field): self.to_stdout(formatSeconds(info_dict['duration'])) print_mandatory('format') - if self.params.get('forcejson', False): - self.post_extract(info_dict) + if self.params.get('forcejson'): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def dl(self, name, info, subtitle=False, test=False):