[ie] Add new fields with proper support for multiple values

This commit is contained in:
Lev Plyusnin 2024-01-03 08:35:28 +07:00
parent 85b33f5c16
commit 071326c0cc
No known key found for this signature in database
GPG Key ID: 21C6C2C9C0A4460D
6 changed files with 87 additions and 13 deletions

View File

@ -24,6 +24,7 @@
import unicodedata
from .cache import Cache
from .compat import functools, urllib # isort: split
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies
@ -1735,6 +1736,7 @@ def __extract_info(self, url, ie, download, extra_info, process):
'_type': 'compat_list',
'entries': ie_result,
}
self.fix_deprecated_fields(ie_result)
if extra_info.get('original_url'):
ie_result.setdefault('original_url', extra_info['original_url'])
self.add_default_extra_info(ie_result, ie, url)
@ -1744,6 +1746,19 @@ def __extract_info(self, url, ie, download, extra_info, process):
else:
return ie_result
def fix_deprecated_fields(self, ie_result):
deprecated_multivalue_fields = {
'artist': 'artist_list',
'composer': 'composer_list',
'album_artist': 'album_artist_list',
'genre': 'genre_list',
}
for deprecated_field, new_field in deprecated_multivalue_fields.items():
if deprecated_field not in ie_result:
continue
self.deprecation_warning(f'"{deprecated_field}" field is deprecated. Use "{new_field}" instead')
ie_result[new_field] = re.split(r', ?', ie_result[deprecated_field])
def add_default_extra_info(self, ie_result, ie, url):
if url is not None:
self.add_extra_info(ie_result, {
@ -3918,10 +3933,9 @@ def print_debug_header(self):
# These imports can be slow. So import them only as needed
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import (
_PLUGIN_CLASSES as plugin_ies,
from .extractor.extractors import _PLUGIN_CLASSES as plugin_ies
from .extractor.extractors import \
_PLUGIN_OVERRIDES as plugin_ie_overrides
)
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

View File

@ -670,6 +670,11 @@ def get_postprocessors(opts):
'add_metadata': opts.addmetadata,
'add_infojson': opts.embed_infojson,
}
# MutagenMetadata must run after FFmpegMetadata
if opts.addmetadata:
yield {
'key': 'MutagenMetadata',
}
# Deprecated
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
# but must be below EmbedSubtitle and FFmpegMetadata

View File

@ -422,16 +422,23 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string.
artist: Artist(s) of the track.
genre: Genre(s) of the track.
artist_list: Artist(s) of the track, as a list of unicode strings.
composer_list: Composer(s) of the piece, as a list of unicode strings.
genre_list: Genre(s) of the track, as a list of unicode strings.
album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits
and compilations).
album_artist_list: All artists appeared on the album, as a list of unicode strings.
(e.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"],
useful for splits and compilations).
disc_number: Number of the disc or other physical medium the track belongs to,
as an integer.
composer: Composer of the piece
composer: Deprecated, use "composer_list" instead. Composer(s) of the piece,
comma-separated
artist: Deprecated, use "artist_list" instead. Artist(s) of the track, comma-separated.
genre: Deprecated, use "genre_list" instead. Genre(s) of the track, comma-separated.
album_artist: Deprecated, use "album_artist_list" instead. All artists appeared on the
album, comma-separated.
The following fields should only be set for clips that should be cut from the original video:

View File

@ -30,6 +30,7 @@
)
from .modify_chapters import ModifyChaptersPP
from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .mutagenmetadata import MutagenMetadataPP
from .sponskrub import SponSkrubPP
from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP

View File

@ -23,6 +23,7 @@
encodeFilename,
filter_dict,
float_or_none,
is_iterable_like,
is_outdated_version,
orderedSet,
prepend_extension,
@ -738,9 +739,12 @@ def _get_metadata_opts(self, info):
def add(meta_list, info_list=None):
value = next((
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None)
if value not in ('', None):
if is_iterable_like(value):
value = ', '.join(value)
value = str(value)
value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
@ -754,10 +758,11 @@ def add(meta_list, info_list=None):
add(('description', 'synopsis'), 'description')
add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
add('genre')
add('artist', ('artist_list', 'creator', 'uploader', 'uploader_id'))
add('composer', 'composer_list')
add('genre', 'genre_list')
add('album')
add('album_artist')
add('album_artist', 'album_artist_list')
add('disc', 'disc_number')
add('show', 'series')
add('season_number')

View File

@ -0,0 +1,42 @@
from .common import PostProcessor
from ..dependencies import mutagen
if mutagen:
from mutagen.easymp4 import EasyMP4
from mutagen.flac import FLAC
from mutagen.mp3 import EasyMP3
from mutagen.musepack import Musepack
from mutagen.oggopus import OggOpus
from mutagen.oggvorbis import OggVorbis
class MutagenMetadataPP(PostProcessor):
def __init__(self, downloader):
PostProcessor.__init__(self, downloader)
@PostProcessor._restrict_to(images=False)
def run(self, information):
extension = information['ext']
ret = [], information
if not mutagen:
if extension in ['mp3', 'm4a', 'ogg', 'opus', 'flac', '.mpc']:
self.report_warning('module mutagen was not found. Tags with multiple values (e.g. artist, album artist and genre) may be set incorrectly. Please install using `python -m pip install mutagen`')
return ret
tag_mapping = {
'artist': 'artist_list',
'albumartist': 'album_artist_list',
'genre': 'genre_list',
'composer': 'composer_list'
}
supported_formats = [EasyMP3, EasyMP4, OggVorbis, OggOpus, FLAC, Musepack]
file = mutagen.File(information['filepath'], supported_formats)
if not file:
return ret
if isinstance(file, EasyMP4):
file.RegisterTextKey('composer', '\251wrt')
for tag_key, info_key in tag_mapping.items():
value = information.get(info_key)
if value:
file[tag_key] = value
file.save()
return ret