[extractor/hotstar] Improve format metadata

This commit is contained in:
pukkandan 2022-12-09 15:17:51 +05:30
parent 7108221662
commit e74a3c6dcc
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39

View File

@ -148,6 +148,12 @@ class HotStarIE(HotStarBaseIE):
'dr': 'dynamic_range', 'dr': 'dynamic_range',
} }
_TAG_FIELDS = {
'language': 'language',
'acodec': 'audio_codec',
'vcodec': 'video_codec',
}
@classmethod @classmethod
def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
assert None in (video_type, root) assert None in (video_type, root)
@ -182,24 +188,22 @@ def _real_extract(self, url):
for key, prefix in self._IGNORE_MAP.items() for key, prefix in self._IGNORE_MAP.items()
for ignore in self._configuration_arg(key)): for ignore in self._configuration_arg(key)):
continue continue
tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';'))
format_url = url_or_none(playback_set.get('playbackUrl')) format_url = url_or_none(playback_set.get('playbackUrl'))
if not format_url: if not format_url:
continue continue
format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr')
ext = determine_ext(format_url) ext = determine_ext(format_url)
current_formats, current_subs = [], {} current_formats, current_subs = [], {}
try: try:
if 'package:hls' in tags or ext == 'm3u8': if 'package:hls' in tags or ext == 'm3u8':
current_formats, current_subs = self._extract_m3u8_formats_and_subtitles( current_formats, current_subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', format_url, video_id, ext='mp4', headers=headers)
entry_protocol='m3u8_native',
m3u8_id=f'{dr}-hls', headers=headers)
elif 'package:dash' in tags or ext == 'mpd': elif 'package:dash' in tags or ext == 'mpd':
current_formats, current_subs = self._extract_mpd_formats_and_subtitles( current_formats, current_subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) format_url, video_id, headers=headers)
elif ext == 'f4m': elif ext == 'f4m':
pass # XXX: produce broken files pass # XXX: produce broken files
else: else:
@ -213,20 +217,32 @@ def _real_extract(self, url):
geo_restricted = True geo_restricted = True
continue continue
if tags and 'encryption:plain' not in tags: if tag_dict.get('encryption') not in ('plain', None):
for f in current_formats: for f in current_formats:
f['has_drm'] = True f['has_drm'] = True
if tags and 'language' in tags: for f in current_formats:
lang = re.search(r'language:(?P<lang>[a-z]+)', tags).group('lang') for k, v in self._TAG_FIELDS.items():
for f in current_formats: if not f.get(k):
if not f.get('langauge'): f[k] = tag_dict.get(v)
f['language'] = lang if f.get('vcodec') != 'none' and not f.get('dynamic_range'):
f['dynamic_range'] = tag_dict.get('dynamic_range')
if f.get('acodec') != 'none' and not f.get('audio_channels'):
f['audio_channels'] = {
'stereo': 2,
'dolby51': 6,
}.get(tag_dict.get('audio_channel'))
f['format_note'] = join_nonempty(
tag_dict.get('ladder'),
tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None,
f.get('format_note'),
delim=', ')
formats.extend(current_formats) formats.extend(current_formats)
subs = self._merge_subtitles(subs, current_subs) subs = self._merge_subtitles(subs, current_subs)
if not formats and geo_restricted: if not formats and geo_restricted:
self.raise_geo_restricted(countries=['IN'], metadata_available=True) self.raise_geo_restricted(countries=['IN'], metadata_available=True)
self._remove_duplicate_formats(formats)
for f in formats: for f in formats:
f.setdefault('http_headers', {}).update(headers) f.setdefault('http_headers', {}).update(headers)
@ -235,7 +251,7 @@ def _real_extract(self, url):
'title': video_data.get('title'), 'title': video_data.get('title'),
'description': video_data.get('description'), 'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(video_data.get('duration')),
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
'formats': formats, 'formats': formats,
'subtitles': subs, 'subtitles': subs,
'channel': video_data.get('channelName'), 'channel': video_data.get('channelName'),