[extractor/youtube] Extract concurrent view count for livestreams (#5152)

Adds new field `concurrent_view_count` Closes https://github.com/yt-dlp/yt-dlp/issues/4843 Authored by: coletdjnz
2024-11-24 07:40:30 +01:00 · 2022-10-07 20:00:40 +13:00 · 2022-10-07 20:00:40 +13:00 · 867c66ff97
commit 867c66ff97
parent f03940963e
3 changed files with 21 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -1226,6 +1226,7 @@ # OUTPUT TEMPLATE
 - `duration` (numeric): Length of the video in seconds
 - `duration_string` (string): Length of the video (HH:mm:ss)
 - `view_count` (numeric): How many users have watched the video on the platform
+ - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
 - `like_count` (numeric): Number of positive ratings of the video
 - `dislike_count` (numeric): Number of negative ratings of the video
 - `repost_count` (numeric): Number of reposts of the video
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -284,6 +284,7 @@ class InfoExtractor:
                    captions instead of normal subtitles
    duration:       Length of the video in seconds, as an integer or float.
    view_count:     How many users have watched the video on the platform.
+    concurrent_view_count: How many users are currently watching the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
    repost_count:   Number of reposts of the video
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -912,8 +912,7 @@ def _extract_video(self, renderer):
                traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
                video_id, default=None, group='duration'))

-        view_count = self._get_count(renderer, 'viewCountText')
-
+        view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')
        uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
        channel_id = traverse_obj(
            renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
@ -932,6 +931,12 @@ def _extract_video(self, renderer):
        if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
            url = f'https://www.youtube.com/shorts/{video_id}'

+        live_status = (
+            'is_upcoming' if scheduled_timestamp is not None
+            else 'was_live' if 'streamed' in time_text.lower()
+            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
+            else None)
+
        return {
            '_type': 'url',
            'ie_key': YoutubeIE.ie_key(),
@ -940,17 +945,12 @@ def _extract_video(self, renderer):
            'title': title,
            'description': description,
            'duration': duration,
-            'view_count': view_count,
            'uploader': uploader,
            'channel_id': channel_id,
            'thumbnails': thumbnails,
            'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
                            if self._configuration_arg('approximate_date', ie_key='youtubetab')
                            else None),
-            'live_status': ('is_upcoming' if scheduled_timestamp is not None
-                            else 'was_live' if 'streamed' in time_text.lower()
-                            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
-                            else None),
            'release_timestamp': scheduled_timestamp,
            'availability':
                'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
@ -958,7 +958,8 @@ def _extract_video(self, renderer):
                    is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
                    needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
                    needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
-                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
+                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
+            'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,
        }


@ -2328,6 +2329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'view_count': int,
                'playable_in_embed': True,
                'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+                'concurrent_view_count': int,
            },
            'params': {'skip_download': True}
        }, {
@ -4115,6 +4117,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
                    'like_count': str_to_int(like_count),
                    'dislike_count': str_to_int(dislike_count),
                })
+            vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
+            if vcr:
+                vc = self._get_count(vcr, 'viewCount')
+                # Upcoming premieres with waiting count are treated as live here
+                if vcr.get('isLive'):
+                    info['concurrent_view_count'] = vc
+                elif info.get('view_count') is None:
+                    info['view_count'] = vc
+
        vsir = get_first(contents, 'videoSecondaryInfoRenderer')
        if vsir:
            vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))