From 1d253b0a27110d174c40faf8fb1c999d099e0cde Mon Sep 17 00:00:00 2001
From: Jackson Humphrey <jackson.s.humphrey@gmail.com>
Date: Sat, 16 Nov 2024 14:02:14 -0600
Subject: [PATCH] [ie/patreon] Fix comments extraction (#11530)

Closes #11483
Authored by: jshumphrey, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
---
 yt_dlp/extractor/patreon.py | 51 +++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 4d668cd37d..6bdeaf1571 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -16,10 +16,10 @@
     parse_iso8601,
     smuggle_url,
     str_or_none,
-    traverse_obj,
     url_or_none,
     urljoin,
 )
+from ..utils.traversal import traverse_obj, value
 
 
 class PatreonBaseIE(InfoExtractor):
@@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
             'thumbnail': r're:^https?://.+',
         },
         'skip': 'Patron-only content',
+    }, {
+        # Contains a comment reply in the 'included' section
+        'url': 'https://www.patreon.com/posts/114721679',
+        'info_dict': {
+            'id': '114721679',
+            'ext': 'mp4',
+            'upload_date': '20241025',
+            'uploader': 'Japanalysis',
+            'like_count': int,
+            'thumbnail': r're:^https?://.+',
+            'comment_count': int,
+            'title': 'Karasawa Part 2',
+            'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
+            'uploader_url': 'https://www.patreon.com/japanalysis',
+            'uploader_id': '80504268',
+            'channel_url': 'https://www.patreon.com/japanalysis',
+            'channel_follower_count': int,
+            'timestamp': 1729897015,
+            'channel_id': '9346307',
+        },
+        'params': {'getcomments': True},
     }]
     _RETURN_TYPE = 'video'
 
@@ -404,26 +425,24 @@ def _get_comments(self, post_id):
                 f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
 
             cursor = None
-            for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)):
+            for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
                 count += 1
-                comment_id = comment.get('id')
-                attributes = comment.get('attributes') or {}
-                if comment_id is None:
-                    continue
                 author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
-                author_info = traverse_obj(
-                    response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
-                    get_all=False, expected_type=dict, default={})
 
                 yield {
-                    'id': comment_id,
-                    'text': attributes.get('body'),
-                    'timestamp': parse_iso8601(attributes.get('created')),
-                    'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'),
-                    'author_is_uploader': attributes.get('is_by_creator'),
+                    **traverse_obj(comment, {
+                        'id': ('id', {str_or_none}),
+                        'text': ('attributes', 'body', {str}),
+                        'timestamp': ('attributes', 'created', {parse_iso8601}),
+                        'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
+                        'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
+                    }),
+                    **traverse_obj(response, (
+                        'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
+                            'author': ('full_name', {str}),
+                            'author_thumbnail': ('image_url', {url_or_none}),
+                        }), get_all=False),
                     'author_id': author_id,
-                    'author': author_info.get('full_name'),
-                    'author_thumbnail': author_info.get('image_url'),
                 }
 
             if count < traverse_obj(response, ('meta', 'count')):