From 273762c8d045ace16143a6614c8d258f02a8094b Mon Sep 17 00:00:00 2001 From: siikamiika Date: Mon, 15 Feb 2021 11:57:21 +0200 Subject: [PATCH] #86 [youtube_live_chat] Use POST API (Closes #82) YouTube has removed support for the old GET based live chat API, and it's now returning 404 Authored by siikamiika --- youtube_dlc/downloader/fragment.py | 3 +- youtube_dlc/downloader/http.py | 5 +- youtube_dlc/downloader/youtube_live_chat.py | 75 +++++++++++---------- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/youtube_dlc/downloader/fragment.py b/youtube_dlc/downloader/fragment.py index f4104c713..5bc7f50f6 100644 --- a/youtube_dlc/downloader/fragment.py +++ b/youtube_dlc/downloader/fragment.py @@ -95,11 +95,12 @@ def _write_ytdl_file(self, ctx): frag_index_stream.write(json.dumps({'downloader': downloader})) frag_index_stream.close() - def _download_fragment(self, ctx, frag_url, info_dict, headers=None): + def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_info_dict = { 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), + 'request_data': request_data, } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: diff --git a/youtube_dlc/downloader/http.py b/youtube_dlc/downloader/http.py index d8ac41dcc..bf77f4427 100644 --- a/youtube_dlc/downloader/http.py +++ b/youtube_dlc/downloader/http.py @@ -27,6 +27,7 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] + request_data = info_dict.get('request_data', None) class DownloadContext(dict): __getattr__ = dict.get @@ -101,7 +102,7 @@ def establish_connection(): range_end = ctx.data_len - 1 has_range = range_start is not None ctx.has_range = has_range - request = sanitized_Request(url, None, headers) + request = sanitized_Request(url, request_data, headers) if has_range: set_range(request, range_start, range_end) # Establish connection @@ -152,7 +153,7 @@ def establish_connection(): try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( - sanitized_Request(url, None, headers)) + sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: diff --git a/youtube_dlc/downloader/youtube_live_chat.py b/youtube_dlc/downloader/youtube_live_chat.py index 5ac24c020..8e173d8b5 100644 --- a/youtube_dlc/downloader/youtube_live_chat.py +++ b/youtube_dlc/downloader/youtube_live_chat.py @@ -1,11 +1,13 @@ from __future__ import division, unicode_literals -import re import json from .fragment import FragmentFD from ..compat import compat_urllib_error -from ..utils import try_get +from ..utils import ( + try_get, + RegexNotFoundError, +) from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE @@ -27,40 +29,28 @@ def real_download(self, filename, info_dict): 'total_frags': None, } - def dl_fragment(url): - headers = info_dict.get('http_headers', {}) - return self._download_fragment(ctx, url, info_dict, headers) + ie = YT_BaseIE(self.ydl) - def parse_yt_initial_data(data): - patterns = ( - r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE), - r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE) - data = data.decode('utf-8', 'replace') - for patt in patterns: - try: - raw_json = re.search(patt, data).group(1) - return json.loads(raw_json) - except AttributeError: - continue + def dl_fragment(url, data=None, headers=None): + http_headers = info_dict.get('http_headers', {}) + if headers: + http_headers = http_headers.copy() + http_headers.update(headers) + return self._download_fragment(ctx, url, info_dict, http_headers, data) - def download_and_parse_fragment(url, frag_index): + def download_and_parse_fragment(url, frag_index, request_data): count = 0 while count <= fragment_retries: try: - success, raw_fragment = dl_fragment(url) + success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'}) if not success: return False, None, None - data = parse_yt_initial_data(raw_fragment) + try: + data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + data = None if not data: - raw_data = json.loads(raw_fragment) - # sometimes youtube replies with a list - if not isinstance(raw_data, list): - raw_data = [raw_data] - try: - data = next(item['response'] for item in raw_data if 'response' in item) - except StopIteration: - data = {} - + data = json.loads(raw_fragment) live_chat_continuation = try_get( data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} @@ -93,22 +83,37 @@ def download_and_parse_fragment(url, frag_index): 'https://www.youtube.com/watch?v={}'.format(video_id)) if not success: return False - data = parse_yt_initial_data(raw_fragment) + try: + data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + return False continuation_id = try_get( data, lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) # no data yet but required to call _append_fragment self._append_fragment(ctx, b'') + ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) + + if not ytcfg: + return False + api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) + innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) + if not api_key or not innertube_context: + return False + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + frag_index = offset = 0 while continuation_id is not None: frag_index += 1 - url = ''.join(( - 'https://www.youtube.com/live_chat_replay', - '/get_live_chat_replay' if frag_index > 1 else '', - '?continuation=%s' % continuation_id, - '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else '')) - success, continuation_id, offset = download_and_parse_fragment(url, frag_index) + request_data = { + 'context': innertube_context, + 'continuation': continuation_id, + } + if frag_index > 1: + request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} + success, continuation_id, offset = download_and_parse_fragment( + url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n') if not success: return False if test: