Merge branch 'master' into GoogleDriveFolderFix

2024-11-20 05:47:24 +01:00 · 2024-10-02 17:47:09 +13:00 · 2024-10-02 17:47:09 +13:00 · 3068d9897d
commit 3068d9897d
parent b3534df159 e59c82a74c
7 changed files with 59 additions and 30 deletions
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -59,4 +59,4 @@ jobs:
      continue-on-error: False
      run: |
        python3 -m yt_dlp -v || true  # Print debug head
-        python3 ./devscripts/run_tests.py core
+        python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
--- a/.github/workflows/quick-test.yml
+++ b/.github/workflows/quick-test.yml
@ -20,7 +20,7 @@ jobs:
      timeout-minutes: 15
      run: |
        python3 -m yt_dlp -v || true
-        python3 ./devscripts/run_tests.py core
+        python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core
  check:
    name: Code check
    if: "!contains(github.event.head_commit.message, 'ci skip all')"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -80,6 +80,7 @@ static-analysis = [
 ]
 test = [
    "pytest~=8.1",
+    "pytest-rerunfailures~=14.0",
 ]
 pyinstaller = [
    "pyinstaller>=6.10.0",  # Windows temp cleanup fixed in 6.10.0
@ -162,7 +163,6 @@ lint-fix = "ruff check --fix {args:.}"
 features = ["test"]
 dependencies = [
    "pytest-randomly~=3.15",
-    "pytest-rerunfailures~=14.0",
    "pytest-xdist[psutil]~=3.5",
 ]

--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -27,7 +27,7 @@
 from .cache import Cache
 from .compat import urllib  # isort: split
 from .compat import compat_os_name, urllib_req_to_req
-from .cookies import LenientSimpleCookie, load_cookies
+from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
 from .downloader.rtmp import rtmpdump_version
 from .extractor import gen_extractor_classes, get_info_extractor
@ -1624,7 +1624,7 @@ def wrapper(self, *args, **kwargs):
            while True:
                try:
                    return func(self, *args, **kwargs)
-                except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
+                except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
                    raise
                except ReExtractInfo as e:
                    if e.expected:
@ -3580,6 +3580,8 @@ def __download_wrapper(self, func):
        def wrapper(*args, **kwargs):
            try:
                res = func(*args, **kwargs)
+            except CookieLoadError:
+                raise
            except UnavailableVideoError as e:
                self.report_error(e)
            except DownloadCancelled as e:
@ -4113,8 +4115,13 @@ def proxies(self):
    @functools.cached_property
    def cookiejar(self):
        """Global cookiejar instance"""
-        return load_cookies(
-            self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
+        try:
+            return load_cookies(
+                self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
+        except CookieLoadError as error:
+            cause = error.__context__
+            self.report_error(str(cause), tb=''.join(traceback.format_exception(cause)))
+            raise

    @property
    def _opener(self):
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -15,7 +15,7 @@
 import traceback

 from .compat import compat_os_name
-from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
+from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
 from .downloader.external import get_external_downloader
 from .extractor import list_extractor_classes
 from .extractor.adobepass import MSO_INFO
@ -1084,7 +1084,7 @@ def main(argv=None):
    _IN_CLI = True
    try:
        _exit(*variadic(_real_main(argv)))
-    except DownloadError:
+    except (CookieLoadError, DownloadError):
        _exit(1)
    except SameFileError as e:
        _exit(f'ERROR: {e}')
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@ -34,6 +34,7 @@
 from .minicurses import MultilinePrinter, QuietMultilinePrinter
 from .utils import (
    DownloadError,
+    YoutubeDLError,
    Popen,
    error_to_str,
    expand_path,
@ -86,24 +87,31 @@ def _create_progress_bar(logger):
    return printer


+class CookieLoadError(YoutubeDLError):
+    pass
+
+
 def load_cookies(cookie_file, browser_specification, ydl):
-    cookie_jars = []
-    if browser_specification is not None:
-        browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
-        cookie_jars.append(
-            extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
+    try:
+        cookie_jars = []
+        if browser_specification is not None:
+            browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
+            cookie_jars.append(
+                extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))

-    if cookie_file is not None:
-        is_filename = is_path_like(cookie_file)
-        if is_filename:
-            cookie_file = expand_path(cookie_file)
+        if cookie_file is not None:
+            is_filename = is_path_like(cookie_file)
+            if is_filename:
+                cookie_file = expand_path(cookie_file)

-        jar = YoutubeDLCookieJar(cookie_file)
-        if not is_filename or os.access(cookie_file, os.R_OK):
-            jar.load()
-        cookie_jars.append(jar)
+            jar = YoutubeDLCookieJar(cookie_file)
+            if not is_filename or os.access(cookie_file, os.R_OK):
+                jar.load()
+            cookie_jars.append(jar)

-    return _merge_cookie_jars(cookie_jars)
+        return _merge_cookie_jars(cookie_jars)
+    except Exception:
+        raise CookieLoadError('failed to load cookies')


 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -1,3 +1,4 @@
+import functools
 import itertools
 import urllib.parse

@ -22,13 +23,19 @@


 class PatreonBaseIE(InfoExtractor):
-    USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
+    @functools.cached_property
+    def patreon_user_agent(self):
+        # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
+        # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
+        if self._get_cookies('https://www.patreon.com/').get('session_id'):
+            return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
+        return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'

    def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
        if headers is None:
            headers = {}
        if 'User-Agent' not in headers:
-            headers['User-Agent'] = self.USER_AGENT
+            headers['User-Agent'] = self.patreon_user_agent
        if query:
            query.update({'json-api-version': 1.0})

@ -111,6 +118,7 @@ class PatreonIE(PatreonBaseIE):
            'comment_count': int,
            'channel_is_verified': True,
            'chapters': 'count:4',
+            'timestamp': 1423689666,
        },
        'params': {
            'noplaylist': True,
@ -221,6 +229,7 @@ class PatreonIE(PatreonBaseIE):
            'thumbnail': r're:^https?://.+',
        },
        'params': {'skip_download': 'm3u8'},
+        'expected_warnings': ['Failed to parse XML: not well-formed'],
    }, {
        # multiple attachments/embeds
        'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
@ -326,8 +335,13 @@ def _real_extract(self, url):
        if embed_url and (urlh := self._request_webpage(
                embed_url, video_id, 'Checking embed URL', headers=headers,
                fatal=False, errnote=False, expected_status=403)):
+            # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
+            # to check for "Sorry, we couldn&amp;rsquo;t find that page" in the meta description tag
+            meta_description = clean_html(self._html_search_meta(
+                'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
            # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
-            if urlh.status != 403 or VidsIoIE.suitable(embed_url):
+            if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
+                    or VidsIoIE.suitable(embed_url)):
                entries.append(self.url_result(smuggle_url(embed_url, headers)))

        post_file = traverse_obj(attributes, ('post_file', {dict}))
@ -427,7 +441,7 @@ class PatreonCampaignIE(PatreonBaseIE):
            'title': 'Cognitive Dissonance Podcast',
            'channel_url': 'https://www.patreon.com/dissonancepod',
            'id': '80642',
-            'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
+            'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
            'channel_id': '80642',
            'channel': 'Cognitive Dissonance Podcast',
            'age_limit': 0,
@ -445,7 +459,7 @@ class PatreonCampaignIE(PatreonBaseIE):
            'id': '4767637',
            'channel_id': '4767637',
            'channel_url': 'https://www.patreon.com/notjustbikes',
-            'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f',
+            'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
            'age_limit': 0,
            'channel': 'Not Just Bikes',
            'uploader_url': 'https://www.patreon.com/notjustbikes',
@ -462,7 +476,7 @@ class PatreonCampaignIE(PatreonBaseIE):
            'id': '4243769',
            'channel_id': '4243769',
            'channel_url': 'https://www.patreon.com/secondthought',
-            'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b',
+            'description': r're:(?s).*Second Thought is an educational YouTube channel.*',
            'age_limit': 0,
            'channel': 'Second Thought',
            'uploader_url': 'https://www.patreon.com/secondthought',
@ -512,7 +526,7 @@ def _real_extract(self, url):

        campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
        if campaign_id is None:
-            webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
+            webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
            campaign_id = self._search_nextjs_data(
                webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']