From 652827d5a076c9483c36654ad2cf3fe46219baf4 Mon Sep 17 00:00:00 2001 From: Ben Faerber Date: Sun, 23 Feb 2025 02:11:58 -0700 Subject: [PATCH] [ie/softwhiteunderbelly] Add extractor (#12281) Authored by: benfaerber --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/softwhiteunderbelly.py | 87 +++++++++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 yt_dlp/extractor/softwhiteunderbelly.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9a49bcb309..403e1f1f65 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1895,6 +1895,7 @@ from .slutload import SlutloadIE from .smotrim import SmotrimIE from .snapchat import SnapchatSpotlightIE from .snotr import SnotrIE +from .softwhiteunderbelly import SoftWhiteUnderbellyIE from .sohu import ( SohuIE, SohuVIE, diff --git a/yt_dlp/extractor/softwhiteunderbelly.py b/yt_dlp/extractor/softwhiteunderbelly.py new file mode 100644 index 0000000000..ce1b214054 --- /dev/null +++ b/yt_dlp/extractor/softwhiteunderbelly.py @@ -0,0 +1,87 @@ +from .common import InfoExtractor +from .vimeo import VHXEmbedIE +from ..utils import ( + ExtractorError, + clean_html, + update_url, + urlencode_postdata, +) +from ..utils.traversal import find_element, traverse_obj + + +class SoftWhiteUnderbellyIE(InfoExtractor): + _LOGIN_URL = 'https://www.softwhiteunderbelly.com/login' + _NETRC_MACHINE = 'softwhiteunderbelly' + _VALID_URL = r'https?://(?:www\.)?softwhiteunderbelly\.com/videos/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.softwhiteunderbelly.com/videos/kenneth-final1', + 'note': 'A single Soft White Underbelly Episode', + 'md5': '8e79f29ec1f1bda6da2e0b998fcbebb8', + 'info_dict': { + 'id': '3201266', + 'ext': 'mp4', + 'display_id': 'kenneth-final1', + 'title': 'Appalachian Man interview-Kenneth', + 'description': 'Soft White Underbelly interview and portrait of Kenneth, an Appalachian man in Clay County, Kentucky.', + 'thumbnail': 'https://vhx.imgix.net/softwhiteunderbelly/assets/249f6db0-2b39-49a4-979b-f8dad4681825.jpg', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos', + 'uploader_id': 'user80538407', + 'duration': 512, + }, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }, { + 'url': 'https://www.softwhiteunderbelly.com/videos/tj-2-final-2160p', + 'note': 'A single Soft White Underbelly Episode', + 'md5': '286bd8851b4824c62afb369e6f307036', + 'info_dict': { + 'id': '3506029', + 'ext': 'mp4', + 'display_id': 'tj-2-final-2160p', + 'title': 'Fentanyl Addict interview-TJ (follow up)', + 'description': 'Soft White Underbelly follow up interview and portrait of TJ, a fentanyl addict on Skid Row.', + 'thumbnail': 'https://vhx.imgix.net/softwhiteunderbelly/assets/c883d531-5da0-4faf-a2e2-8eba97e5adfc.jpg', + 'duration': 817, + 'uploader': 'OTT Videos', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader_id': 'user80538407', + }, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }] + + def _perform_login(self, username, password): + signin_page = self._download_webpage(self._LOGIN_URL, None, 'Fetching authenticity token') + self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata({ + 'email': username, + 'password': password, + 'authenticity_token': self._html_search_regex( + r'name=["\']authenticity_token["\']\s+value=["\']([^"\']+)', signin_page, 'authenticity_token'), + 'utf8': True, + }), + ) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + if '
https?://embed\.vhx\.tv/videos/(?P\d+)[^"\']*)', + webpage, 'embed url', group=('url', 'id')) + + return { + '_type': 'url_transparent', + 'ie_key': VHXEmbedIE.ie_key(), + 'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.softwhiteunderbelly.com'), + 'id': embed_id, + 'display_id': display_id, + 'title': traverse_obj(webpage, ({find_element(id='watch-info')}, {find_element(cls='video-title')}, {clean_html})), + 'description': self._html_search_meta('description', webpage, default=None), + 'thumbnail': update_url(self._og_search_thumbnail(webpage) or '', query=None) or None, + }