1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-28 01:20:57 +01:00

[patreon:user] Add new extractor

This commit is contained in:
Kanu Gaba 2021-04-23 17:07:14 -04:00
parent 7e8b3f9439
commit 3630cd5423
3 changed files with 86 additions and 1 deletions

View File

@ -691,6 +691,7 @@
- **ParamountNetwork**
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
- **Patreon:user**: Audio posts by user
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **PearVideo**
- **PeerTube**

View File

@ -895,7 +895,10 @@ from .palcomp3 import (
)
from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .patreon import (
PatreonIE,
PatreonUserIE,
)
from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .peertube import PeerTubeIE

View File

@ -13,6 +13,14 @@ from ..utils import (
try_get,
)
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import re
class PatreonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
@ -154,3 +162,76 @@ class PatreonIE(InfoExtractor):
})
return info
class PatreonUserIE(PatreonIE):
IE_NAME = 'Patreon:user'
IE_DESC = 'Audio posts by user'
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>\w+)(?!.)'
_TESTS = [
# Standard
{
'url': 'https://www.patreon.com/joshuacitarella',
'info_dict': {
'id': 'joshuacitarella',
'title': "joshuacitarella's audio posts",
},
'playlist_mincount': 4,
},
# All Private
{
'url': 'https://www.patreon.com/juicysoup',
'info_dict': {
'id': 'juicysoup',
'title': "juicysoup's audio posts",
},
'playlist_mincount': 0,
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
# Start Selenium Chromedriver
options = webdriver.chrome.options.Options()
options.add_argument("headless")
driver = webdriver.Chrome(options=options)
driver.get(url)
delay = 10 # Seconds
try:
# Wait for audio elements to load
WebDriverWait(driver, delay).until(
EC.presence_of_element_located((By.TAG_NAME, 'audio')))
webpage = driver.page_source
# Find unique elements matching regex
elements = re.findall(
r'(?P<href>https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<vid>\d+))',
webpage)
hrefs = set()
for element in elements:
hrefs.add(element)
# Check whether elements are an audio post
real_hrefs = []
for href in hrefs:
post = self._download_json(
'https://www.patreon.com/api/posts/' + href[1], href[1])
post_type = post.get('data').get('attributes').get('post_type')
if post_type == 'audio_file':
real_hrefs.append(href[0])
else:
print("Not an audio post: {}".format(href[0]))
except TimeoutException:
print("Loading took too much time or no audio files found!")
driver.quit()
return self.playlist_result([], video_id, video_id + "'s audio posts")
# Create list of info dicts
entries = [self.url_result(link, PatreonIE.ie_key()) for link in real_hrefs]
# Clean up and return playlist object
driver.quit()
return self.playlist_result(entries, video_id, video_id + "'s audio posts")