mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-11-28 01:20:57 +01:00
[patreon:user] Add new extractor
This commit is contained in:
parent
7e8b3f9439
commit
3630cd5423
@ -691,6 +691,7 @@
|
||||
- **ParamountNetwork**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **Patreon:user**: Audio posts by user
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **PearVideo**
|
||||
- **PeerTube**
|
||||
|
@ -895,7 +895,10 @@ from .palcomp3 import (
|
||||
)
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .patreon import (
|
||||
PatreonIE,
|
||||
PatreonUserIE,
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peertube import PeerTubeIE
|
||||
|
@ -13,6 +13,14 @@ from ..utils import (
|
||||
try_get,
|
||||
)
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class PatreonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
|
||||
@ -154,3 +162,76 @@ class PatreonIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class PatreonUserIE(PatreonIE):
|
||||
IE_NAME = 'Patreon:user'
|
||||
IE_DESC = 'Audio posts by user'
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>\w+)(?!.)'
|
||||
_TESTS = [
|
||||
# Standard
|
||||
{
|
||||
'url': 'https://www.patreon.com/joshuacitarella',
|
||||
'info_dict': {
|
||||
'id': 'joshuacitarella',
|
||||
'title': "joshuacitarella's audio posts",
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
},
|
||||
# All Private
|
||||
{
|
||||
'url': 'https://www.patreon.com/juicysoup',
|
||||
'info_dict': {
|
||||
'id': 'juicysoup',
|
||||
'title': "juicysoup's audio posts",
|
||||
},
|
||||
'playlist_mincount': 0,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Start Selenium Chromedriver
|
||||
options = webdriver.chrome.options.Options()
|
||||
options.add_argument("headless")
|
||||
driver = webdriver.Chrome(options=options)
|
||||
driver.get(url)
|
||||
delay = 10 # Seconds
|
||||
|
||||
try:
|
||||
# Wait for audio elements to load
|
||||
WebDriverWait(driver, delay).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, 'audio')))
|
||||
webpage = driver.page_source
|
||||
|
||||
# Find unique elements matching regex
|
||||
elements = re.findall(
|
||||
r'(?P<href>https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<vid>\d+))',
|
||||
webpage)
|
||||
hrefs = set()
|
||||
for element in elements:
|
||||
hrefs.add(element)
|
||||
|
||||
# Check whether elements are an audio post
|
||||
real_hrefs = []
|
||||
for href in hrefs:
|
||||
post = self._download_json(
|
||||
'https://www.patreon.com/api/posts/' + href[1], href[1])
|
||||
post_type = post.get('data').get('attributes').get('post_type')
|
||||
if post_type == 'audio_file':
|
||||
real_hrefs.append(href[0])
|
||||
else:
|
||||
print("Not an audio post: {}".format(href[0]))
|
||||
|
||||
except TimeoutException:
|
||||
print("Loading took too much time or no audio files found!")
|
||||
driver.quit()
|
||||
return self.playlist_result([], video_id, video_id + "'s audio posts")
|
||||
|
||||
# Create list of info dicts
|
||||
entries = [self.url_result(link, PatreonIE.ie_key()) for link in real_hrefs]
|
||||
|
||||
# Clean up and return playlist object
|
||||
driver.quit()
|
||||
return self.playlist_result(entries, video_id, video_id + "'s audio posts")
|
||||
|
Loading…
Reference in New Issue
Block a user