diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 5dafef283..baff0280f 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -137,6 +137,7 @@ from .wimtv import WimTVIE from .tvp import TVPEmbedIE from .blogger import BloggerIE +from .gfycat import GfycatIE class GenericIE(InfoExtractor): @@ -2382,6 +2383,33 @@ class GenericIE(InfoExtractor): 'timestamp': 1636788683.0, 'upload_date': '20211113' } + }, + { + # Multiple gfycat iframe embeds + 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422', + 'info_dict': { + 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다', + 'id': 'board' + }, + 'playlist_count': 8, + }, + { + # Multiple gfycat gifs (direct links) + 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199', + 'info_dict': { + 'title': '옳게 된 크롭 니트 스테이씨 아이사', + 'id': 'board' + }, + 'playlist_count': 6 + }, + { + # Multiple gfycat embeds, with uppercase "IFR" in urls + 'url': 'https://kkzz.kr/?vid=2295', + 'info_dict': { + 'title': '지방시 앰버서더 에스파 카리나 움짤', + 'id': '?vid=2295' + }, + 'playlist_count': 9 } # ] @@ -3572,6 +3600,10 @@ def _real_extract(self, url): if tvp_urls: return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key()) + # Look for Gfycat Embeds + gfycat_urls = GfycatIE._extract_urls(webpage) + if gfycat_urls: + return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key()) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py index 56a6dc03d..2ad03e2b2 100644 --- a/yt_dlp/extractor/gfycat.py +++ b/yt_dlp/extractor/gfycat.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -11,7 +13,7 @@ class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#\.]+)' + _VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P[^-/?#\."\']+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { @@ -78,8 +80,19 @@ class GfycatIE(InfoExtractor): }, { 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4', 'only_matching': True + }, { + 'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa', + 'only_matching': True }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<(?:iframe|source)[^>]+\bsrc=["\'](?P%s)' % GfycatIE._VALID_URL, + webpage)] + def _real_extract(self, url): video_id = self._match_id(url)