Fix extractor

2024-11-20 13:57:25 +01:00 · 2024-09-10 08:54:16 +05:30 · 2024-09-10 08:54:16 +05:30 · 308e713d9e
commit 308e713d9e
parent d1c4d88b2d
1 changed files with 89 additions and 54 deletions
--- a/yt_dlp/extractor/fptplay.py
+++ b/yt_dlp/extractor/fptplay.py
@ -2,78 +2,114 @@
 import time
 import urllib.parse

-from .common import InfoExtractor
 from ..utils import (
-    clean_html,
-    join_nonempty,
-    strip_or_none,
+    ExtractorError,
+    int_or_none,
 )

+from .common import InfoExtractor
+
        
 class FptplayIE(InfoExtractor):
-    _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)'
+    _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>[a-f0-9]+)'
    _GEO_COUNTRIES = ['VN']
    IE_NAME = 'fptplay'
    IE_DESC = 'fptplay.vn'
    _TESTS = [{
-        'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
-        'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
+        'url': 'https://fptplay.vn/xem-video/jumanji-tro-choi-ky-ao-615c9b232089bd0509bfbf42',
        'info_dict': {
-            'id': '621a123016f369ebbde55945',
+            'id': '615c9b232089bd0509bfbf42',
            'ext': 'mp4',
-            'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A',
-            'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
+            'title': 'Jumanji: Welcome To The Jungle',
+            'description': 'Phim theo chân một nhóm bốn học sinh phổ thông bị phạt dọn dẹp tầng hầm trường học. Tại đó, họ phát hiện ra trò chơi cổ mang tên Jumanji.',
+            'thumbnail': 'https://images.fptplay.net/media/OTT/VOD/2023/03/13/jumanji-tro-choi-ky-ao-fpt-play-1678685776013_Background_1920x1080_over.jpg',
+            'release_year': '2017',
        },
    }, {
-        'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
-        'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
+        'url': 'https://fptplay.vn/xem-video/sang-nhu-trang-trong-may-6156d8292089bd2184e26238',
        'info_dict': {
-            'id': '61f3aa8a6b3b1d2e73c60eb5',
+            'id': '346034',
            'ext': 'mp4',
-            'title': 'Má Tôi Là Đại Gia - Tập 3',
-            'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
+            'title': 'Bright As The Moon',
+            'description': '',
+            'release_year': '2021',
+            'season_number': '1',
+            'episode': 'Tập 1',
+            'episode_number': '1',
+            'duration': '2665'
        },
-    }, {
-        'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
-        'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
-        'info_dict': {
-            'id': '6222d9684ec7230fa6e627a2',
-            'ext': 'mp4',
-            'title': 'Lạp Tội Đồ Giám - Tập 2B',
-            'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
-        },
-    }, {
-        'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
-        'only_matching': True,
-    }]
+    }, ]
+

    def _real_extract(self, url):
-        video_id, slug_episode = self._match_valid_url(url).group('id', 'episode')
-        webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
-        title = self._search_regex(
-            r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
-        real_episode = slug_episode if not title else self._search_regex(
-            r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
-        title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
+        contentId = self._match_id(url)

-        info = self._download_json(
-            self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
-        formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
-        return {
-            'id': video_id,
-            'title': join_nonempty(title, real_episode, delim=' - '),
-            'description': (
-                clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description'))
-                or self._html_search_meta(('og:description', 'twitter:description'), webpage)),
-            'formats': formats,
-            'subtitles': subtitles,
-        }
+        # Need valid cookie with Bearer token, else it won't work
+        token = self._get_cookies(url).get("token")
    
-    def get_api_with_st_token(self, video_id, episode):
-        path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
+        res = self._download_json(self.get_api_with_st_token(contentId), contentId, expected_status=406)
+
+        if res["result"]["episode_type"] == 0:
+            # movie or single video
+            manifest = self._download_json(self.get_api_with_st_token(contentId, 0), contentId, headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
+            
+            if manifest.get("msg") != "success":
+                raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
+
+            formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], contentId)
+            return {
+                'id': contentId,
+                'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
+                'description': res["result"]["description"],
+                'thumbnail': res["result"]["thumb"],
+                'release_year': int_or_none(res["result"]["movie_release_date"]),
+                'duration': int_or_none(res["result"]["duration"]),
+                'formats': formats,
+                'subtitles': subtitles
+            }
+        else:
+            # playlist
+            entries = []
+            for episode in res["result"]["episodes"]:
+                
+                if episode["is_trailer"] == 1:
+                    continue
+                
+                manifest = self._download_json(self.get_api_with_st_token(contentId, episode["_id"]), episode["_id"], headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
+                if manifest.get("msg") != "success":
+                    raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
+                    
+                formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], episode["_id"])
+                
+                entry = {
+                    'id': episode["ref_episode_id"],
+                    'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
+                    'description': episode["description"],
+                    'thumbnail': episode["thumb"],
+                    'release_year': int_or_none(res["result"]["movie_release_date"]),
+                    'season_number': 1,  # Assuming season 1 for simplicity
+                    'episode': episode["title"],
+                    'episode_number': episode["_id"] + 1,
+                    'duration': int_or_none(episode["duration"]),
+                    'formats': formats,
+                    'subtitles': subtitles
+                }
+                entries.append(entry)
+            
+            return {
+                '_type': 'playlist',
+                'id': contentId,
+                'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
+                'entries': entries
+            }
+
+    def get_api_with_st_token(self, video_id, episode=None):
+        if episode is not None:
+            path = f'/api/v7.1_w/stream/vod/{video_id}/{0 if episode is None else episode}/adaptive_bitrate'
+        else:
+            path = f'/api/v7.1_w/vod/detail/{video_id}'
        timestamp = int(time.time()) + 10800
-
-        t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
+        t = hashlib.md5(f'6ea6d2a4e2d3a4bd5e275401aa086d{timestamp}{path}'.encode()).hexdigest().upper()
        r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
        n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]

@ -89,7 +125,7 @@ def convert(e):
                    i[n] = e[c]
                n += 1
                c += 1
-                if 3 == n:
+                if n == 3:
                    a[0] = (252 & i[0]) >> 2
                    a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
                    a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
@ -100,14 +136,13 @@ def convert(e):
            if n:
                for o in range(n, 3):
                    i[o] = 0
-
                for o in range(n + 1):
                    a[0] = (252 & i[0]) >> 2
                    a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
                    a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
                    a[3] = (63 & i[2])
                    t += r[a[o]]
-                n += 1
+                    n += 1
                while n < 3:
                    t += ''
                    n += 1