Fix extractor

2024-11-20 13:57:25 +01:00 · 2024-09-10 08:54:16 +05:30 · 2024-09-10 08:54:16 +05:30 · 308e713d9e
commit 308e713d9e
parent d1c4d88b2d
1 changed files with 89 additions and 54 deletions
--- a/yt_dlp/extractor/fptplay.py
+++ b/yt_dlp/extractor/fptplay.py
@ -2,78 +2,114 @@
 import time
 import urllib.parse
 from .common import InfoExtractor
 from ..utils import (
-    clean_html,
+    ExtractorError,
-    join_nonempty,
+    int_or_none,
    strip_or_none,
 )
 from .common import InfoExtractor
 class FptplayIE(InfoExtractor):
-    _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)'
+    _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>[a-f0-9]+)'
    _GEO_COUNTRIES = ['VN']
    IE_NAME = 'fptplay'
    IE_DESC = 'fptplay.vn'
    _TESTS = [{
-        'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
+        'url': 'https://fptplay.vn/xem-video/jumanji-tro-choi-ky-ao-615c9b232089bd0509bfbf42',
        'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
        'info_dict': {
-            'id': '621a123016f369ebbde55945',
+            'id': '615c9b232089bd0509bfbf42',
            'ext': 'mp4',
-            'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A',
+            'title': 'Jumanji: Welcome To The Jungle',
-            'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
+            'description': 'Phim theo chân một nhóm bốn học sinh phổ thông bị phạt dọn dẹp tầng hầm trường học. Tại đó, họ phát hiện ra trò chơi cổ mang tên Jumanji.',
            'thumbnail': 'https://images.fptplay.net/media/OTT/VOD/2023/03/13/jumanji-tro-choi-ky-ao-fpt-play-1678685776013_Background_1920x1080_over.jpg',
            'release_year': '2017',
        },
    }, {
-        'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
+        'url': 'https://fptplay.vn/xem-video/sang-nhu-trang-trong-may-6156d8292089bd2184e26238',
        'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
        'info_dict': {
-            'id': '61f3aa8a6b3b1d2e73c60eb5',
+            'id': '346034',
            'ext': 'mp4',
-            'title': 'Má Tôi Là Đại Gia - Tập 3',
+            'title': 'Bright As The Moon',
-            'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
+            'description': '',
            'release_year': '2021',
            'season_number': '1',
            'episode': 'Tập 1',
            'episode_number': '1',
            'duration': '2665'
        },
-    }, {
+    }, ]
-        'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
+
        'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
        'info_dict': {
            'id': '6222d9684ec7230fa6e627a2',
            'ext': 'mp4',
            'title': 'Lạp Tội Đồ Giám - Tập 2B',
            'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
        },
    }, {
        'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        video_id, slug_episode = self._match_valid_url(url).group('id', 'episode')
+        contentId = self._match_id(url)
        webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
        title = self._search_regex(
            r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
        real_episode = slug_episode if not title else self._search_regex(
            r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
        title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
-        info = self._download_json(
+        # Need valid cookie with Bearer token, else it won't work
-            self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
+        token = self._get_cookies(url).get("token")
        formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
        return {
            'id': video_id,
            'title': join_nonempty(title, real_episode, delim=' - '),
            'description': (
                clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description'))
                or self._html_search_meta(('og:description', 'twitter:description'), webpage)),
            'formats': formats,
            'subtitles': subtitles,
        }
-    def get_api_with_st_token(self, video_id, episode):
+        res = self._download_json(self.get_api_with_st_token(contentId), contentId, expected_status=406)
-        path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
+
        if res["result"]["episode_type"] == 0:
            # movie or single video
            manifest = self._download_json(self.get_api_with_st_token(contentId, 0), contentId, headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
            if manifest.get("msg") != "success":
                raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
            formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], contentId)
            return {
                'id': contentId,
                'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
                'description': res["result"]["description"],
                'thumbnail': res["result"]["thumb"],
                'release_year': int_or_none(res["result"]["movie_release_date"]),
                'duration': int_or_none(res["result"]["duration"]),
                'formats': formats,
                'subtitles': subtitles
            }
        else:
            # playlist
            entries = []
            for episode in res["result"]["episodes"]:
                if episode["is_trailer"] == 1:
                    continue
                manifest = self._download_json(self.get_api_with_st_token(contentId, episode["_id"]), episode["_id"], headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
                if manifest.get("msg") != "success":
                    raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
                formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], episode["_id"])
                entry = {
                    'id': episode["ref_episode_id"],
                    'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
                    'description': episode["description"],
                    'thumbnail': episode["thumb"],
                    'release_year': int_or_none(res["result"]["movie_release_date"]),
                    'season_number': 1,  # Assuming season 1 for simplicity
                    'episode': episode["title"],
                    'episode_number': episode["_id"] + 1,
                    'duration': int_or_none(episode["duration"]),
                    'formats': formats,
                    'subtitles': subtitles
                }
                entries.append(entry)
            return {
                '_type': 'playlist',
                'id': contentId,
                'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
                'entries': entries
            }
    def get_api_with_st_token(self, video_id, episode=None):
        if episode is not None:
            path = f'/api/v7.1_w/stream/vod/{video_id}/{0 if episode is None else episode}/adaptive_bitrate'
        else:
            path = f'/api/v7.1_w/vod/detail/{video_id}'
        timestamp = int(time.time()) + 10800
-
+        t = hashlib.md5(f'6ea6d2a4e2d3a4bd5e275401aa086d{timestamp}{path}'.encode()).hexdigest().upper()
        t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
        r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
        n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
@ -89,7 +125,7 @@ def convert(e):
                    i[n] = e[c]
                n += 1
                c += 1
-                if 3 == n:
+                if n == 3:
                    a[0] = (252 & i[0]) >> 2
                    a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
                    a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
@ -100,14 +136,13 @@ def convert(e):
            if n:
                for o in range(n, 3):
                    i[o] = 0
                for o in range(n + 1):
                    a[0] = (252 & i[0]) >> 2
                    a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
                    a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
                    a[3] = (63 & i[2])
                    t += r[a[o]]
-                n += 1
+                    n += 1
                while n < 3:
                    t += ''
                    n += 1