[ie/orf:podcast] Add extractor (#8486)

Closes #5265 Authored by: Esokrates
2024-11-12 02:01:05 +01:00 · 2023-11-11 20:06:25 +00:00 · 2023-11-11 20:06:25 +00:00 · 6ba3085616
commit 6ba3085616
parent f6e97090d2
2 changed files with 44 additions and 2 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -1420,6 +1420,7 @@
    ORFTVthekIE,
    ORFFM4StoryIE,
    ORFRadioIE,
+    ORFPodcastIE,
    ORFIPTVIE,
 )
 from .outsidetv import OutsideTVIE
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@ -4,15 +4,16 @@
 from .common import InfoExtractor
 from ..networking import HEADRequest
 from ..utils import (
+    InAdvancePagedList,
    clean_html,
    determine_ext,
    float_or_none,
-    InAdvancePagedList,
    int_or_none,
    join_nonempty,
+    make_archive_id,
+    mimetype2ext,
    orderedSet,
    remove_end,
-    make_archive_id,
    smuggle_url,
    strip_jsonp,
    try_call,
@ -21,6 +22,7 @@
    unsmuggle_url,
    url_or_none,
 )
+from ..utils.traversal import traverse_obj


 class ORFTVthekIE(InfoExtractor):
@ -334,6 +336,45 @@ def _real_extract(self, url):
            self._entries(data, station or station2), show_id, data.get('title'), clean_html(data.get('subtitle')))


+class ORFPodcastIE(InfoExtractor):
+    IE_NAME = 'orf:podcast'
+    _STATION_RE = '|'.join(map(re.escape, (
+        'bgl', 'fm4', 'ktn', 'noe', 'oe1', 'oe3',
+        'ooe', 'sbg', 'stm', 'tir', 'tv', 'vbg', 'wie')))
+    _VALID_URL = rf'https?://sound\.orf\.at/podcast/(?P<station>{_STATION_RE})/(?P<show>[\w-]+)/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://sound.orf.at/podcast/oe3/fruehstueck-bei-mir/nicolas-stockhammer-15102023',
+        'md5': '526a5700e03d271a1505386a8721ab9b',
+        'info_dict': {
+            'id': 'nicolas-stockhammer-15102023',
+            'ext': 'mp3',
+            'title': 'Nicolas Stockhammer (15.10.2023)',
+            'duration': 3396.0,
+            'series': 'Frühstück bei mir',
+        },
+        'skip': 'ORF podcasts are only available for a limited time'
+    }]
+
+    def _real_extract(self, url):
+        station, show, show_id = self._match_valid_url(url).group('station', 'show', 'id')
+        data = self._download_json(
+            f'https://audioapi.orf.at/radiothek/api/2.0/podcast/{station}/{show}/{show_id}', show_id)
+
+        return {
+            'id': show_id,
+            'ext': 'mp3',
+            'vcodec': 'none',
+            **traverse_obj(data, ('payload', {
+                'url': ('enclosures', 0, 'url'),
+                'ext': ('enclosures', 0, 'type', {mimetype2ext}),
+                'title': 'title',
+                'description': ('description', {clean_html}),
+                'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
+                'series': ('podcast', 'title'),
+            })),
+        }
+
+
 class ORFIPTVIE(InfoExtractor):
    IE_NAME = 'orf:iptv'
    IE_DESC = 'iptv.ORF.at'