[nrktv:season,series] Fix extraction and update tests (closes #17159, closes #17258)

This commit is contained in:
Sergey M․ 2018-12-07 00:49:24 +07:00
parent 33cc1ea586
commit 15699ec8b0
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):
_TESTS = [{ _TESTS = [{
# video # video
'url': 'http://www.nrk.no/video/PS*150533', 'url': 'http://www.nrk.no/video/PS*150533',
'md5': '2f7f6eeb2aacdd99885f355428715cfa', 'md5': '706f34cdf1322577589e369e522b50ef',
'info_dict': { 'info_dict': {
'id': '150533', 'id': '150533',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show', 'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
'duration': 263, 'duration': 262,
} }
}, { }, {
# audio # audio
@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '4e9ca6629f09e588ed240fb11619922a', 'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': { 'info_dict': {
'id': 'MUHH48000314AA', 'id': 'MUHH48000314AA',
'ext': 'mp4', 'ext': 'mp4',
'title': '20 spørsmål 23.05.2014', 'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741, 'duration': 1741,
'series': '20 spørsmål - TV', 'series': '20 spørsmål',
'episode': '23.05.2014', 'episode': '23.05.2014',
}, },
}, { }, {
@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515AH', 'id': 'MSPO40010515AH',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
'description': 'md5:c03aba1e917561eface5214020551b7a', 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 772, 'duration': 772,
'series': 'Tour de Ski', 'series': 'Tour de Ski',
'episode': '06.01.2015', 'episode': '06.01.2015',
@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515BH', 'id': 'MSPO40010515BH',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
'description': 'md5:c03aba1e917561eface5214020551b7a', 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 6175, 'duration': 6175,
'series': 'Tour de Ski', 'series': 'Tour de Ski',
'episode': '06.01.2015', 'episode': '06.01.2015',
@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):
'info_dict': { 'info_dict': {
'id': 'MSPO40010515', 'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:c03aba1e917561eface5214020551b7a', 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
}, },
'expected_warnings': ['Video is geo restricted'], 'expected_warnings': ['Video is geo restricted'],
}, { }, {
@ -406,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
def _extract_series(self, webpage, display_id, fatal=True): def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json( config = self._parse_json(
self._search_regex( self._search_regex(
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config', (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
default='{}' if not fatal else NO_DEFAULT), r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False) display_id, fatal=False)
if not config: if not config:
return return
return try_get(config, lambda x: x['series'], dict) return try_get(
config,
(lambda x: x['initialState']['series'], lambda x: x['series']),
dict)
def _extract_seasons(self, seasons):
if not isinstance(seasons, list):
return []
entries = []
for season in seasons:
entries.extend(self._extract_episodes(season))
return entries
def _extract_episodes(self, season): def _extract_episodes(self, season):
entries = []
if not isinstance(season, dict): if not isinstance(season, dict):
return entries return []
episodes = season.get('episodes') return self._extract_entries(season.get('episodes'))
if not isinstance(episodes, list):
return entries def _extract_entries(self, entry_list):
for episode in episodes: if not isinstance(entry_list, list):
return []
entries = []
for episode in entry_list:
nrk_id = episode.get('prfId') nrk_id = episode.get('prfId')
if not nrk_id or not isinstance(nrk_id, compat_str): if not nrk_id or not isinstance(nrk_id, compat_str):
continue continue
@ -465,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# new layout # new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage', 'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': { 'info_dict': {
'id': 'backstage', 'id': 'backstage',
@ -474,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
}, },
'playlist_mincount': 60, 'playlist_mincount': 60,
}, { }, {
# old layout # new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede', 'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': { 'info_dict': {
'id': 'groenn-glede', 'id': 'groenn-glede',
'title': 'Grønn glede', 'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608', 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
}, },
'playlist_mincount': 9, 'playlist_mincount': 10,
}, { }, {
'url': 'http://tv.nrksuper.no/serie/labyrint', # old layout
'url': 'https://tv.nrksuper.no/serie/labyrint',
'info_dict': { 'info_dict': {
'id': 'labyrint', 'id': 'labyrint',
'title': 'Labyrint', 'title': 'Labyrint',
'description': 'md5:58afd450974c89e27d5a19212eee7115', 'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, { }, {
@ -520,11 +535,11 @@ def _real_extract(self, url):
description = try_get( description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str) series, lambda x: x['titles']['subtitle'], compat_str)
entries = [] entries = []
for season in series['seasons']: entries.extend(self._extract_seasons(series.get('seasons')))
entries.extend(self._extract_episodes(season)) entries.extend(self._extract_entries(series.get('instalments')))
return self.playlist_result(entries, series_id, title, description) return self.playlist_result(entries, series_id, title, description)
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede) # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
entries = [ entries = [
self.url_result( self.url_result(
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format( 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
@ -536,6 +551,9 @@ def _real_extract(self, url):
'seriestitle', webpage, 'seriestitle', webpage,
'title', default=None) or self._og_search_title( 'title', default=None) or self._og_search_title(
webpage, fatal=False) webpage, fatal=False)
if title:
title = self._search_regex(
r'NRK (?:Super )?TV\s*[-]\s*(.+)', title, 'title', default=title)
description = self._html_search_meta( description = self._html_search_meta(
'series_description', webpage, 'series_description', webpage,
@ -596,7 +614,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
'title': 'Rivertonprisen til Karin Fossum', 'title': 'Rivertonprisen til Karin Fossum',
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
}, },
'playlist_count': 5, 'playlist_count': 2,
}] }]
def _extract_title(self, webpage): def _extract_title(self, webpage):