[dvtv] Fix live streams extraction

(also fixed test broken by changed og tags on the site)
This commit is contained in:
Petr Novak 2018-01-28 13:28:58 +01:00 committed by Sergey M․
parent c2b3bd0451
commit 6f4ad0db34
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor):
}, { }, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': { 'info_dict': {
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e', 'id': '973eb3bc854e11e498be002590604f2e',
}, },
'playlist': [{ 'playlist': [{
@ -91,10 +91,21 @@ class DVTVIE(InfoExtractor):
}, { }, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
'md5': '87defe16681b1429c91f7a74809823c6',
'info_dict': {
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
'ext': 'mp4',
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
},
}] }]
def _parse_video_metadata(self, js, video_id): def _parse_video_metadata(self, js, video_id, live_js=None):
data = self._parse_json(js, video_id, transform_source=js_to_json) data = self._parse_json(js, video_id, transform_source=js_to_json)
if live_js:
data.update(
self._parse_json(live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title']) title = unescapeHTML(data['title'])
@ -142,13 +153,18 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# live content
live_item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
webpage, 'video', default=None, fatal=False)
# single video # single video
item = self._search_regex( item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None, fatal=False) webpage, 'video', default=None, fatal=False)
if item: if item:
return self._parse_video_metadata(item, video_id) return self._parse_video_metadata(item, video_id, live_item)
# playlist # playlist
items = re.findall( items = re.findall(