From da3f7fb7f84e47de6aa0b29b16f78cb5bdf7d746 Mon Sep 17 00:00:00 2001 From: t0mm0 Date: Sun, 28 Dec 2014 17:07:32 +0000 Subject: [PATCH 1/3] [hitbox] add extractor for hitbox vods --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/hitbox.py | 104 +++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 youtube_dl/extractor/hitbox.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ea4faf2a62..3300dfeb48 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -172,6 +172,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE +from .hitbox import HitboxIE from .hornbunny import HornBunnyIE from .hostingbulk import HostingBulkIE from .hotnewhiphop import HotNewHipHopIE diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py new file mode 100644 index 0000000000..239da3cd47 --- /dev/null +++ b/youtube_dl/extractor/hitbox.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + + +class HitboxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.hitbox.tv/video/358062', + 'info_dict': { + 'id': '358062', + 'title': 'Megaman', + 'alt_title': 'Megaman', + 'description': '', + 'ext': 'mp4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 3834, + 'resolution': 'SD 480p', + 'uploader_id': 'supergreatfriend', + 'view_count': int, + 'upload_date': '20141225', + 'categories': [None], + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.hitbox.tv/video/203213', + 'info_dict': { + 'id': '203213', + 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy', + 'alt_title': 'hitboxlive - Aug 9th #6', + 'description': '', + 'ext': 'mp4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 215, + 'resolution': 'HD 720p', + 'uploader_id': 'hitboxlive', + 'view_count': int, + 'upload_date': '20140809', + 'categories': ['Live Show'], + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + thumb_base = 'https://edge.sf.hitbox.tv' + metadata = self._download_json( + 'https://www.hitbox.tv/api/media/video/%s' % (video_id), video_id + ) + + video_meta = metadata.get('video', [])[0] + title = video_meta.get('media_status') + alt_title = video_meta.get('media_title') + description = video_meta.get('media_description') + duration = int(float(video_meta.get('media_duration'))) + uploader = video_meta.get('media_user_name') + views = int(video_meta.get('media_views')) + upload_date = unified_strdate(video_meta.get('media_date_added')) + categories = [video_meta.get('category_name')] + thumbs = [ + {'url': thumb_base + video_meta.get('media_thumbnail'), + 'width': 320, + 'height': 180}, + {'url': thumb_base + video_meta.get('media_thumbnail_large'), + 'width': 768, + 'height': 432}, + ] + + player_config = self._download_json( + 'https://www.hitbox.tv/api/player/config/video/%s' % (video_id), + video_id + ) + + clip = player_config.get('clip') + video_url = clip.get('url') + res = clip.get('bitrates', [])[0].get('label') + + return { + 'id': video_id, + 'title': title, + 'alt_title': alt_title, + 'description': description, + 'url': video_url, + 'ext': 'mp4', + 'thumbnails': thumbs, + 'duration': duration, + 'resolution': res, + 'uploader_id': uploader, + 'view_count': views, + 'upload_date': upload_date, + 'categories': categories, + 'protocol': 'm3u8', + } From e3947e2b7fe1cd81f841daa1c4dc2ca72af8aefe Mon Sep 17 00:00:00 2001 From: t0mm0 Date: Mon, 29 Dec 2014 20:10:59 +0000 Subject: [PATCH 2/3] [hitbox] add support for live streams --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/hitbox.py | 146 +++++++++++++++++++++---------- 2 files changed, 102 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3300dfeb48..4f5a1ce18f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -172,7 +172,7 @@ from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE -from .hitbox import HitboxIE +from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE from .hostingbulk import HostingBulkIE from .hotnewhiphop import HotNewHipHopIE diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index 239da3cd47..eab2749ecd 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -1,5 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( @@ -9,33 +10,13 @@ class HitboxIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://www.hitbox.tv/video/358062', - 'info_dict': { - 'id': '358062', - 'title': 'Megaman', - 'alt_title': 'Megaman', - 'description': '', - 'ext': 'mp4', - 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 3834, - 'resolution': 'SD 480p', - 'uploader_id': 'supergreatfriend', - 'view_count': int, - 'upload_date': '20141225', - 'categories': [None], - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { + _TEST = { 'url': 'http://www.hitbox.tv/video/203213', 'info_dict': { 'id': '203213', 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy', 'alt_title': 'hitboxlive - Aug 9th #6', - 'description': '', + 'description': '\n', 'ext': 'mp4', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 215, @@ -49,24 +30,28 @@ class HitboxIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) + } + def _extract_metadata(self, url, video_id): thumb_base = 'https://edge.sf.hitbox.tv' metadata = self._download_json( - 'https://www.hitbox.tv/api/media/video/%s' % (video_id), video_id + '%s/%s' % (url, video_id), video_id ) - video_meta = metadata.get('video', [])[0] + date = 'media_live_since' + media_type = 'livestream' + if metadata.get('media_type') == 'video': + media_type = 'video' + date = 'media_date_added' + + video_meta = metadata.get(media_type, [])[0] title = video_meta.get('media_status') alt_title = video_meta.get('media_title') - description = video_meta.get('media_description') + description = video_meta.get('media_description_md') duration = int(float(video_meta.get('media_duration'))) uploader = video_meta.get('media_user_name') views = int(video_meta.get('media_views')) - upload_date = unified_strdate(video_meta.get('media_date_added')) + upload_date = unified_strdate(video_meta.get(date)) categories = [video_meta.get('category_name')] thumbs = [ {'url': thumb_base + video_meta.get('media_thumbnail'), @@ -77,6 +62,28 @@ def _real_extract(self, url): 'height': 432}, ] + return { + 'id': video_id, + 'title': title, + 'alt_title': alt_title, + 'description': description, + 'ext': 'mp4', + 'thumbnails': thumbs, + 'duration': duration, + 'uploader_id': uploader, + 'view_count': views, + 'upload_date': upload_date, + 'categories': categories, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + metadata = self._extract_metadata( + 'https://www.hitbox.tv/api/media/video', + video_id + ) + player_config = self._download_json( 'https://www.hitbox.tv/api/player/config/video/%s' % (video_id), video_id @@ -86,19 +93,68 @@ def _real_extract(self, url): video_url = clip.get('url') res = clip.get('bitrates', [])[0].get('label') - return { - 'id': video_id, - 'title': title, - 'alt_title': alt_title, - 'description': description, - 'url': video_url, + metadata['resolution'] = res + metadata['url'] = video_url + metadata['protocol'] = 'm3u8' + + return metadata + + +class HitboxLiveIE(HitboxIE): + _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P.+)' + _TEST = { + 'url': 'http://www.hitbox.tv/dimak', + 'info_dict': { + 'id': 'dimak', 'ext': 'mp4', - 'thumbnails': thumbs, - 'duration': duration, - 'resolution': res, - 'uploader_id': uploader, - 'view_count': views, - 'upload_date': upload_date, - 'categories': categories, - 'protocol': 'm3u8', - } + 'description': str, + 'upload_date': str, + 'title': str, + 'uploader_id': 'Dimak', + }, + 'params': { + # live + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + metadata = self._extract_metadata( + 'https://www.hitbox.tv/api/media/live', + video_id + ) + + player_config = self._download_json( + 'https://www.hitbox.tv/api/player/config/live/%s' % (video_id), + video_id + ) + + formats = [] + cdns = player_config.get('cdns') + servers = [] + for cdn in cdns: + base_url = cdn.get('netConnectionUrl') + host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1) + if base_url not in servers: + servers.append(base_url) + for stream in cdn.get('bitrates'): + label = stream.get('label') + if label != 'Auto': + formats.append({ + 'url': '%s/%s' % (base_url, stream.get('url')), + 'ext': 'mp4', + 'vbr': stream.get('bitrate'), + 'resolution': label, + 'rtmp_live': True, + 'format_note': host, + 'page_url': url, + 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf', + }) + + self._sort_formats(formats) + metadata['formats'] = formats + metadata['is_live'] = True + metadata['title'] = self._live_title(metadata.get('title')) + return metadata From 0c0a70f4c6839903b326d7d9074e93235defaa5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Dec 2014 03:22:07 +0600 Subject: [PATCH 3/3] [hitbox] Minor changes --- youtube_dl/extractor/hitbox.py | 58 +++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index eab2749ecd..84bd7c0804 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -1,14 +1,20 @@ # coding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor from ..utils import ( - unified_strdate, + clean_html, + parse_iso8601, + float_or_none, + int_or_none, + compat_str, ) class HitboxIE(InfoExtractor): + IE_NAME = 'hitbox' _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P[0-9]+)' _TEST = { 'url': 'http://www.hitbox.tv/video/203213', @@ -16,13 +22,14 @@ class HitboxIE(InfoExtractor): 'id': '203213', 'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy', 'alt_title': 'hitboxlive - Aug 9th #6', - 'description': '\n', + 'description': '', 'ext': 'mp4', 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 215, + 'duration': 215.1666, 'resolution': 'HD 720p', - 'uploader_id': 'hitboxlive', + 'uploader': 'hitboxlive', 'view_count': int, + 'timestamp': 1407576133, 'upload_date': '20140809', 'categories': ['Live Show'], }, @@ -35,8 +42,7 @@ class HitboxIE(InfoExtractor): def _extract_metadata(self, url, video_id): thumb_base = 'https://edge.sf.hitbox.tv' metadata = self._download_json( - '%s/%s' % (url, video_id), video_id - ) + '%s/%s' % (url, video_id), video_id) date = 'media_live_since' media_type = 'livestream' @@ -47,11 +53,13 @@ def _extract_metadata(self, url, video_id): video_meta = metadata.get(media_type, [])[0] title = video_meta.get('media_status') alt_title = video_meta.get('media_title') - description = video_meta.get('media_description_md') - duration = int(float(video_meta.get('media_duration'))) + description = clean_html( + video_meta.get('media_description') or + video_meta.get('media_description_md')) + duration = float_or_none(video_meta.get('media_duration')) uploader = video_meta.get('media_user_name') - views = int(video_meta.get('media_views')) - upload_date = unified_strdate(video_meta.get(date)) + views = int_or_none(video_meta.get('media_views')) + timestamp = parse_iso8601(video_meta.get(date), ' ') categories = [video_meta.get('category_name')] thumbs = [ {'url': thumb_base + video_meta.get('media_thumbnail'), @@ -70,9 +78,9 @@ def _extract_metadata(self, url, video_id): 'ext': 'mp4', 'thumbnails': thumbs, 'duration': duration, - 'uploader_id': uploader, + 'uploader': uploader, 'view_count': views, - 'upload_date': upload_date, + 'timestamp': timestamp, 'categories': categories, } @@ -81,13 +89,11 @@ def _real_extract(self, url): metadata = self._extract_metadata( 'https://www.hitbox.tv/api/media/video', - video_id - ) + video_id) player_config = self._download_json( - 'https://www.hitbox.tv/api/player/config/video/%s' % (video_id), - video_id - ) + 'https://www.hitbox.tv/api/player/config/video/%s' % video_id, + video_id) clip = player_config.get('clip') video_url = clip.get('url') @@ -101,16 +107,18 @@ def _real_extract(self, url): class HitboxLiveIE(HitboxIE): + IE_NAME = 'hitbox:live' _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P.+)' _TEST = { 'url': 'http://www.hitbox.tv/dimak', 'info_dict': { 'id': 'dimak', 'ext': 'mp4', - 'description': str, - 'upload_date': str, - 'title': str, - 'uploader_id': 'Dimak', + 'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e', + 'timestamp': int, + 'upload_date': compat_str, + 'title': compat_str, + 'uploader': 'Dimak', }, 'params': { # live @@ -123,13 +131,11 @@ def _real_extract(self, url): metadata = self._extract_metadata( 'https://www.hitbox.tv/api/media/live', - video_id - ) + video_id) player_config = self._download_json( - 'https://www.hitbox.tv/api/player/config/live/%s' % (video_id), - video_id - ) + 'https://www.hitbox.tv/api/player/config/live/%s' % video_id, + video_id) formats = [] cdns = player_config.get('cdns')