1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-02-17 18:29:16 +01:00

[firedrive] Add new extractor. Addresses

This commit is contained in:
Naglis Jonaitis 2014-07-12 00:42:42 +03:00
parent c961a0e63e
commit 678f58de4b
2 changed files with 82 additions and 0 deletions
youtube_dl/extractor

@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
from .firedrive import FiredriveIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
determine_ext,
)
class FiredriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
'(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
_FILE_DELETED_REGEX = r'<div class="removed_file_image">'
_TESTS = [{
'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
'info_dict': {
'id': 'FEB892FA160EBD01',
'ext': 'flv',
'title': 'bbb_theora_486kbit.flv',
'thumbnail': 're:http://.*\.jpg',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://firedrive.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id,
expected=True)
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
(?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
req.add_header('Content-type', 'application/x-www-form-urlencoded')
# Apparently, this header is required for confirmation to work.
req.add_header('Host', 'www.firedrive.com')
webpage = self._download_webpage(req, video_id,
'Downloading video page')
title = self._search_regex(r'class="external_title_left">(.+)</div>',
webpage, 'title')
thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
'thumbnail', fatal=False, default="")
url = self._search_regex(r'file:\s?\'(http[^\']+)\',',
webpage, 'file url')
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
webpage, 'extension', fatal=False)
formats = [{
'format_id': 'sd',
'url': url,
'ext': ext or determine_ext(url),
'quality': 1,
}]
return {
'id': video_id,
'title': title,
'thumbnail': "http:" + thumbnail,
'formats': formats,
}