From 92743d423a7dfaf0f803deab14475e6343091f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Thu, 25 Nov 2010 04:24:45 -0200
Subject: [PATCH 1/9] Preliminary downloading from vimeo

---
 youtube-dl | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
diff --git a/youtube-dl b/youtube-dl
index 8dd03daf3c..edd1d3f29e 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1718,6 +1718,118 @@ class YahooIE(InfoExtractor):
 			self._downloader.trouble(u'\nERROR: unable to download video')
 
 
+class VimeoIE(InfoExtractor):
+	"""Information extractor for vimeo.com."""
+
+	# _VALID_URL matches Vimeo URLs
+	_VALID_URL = r'(?:http://)?vimeo\.com/([0-9]+)'
+
+	def __init__(self, downloader=None):
+		InfoExtractor.__init__(self, downloader)
+
+	@staticmethod
+	def suitable(url):
+		return (re.match(VimeoIE._VALID_URL, url) is not None)
+
+	def report_download_webpage(self, video_id):
+		"""Report webpage download."""
+		self._downloader.to_screen(u'[video.vimeo] %s: Downloading webpage' % video_id)
+
+	def report_extraction(self, video_id):
+		"""Report information extraction."""
+		self._downloader.to_screen(u'[video.vimeo] %s: Extracting information' % video_id)
+
+	def _real_initialize(self):
+		return
+
+	def _real_extract(self, url, new_video=True):
+		# Extract ID from URL
+		mobj = re.match(self._VALID_URL, url)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+			return
+
+		# At this point we have a new video
+		self._downloader.increment_downloads()
+		video_id = mobj.group(1)
+		video_extension = 'flv' # FIXME
+
+		# Retrieve video webpage to extract further information
+		request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
+		try:
+			self.report_download_webpage(video_id)
+			webpage = urllib2.urlopen(request).read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+			return
+
+		# Extract uploader and title from webpage
+		self.report_extraction(video_id)
+		mobj = re.search(r'<caption>(.*)</caption>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract video title')
+			return
+		video_title = mobj.group(1).decode('utf-8')
+		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+
+		mobj = re.search(r'<uploader_url>http://vimeo.com/(.*)</uploader_url>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract video uploader')
+			return
+		video_uploader = mobj.group(1).decode('utf-8')
+
+		# Extract video thumbnail
+		mobj = re.search(r'<thumbnail>(.*)</thumbnail>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
+			return
+		video_thumbnail = mobj.group(1).decode('utf-8')
+
+		# # Extract video description
+		# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
+		# if mobj is None:
+		# 	self._downloader.trouble(u'ERROR: unable to extract video description')
+		# 	return
+		# video_description = mobj.group(1).decode('utf-8')
+		# if not video_description: video_description = 'No description available.'
+		video_description = 'Foo.'
+
+		# Extract request signature
+		mobj = re.search(r'<request_signature>(.*)</request_signature>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract request signature')
+			return
+		sig = mobj.group(1).decode('utf-8')
+
+		# Extract request signature expiration
+		mobj = re.search(r'<request_signature_expires>(.*)</request_signature_expires>', webpage)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
+			return
+		sig_exp = mobj.group(1).decode('utf-8')
+
+		video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp)
+
+		try:
+			# Process video information
+			self._downloader.process_info({
+				'id':		video_id.decode('utf-8'),
+				'url':		video_url,
+				'uploader':	video_uploader,
+				'upload_date':	u'NA',
+				'title':	video_title,
+				'stitle':	simple_title,
+				'ext':		video_extension.decode('utf-8'),
+				'thumbnail':	video_thumbnail.decode('utf-8'),
+				'description':	video_description,
+				'thumbnail':	video_thumbnail,
+				'description':	video_description,
+				'player_url':	None,
+			})
+		except UnavailableVideoError:
+			self._downloader.trouble(u'ERROR: unable to download video')
+
+
 class GenericIE(InfoExtractor):
 	"""Generic last-resort information extractor."""
 
@@ -2537,6 +2649,7 @@ if __name__ == '__main__':
 			parser.error(u'invalid playlist end number specified')
 
 		# Information extractors
+		vimeo_ie = VimeoIE()
 		youtube_ie = YoutubeIE()
 		metacafe_ie = MetacafeIE(youtube_ie)
 		dailymotion_ie = DailymotionIE()
@@ -2588,6 +2701,7 @@ if __name__ == '__main__':
 			'nopart': opts.nopart,
 			'updatetime': opts.updatetime,
 			})
+		fd.add_info_extractor(vimeo_ie)
 		fd.add_info_extractor(youtube_search_ie)
 		fd.add_info_extractor(youtube_pl_ie)
 		fd.add_info_extractor(youtube_user_ie)

From c5a088d341e3aeaf65fbca02523c02ff3bccee6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Sat, 29 Jan 2011 04:13:54 -0200
Subject: [PATCH 2/9] Use non-greedy regexps, for safety.

Since I was very lazy when I coded this, I took the fastest route.  Luckily,
Vasyl' Vavrychuk pointed this out and I went (after many months) and just
did some minor changes.
---
 youtube-dl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/youtube-dl b/youtube-dl
index edd1d3f29e..e7459062df 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1765,21 +1765,21 @@ class VimeoIE(InfoExtractor):
 
 		# Extract uploader and title from webpage
 		self.report_extraction(video_id)
-		mobj = re.search(r'<caption>(.*)</caption>', webpage)
+		mobj = re.search(r'<caption>(.*?)</caption>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract video title')
 			return
 		video_title = mobj.group(1).decode('utf-8')
 		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 
-		mobj = re.search(r'<uploader_url>http://vimeo.com/(.*)</uploader_url>', webpage)
+		mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract video uploader')
 			return
 		video_uploader = mobj.group(1).decode('utf-8')
 
 		# Extract video thumbnail
-		mobj = re.search(r'<thumbnail>(.*)</thumbnail>', webpage)
+		mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
 			return
@@ -1795,14 +1795,14 @@ class VimeoIE(InfoExtractor):
 		video_description = 'Foo.'
 
 		# Extract request signature
-		mobj = re.search(r'<request_signature>(.*)</request_signature>', webpage)
+		mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract request signature')
 			return
 		sig = mobj.group(1).decode('utf-8')
 
 		# Extract request signature expiration
-		mobj = re.search(r'<request_signature_expires>(.*)</request_signature_expires>', webpage)
+		mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
 			return

From f24c674b048003d878a1d6436c1b2af47693f2ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Fri, 4 Feb 2011 04:02:29 -0200
Subject: [PATCH 3/9] Make some of the comments more descriptive.

---
 youtube-dl | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/youtube-dl b/youtube-dl
index b96156be7f..a925c97836 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1764,8 +1764,12 @@ class VimeoIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
 			return
 
-		# Extract uploader and title from webpage
+		# Now we begin extracting as much information as we can from what we
+		# retrieved. First we extract the information common to all extractors,
+		# and latter we extract those that are Vimeo specific.
 		self.report_extraction(video_id)
+
+		# Extract title
 		mobj = re.search(r'<caption>(.*?)</caption>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract video title')
@@ -1773,6 +1777,7 @@ class VimeoIE(InfoExtractor):
 		video_title = mobj.group(1).decode('utf-8')
 		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 
+		# Extract uploader
 		mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract video uploader')
@@ -1795,14 +1800,14 @@ class VimeoIE(InfoExtractor):
 		# if not video_description: video_description = 'No description available.'
 		video_description = 'Foo.'
 
-		# Extract request signature
+		# Vimeo specific: extract request signature
 		mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract request signature')
 			return
 		sig = mobj.group(1).decode('utf-8')
 
-		# Extract request signature expiration
+		# Vimeo specific: Extract request signature expiration
 		mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
 		if mobj is None:
 			self._downloader.trouble(u'ERROR: unable to extract request signature expiration')

From 8cc98b2358fb4554c7af9dcd38fd4c96262e5ac3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Fri, 4 Feb 2011 06:15:27 -0200
Subject: [PATCH 4/9] vimeo: Also accept URLs prefixed by www.

I hope that this doesn't break anything. `:)`
---
 youtube-dl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube-dl b/youtube-dl
index a925c97836..16d234ebf0 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1723,7 +1723,7 @@ class VimeoIE(InfoExtractor):
 	"""Information extractor for vimeo.com."""
 
 	# _VALID_URL matches Vimeo URLs
-	_VALID_URL = r'(?:http://)?vimeo\.com/([0-9]+)'
+	_VALID_URL = r'(?:http://)?(?:www.)?vimeo\.com/([0-9]+)'
 
 	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)

From a7e5259c33851725243b13f01929e75bb40e0ea2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Thu, 17 Feb 2011 08:25:45 -0200
Subject: [PATCH 5/9] vimeo: Make regexp more robust.

This change makes the VimeoIE work with http://player.vimeo.com/video/19267888
---
 youtube-dl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube-dl b/youtube-dl
index 16d234ebf0..780a6d9a25 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1723,7 +1723,7 @@ class VimeoIE(InfoExtractor):
 	"""Information extractor for vimeo.com."""
 
 	# _VALID_URL matches Vimeo URLs
-	_VALID_URL = r'(?:http://)?(?:www.)?vimeo\.com/([0-9]+)'
+	_VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)'
 
 	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)

From 0ecedbdb036120849c2a7eb992ec8a993221e5f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Wed, 20 Apr 2011 21:07:57 -0300
Subject: [PATCH 6/9] vimeo: Remove clutter in some messages.

We should make a unified way of printing messages, but let's follow suit and
do what the main YoutubeIE does here.
---
 youtube-dl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube-dl b/youtube-dl
index 240b2bc7b1..080490ded6 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1735,11 +1735,11 @@ class VimeoIE(InfoExtractor):
 
 	def report_download_webpage(self, video_id):
 		"""Report webpage download."""
-		self._downloader.to_screen(u'[video.vimeo] %s: Downloading webpage' % video_id)
+		self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
 
 	def report_extraction(self, video_id):
 		"""Report information extraction."""
-		self._downloader.to_screen(u'[video.vimeo] %s: Extracting information' % video_id)
+		self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
 
 	def _real_initialize(self):
 		return

From 1e055db69ccffbacad5765887f14879bbe350ce2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Wed, 20 Apr 2011 21:15:57 -0300
Subject: [PATCH 7/9] vimeo: Ignore if we are using HTTP/S or not.

---
 youtube-dl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube-dl b/youtube-dl
index 080490ded6..17fb82da70 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1724,7 +1724,7 @@ class VimeoIE(InfoExtractor):
 	"""Information extractor for vimeo.com."""
 
 	# _VALID_URL matches Vimeo URLs
-	_VALID_URL = r'(?:http://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)'
+	_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)'
 
 	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)

From 44c636df8966a1ace617b276f19b5887aa66d612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Wed, 20 Apr 2011 21:20:55 -0300
Subject: [PATCH 8/9] vimeo: Tweak the regexp to allow some extended URLs from
 vimeo.

This, in particular, lets me grab the videos from the beginners channel with
URLs like:

    http://vimeo.com/groups/fivebyfive/videos/22648611

Note that the regexp *will* break for other URLs that we don't know about
and that's on purpose: we don't want to accidentally grab videos that would
be passed on to other information extractors.
---
 youtube-dl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube-dl b/youtube-dl
index 17fb82da70..f3d7a3f61f 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1724,7 +1724,7 @@ class VimeoIE(InfoExtractor):
 	"""Information extractor for vimeo.com."""
 
 	# _VALID_URL matches Vimeo URLs
-	_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:video/)?([0-9]+)'
+	_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
 
 	def __init__(self, downloader=None):
 		InfoExtractor.__init__(self, downloader)

From 2fc31a48723fd4f84c20cf97f810f0171419bcf1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= <rbrito@ime.usp.br>
Date: Wed, 20 Apr 2011 21:29:29 -0300
Subject: [PATCH 9/9] vimeo: Apparently, all videos in vimeo are served in ISO
 containers.

---
 youtube-dl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube-dl b/youtube-dl
index f3d7a3f61f..b734c997c8 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -1754,7 +1754,6 @@ class VimeoIE(InfoExtractor):
 		# At this point we have a new video
 		self._downloader.increment_downloads()
 		video_id = mobj.group(1)
-		video_extension = 'flv' # FIXME
 
 		# Retrieve video webpage to extract further information
 		request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
@@ -1826,7 +1825,7 @@ class VimeoIE(InfoExtractor):
 				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
-				'ext':		video_extension.decode('utf-8'),
+				'ext':		u'mp4',
 				'thumbnail':	video_thumbnail.decode('utf-8'),
 				'description':	video_description,
 				'thumbnail':	video_thumbnail,