2013-10-23 22:31:53 +08:00
# encoding: utf-8
2014-01-07 06:38:16 +08:00
from __future__ import unicode_literals
2013-06-24 02:18:21 +08:00
import json
import re
2013-07-29 19:12:09 +08:00
import itertools
2013-06-24 02:18:21 +08:00
from . common import InfoExtractor
2014-11-02 18:23:40 +08:00
from . . compat import (
2014-03-08 19:24:43 +08:00
compat_HTTPError ,
2014-09-29 06:36:06 +08:00
compat_urlparse ,
2014-11-02 18:23:40 +08:00
)
from . . utils import (
2016-01-09 05:06:09 +08:00
determine_ext ,
2015-11-03 22:53:17 +08:00
encode_dict ,
2013-06-24 02:18:21 +08:00
ExtractorError ,
2014-09-29 06:36:06 +08:00
InAdvancePagedList ,
int_or_none ,
2013-10-23 20:38:03 +08:00
RegexNotFoundError ,
2015-11-20 22:33:49 +08:00
sanitized_Request ,
2015-02-18 07:27:57 +08:00
smuggle_url ,
2013-06-24 02:18:21 +08:00
std_headers ,
2015-03-07 01:16:56 +08:00
unified_strdate ,
2013-10-15 18:05:13 +08:00
unsmuggle_url ,
2014-04-24 20:44:27 +08:00
urlencode_postdata ,
2015-06-21 18:23:58 +08:00
unescapeHTML ,
2015-12-16 16:43:53 +08:00
parse_filesize ,
2013-06-24 02:18:21 +08:00
)
2013-12-22 10:17:56 +08:00
2014-04-25 03:51:20 +08:00
class VimeoBaseInfoExtractor ( InfoExtractor ) :
_NETRC_MACHINE = ' vimeo '
_LOGIN_REQUIRED = False
2015-08-11 01:35:08 +08:00
_LOGIN_URL = ' https://vimeo.com/log_in '
2014-04-25 03:51:20 +08:00
def _login ( self ) :
( username , password ) = self . _get_login_info ( )
if username is None :
if self . _LOGIN_REQUIRED :
2014-05-05 04:27:56 +08:00
raise ExtractorError ( ' No login info available, needed for using %s . ' % self . IE_NAME , expected = True )
2014-04-25 03:51:20 +08:00
return
self . report_login ( )
2015-08-11 01:35:08 +08:00
webpage = self . _download_webpage ( self . _LOGIN_URL , None , False )
2015-10-11 04:31:37 +08:00
token , vuid = self . _extract_xsrft_and_vuid ( webpage )
2015-11-03 23:06:36 +08:00
data = urlencode_postdata ( encode_dict ( {
2015-08-11 01:35:08 +08:00
' action ' : ' login ' ,
2014-04-25 03:51:20 +08:00
' email ' : username ,
' password ' : password ,
' service ' : ' vimeo ' ,
' token ' : token ,
2015-11-03 23:06:36 +08:00
} ) )
2015-11-20 22:33:49 +08:00
login_request = sanitized_Request ( self . _LOGIN_URL , data )
2014-04-25 03:51:20 +08:00
login_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
2015-08-11 01:35:08 +08:00
login_request . add_header ( ' Referer ' , self . _LOGIN_URL )
2015-11-12 00:32:13 +08:00
self . _set_vimeo_cookie ( ' vuid ' , vuid )
2014-04-25 03:51:20 +08:00
self . _download_webpage ( login_request , None , False , ' Wrong login info ' )
2015-10-11 04:31:37 +08:00
def _extract_xsrft_and_vuid ( self , webpage ) :
xsrft = self . _search_regex (
2016-02-12 05:16:26 +08:00
r ' (?:(?P<q1>[ " \' ])xsrft(?P=q1) \ s*:|xsrft \ s*[=:]) \ s*(?P<q>[ " \' ])(?P<xsrft>.+?)(?P=q) ' ,
2015-08-11 01:35:08 +08:00
webpage , ' login token ' , group = ' xsrft ' )
2015-10-11 04:31:37 +08:00
vuid = self . _search_regex (
r ' [ " \' ]vuid[ " \' ] \ s*: \ s*([ " \' ])(?P<vuid>.+?) \ 1 ' ,
webpage , ' vuid ' , group = ' vuid ' )
return xsrft , vuid
2015-08-11 01:35:08 +08:00
2015-11-12 00:32:13 +08:00
def _set_vimeo_cookie ( self , name , value ) :
self . _set_cookie ( ' vimeo.com ' , name , value )
2014-04-25 03:51:20 +08:00
2015-02-16 01:52:07 +08:00
class VimeoIE ( VimeoBaseInfoExtractor ) :
2013-06-24 02:18:21 +08:00
""" Information extractor for vimeo.com. """
# _VALID_URL matches Vimeo URLs
2013-12-22 10:17:56 +08:00
_VALID_URL = r ''' (?x)
2014-10-12 04:25:30 +08:00
https ? : / /
2013-12-22 10:17:56 +08:00
( ? : ( ? : www | ( ? P < player > player ) ) \. ) ?
vimeo ( ? P < pro > pro ) ? \. com /
2014-08-27 17:36:01 +08:00
( ? ! channels / [ ^ / ? #]+/?(?:$|[?#])|album/)
2013-12-22 10:17:56 +08:00
( ? : . * ? / ) ?
2013-12-22 10:34:13 +08:00
( ? : ( ? : play_redirect_hls | moogaloop \. swf ) \? clip_id = ) ?
2013-12-22 10:17:56 +08:00
( ? : videos ? / ) ?
( ? P < id > [ 0 - 9 ] + )
2013-12-22 10:34:13 +08:00
/ ? ( ? : [ ? & ] . * ) ? ( ? : [ #].*)?$'''
2014-01-07 06:38:16 +08:00
IE_NAME = ' vimeo '
2013-08-21 19:48:19 +08:00
_TESTS = [
{
2014-01-07 06:38:16 +08:00
' url ' : ' http://vimeo.com/56015672#at=0 ' ,
' md5 ' : ' 8879b6cc097e987f02484baf890129e5 ' ,
' info_dict ' : {
2014-02-17 18:44:24 +08:00
' id ' : ' 56015672 ' ,
' ext ' : ' mp4 ' ,
2015-10-11 04:22:42 +08:00
' title ' : " youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550 " ,
' description ' : ' md5:2d3305bad981a06ff79f027f19865021 ' ,
' upload_date ' : ' 20121220 ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/user7108434 ' ,
2015-10-11 04:22:42 +08:00
' uploader_id ' : ' user7108434 ' ,
' uploader ' : ' Filippo Valsorda ' ,
' duration ' : 10 ,
2013-08-21 19:48:19 +08:00
} ,
} ,
{
2014-01-07 06:38:16 +08:00
' url ' : ' http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876 ' ,
' md5 ' : ' 3b5ca6aa22b60dfeeadf50b72e44ed82 ' ,
' note ' : ' Vimeo Pro video (#1197) ' ,
' info_dict ' : {
2014-05-05 04:27:56 +08:00
' id ' : ' 68093876 ' ,
' ext ' : ' mp4 ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/openstreetmapus ' ,
2014-01-07 06:38:16 +08:00
' uploader_id ' : ' openstreetmapus ' ,
' uploader ' : ' OpenStreetMap US ' ,
' title ' : ' Andy Allan - Putting the Carto into OpenStreetMap Cartography ' ,
2015-10-11 04:22:42 +08:00
' description ' : ' md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30 ' ,
2014-05-10 02:46:40 +08:00
' duration ' : 1595 ,
2013-08-21 19:48:19 +08:00
} ,
} ,
2013-09-03 16:48:56 +08:00
{
2014-01-07 06:38:16 +08:00
' url ' : ' http://player.vimeo.com/video/54469442 ' ,
' md5 ' : ' 619b811a4417aa4abe78dc653becf511 ' ,
' note ' : ' Videos that embed the url in the player page ' ,
' info_dict ' : {
2014-05-05 04:27:56 +08:00
' id ' : ' 54469442 ' ,
' ext ' : ' mp4 ' ,
2014-07-21 19:11:24 +08:00
' title ' : ' Kathy Sierra: Building the minimum Badass User, Business of Software 2012 ' ,
2014-01-07 06:38:16 +08:00
' uploader ' : ' The BLN & Business of Software ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/theblnbusinessofsoftware ' ,
2014-01-07 06:38:16 +08:00
' uploader_id ' : ' theblnbusinessofsoftware ' ,
2014-05-10 02:46:40 +08:00
' duration ' : 3610 ,
2014-09-30 04:23:21 +08:00
' description ' : None ,
2013-09-03 16:48:56 +08:00
} ,
2013-10-23 22:31:53 +08:00
} ,
{
2014-01-07 06:38:16 +08:00
' url ' : ' http://vimeo.com/68375962 ' ,
' md5 ' : ' aaf896bdb7ddd6476df50007a0ac0ae7 ' ,
' note ' : ' Video protected with password ' ,
' info_dict ' : {
2014-05-05 04:27:56 +08:00
' id ' : ' 68375962 ' ,
' ext ' : ' mp4 ' ,
2014-01-07 06:38:16 +08:00
' title ' : ' youtube-dl password protected test video ' ,
' upload_date ' : ' 20130614 ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/user18948128 ' ,
2014-01-07 06:38:16 +08:00
' uploader_id ' : ' user18948128 ' ,
' uploader ' : ' Jaime Marquínez Ferrándiz ' ,
2014-05-10 02:46:40 +08:00
' duration ' : 10 ,
2015-10-20 22:38:44 +08:00
' description ' : ' This is " youtube-dl password protected test video " by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people \u2026 ' ,
2013-10-23 22:31:53 +08:00
} ,
2014-01-07 06:38:16 +08:00
' params ' : {
' videopassword ' : ' youtube-dl ' ,
2013-10-23 22:31:53 +08:00
} ,
} ,
2014-08-04 01:04:47 +08:00
{
' url ' : ' http://vimeo.com/channels/keypeele/75629013 ' ,
' md5 ' : ' 2f86a05afe9d7abc0b9126d229bbe15d ' ,
' note ' : ' Video is freely available via original URL '
' and protected with password when accessed via http://vimeo.com/75629013 ' ,
' info_dict ' : {
' id ' : ' 75629013 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Key & Peele: Terrorist Interrogation ' ,
' description ' : ' md5:8678b246399b070816b12313e8b4eb5c ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/atencio ' ,
2014-08-04 01:04:47 +08:00
' uploader_id ' : ' atencio ' ,
' uploader ' : ' Peter Atencio ' ,
2015-03-07 01:16:56 +08:00
' upload_date ' : ' 20130927 ' ,
2014-08-04 01:04:47 +08:00
' duration ' : 187 ,
} ,
} ,
2014-02-03 21:02:58 +08:00
{
' url ' : ' http://vimeo.com/76979871 ' ,
' note ' : ' Video with subtitles ' ,
' info_dict ' : {
' id ' : ' 76979871 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' The New Vimeo Player (You Know, For Videos) ' ,
' description ' : ' md5:2ec900bf97c3f389378a96aee11260ea ' ,
' upload_date ' : ' 20131015 ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/staff ' ,
2014-02-03 21:02:58 +08:00
' uploader_id ' : ' staff ' ,
' uploader ' : ' Vimeo Staff ' ,
2014-05-10 02:46:40 +08:00
' duration ' : 62 ,
2014-02-03 21:02:58 +08:00
}
} ,
2014-10-17 21:49:16 +08:00
{
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
' url ' : ' https://player.vimeo.com/video/98044508 ' ,
' note ' : ' The js code contains assignments to the same variable as the config ' ,
' info_dict ' : {
' id ' : ' 98044508 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Pier Solar OUYA Official Trailer ' ,
' uploader ' : ' Tulio Gonçalves ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/user28849593 ' ,
2014-10-17 21:49:16 +08:00
' uploader_id ' : ' user28849593 ' ,
} ,
} ,
2015-12-17 00:00:17 +08:00
{
# contains original format
' url ' : ' https://vimeo.com/33951933 ' ,
' md5 ' : ' 53c688fa95a55bf4b7293d37a89c5c53 ' ,
' info_dict ' : {
' id ' : ' 33951933 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' FOX CLASSICS - Forever Classic ID - A Full Minute ' ,
' uploader ' : ' The DMCI ' ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : ' re:https?://(?:www \ .)?vimeo \ .com/dmci ' ,
2015-12-17 00:00:17 +08:00
' uploader_id ' : ' dmci ' ,
' upload_date ' : ' 20111220 ' ,
' description ' : ' md5:ae23671e82d05415868f7ad1aec21147 ' ,
} ,
} ,
2015-10-20 22:38:44 +08:00
{
' url ' : ' https://vimeo.com/109815029 ' ,
' note ' : ' Video not completely processed, " failed " seed status ' ,
' only_matching ' : True ,
} ,
2015-11-20 00:31:16 +08:00
{
' url ' : ' https://vimeo.com/groups/travelhd/videos/22439234 ' ,
' only_matching ' : True ,
} ,
2016-01-09 05:07:29 +08:00
{
# source file returns 403: Forbidden
' url ' : ' https://vimeo.com/7809605 ' ,
' only_matching ' : True ,
} ,
2013-08-21 19:48:19 +08:00
]
2013-06-24 02:18:21 +08:00
2015-06-21 18:23:58 +08:00
@staticmethod
def _extract_vimeo_url ( url , webpage ) :
# Look for embedded (iframe) Vimeo player
mobj = re . search (
r ' <iframe[^>]+?src=([ " \' ])(?P<url>(?:https?:)?//player \ .vimeo \ .com/video/.+?) \ 1 ' , webpage )
if mobj :
player_url = unescapeHTML ( mobj . group ( ' url ' ) )
2016-01-08 12:41:24 +08:00
surl = smuggle_url ( player_url , { ' http_headers ' : { ' Referer ' : url } } )
2015-06-21 18:23:58 +08:00
return surl
# Look for embedded (swf embed) Vimeo player
mobj = re . search (
r ' <embed[^>]+?src= " ((?:https?:)?//(?:www \ .)?vimeo \ .com/moogaloop \ .swf.+?) " ' , webpage )
if mobj :
return mobj . group ( 1 )
2013-06-24 02:18:21 +08:00
def _verify_video_password ( self , url , video_id , webpage ) :
2016-02-14 16:25:04 +08:00
password = self . _downloader . params . get ( ' videopassword ' )
2013-06-24 02:18:21 +08:00
if password is None :
2015-02-19 05:00:12 +08:00
raise ExtractorError ( ' This video is protected by a password, use the --video-password option ' , expected = True )
2015-10-11 04:31:37 +08:00
token , vuid = self . _extract_xsrft_and_vuid ( webpage )
2015-11-03 22:53:17 +08:00
data = urlencode_postdata ( encode_dict ( {
2014-05-05 04:27:56 +08:00
' password ' : password ,
' token ' : token ,
2015-11-03 22:53:17 +08:00
} ) )
2015-03-07 02:08:27 +08:00
if url . startswith ( ' http:// ' ) :
# vimeo only supports https now, but the user can give an http url
url = url . replace ( ' http:// ' , ' https:// ' )
2015-11-20 22:33:49 +08:00
password_request = sanitized_Request ( url + ' /password ' , data )
2013-06-24 02:18:21 +08:00
password_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
2015-08-09 19:10:40 +08:00
password_request . add_header ( ' Referer ' , url )
2015-11-12 00:32:13 +08:00
self . _set_vimeo_cookie ( ' vuid ' , vuid )
2015-02-10 11:53:21 +08:00
return self . _download_webpage (
password_request , video_id ,
' Verifying the password ' , ' Wrong password ' )
2013-06-24 02:18:21 +08:00
2014-01-07 16:51:57 +08:00
def _verify_player_video_password ( self , url , video_id ) :
2016-02-14 16:25:04 +08:00
password = self . _downloader . params . get ( ' videopassword ' )
2014-01-07 16:51:57 +08:00
if password is None :
raise ExtractorError ( ' This video is protected by a password, use the --video-password option ' )
2015-11-03 23:01:09 +08:00
data = urlencode_postdata ( encode_dict ( { ' password ' : password } ) )
2014-01-07 16:51:57 +08:00
pass_url = url + ' /check-password '
2015-11-20 22:33:49 +08:00
password_request = sanitized_Request ( pass_url , data )
2014-01-07 16:51:57 +08:00
password_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
return self . _download_json (
password_request , video_id ,
' Verifying the password ' ,
' Wrong password ' )
2013-07-08 05:24:34 +08:00
def _real_initialize ( self ) :
self . _login ( )
2013-12-11 03:43:16 +08:00
def _real_extract ( self , url ) :
2016-01-08 12:41:24 +08:00
url , data = unsmuggle_url ( url , { } )
2016-03-06 17:01:05 +08:00
headers = std_headers . copy ( )
2016-01-08 12:41:24 +08:00
if ' http_headers ' in data :
headers . update ( data [ ' http_headers ' ] )
2014-08-25 15:35:37 +08:00
if ' Referer ' not in headers :
headers [ ' Referer ' ] = url
2013-10-15 18:05:13 +08:00
2013-06-24 02:18:21 +08:00
# Extract ID from URL
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
2014-09-30 04:23:21 +08:00
orig_url = url
2013-11-03 19:11:13 +08:00
if mobj . group ( ' pro ' ) or mobj . group ( ' player ' ) :
2015-03-07 05:39:05 +08:00
url = ' https://player.vimeo.com/video/ ' + video_id
2015-04-28 20:56:48 +08:00
else :
url = ' https://vimeo.com/ ' + video_id
2013-06-24 02:18:21 +08:00
# Retrieve video webpage to extract further information
2016-03-06 17:01:05 +08:00
request = sanitized_Request ( url , headers = headers )
2014-03-08 19:24:43 +08:00
try :
webpage = self . _download_webpage ( request , video_id )
except ExtractorError as ee :
if isinstance ( ee . cause , compat_HTTPError ) and ee . cause . code == 403 :
errmsg = ee . cause . read ( )
if b ' Because of its privacy settings, this video cannot be played here ' in errmsg :
raise ExtractorError (
' Cannot download embed-only video without embedding '
' URL. Please call youtube-dl with the URL of the page '
' that embeds this video. ' ,
expected = True )
raise
2013-06-24 02:18:21 +08:00
# Now we begin extracting as much information as we can from what we
# retrieved. First we extract the information common to all extractors,
# and latter we extract those that are Vimeo specific.
self . report_extraction ( video_id )
2015-03-27 05:05:08 +08:00
vimeo_config = self . _search_regex (
2015-10-20 16:30:31 +08:00
r ' vimeo \ .config \ s*= \ s*(?:( { .+?})|_extend \ ([^,]+, \ s+( { .+?}) \ )); ' , webpage ,
2015-03-27 05:05:08 +08:00
' vimeo config ' , default = None )
if vimeo_config :
seed_status = self . _parse_json ( vimeo_config , video_id ) . get ( ' seed_status ' , { } )
if seed_status . get ( ' state ' ) == ' failed ' :
raise ExtractorError (
2015-10-20 16:30:31 +08:00
' %s said: %s ' % ( self . IE_NAME , seed_status [ ' title ' ] ) ,
2015-03-27 05:05:08 +08:00
expected = True )
2013-06-24 02:18:21 +08:00
# Extract the config JSON
try :
2013-10-23 22:31:53 +08:00
try :
config_url = self . _html_search_regex (
2015-10-18 00:18:40 +08:00
r ' data-config-url= " (.+?) " ' , webpage ,
' config URL ' , default = None )
if not config_url :
2015-10-18 00:48:14 +08:00
# Sometimes new react-based page is served instead of old one that require
# different config URL extraction approach (see
# https://github.com/rg3/youtube-dl/pull/7209)
2015-10-18 00:18:40 +08:00
vimeo_clip_page_config = self . _search_regex (
r ' vimeo \ .clip_page_config \ s*= \ s*( { .+?}); ' , webpage ,
' vimeo clip page config ' )
2015-10-18 00:48:14 +08:00
config_url = self . _parse_json (
vimeo_clip_page_config , video_id ) [ ' player ' ] [ ' config_url ' ]
2013-10-23 22:31:53 +08:00
config_json = self . _download_webpage ( config_url , video_id )
config = json . loads ( config_json )
except RegexNotFoundError :
# For pro videos or player.vimeo.com urls
2013-12-11 03:28:12 +08:00
# We try to find out to which variable is assigned the config dic
m_variable_name = re . search ( ' ( \ w) \ .video \ .id ' , webpage )
if m_variable_name is not None :
2014-10-17 21:49:16 +08:00
config_re = r ' %s =( { [^}].+?}); ' % re . escape ( m_variable_name . group ( 1 ) )
2013-12-11 03:28:12 +08:00
else :
config_re = [ r ' = { config:( { .+?}),assets: ' , r ' (?:[abc])=( { .+?}); ' ]
2014-01-07 06:38:16 +08:00
config = self . _search_regex ( config_re , webpage , ' info section ' ,
2014-11-24 04:39:15 +08:00
flags = re . DOTALL )
2013-10-23 22:31:53 +08:00
config = json . loads ( config )
2013-10-23 17:38:51 +08:00
except Exception as e :
2013-06-24 02:18:21 +08:00
if re . search ( ' The creator of this video has not given you permission to embed it on this domain. ' , webpage ) :
2014-01-07 06:38:16 +08:00
raise ExtractorError ( ' The author has restricted the access to this video, try with the " --referer " option ' )
2013-06-24 02:18:21 +08:00
2015-02-10 11:53:21 +08:00
if re . search ( r ' <form[^>]+?id= " pw_form " ' , webpage ) is not None :
2016-01-08 12:41:24 +08:00
if ' _video_password_verified ' in data :
2015-02-18 07:27:57 +08:00
raise ExtractorError ( ' video password verification failed! ' )
2013-06-24 02:18:21 +08:00
self . _verify_video_password ( url , video_id , webpage )
2015-02-18 07:27:57 +08:00
return self . _real_extract (
smuggle_url ( url , { ' _video_password_verified ' : ' verified ' } ) )
2013-06-24 02:18:21 +08:00
else :
2014-01-07 06:38:16 +08:00
raise ExtractorError ( ' Unable to extract info section ' ,
2013-10-23 17:38:51 +08:00
cause = e )
2014-01-07 06:35:24 +08:00
else :
if config . get ( ' view ' ) == 4 :
2014-01-07 16:51:57 +08:00
config = self . _verify_player_video_password ( url , video_id )
2013-06-24 02:18:21 +08:00
2016-01-08 12:41:24 +08:00
if ' >You rented this title.< ' in webpage :
feature_id = config . get ( ' video ' , { } ) . get ( ' vod ' , { } ) . get ( ' feature_id ' )
if feature_id and not data . get ( ' force_feature_id ' , False ) :
return self . url_result ( smuggle_url (
' https://player.vimeo.com/player/ %s ' % feature_id ,
{ ' force_feature_id ' : True } ) , ' Vimeo ' )
2013-06-24 02:18:21 +08:00
# Extract title
2016-02-14 17:37:17 +08:00
video_title = config [ ' video ' ] [ ' title ' ]
2013-06-24 02:18:21 +08:00
2016-03-03 02:00:11 +08:00
# Extract uploader, uploader_url and uploader_id
video_uploader = config [ ' video ' ] . get ( ' owner ' , { } ) . get ( ' name ' )
video_uploader_url = config [ ' video ' ] . get ( ' owner ' , { } ) . get ( ' url ' )
video_uploader_id = video_uploader_url . split ( ' / ' ) [ - 1 ] if video_uploader_url else None
2013-06-24 02:18:21 +08:00
# Extract video thumbnail
2016-02-14 17:37:17 +08:00
video_thumbnail = config [ ' video ' ] . get ( ' thumbnail ' )
2014-02-28 19:00:12 +08:00
if video_thumbnail is None :
2016-02-14 17:37:17 +08:00
video_thumbs = config [ ' video ' ] . get ( ' thumbs ' )
2014-02-28 19:00:12 +08:00
if video_thumbs and isinstance ( video_thumbs , dict ) :
2014-08-05 03:37:36 +08:00
_ , video_thumbnail = sorted ( ( int ( width if width . isdigit ( ) else 0 ) , t_url ) for ( width , t_url ) in video_thumbs . items ( ) ) [ - 1 ]
2013-06-24 02:18:21 +08:00
# Extract video description
2014-09-30 04:23:21 +08:00
2014-09-29 10:58:29 +08:00
video_description = self . _html_search_regex (
2014-09-30 04:23:21 +08:00
r ' (?s)<div \ s+class= " [^ " ]*description[^ " ]* " [^>]*>(.*?)</div> ' ,
webpage , ' description ' , default = None )
if not video_description :
video_description = self . _html_search_meta (
' description ' , webpage , default = None )
if not video_description and mobj . group ( ' pro ' ) :
orig_webpage = self . _download_webpage (
orig_url , video_id ,
note = ' Downloading webpage for description ' ,
fatal = False )
if orig_webpage :
video_description = self . _html_search_meta (
' description ' , orig_webpage , default = None )
if not video_description and not mobj . group ( ' player ' ) :
self . _downloader . report_warning ( ' Cannot find video description ' )
2013-06-24 02:18:21 +08:00
2014-05-10 02:46:40 +08:00
# Extract video duration
2016-02-14 17:37:17 +08:00
video_duration = int_or_none ( config [ ' video ' ] . get ( ' duration ' ) )
2014-05-10 02:46:40 +08:00
2013-06-24 02:18:21 +08:00
# Extract upload date
video_upload_date = None
2015-03-07 01:16:56 +08:00
mobj = re . search ( r ' <time[^>]+datetime= " ([^ " ]+) " ' , webpage )
2013-06-24 02:18:21 +08:00
if mobj is not None :
2015-03-07 01:16:56 +08:00
video_upload_date = unified_strdate ( mobj . group ( 1 ) )
2013-06-24 02:18:21 +08:00
2013-12-06 20:03:08 +08:00
try :
2014-01-07 06:38:16 +08:00
view_count = int ( self . _search_regex ( r ' UserPlays:( \ d+) ' , webpage , ' view count ' ) )
like_count = int ( self . _search_regex ( r ' UserLikes:( \ d+) ' , webpage , ' like count ' ) )
comment_count = int ( self . _search_regex ( r ' UserComments:( \ d+) ' , webpage , ' comment count ' ) )
2013-12-06 20:03:08 +08:00
except RegexNotFoundError :
# This info is only available in vimeo.com/{id} urls
view_count = None
like_count = None
comment_count = None
2013-07-06 00:10:57 +08:00
formats = [ ]
2015-12-16 16:43:53 +08:00
download_request = sanitized_Request ( ' https://vimeo.com/ %s ?action=load_download_config ' % video_id , headers = {
' X-Requested-With ' : ' XMLHttpRequest ' } )
download_data = self . _download_json ( download_request , video_id , fatal = False )
if download_data :
source_file = download_data . get ( ' source_file ' )
2016-01-09 05:06:09 +08:00
if isinstance ( source_file , dict ) :
download_url = source_file . get ( ' download_url ' )
if download_url and not source_file . get ( ' is_cold ' ) and not source_file . get ( ' is_defrosting ' ) :
source_name = source_file . get ( ' public_name ' , ' Original ' )
if self . _is_valid_url ( download_url , video_id , ' %s video ' % source_name ) :
2016-01-21 19:43:45 +08:00
ext = source_file . get ( ' extension ' , determine_ext ( download_url ) ) . lower ( )
2016-01-09 05:06:09 +08:00
formats . append ( {
' url ' : download_url ,
' ext ' : ext ,
' width ' : int_or_none ( source_file . get ( ' width ' ) ) ,
' height ' : int_or_none ( source_file . get ( ' height ' ) ) ,
' filesize ' : parse_filesize ( source_file . get ( ' size ' ) ) ,
' format_id ' : source_name ,
' preference ' : 1 ,
} )
2015-11-12 05:56:11 +08:00
config_files = config [ ' video ' ] . get ( ' files ' ) or config [ ' request ' ] . get ( ' files ' , { } )
for f in config_files . get ( ' progressive ' , [ ] ) :
video_url = f . get ( ' url ' )
if not video_url :
continue
formats . append ( {
' url ' : video_url ,
' format_id ' : ' http- %s ' % f . get ( ' quality ' ) ,
' width ' : int_or_none ( f . get ( ' width ' ) ) ,
' height ' : int_or_none ( f . get ( ' height ' ) ) ,
' fps ' : int_or_none ( f . get ( ' fps ' ) ) ,
' tbr ' : int_or_none ( f . get ( ' bitrate ' ) ) ,
} )
m3u8_url = config_files . get ( ' hls ' , { } ) . get ( ' url ' )
2015-10-11 04:05:29 +08:00
if m3u8_url :
2015-12-29 02:58:24 +08:00
formats . extend ( self . _extract_m3u8_formats (
m3u8_url , video_id , ' mp4 ' , ' m3u8_native ' , m3u8_id = ' hls ' , fatal = False ) )
2015-11-12 05:56:11 +08:00
# Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified. This lead to wrong sorting.
2015-12-16 23:36:25 +08:00
self . _sort_formats ( formats , field_preference = ( ' preference ' , ' height ' , ' width ' , ' fps ' , ' format_id ' ) )
2013-06-24 02:18:21 +08:00
2014-02-03 21:02:58 +08:00
subtitles = { }
text_tracks = config [ ' request ' ] . get ( ' text_tracks ' )
if text_tracks :
for tt in text_tracks :
2015-02-16 01:52:07 +08:00
subtitles [ tt [ ' lang ' ] ] = [ {
' ext ' : ' vtt ' ,
2015-03-13 02:08:16 +08:00
' url ' : ' https://vimeo.com ' + tt [ ' url ' ] ,
2015-02-16 01:52:07 +08:00
} ]
2014-02-03 21:02:58 +08:00
2013-11-03 19:11:13 +08:00
return {
2014-02-03 21:24:11 +08:00
' id ' : video_id ,
2013-06-24 02:18:21 +08:00
' uploader ' : video_uploader ,
2016-03-03 02:00:11 +08:00
' uploader_url ' : video_uploader_url ,
2013-06-24 02:18:21 +08:00
' uploader_id ' : video_uploader_id ,
2014-02-03 21:24:11 +08:00
' upload_date ' : video_upload_date ,
' title ' : video_title ,
' thumbnail ' : video_thumbnail ,
' description ' : video_description ,
2014-05-10 02:46:40 +08:00
' duration ' : video_duration ,
2013-07-06 00:10:57 +08:00
' formats ' : formats ,
2013-11-03 19:11:13 +08:00
' webpage_url ' : url ,
2013-12-06 20:03:08 +08:00
' view_count ' : view_count ,
' like_count ' : like_count ,
' comment_count ' : comment_count ,
2015-02-16 01:52:07 +08:00
' subtitles ' : subtitles ,
2013-11-03 19:11:13 +08:00
}
2013-07-29 19:12:09 +08:00
2015-08-11 01:35:08 +08:00
class VimeoChannelIE ( VimeoBaseInfoExtractor ) :
2014-01-07 06:38:16 +08:00
IE_NAME = ' vimeo:channel '
2015-03-13 02:08:16 +08:00
_VALID_URL = r ' https://vimeo \ .com/channels/(?P<id>[^/?#]+)/?(?:$|[?#]) '
2013-07-29 19:12:09 +08:00
_MORE_PAGES_INDICATOR = r ' <a.+?rel= " next " '
2015-08-11 01:58:01 +08:00
_TITLE = None
2013-12-02 05:36:18 +08:00
_TITLE_RE = r ' <link rel= " alternate " [^>]+?title= " (.*?) " '
2014-08-27 17:36:01 +08:00
_TESTS = [ {
2015-03-13 02:08:16 +08:00
' url ' : ' https://vimeo.com/channels/tributes ' ,
2014-08-27 17:36:01 +08:00
' info_dict ' : {
2015-02-18 07:33:31 +08:00
' id ' : ' tributes ' ,
2014-08-27 17:36:01 +08:00
' title ' : ' Vimeo Tributes ' ,
} ,
' playlist_mincount ' : 25 ,
} ]
2013-07-29 19:12:09 +08:00
2013-12-07 04:47:32 +08:00
def _page_url ( self , base_url , pagenum ) :
return ' %s /videos/page: %d / ' % ( base_url , pagenum )
2013-12-07 05:01:41 +08:00
def _extract_list_title ( self , webpage ) :
2015-08-11 01:58:01 +08:00
return self . _TITLE or self . _html_search_regex ( self . _TITLE_RE , webpage , ' list title ' )
2013-12-07 05:01:41 +08:00
2015-02-10 11:53:21 +08:00
def _login_list_password ( self , page_url , list_id , webpage ) :
login_form = self . _search_regex (
r ' (?s)<form[^>]+?id= " pw_form " (.*?)</form> ' ,
webpage , ' login form ' , default = None )
if not login_form :
return webpage
2016-02-14 16:25:04 +08:00
password = self . _downloader . params . get ( ' videopassword ' )
2015-02-10 11:53:21 +08:00
if password is None :
raise ExtractorError ( ' This album is protected by a password, use the --video-password option ' , expected = True )
2015-07-15 00:36:30 +08:00
fields = self . _hidden_inputs ( login_form )
2015-10-11 04:31:37 +08:00
token , vuid = self . _extract_xsrft_and_vuid ( webpage )
2015-02-10 11:53:21 +08:00
fields [ ' token ' ] = token
fields [ ' password ' ] = password
2015-11-03 23:09:24 +08:00
post = urlencode_postdata ( encode_dict ( fields ) )
2015-02-10 11:53:21 +08:00
password_path = self . _search_regex (
r ' action= " ([^ " ]+) " ' , login_form , ' password URL ' )
password_url = compat_urlparse . urljoin ( page_url , password_path )
2015-11-20 22:33:49 +08:00
password_request = sanitized_Request ( password_url , post )
2015-02-10 11:53:21 +08:00
password_request . add_header ( ' Content-type ' , ' application/x-www-form-urlencoded ' )
2015-11-12 00:32:13 +08:00
self . _set_vimeo_cookie ( ' vuid ' , vuid )
self . _set_vimeo_cookie ( ' xsrft ' , token )
2015-02-10 11:53:21 +08:00
return self . _download_webpage (
password_request , list_id ,
' Verifying the password ' , ' Wrong password ' )
2015-11-19 23:29:32 +08:00
def _title_and_entries ( self , list_id , base_url ) :
2013-07-29 19:12:09 +08:00
for pagenum in itertools . count ( 1 ) :
2015-02-10 11:53:21 +08:00
page_url = self . _page_url ( base_url , pagenum )
2013-12-02 05:36:18 +08:00
webpage = self . _download_webpage (
2015-02-10 11:53:21 +08:00
page_url , list_id ,
2014-01-07 06:38:16 +08:00
' Downloading page %s ' % pagenum )
2015-02-10 11:53:21 +08:00
if pagenum == 1 :
webpage = self . _login_list_password ( page_url , list_id , webpage )
2015-11-19 23:29:32 +08:00
yield self . _extract_list_title ( webpage )
for video_id in re . findall ( r ' id= " clip_( \ d+?) " ' , webpage ) :
yield self . url_result ( ' https://vimeo.com/ %s ' % video_id , ' Vimeo ' )
2015-02-10 11:53:21 +08:00
2013-07-29 19:12:09 +08:00
if re . search ( self . _MORE_PAGES_INDICATOR , webpage , re . DOTALL ) is None :
break
2015-11-19 23:29:32 +08:00
def _extract_videos ( self , list_id , base_url ) :
title_and_entries = self . _title_and_entries ( list_id , base_url )
list_title = next ( title_and_entries )
return self . playlist_result ( title_and_entries , list_id , list_title )
2013-12-02 05:36:18 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2014-05-05 04:27:56 +08:00
channel_id = mobj . group ( ' id ' )
2015-03-13 02:08:16 +08:00
return self . _extract_videos ( channel_id , ' https://vimeo.com/channels/ %s ' % channel_id )
2013-12-02 05:36:18 +08:00
class VimeoUserIE ( VimeoChannelIE ) :
2014-01-07 06:38:16 +08:00
IE_NAME = ' vimeo:user '
2015-08-11 03:17:41 +08:00
_VALID_URL = r ' https://vimeo \ .com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$) '
2013-12-02 05:36:18 +08:00
_TITLE_RE = r ' <a[^>]+?class= " user " >([^<>]+?)</a> '
2014-08-27 17:36:01 +08:00
_TESTS = [ {
2015-03-13 02:08:16 +08:00
' url ' : ' https://vimeo.com/nkistudio/videos ' ,
2014-08-27 17:36:01 +08:00
' info_dict ' : {
' title ' : ' Nki ' ,
2015-02-18 07:33:31 +08:00
' id ' : ' nkistudio ' ,
2014-08-27 17:36:01 +08:00
} ,
' playlist_mincount ' : 66 ,
} ]
2013-12-02 05:36:18 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
name = mobj . group ( ' name ' )
2015-03-13 02:08:16 +08:00
return self . _extract_videos ( name , ' https://vimeo.com/ %s ' % name )
2013-12-07 04:47:32 +08:00
class VimeoAlbumIE ( VimeoChannelIE ) :
2014-01-07 06:38:16 +08:00
IE_NAME = ' vimeo:album '
2015-03-07 05:16:26 +08:00
_VALID_URL = r ' https://vimeo \ .com/album/(?P<id> \ d+) '
2013-12-07 04:47:32 +08:00
_TITLE_RE = r ' <header id= " page_header " > \ n \ s*<h1>(.*?)</h1> '
2014-08-27 17:36:01 +08:00
_TESTS = [ {
2015-03-07 05:16:26 +08:00
' url ' : ' https://vimeo.com/album/2632481 ' ,
2014-08-27 17:36:01 +08:00
' info_dict ' : {
2015-02-18 07:33:31 +08:00
' id ' : ' 2632481 ' ,
2014-08-27 17:36:01 +08:00
' title ' : ' Staff Favorites: November 2013 ' ,
} ,
' playlist_mincount ' : 13 ,
2015-02-10 11:53:21 +08:00
} , {
' note ' : ' Password-protected album ' ,
' url ' : ' https://vimeo.com/album/3253534 ' ,
' info_dict ' : {
' title ' : ' test ' ,
' id ' : ' 3253534 ' ,
} ,
' playlist_count ' : 1 ,
' params ' : {
' videopassword ' : ' youtube-dl ' ,
}
2014-08-27 17:36:01 +08:00
} ]
2013-12-07 04:47:32 +08:00
def _page_url ( self , base_url , pagenum ) :
return ' %s /page: %d / ' % ( base_url , pagenum )
def _real_extract ( self , url ) :
2015-02-10 11:53:21 +08:00
album_id = self . _match_id ( url )
2015-03-07 05:16:26 +08:00
return self . _extract_videos ( album_id , ' https://vimeo.com/album/ %s ' % album_id )
2013-12-07 05:01:41 +08:00
class VimeoGroupsIE ( VimeoAlbumIE ) :
2014-01-07 06:38:16 +08:00
IE_NAME = ' vimeo:group '
2015-11-20 00:30:58 +08:00
_VALID_URL = r ' https://vimeo \ .com/groups/(?P<name>[^/]+)(?:/(?!videos?/ \ d+)|$) '
2014-08-27 17:36:01 +08:00
_TESTS = [ {
2015-03-13 02:08:16 +08:00
' url ' : ' https://vimeo.com/groups/rolexawards ' ,
2014-08-27 17:36:01 +08:00
' info_dict ' : {
2015-02-18 07:33:31 +08:00
' id ' : ' rolexawards ' ,
2014-08-27 17:36:01 +08:00
' title ' : ' Rolex Awards for Enterprise ' ,
} ,
' playlist_mincount ' : 73 ,
} ]
2013-12-07 05:01:41 +08:00
def _extract_list_title ( self , webpage ) :
return self . _og_search_title ( webpage )
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
name = mobj . group ( ' name ' )
2015-03-13 02:08:16 +08:00
return self . _extract_videos ( name , ' https://vimeo.com/groups/ %s ' % name )
2014-01-07 00:31:47 +08:00
class VimeoReviewIE ( InfoExtractor ) :
2014-01-07 06:38:16 +08:00
IE_NAME = ' vimeo:review '
IE_DESC = ' Review pages on vimeo '
2015-03-13 02:08:16 +08:00
_VALID_URL = r ' https://vimeo \ .com/[^/]+/review/(?P<id>[^/]+) '
2014-08-27 17:13:42 +08:00
_TESTS = [ {
2014-01-07 00:31:47 +08:00
' url ' : ' https://vimeo.com/user21297594/review/75524534/3c257a1b5d ' ,
' md5 ' : ' c507a72f780cacc12b2248bb4006d253 ' ,
' info_dict ' : {
2015-02-01 19:12:27 +08:00
' id ' : ' 75524534 ' ,
' ext ' : ' mp4 ' ,
2014-01-07 00:31:47 +08:00
' title ' : " DICK HARDWICK ' Comedian ' " ,
' uploader ' : ' Richard Hardwick ' ,
}
2014-08-27 17:13:42 +08:00
} , {
' note ' : ' video player needs Referer ' ,
2015-03-13 02:08:16 +08:00
' url ' : ' https://vimeo.com/user22258446/review/91613211/13f927e053 ' ,
2014-08-27 17:13:42 +08:00
' md5 ' : ' 6295fdab8f4bf6a002d058b2c6dce276 ' ,
' info_dict ' : {
' id ' : ' 91613211 ' ,
' ext ' : ' mp4 ' ,
2014-10-27 07:13:40 +08:00
' title ' : ' re:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn ' ,
2014-08-27 17:13:42 +08:00
' uploader ' : ' DevWeek Events ' ,
' duration ' : 2773 ,
' thumbnail ' : ' re:^https?://.* \ .jpg$ ' ,
}
} ]
2014-01-07 00:31:47 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
player_url = ' https://player.vimeo.com/player/ ' + video_id
return self . url_result ( player_url , ' Vimeo ' , video_id )
2014-04-25 03:51:20 +08:00
2015-08-11 01:35:08 +08:00
class VimeoWatchLaterIE ( VimeoChannelIE ) :
2014-04-25 03:51:20 +08:00
IE_NAME = ' vimeo:watchlater '
IE_DESC = ' Vimeo watch later list, " vimeowatchlater " keyword (requires authentication) '
2015-08-11 01:58:01 +08:00
_VALID_URL = r ' https://vimeo \ .com/(?:home/)?watchlater|:vimeowatchlater '
_TITLE = ' Watch Later '
2014-04-25 03:51:20 +08:00
_LOGIN_REQUIRED = True
2014-08-27 17:36:01 +08:00
_TESTS = [ {
2015-08-11 01:58:01 +08:00
' url ' : ' https://vimeo.com/watchlater ' ,
2014-08-27 17:36:01 +08:00
' only_matching ' : True ,
} ]
2014-04-25 03:51:20 +08:00
def _real_initialize ( self ) :
self . _login ( )
def _page_url ( self , base_url , pagenum ) :
url = ' %s /page: %d / ' % ( base_url , pagenum )
2015-11-20 22:33:49 +08:00
request = sanitized_Request ( url )
2014-04-25 03:51:20 +08:00
# Set the header to get a partial html page with the ids,
# the normal page doesn't contain them.
request . add_header ( ' X-Requested-With ' , ' XMLHttpRequest ' )
return request
def _real_extract ( self , url ) :
2015-08-11 01:58:01 +08:00
return self . _extract_videos ( ' watchlater ' , ' https://vimeo.com/watchlater ' )
2014-09-28 18:14:16 +08:00
class VimeoLikesIE ( InfoExtractor ) :
2015-03-13 02:08:16 +08:00
_VALID_URL = r ' https://(?:www \ .)?vimeo \ .com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:) '
2014-09-28 18:14:16 +08:00
IE_NAME = ' vimeo:likes '
IE_DESC = ' Vimeo user likes '
_TEST = {
2014-09-29 06:36:06 +08:00
' url ' : ' https://vimeo.com/user755559/likes/ ' ,
' playlist_mincount ' : 293 ,
2016-02-14 17:37:17 +08:00
' info_dict ' : {
2015-02-18 07:33:31 +08:00
' id ' : ' user755559_likes ' ,
2016-02-14 17:37:17 +08:00
' description ' : ' See all the videos urza likes ' ,
' title ' : ' Videos urza likes ' ,
2014-09-28 18:14:16 +08:00
} ,
}
def _real_extract ( self , url ) :
user_id = self . _match_id ( url )
2014-09-29 06:36:06 +08:00
webpage = self . _download_webpage ( url , user_id )
page_count = self . _int (
self . _search_regex (
r ''' (?x)<li><a \ s+href= " [^ " ]+ " \ s+data-page= " ([0-9]+) " >
. * ? < / a > < / li > \s * < li \s + class = " pagination_next " >
''' , webpage, ' page count ' ),
' page count ' , fatal = True )
PAGE_SIZE = 12
title = self . _html_search_regex (
r ' (?s)<h1>(.+?)</h1> ' , webpage , ' title ' , fatal = False )
description = self . _html_search_meta ( ' description ' , webpage )
def _get_page ( idx ) :
2015-03-13 02:08:16 +08:00
page_url = ' https://vimeo.com/user %s /likes/page: %d /sort:date ' % (
user_id , idx + 1 )
2014-09-29 06:36:06 +08:00
webpage = self . _download_webpage (
page_url , user_id ,
note = ' Downloading page %d / %d ' % ( idx + 1 , page_count ) )
video_list = self . _search_regex (
r ' (?s)<ol class= " js-browse_list[^ " ]+ " [^>]*>(.*?)</ol> ' ,
webpage , ' video content ' )
paths = re . findall (
r ' <li[^>]*> \ s*<a \ s+href= " ([^ " ]+) " ' , video_list )
for path in paths :
yield {
' _type ' : ' url ' ,
' url ' : compat_urlparse . urljoin ( page_url , path ) ,
}
pl = InAdvancePagedList ( _get_page , page_count , PAGE_SIZE )
2014-09-28 18:14:16 +08:00
return {
2014-09-29 06:36:06 +08:00
' _type ' : ' playlist ' ,
' id ' : ' user %s _likes ' % user_id ,
' title ' : title ,
' description ' : description ,
' entries ' : pl ,
2014-09-28 18:14:16 +08:00
}