2014-01-17 10:07:01 +08:00
from __future__ import unicode_literals
2013-06-24 04:31:12 +08:00
import re
from . common import InfoExtractor
from . . utils import (
ExtractorError ,
2015-04-23 01:58:39 +08:00
find_xpath_attr ,
2015-11-22 00:18:17 +08:00
sanitized_Request ,
2013-06-24 04:31:12 +08:00
)
class FlickrIE ( InfoExtractor ) :
2014-09-24 16:46:33 +08:00
_VALID_URL = r ' https?://(?:www \ .|secure \ .)?flickr \ .com/photos/(?P<uploader_id>[ \ w \ -_@]+)/(?P<id> \ d+).* '
2013-06-28 02:46:46 +08:00
_TEST = {
2014-01-17 10:07:01 +08:00
' url ' : ' http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/ ' ,
' md5 ' : ' 6fdc01adbc89d72fc9c4f15b4a4ba87b ' ,
' info_dict ' : {
2014-09-24 16:46:33 +08:00
' id ' : ' 5645318632 ' ,
' ext ' : ' mp4 ' ,
2014-11-24 03:41:03 +08:00
" description " : " Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up. " ,
" uploader_id " : " forestwander-nature-pictures " ,
2014-01-17 10:07:01 +08:00
" title " : " Dark Hollow Waterfalls "
2013-06-28 02:46:46 +08:00
}
}
2013-06-24 04:31:12 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
video_uploader_id = mobj . group ( ' uploader_id ' )
webpage_url = ' http://www.flickr.com/photos/ ' + video_uploader_id + ' / ' + video_id
2015-11-22 00:18:17 +08:00
req = sanitized_Request ( webpage_url )
2015-04-23 01:24:14 +08:00
req . add_header (
' User-Agent ' ,
# it needs a more recent version
' Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome) ' )
webpage = self . _download_webpage ( req , video_id )
secret = self . _search_regex ( r ' secret " \ s*: \ s* " ( \ w+) " ' , webpage , ' secret ' )
2013-06-24 04:31:12 +08:00
first_url = ' https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id= ' + video_id + ' &secret= ' + secret + ' &bitrate=700&target=_self '
2015-04-23 01:58:39 +08:00
first_xml = self . _download_xml ( first_url , video_id , ' Downloading first data webpage ' )
2013-06-24 04:31:12 +08:00
2015-04-23 01:58:39 +08:00
node_id = find_xpath_attr (
first_xml , ' .// { http://video.yahoo.com/YEP/1.0/}Item ' , ' id ' ,
' id ' ) . text
2013-06-24 04:31:12 +08:00
second_url = ' https://secure.flickr.com/video_playlist.gne?node_id= ' + node_id + ' &tech=flash&mode=playlist&bitrate=700&secret= ' + secret + ' &rd=video.yahoo.com&noad=1 '
2015-04-23 01:58:39 +08:00
second_xml = self . _download_xml ( second_url , video_id , ' Downloading second data webpage ' )
2013-06-24 04:31:12 +08:00
self . report_extraction ( video_id )
2015-04-23 01:58:39 +08:00
stream = second_xml . find ( ' .//STREAM ' )
if stream is None :
2014-01-17 10:07:01 +08:00
raise ExtractorError ( ' Unable to extract video url ' )
2015-04-23 01:58:39 +08:00
video_url = stream . attrib [ ' APP ' ] + stream . attrib [ ' FULLPATH ' ]
2013-06-24 04:31:12 +08:00
2014-09-24 16:46:33 +08:00
return {
' id ' : video_id ,
' url ' : video_url ,
' ext ' : ' mp4 ' ,
' title ' : self . _og_search_title ( webpage ) ,
2013-07-13 01:00:19 +08:00
' description ' : self . _og_search_description ( webpage ) ,
2014-09-24 16:46:33 +08:00
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
2013-06-24 04:31:12 +08:00
' uploader_id ' : video_uploader_id ,
2014-09-24 16:46:33 +08:00
}