2013-07-11 17:25:14 +08:00
import re
2013-07-11 18:11:00 +08:00
from . . utils import (
compat_urllib_parse ,
determine_ext
)
2013-07-11 17:25:14 +08:00
from . common import InfoExtractor
2013-07-11 18:11:00 +08:00
class EHowIE ( InfoExtractor ) :
IE_NAME = u ' eHow '
_VALID_URL = r ' (?:https?://)?(?:www \ .)?ehow \ .com/[^/_?]*_(?P<id>[0-9]+) '
2013-07-11 17:25:14 +08:00
_TEST = {
2013-07-11 17:30:25 +08:00
u ' url ' : u ' http://www.ehow.com/video_12245069_hardwood-flooring-basics.html ' ,
u ' file ' : u ' 12245069.flv ' ,
u ' md5 ' : u ' 9809b4e3f115ae2088440bcb4efbf371 ' ,
2013-07-11 17:25:14 +08:00
u ' info_dict ' : {
2013-07-11 17:30:25 +08:00
u " title " : u " Hardwood Flooring Basics " ,
u " description " : u " Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video... " ,
u " uploader " : u " Erick Nathan "
2013-07-11 17:25:14 +08:00
}
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2013-07-11 18:11:00 +08:00
video_id = mobj . group ( ' id ' )
2013-07-11 17:25:14 +08:00
webpage = self . _download_webpage ( url , video_id )
2013-07-11 18:11:00 +08:00
video_url = self . _search_regex ( r ' (?:file|source)=(http[^ \' " &]*) ' ,
2013-07-11 17:25:14 +08:00
webpage , u ' video URL ' )
final_url = compat_urllib_parse . unquote ( video_url )
thumbnail_url = self . _search_regex ( r ' <meta property= " og:image " content= " (.+?) " /> ' ,
webpage , u ' thumbnail URL ' )
uploader = self . _search_regex ( r ' <meta name= " uploader " content= " (.+?) " /> ' ,
webpage , u ' uploader ' )
title = self . _search_regex ( r ' <meta property= " og:title " content= " (.+?) " /> ' ,
2013-07-11 18:11:00 +08:00
webpage , u ' Video title ' ) . replace ( ' | eHow ' , ' ' )
2013-07-11 17:25:14 +08:00
description = self . _search_regex ( r ' <meta property= " og:description " content= " (.+?) " /> ' ,
webpage , u ' video description ' )
2013-07-11 18:11:00 +08:00
ext = determine_ext ( final_url )
return {
' _type ' : ' video ' ,
2013-07-11 17:25:14 +08:00
' id ' : video_id ,
' url ' : final_url ,
' ext ' : ext ,
' title ' : title ,
' thumbnail ' : thumbnail_url ,
' description ' : description ,
' uploader ' : uploader ,
2013-07-11 18:11:00 +08:00
}
2013-07-11 17:25:14 +08:00