2014-01-29 01:55:06 +08:00
from __future__ import unicode_literals
2013-12-05 21:29:08 +08:00
import re
from . common import InfoExtractor
class NineGagIE ( InfoExtractor ) :
IE_NAME = ' 9gag '
_VALID_URL = r ' ^https?://(?:www \ .)?9gag \ .tv/v/(?P<id>[0-9]+) '
_TEST = {
2014-01-29 01:55:06 +08:00
" url " : " http://9gag.tv/v/1912 " ,
" info_dict " : {
2014-03-21 07:25:04 +08:00
" id " : " 1912 " ,
" ext " : " mp4 " ,
2014-01-29 01:55:06 +08:00
" description " : " This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!) " ,
2014-03-13 23:40:53 +08:00
" title " : " \" People Are Awesome 2013 \" Is Absolutely Awesome " ,
" view_count " : int ,
" thumbnail " : " re:^https?:// " ,
2013-12-05 21:29:08 +08:00
} ,
2014-01-29 01:55:06 +08:00
' add_ie ' : [ ' Youtube ' ]
2013-12-05 21:29:08 +08:00
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
webpage = self . _download_webpage ( url , video_id )
2014-03-13 23:40:53 +08:00
youtube_id = self . _html_search_regex (
r ' (?s)id= " jsid-video-post-container " .*?data-external-id= " ([^ " ]+) " ' ,
webpage , ' video ID ' )
description = self . _html_search_regex (
r ' (?s)<div class= " video-caption " >.*?<p>(.*?)</p> ' , webpage ,
' description ' , fatal = False )
view_count_str = self . _html_search_regex (
r ' <p><b>([0-9][0-9,]*)</b> views</p> ' , webpage , ' view count ' ,
fatal = False )
view_count = (
None if view_count_str is None
else int ( view_count_str . replace ( ' , ' , ' ' ) ) )
2013-12-05 21:29:08 +08:00
return {
' _type ' : ' url_transparent ' ,
2014-03-13 23:40:53 +08:00
' url ' : youtube_id ,
2013-12-05 21:29:08 +08:00
' ie_key ' : ' Youtube ' ,
' id ' : video_id ,
2014-03-13 23:40:53 +08:00
' title ' : self . _og_search_title ( webpage ) ,
' description ' : description ,
' view_count ' : view_count ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
2013-12-05 21:29:08 +08:00
}