2014-02-24 01:28:22 +08:00
# encoding: utf-8
from __future__ import unicode_literals
2014-01-31 01:26:50 +08:00
import re
from . common import InfoExtractor
from . . utils import (
unified_strdate ,
)
2014-02-24 01:28:22 +08:00
2014-01-31 01:26:50 +08:00
class NormalbootsIE ( InfoExtractor ) :
2014-02-24 01:28:22 +08:00
_VALID_URL = r ' http://(?:www \ .)?normalboots \ .com/video/(?P<videoid>[0-9a-z-]*)/?$ '
2014-01-31 04:01:35 +08:00
_TEST = {
2014-02-24 01:28:22 +08:00
' url ' : ' http://normalboots.com/video/home-alone-games-jontron/ ' ,
' md5 ' : ' 8bf6de238915dd501105b44ef5f1e0f6 ' ,
' info_dict ' : {
' id ' : ' home-alone-games-jontron ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Home Alone Games - JonTron - NormalBoots ' ,
' description ' : ' Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘ Tense Battle Theme’ : \xa0 http://www.youtube.com/Kiamet/ ' ,
' uploader ' : ' JonTron ' ,
' upload_date ' : ' 20140125 ' ,
2014-01-31 04:01:35 +08:00
}
}
2014-02-24 01:28:22 +08:00
2014-01-31 01:26:50 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' videoid ' )
2014-02-24 01:28:22 +08:00
2014-01-31 01:26:50 +08:00
webpage = self . _download_webpage ( url , video_id )
video_uploader = self . _html_search_regex ( r ' Posted \ sby \ s<a \ shref= " [A-Za-z0-9/]* " >(?P<uploader>[A-Za-z]*) \ s</a> ' ,
2014-11-24 04:39:15 +08:00
webpage , ' uploader ' )
2014-02-24 01:28:22 +08:00
raw_upload_date = self . _html_search_regex ( ' <span style= " text-transform:uppercase; font-size:inherit; " >[A-Za-z]+, (?P<date>.*)</span> ' ,
2014-11-24 04:39:15 +08:00
webpage , ' date ' )
2014-01-31 01:26:50 +08:00
video_upload_date = unified_strdate ( raw_upload_date )
2014-02-24 01:28:22 +08:00
2014-01-31 01:26:50 +08:00
player_url = self . _html_search_regex ( r ' <iframe \ swidth= " [0-9]+ " \ sheight= " [0-9]+ " \ ssrc= " (?P<url>[ \ S]+) " ' , webpage , ' url ' )
player_page = self . _download_webpage ( player_url , video_id )
2014-02-24 01:06:51 +08:00
video_url = self . _html_search_regex ( r " file: \ s ' (?P<file>[^ ' ]+ \ .mp4) ' " , player_page , ' file ' )
2014-02-24 01:28:22 +08:00
return {
' id ' : video_id ,
' url ' : video_url ,
' title ' : self . _og_search_title ( webpage ) ,
' description ' : self . _og_search_description ( webpage ) ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
' uploader ' : video_uploader ,
' upload_date ' : video_upload_date ,
}