2014-07-26 00:05:37 +02:00
from __future__ import unicode_literals
2017-11-18 05:28:58 +10:30
import hashlib
import hmac
2014-07-26 00:05:37 +02:00
import re
2017-11-18 05:28:58 +10:30
import time
2014-07-26 00:05:37 +02:00
from . common import InfoExtractor
2017-07-09 17:55:40 +07:00
from . . compat import compat_str
2015-08-30 01:08:55 +08:00
from . . utils import (
ExtractorError ,
js_to_json ,
2015-08-30 01:11:56 +08:00
int_or_none ,
2016-08-22 00:06:39 +01:00
parse_iso8601 ,
2017-07-09 17:55:40 +07:00
try_get ,
2018-03-27 15:08:40 +00:00
unescapeHTML ,
2017-12-31 01:22:15 +07:00
update_url_query ,
2015-08-30 01:08:55 +08:00
)
2014-07-26 00:05:37 +02:00
class ABCIE ( InfoExtractor ) :
IE_NAME = ' abc.net.au '
2016-09-08 18:29:05 +07:00
_VALID_URL = r ' https?://(?:www \ .)?abc \ .net \ .au/news/(?:[^/]+/) { 1,2}(?P<id> \ d+) '
2014-07-26 00:05:37 +02:00
2015-08-30 01:08:55 +08:00
_TESTS = [ {
2014-11-05 14:30:39 +01:00
' url ' : ' http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334 ' ,
' md5 ' : ' cb3dd03b18455a661071ee1e28344d9f ' ,
2014-07-26 00:05:37 +02:00
' info_dict ' : {
2014-11-05 14:30:39 +01:00
' id ' : ' 5868334 ' ,
2014-07-26 00:05:37 +02:00
' ext ' : ' mp4 ' ,
2014-11-05 14:30:39 +01:00
' title ' : ' Australia to help staff Ebola treatment centre in Sierra Leone ' ,
' description ' : ' md5:809ad29c67a05f54eb41f2a105693a67 ' ,
2014-07-26 00:05:37 +02:00
} ,
2015-12-21 13:07:52 +01:00
' skip ' : ' this video has expired ' ,
2015-08-30 01:08:55 +08:00
} , {
' url ' : ' http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326 ' ,
' md5 ' : ' db2a5369238b51f9811ad815b69dc086 ' ,
' info_dict ' : {
' id ' : ' NvqvPeNZsHU ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20150816 ' ,
' uploader ' : ' ABC News (Australia) ' ,
' description ' : ' Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote " an inclusive Australia, not a divided one. " . Read more here: http://ab.co/1Mwc6ef ' ,
' uploader_id ' : ' NewsOnABC ' ,
' title ' : ' Marriage Equality: Warren Entsch introduces same sex marriage bill ' ,
} ,
' add_ie ' : [ ' Youtube ' ] ,
2015-12-21 13:07:52 +01:00
' skip ' : ' Not accessible from Travis CI server ' ,
2015-10-23 07:09:41 +01:00
} , {
' url ' : ' http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080 ' ,
' md5 ' : ' b96eee7c9edf4fc5a358a0252881cc1f ' ,
' info_dict ' : {
' id ' : ' 6880080 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' NAB lifts interest rates, following Westpac and CBA ' ,
' description ' : ' md5:f13d8edc81e462fce4a0437c7dc04728 ' ,
} ,
2015-10-24 12:31:42 +02:00
} , {
' url ' : ' http://www.abc.net.au/news/2015-10-19/6866214 ' ,
' only_matching ' : True ,
2015-08-30 01:08:55 +08:00
} ]
2014-07-26 00:05:37 +02:00
def _real_extract ( self , url ) :
2014-09-28 09:31:58 +02:00
video_id = self . _match_id ( url )
2014-07-26 00:05:37 +02:00
webpage = self . _download_webpage ( url , video_id )
2015-08-30 01:08:55 +08:00
mobj = re . search (
2015-10-23 07:09:41 +01:00
r ' inline(?P<type>Video|Audio|YouTube)Data \ .push \ ((?P<json_data>[^)]+) \ ); ' ,
2015-08-30 01:08:55 +08:00
webpage )
if mobj is None :
2015-12-21 13:07:52 +01:00
expired = self . _html_search_regex ( r ' (?s)class= " expired-(?:video|audio) " .+?<span>(.+?)</span> ' , webpage , ' expired ' , None )
if expired :
raise ExtractorError ( ' %s said: %s ' % ( self . IE_NAME , expired ) , expected = True )
2015-08-30 01:08:55 +08:00
raise ExtractorError ( ' Unable to extract video urls ' )
urls_info = self . _parse_json (
mobj . group ( ' json_data ' ) , video_id , transform_source = js_to_json )
if not isinstance ( urls_info , list ) :
urls_info = [ urls_info ]
if mobj . group ( ' type ' ) == ' YouTube ' :
return self . playlist_result ( [
self . url_result ( url_info [ ' url ' ] ) for url_info in urls_info ] )
2014-07-26 00:05:37 +02:00
formats = [ {
' url ' : url_info [ ' url ' ] ,
2015-10-23 07:09:41 +01:00
' vcodec ' : url_info . get ( ' codec ' ) if mobj . group ( ' type ' ) == ' Video ' else ' none ' ,
2015-08-30 01:11:56 +08:00
' width ' : int_or_none ( url_info . get ( ' width ' ) ) ,
' height ' : int_or_none ( url_info . get ( ' height ' ) ) ,
' tbr ' : int_or_none ( url_info . get ( ' bitrate ' ) ) ,
' filesize ' : int_or_none ( url_info . get ( ' filesize ' ) ) ,
2014-07-26 00:05:37 +02:00
} for url_info in urls_info ]
2015-10-23 07:09:41 +01:00
2014-07-26 00:05:37 +02:00
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : self . _og_search_title ( webpage ) ,
' formats ' : formats ,
' description ' : self . _og_search_description ( webpage ) ,
' thumbnail ' : self . _og_search_thumbnail ( webpage ) ,
}
2016-08-22 00:06:39 +01:00
class ABCIViewIE ( InfoExtractor ) :
IE_NAME = ' abc.net.au:iview '
2019-04-25 23:56:06 +10:00
_VALID_URL = r ' https?://iview \ .abc \ .net \ .au/(?:(?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P<id>[A-Z0-9]+) '
2017-12-31 01:22:15 +07:00
_GEO_COUNTRIES = [ ' AU ' ]
2016-08-22 00:06:39 +01:00
2016-09-11 04:06:00 +08:00
# ABC iview programs are normally available for 14 days only.
2016-08-22 00:06:39 +01:00
_TESTS = [ {
2019-03-01 23:45:40 +11:00
' url ' : ' https://iview.abc.net.au/show/ben-and-holly-s-little-kingdom/series/0/video/ZX9371A011S00 ' ,
2016-08-22 00:06:39 +01:00
' info_dict ' : {
2019-03-01 23:45:40 +11:00
' id ' : ' ZX9371A011S00 ' ,
2016-08-22 00:06:39 +01:00
' ext ' : ' mp4 ' ,
2019-03-01 23:45:40 +11:00
' title ' : " Dolly Plum " ,
2018-03-27 15:08:40 +00:00
' series ' : " Ben And Holly ' s Little Kingdom " ,
2019-03-01 23:45:40 +11:00
' description ' : ' md5:7d5d426d28814ac97b7c98e33f37193c ' ,
' upload_date ' : ' 20190301 ' ,
2018-03-27 15:08:40 +00:00
' uploader_id ' : ' abc4kids ' ,
2019-03-01 23:45:40 +11:00
' timestamp ' : 1551466585 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
{
' url ' : ' https://iview.abc.net.au/programs/CK1752H001S00 ' ,
' info_dict ' : {
' id ' : ' CK1752H001S00 ' ,
' ext ' : ' mp4 ' ,
' title ' : " Emma Alberici: Guess How Much I Love You (Italian) " ,
' series ' : " Play School Story Time: Languages " ,
' description ' : ' md5:b61ce34edc946e109e76c7432db5d91f ' ,
' upload_date ' : ' 20171204 ' ,
' uploader_id ' : ' abc4kids ' ,
' timestamp ' : 1512414300 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
{
' url ' : ' https://iview.abc.net.au/programs/play-school-story-time-languages/CK1752H001S00 ' ,
' info_dict ' : {
' id ' : ' CK1752H001S00 ' ,
' ext ' : ' mp4 ' ,
' title ' : " Emma Alberici: Guess How Much I Love You (Italian) " ,
' series ' : " Play School Story Time: Languages " ,
' description ' : ' md5:b61ce34edc946e109e76c7432db5d91f ' ,
' upload_date ' : ' 20171204 ' ,
' uploader_id ' : ' abc4kids ' ,
' timestamp ' : 1512414300 ,
2017-12-31 01:22:15 +07:00
} ,
' params ' : {
' skip_download ' : True ,
2016-08-22 00:06:39 +01:00
} ,
} ]
2019-04-25 23:16:09 +10:00
def _make_result ( self , video_id , title , house_number , video_params , * * kwargs ) :
res = {
' id ' : video_id ,
' title ' : title ,
' description ' : video_params . get ( ' description ' ) ,
' thumbnail ' : video_params . get ( ' thumbnail ' ) ,
' duration ' : int_or_none ( video_params . get ( ' duration ' ) ) ,
' timestamp ' : parse_iso8601 ( video_params . get ( ' pubDate ' ) , ' ' ) ,
' series ' : unescapeHTML ( video_params . get ( ' seriesTitle ' ) ) ,
' series_id ' : video_params . get ( ' seriesHouseNumber ' ) or video_id [ : 7 ] ,
' season_number ' : int_or_none ( self . _search_regex (
r ' \ bSeries \ s+( \ d+) \ b ' , title , ' season number ' , default = None ) ) ,
' episode_number ' : int_or_none ( self . _search_regex (
r ' \ bEp \ s+( \ d+) \ b ' , title , ' episode number ' , default = None ) ) ,
' episode_id ' : house_number ,
' uploader_id ' : video_params . get ( ' channel ' ) ,
}
res . update ( * * kwargs )
return res
2016-08-22 00:06:39 +01:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
2018-06-12 13:07:20 +01:00
video_params = self . _download_json (
' https://iview.abc.net.au/api/programs/ ' + video_id , video_id )
2019-02-27 00:03:24 +11:00
title = unescapeHTML ( video_params . get ( ' title ' ) or video_params [ ' seriesTitle ' ] )
stream = next ( s for s in video_params [ ' playlist ' ] if s . get ( ' type ' ) in ( ' program ' , ' livestream ' ) )
2019-02-28 23:20:42 +11:00
2018-06-12 13:07:20 +01:00
house_number = video_params . get ( ' episodeHouseNumber ' ) or video_id
path = ' /auth/hls/sign?ts= {0} &hn= {1} &d=android-tablet ' . format (
2017-12-31 01:22:15 +07:00
int ( time . time ( ) ) , house_number )
sig = hmac . new (
2018-06-12 13:07:20 +01:00
b ' android.content.res.Resources ' ,
2017-12-31 01:22:15 +07:00
path . encode ( ' utf-8 ' ) , hashlib . sha256 ) . hexdigest ( )
token = self . _download_webpage (
' http://iview.abc.net.au {0} &sig= {1} ' . format ( path , sig ) , video_id )
2017-11-18 05:28:58 +10:30
2019-02-27 00:03:24 +11:00
def tokenize_url ( url , token ) :
return update_url_query ( url , {
' hdnea ' : token ,
} )
for sd in ( ' sd ' , ' sd-low ' ) :
sd_url = try_get (
stream , lambda x : x [ ' streams ' ] [ ' hls ' ] [ sd ] , compat_str )
if not sd_url :
continue
formats = self . _extract_m3u8_formats (
tokenize_url ( sd_url , token ) , video_id , ' mp4 ' ,
entry_protocol = ' m3u8_native ' , m3u8_id = ' hls ' , fatal = False )
if formats :
break
self . _sort_formats ( formats )
2019-02-28 23:20:42 +11:00
2019-02-27 00:03:24 +11:00
subtitles = { }
src_vtt = stream . get ( ' captions ' , { } ) . get ( ' src-vtt ' )
if src_vtt :
subtitles [ ' en ' ] = [ {
' url ' : src_vtt ,
' ext ' : ' vtt ' ,
} ]
2019-02-28 23:20:42 +11:00
2018-06-12 13:07:20 +01:00
is_live = video_params . get ( ' livestream ' ) == ' 1 '
if is_live :
title = self . _live_title ( title )
2019-02-28 23:20:42 +11:00
2019-04-25 23:16:09 +10:00
return self . _make_result ( video_id , title , house_number , video_params ,
formats = formats ,
subtitles = subtitles ,
is_live = is_live )
2019-02-27 00:03:24 +11:00
2019-04-25 23:38:04 +10:00
class ABCIViewShowIE ( ABCIViewIE ) :
IE_NAME = ' abc.net.au:iview:show '
2019-04-25 23:56:06 +10:00
_VALID_URL = r ' https?://iview \ .abc \ .net \ .au/(?:show|programs)/(?P<id>[a-z0-9 \ -]+)/? '
2019-02-27 00:03:24 +11:00
2019-03-01 00:10:53 +11:00
_TESTS = [
2019-03-01 23:45:40 +11:00
{
' url ' : ' https://iview.abc.net.au/programs/play-school-celebrity-covers ' ,
' info_dict ' : {
' title ' : " Play School Celebrity Covers " ,
' description ' : ' md5:5cf7b4e466b72ee1b930fc95b2a80ed7 ' ,
' uploader_id ' : ' abc4kids ' ,
} ,
' playlist_count ' : 31
} ,
2019-03-01 00:10:53 +11:00
{
' url ' : ' https://iview.abc.net.au/show/play-school-story-time ' ,
' info_dict ' : {
' title ' : " Play School Story Time " ,
' description ' : ' md5:2763b35f418d334d72e3d7f7fc7afb82 ' ,
' uploader_id ' : ' abc4kids ' ,
} ,
' playlist_count ' : 24
2019-02-27 22:52:56 +11:00
} ,
2019-03-01 00:10:53 +11:00
]
2019-02-27 00:03:24 +11:00
2019-04-25 22:31:27 +10:00
@classmethod
def suitable ( cls , url ) :
2019-04-25 23:38:04 +10:00
return False if ABCIViewIE . suitable ( url ) else super ( ABCIViewShowIE , cls ) . suitable ( url )
2019-04-25 22:31:27 +10:00
2019-02-27 00:03:24 +11:00
def _real_extract ( self , url ) :
2019-04-25 23:38:04 +10:00
show_id = self . _match_id ( url )
show_data = self . _download_json (
' https://iview.abc.net.au/api/series/ ' + show_id , show_id )
title = show_data . get ( ' seriesDescription ' ) or show_id
description = show_data . get ( ' seriesDescription ' )
2019-02-27 00:03:24 +11:00
2019-02-27 22:52:56 +11:00
# Sometimes the episodes are listed in reverse order, with the most recently uploaded ones first.
# This is normally for time-limited series. Others appear oldest to newest
2019-04-25 23:38:04 +10:00
episodes = show_data . get ( ' episodes ' )
2019-02-27 22:52:56 +11:00
entries = [ ]
2019-02-27 00:03:24 +11:00
2019-02-27 22:52:56 +11:00
for episode in episodes :
2019-02-28 23:20:42 +11:00
video_id = house_number = episode . get ( ' episodeHouseNumber ' )
2019-04-26 00:21:14 +10:00
episode_title = unescapeHTML ( episode . get ( ' title ' ) or episode [ ' seriesTitle ' ] )
entry = self . _make_result ( video_id , episode_title , house_number , episode ,
2019-04-25 23:16:09 +10:00
_type = ' url ' ,
url = ' https://iview.abc.net.au/ ' + episode . get ( ' href ' ) ,
ie_key = ABCIViewIE . ie_key ( ) )
2019-02-28 23:20:42 +11:00
entries . append ( entry )
2019-02-27 00:03:24 +11:00
return {
' _type ' : ' playlist ' ,
2019-04-25 23:38:04 +10:00
' id ' : show_id ,
' title ' : title ,
' description ' : description ,
2019-04-25 23:16:09 +10:00
' uploader_id ' : entries [ 0 ] . get ( ' uploader_id ' ) if entries else None ,
2019-02-27 00:03:24 +11:00
' entries ' : entries
2019-04-25 23:38:04 +10:00
}