2015-02-20 14:13:41 +02:00
# coding: utf-8
from __future__ import unicode_literals
2018-07-22 20:20:19 +02:00
from io import StringIO
2018-06-06 00:41:08 +01:00
import re
2015-02-20 14:13:41 +02:00
from . common import InfoExtractor
from . . utils import (
2016-09-16 00:54:34 +07:00
int_or_none ,
2015-02-20 14:13:41 +02:00
parse_iso8601 ,
)
class TV4IE ( InfoExtractor ) :
IE_DESC = ' tv4.se and tv4play.se '
_VALID_URL = r ''' (?x)https?://(?:www \ .)?
( ? :
tv4 \. se / ( ? : [ ^ / ] + ) / klipp / ( ? : . * ) - |
tv4play \. se /
( ? :
2017-09-14 23:50:19 +07:00
( ? : program | barn ) / ( ? : [ ^ / ] + / | ( ? : [ ^ \? ] + ) \? video_id = ) |
2015-02-20 14:13:41 +02:00
iframe / video / |
film / |
sport / |
)
) ( ? P < id > [ 0 - 9 ] + ) '''
2017-02-19 06:24:38 +07:00
_GEO_COUNTRIES = [ ' SE ' ]
2015-02-20 14:13:41 +02:00
_TESTS = [
{
' url ' : ' http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650 ' ,
2017-01-13 10:19:53 +01:00
' md5 ' : ' cb837212f342d77cec06e6dad190e96d ' ,
2015-02-20 14:13:41 +02:00
' info_dict ' : {
' id ' : ' 2491650 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Kalla Fakta 5 (english subtitles) ' ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2015-02-20 14:13:41 +02:00
' timestamp ' : int ,
' upload_date ' : ' 20131125 ' ,
} ,
} ,
{
' url ' : ' http://www.tv4play.se/iframe/video/3054113 ' ,
2017-01-13 10:19:53 +01:00
' md5 ' : ' cb837212f342d77cec06e6dad190e96d ' ,
2015-02-20 14:13:41 +02:00
' info_dict ' : {
' id ' : ' 3054113 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Så här jobbar ficktjuvarna - se avslöjande bilder ' ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2015-02-20 14:13:41 +02:00
' description ' : ' Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för. ' ,
' timestamp ' : int ,
' upload_date ' : ' 20150130 ' ,
} ,
} ,
{
' url ' : ' http://www.tv4play.se/sport/3060959 ' ,
' only_matching ' : True ,
} ,
{
' url ' : ' http://www.tv4play.se/film/2378136 ' ,
' only_matching ' : True ,
} ,
{
' url ' : ' http://www.tv4play.se/barn/looney-tunes?video_id=3062412 ' ,
' only_matching ' : True ,
} ,
2017-09-14 23:50:19 +07:00
{
2017-09-14 20:47:23 +02:00
' url ' : ' http://www.tv4play.se/program/farang/3922081 ' ,
2017-09-14 23:50:19 +07:00
' only_matching ' : True ,
}
2015-02-20 14:13:41 +02:00
]
2018-07-22 20:20:19 +02:00
_ENCODING = ' UTF-8 '
_MPEG2_PTS_RATE_HZ = 90000
""" :type Pattern """
_REGEX_SUB_ENTRY_TIMELINE = re . compile (
r ' ^( \ d+):( \ d+):( \ d+).( \ d+)[^0-9]+( \ d+):( \ d+):( \ d+).( \ d+)$ '
)
""" :type Pattern """
_REGEX_X_TIMESTAMP = re . compile (
r ' ^X-TIMESTAMP-MAP=MPEGTS:( \ d+),LOCAL:( \ d+):( \ d+):( \ d+).( \ d+)$ '
)
""" :type Pattern """
_REGEX_WEBVTT = re . compile ( r ' ^(.+-( \ d+) \ .webvtt.*)$ ' )
""" :type Pattern """
2015-02-20 14:13:41 +02:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
info = self . _download_json (
2016-09-16 00:54:34 +07:00
' http://www.tv4play.se/player/assets/ %s .json ' % video_id ,
video_id , ' Downloading video info JSON ' )
2015-02-20 14:13:41 +02:00
2016-09-16 00:54:34 +07:00
title = info [ ' title ' ]
2015-02-20 14:13:41 +02:00
2018-06-06 00:41:08 +01:00
manifest_url = self . _download_json (
' https://playback-api.b17g.net/media/ ' + video_id ,
video_id , query = {
' service ' : ' tv4 ' ,
' device ' : ' browser ' ,
' protocol ' : ' hls ' ,
} ) [ ' playbackItem ' ] [ ' manifestUrl ' ]
2018-07-22 20:20:19 +02:00
all_formats = self . _extract_m3u8_formats (
2018-06-06 00:41:08 +01:00
manifest_url , video_id , ' mp4 ' ,
' m3u8_native ' , m3u8_id = ' hls ' , fatal = False )
2018-07-22 20:20:19 +02:00
all_formats . extend ( self . _extract_mpd_formats (
2018-06-06 00:41:08 +01:00
manifest_url . replace ( ' .m3u8 ' , ' .mpd ' ) ,
video_id , mpd_id = ' dash ' , fatal = False ) )
2018-07-22 20:20:19 +02:00
all_formats . extend ( self . _extract_f4m_formats (
2018-06-06 00:41:08 +01:00
manifest_url . replace ( ' .m3u8 ' , ' .f4m ' ) ,
video_id , f4m_id = ' hds ' , fatal = False ) )
2018-07-22 20:20:19 +02:00
all_formats . extend ( self . _extract_ism_formats (
2018-06-06 00:41:08 +01:00
re . sub ( r ' \ .ism/.+? \ .m3u8 ' , r ' .ism/Manifest ' , manifest_url ) ,
video_id , ism_id = ' mss ' , fatal = False ) )
2017-02-19 06:24:38 +07:00
2018-07-22 20:20:19 +02:00
if not all_formats and info . get ( ' is_geo_restricted ' ) :
2017-02-19 06:24:38 +07:00
self . raise_geo_restricted ( countries = self . _GEO_COUNTRIES )
2018-07-22 20:20:19 +02:00
subtitle_formats = [ ]
other_formats = [ ]
for _index , _format in enumerate ( all_formats ) :
if re . match ( r ' ^.*textstream.*$ ' , _format [ ' format_id ' ] ) :
subtitle_formats . append ( _format )
else :
other_formats . append ( _format )
self . _sort_formats ( other_formats )
subtitles = self . _webvtt_download_all_subtitle_data (
video_id ,
subtitle_formats
)
2015-02-20 14:13:41 +02:00
return {
' id ' : video_id ,
2016-09-16 00:54:34 +07:00
' title ' : title ,
2018-07-22 20:20:19 +02:00
' formats ' : other_formats ,
' subtitles ' : subtitles ,
2015-02-20 14:13:41 +02:00
' description ' : info . get ( ' description ' ) ,
' timestamp ' : parse_iso8601 ( info . get ( ' broadcast_date_time ' ) ) ,
2016-09-16 00:54:34 +07:00
' duration ' : int_or_none ( info . get ( ' duration ' ) ) ,
2015-02-20 14:13:41 +02:00
' thumbnail ' : info . get ( ' image ' ) ,
2016-09-16 00:54:34 +07:00
' is_live ' : info . get ( ' is_live ' ) is True ,
2015-02-20 14:13:41 +02:00
}
2018-07-22 20:20:19 +02:00
@staticmethod
def _webvtt_adjust_time ( reference_sec , ahead_sec , actual_sec ) :
"""
: param reference_sec :
: type reference_sec : float
: param ahead_sec :
: type ahead_sec : float
: param actual_sec :
: type actual_sec : float
: return :
: rtype : float
"""
return reference_sec - ahead_sec + actual_sec
def _webvtt_download_all_subtitle_data ( self , video_id , subtitle_formats ) :
subtitles = { }
for subtitle_format in subtitle_formats :
tag = subtitle_format [ ' language ' ]
subtitle = self . _webvtt_download_subtitle_data (
video_id , subtitle_format
)
if subtitle is not None :
if tag not in subtitles . keys ( ) :
subtitles [ tag ] = [ ]
subtitles [ tag ] . append ( subtitle )
return subtitles
def _webvtt_download_subtitle_data ( self , video_id , subtitle_format ) :
subs_m3u8_url = subtitle_format [ ' url ' ]
urlh = self . _request_webpage ( subs_m3u8_url , video_id , fatal = False )
subs_m3u8_body = ' '
if urlh :
subs_m3u8_data = urlh . read ( )
if subs_m3u8_data :
subs_m3u8_body = subs_m3u8_data . decode ( encoding = self . _ENCODING )
subs_body_io = StringIO ( )
base_url = re . search (
r ' ^(.+)/[^/]+ ' ,
subtitle_format [ ' manifest_url ' ]
) . group ( 1 )
first_fragment = True
for subs_m3u8_line in subs_m3u8_body . split ( ' \n ' ) :
match = self . _REGEX_WEBVTT . match ( subs_m3u8_line )
if match :
subs_fragment_partial_url = match . group ( 1 )
subs_fragment_index = match . group ( 2 )
subs_fragment_url = ' / ' . join (
[ base_url , subs_fragment_partial_url ]
)
urlh = self . _request_webpage (
subs_fragment_url ,
' {} - {} ' . format ( video_id , subs_fragment_index ) ,
fatal = False
)
if urlh :
subs_fragment_data = urlh . read ( )
if subs_fragment_data :
self . _webvtt_write_fragment (
subs_fragment_data , subs_body_io , first_fragment
)
first_fragment = False
subtitle = { ' ext ' : ' vtt ' , ' data ' : subs_body_io . getvalue ( ) }
subs_body_io . close ( )
return subtitle
def _webvtt_handle_one_fragment (
self ,
webvtt_bytes ,
vtt_file ,
first_fragment = False
) :
"""
: param webvtt_bytes :
: type webvtt_bytes : bytes
: param vtt_file :
: type vtt_file : TextIO
: param first_fragment :
: type first_fragment : bool
: return :
: rtype : int
"""
mpeg_ref_sec : float = None
local_ref_sec : float = 0.0
for line_index , line in enumerate (
StringIO (
webvtt_bytes . decode ( encoding = self . _ENCODING )
) . readlines ( )
) :
line = line . strip ( )
if line_index == 0 :
if line == ' WEBVTT ' :
if first_fragment :
print ( line , file = vtt_file )
continue
else :
break
elif line_index == 1 :
match = self . _REGEX_X_TIMESTAMP . match ( line )
""" :type: Match """
if match :
mpeg_ref_sec = (
int ( match . group ( 1 ) ) - ( 10 * self . _MPEG2_PTS_RATE_HZ )
) / self . _MPEG2_PTS_RATE_HZ
local_ref_sec : float = self . _webvtt_time_parts_to_float (
int ( match . group ( 2 ) ) ,
int ( match . group ( 3 ) ) ,
int ( match . group ( 4 ) ) ,
int ( match . group ( 5 ) )
)
continue
else :
if len ( line . strip ( ) ) > 0 :
match = self . _REGEX_SUB_ENTRY_TIMELINE . match ( line )
""" :type: Match """
if match :
print ( ' ' , file = vtt_file )
print ( self . _webvtt_make_timeline (
self . _webvtt_adjust_time (
mpeg_ref_sec ,
local_ref_sec ,
self . _webvtt_time_parts_to_float (
int ( match . group ( 1 ) ) ,
int ( match . group ( 2 ) ) ,
int ( match . group ( 3 ) ) ,
int ( match . group ( 4 ) )
)
) ,
self . _webvtt_adjust_time (
mpeg_ref_sec ,
local_ref_sec ,
self . _webvtt_time_parts_to_float (
int ( match . group ( 5 ) ) ,
int ( match . group ( 6 ) ) ,
int ( match . group ( 7 ) ) ,
int ( match . group ( 8 ) )
)
)
) , file = vtt_file )
else :
print ( ' {} ' . format ( line ) , file = vtt_file )
def _webvtt_make_timeline ( self , start_sec = 0.0 , stop_sec = 0.0 ) :
"""
: param start_sec :
: type start_sec : float
: param stop_sec :
: type stop_sec : float
: return :
: rtype : str
"""
return (
' {:02d} : {:02d} : {:02d} . {:03d} --> {:02d} : {:02d} : {:02d} . {:03d} '
) . format (
* self . _webvtt_time_float_to_parts ( start_sec ) ,
* self . _webvtt_time_float_to_parts ( stop_sec )
)
@staticmethod
def _webvtt_time_parts_to_float (
hours = 0 , minutes = 0 , seconds = 0 , milli_seconds = 0
) :
"""
: param hours :
: type hours : int
: param minutes :
: type minutes : int
: param seconds :
: type seconds : int
: param milli_seconds :
: type milli_seconds : int
: return :
: rtype : float
"""
return seconds + 60 * ( minutes + 60 * hours ) + milli_seconds / 1000
@staticmethod
def _webvtt_time_float_to_parts ( input_sec = 0.0 ) :
"""
: param input_sec :
: type input_sec : float
: return :
: rtype : Tuple [ int , int , int , int ]
"""
minutes , seconds = divmod ( input_sec , 60 )
hours , minutes = divmod ( minutes , 60 )
milli_seconds : int = int ( 1000 * ( input_sec % 1 ) )
return int ( hours ) , int ( minutes ) , int ( seconds ) , milli_seconds
def _webvtt_write_fragment (
self ,
webvtt_data ,
output_stream ,
first_fragment = False
) :
"""
: param webvtt_data :
: type webvtt_data : bytes
: param output_stream :
: type output_stream : TextIO
: param first_fragment :
: type first_fragment : bool
"""
self . _webvtt_handle_one_fragment (
webvtt_data , output_stream , first_fragment
)