2019-07-13 22:44:10 +02:00
# coding: utf-8
from __future__ import unicode_literals
import json
2019-07-18 11:43:41 +02:00
import re
2019-07-13 22:44:10 +02:00
from . common import InfoExtractor
2019-07-17 21:30:14 +02:00
from . . utils import urljoin
2019-07-13 22:44:10 +02:00
class TelevizeSeznamIE ( InfoExtractor ) :
2020-01-15 10:46:54 +01:00
_VALID_URL = r ' https?://(?:www \ .)?televizeseznam \ .cz/.+/(?P<display_id>.+)-(?P<id>[0-9]+) '
2019-07-13 22:44:10 +02:00
2020-01-14 19:09:23 +01:00
_GRAPHQL_URL = ' https://www.televizeseznam.cz/api/graphql '
2019-07-13 22:44:10 +02:00
_GRAPHQL_QUERY = ''' query LoadEpisode($urlName : String) { episode(urlName: $urlName) { ...VideoDetailFragmentOnEpisode } }
2020-01-14 19:08:07 +01:00
fragment VideoDetailFragmentOnEpisode on Episode {
id
spl
urlName
name
perex
}
2019-07-13 22:44:10 +02:00
'''
_TEST = {
' url ' : ' https://www.televizeseznam.cz/video/lajna/buh-57953890 ' ,
' md5 ' : ' 40c41ade1464a390a0b447e333df4239 ' ,
' info_dict ' : {
2019-07-18 11:43:41 +02:00
' id ' : ' 57953890 ' ,
' display_id ' : ' buh ' ,
2019-07-13 22:44:10 +02:00
' title ' : ' Bůh ' ,
2019-07-17 21:26:49 +02:00
' description ' : ' Trenér Hrouzek je plný rozporů. Na pomoc si povolá i toho nejvyššího. Kdo to ale je? Pomůže mu vyřešit několik dilemat, která se mu v poslední době v životě nahromadila? ' ,
2019-07-13 22:44:10 +02:00
' ext ' : ' mp4 ' ,
}
}
def extract_subtitles ( self , spl_url , play_list ) :
if not play_list :
return None
subtitles = { }
for k , v in play_list . items ( ) :
subtitles . update ( {
v [ ' language ' ] : {
' ext ' : ' srt ' ,
2019-07-17 21:30:14 +02:00
' url ' : urljoin ( spl_url , v [ ' urls ' ] [ ' srt ' ] )
2019-07-13 22:44:10 +02:00
}
} )
return subtitles
def extract_formats ( self , spl_url , play_list , subtitles ) :
formats = [ ]
for r , v in play_list . items ( ) :
2020-01-15 10:43:10 +01:00
format = {
2020-01-14 18:43:35 +01:00
' format_id ' : r ,
2020-01-15 10:43:10 +01:00
' url ' : urljoin ( spl_url , v . get ( ' url ' ) ) ,
2019-07-13 22:44:10 +02:00
' protocol ' : ' https ' ,
' ext ' : ' mp4 ' ,
' subtitles ' : subtitles ,
2020-01-15 10:43:10 +01:00
}
if v . get ( ' resolution ' ) :
format . update ( { ' width ' : v [ ' resolution ' ] [ 0 ] , ' height ' : v [ ' resolution ' ] [ 1 ] } )
formats . append ( format )
2019-07-13 22:44:10 +02:00
return formats
def _real_extract ( self , url ) :
2019-07-18 11:43:41 +02:00
display_id , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
2019-07-13 22:44:10 +02:00
data = self . _download_json (
self . _GRAPHQL_URL , video_id , ' Downloading GraphQL result ' ,
data = json . dumps ( {
' query ' : self . _GRAPHQL_QUERY ,
' variables ' : { ' urlName ' : video_id }
} ) . encode ( ' utf-8 ' ) ,
headers = { ' Content-Type ' : ' application/json;charset=UTF-8 ' }
) [ ' data ' ]
2020-01-14 18:38:11 +01:00
spl_url = data [ ' episode ' ] [ ' spl ' ] + ' spl2,3 '
metadata = self . _download_json ( spl_url , video_id , ' Downloading playlist ' )
if ' Location ' in metadata and ' data ' not in metadata :
# they sometimes wants to redirect
spl_url = metadata [ ' Location ' ]
metadata = self . _download_json ( spl_url , video_id , ' Redirected -> Downloading playlist ' )
play_list = metadata [ ' data ' ]
2019-07-13 22:44:10 +02:00
subtitles = self . extract_subtitles ( spl_url , play_list . get ( ' subtitles ' ) )
formats = self . extract_formats ( spl_url , play_list [ ' mp4 ' ] , subtitles )
return {
' id ' : video_id ,
2019-07-18 11:43:41 +02:00
' display_id ' : display_id ,
2019-07-13 22:44:10 +02:00
' title ' : data [ ' episode ' ] . get ( ' name ' ) ,
2019-07-17 21:26:49 +02:00
' description ' : data [ ' episode ' ] . get ( ' perex ' ) ,
2019-07-13 22:44:10 +02:00
' formats ' : formats
}