From 8091303b7b5fc5485c03acc2b990244b35b62e1a Mon Sep 17 00:00:00 2001 From: "Michael Hsin (ytd.l)" Date: Sun, 4 Dec 2016 07:29:26 +0800 Subject: [PATCH 1/6] First version. Handles archived videos only. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/linelive.py | 129 +++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 youtube_dl/extractor/linelive.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 46d007b7d..e23fa5026 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -472,6 +472,7 @@ from .limelight import ( LimelightChannelIE, LimelightChannelListIE, ) +from .linelive import LineLiveIE from .litv import LiTVIE from .liveleak import LiveLeakIE from .livestream import ( diff --git a/youtube_dl/extractor/linelive.py b/youtube_dl/extractor/linelive.py new file mode 100644 index 000000000..729854552 --- /dev/null +++ b/youtube_dl/extractor/linelive.py @@ -0,0 +1,129 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json +import itertools +from pprint import pformat + +from .common import InfoExtractor + +from ..compat import compat_str + +from ..utils import ( + determine_ext, + error_to_compat_str, + ExtractorError, + int_or_none, + parse_iso8601, + sanitized_Request, + str_to_int, + unescapeHTML, + mimetype2ext, +) + + +class LineLiveBaseInfoExtractor(InfoExtractor): + @classmethod + def _match_channel(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return m.group('channel') + + def _extract_formats(self, orig_urls): + formats = [] + for key in orig_urls: + if key == "abr" or key == "aac": + """ Audio only streams, discard them """ + elif not orig_urls.get(key): + """ null url """ + else: + format_id = key + url = orig_urls.get(key) + height = format_id + ext = 'mp4' + formats.append({ + 'format_id': format_id, + 'format': format_id, + 'url': url, + 'height': height, + 'ext': ext, + }) + self._sort_formats(formats) + return formats + + +class LineLiveIE(LineLiveBaseInfoExtractor): + # https://live.line.me/r/channels/21/broadcast/51883 + _VALID_URL = r'(?i)(?:https?://)?live\.line\.me/channels/(?P\d+)/broadcast/(?P\d+)' + IE_NAME = 'linelive' + + _FORMATS = [ + ('stream_h264_ld_url', 'ld'), + ('stream_h264_url', 'standard'), + ('stream_h264_hq_url', 'hq'), + ('stream_h264_hd_url', 'hd'), + ('stream_h264_hd1080_url', 'hd180'), + ] + + _TESTS = [ + { + 'url': 'https://live.line.me/r/channels/21/broadcast/51883', + 'md5': '2137c41a8e78554bb09225b8eb322406', + 'info_dict': { + 'id': '51883', + 'channel': '21', + 'ext': 'mp4', + 'title': '', + 'description': '', + 'thumbnail': 're:^https?:.*\.(?:jpg|png)$', + 'duration': 74, + 'timestamp': 1425657362, + 'upload_date': '20150306', + 'uploader': 'IGN', + 'uploader_id': 'xijv66', + 'age_limit': 0, + 'view_count': int, + 'comment_count': int, + } + }, + { + 'url': 'https://live.line.me/r/channels/21/broadcast/51883', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + channel = self._match_channel(url) + #print("channel = %s, video_id = %s" % (channel, video_id)) + + info_url = "https://live-api.line-apps.com/app/v2/channel/%s/broadcast/%s" % (channel, video_id) + #print("info_url = %s" % (info_url)) + info = self._download_json(info_url, video_id) + #print("info = %s", json.dumps(info)) + + description = info.get("description") + item = info.get("item") + if item: + title = item.get("title") + duration = item.get("archiveDuration") + + urls = info.get("archivedHLSURLs") + formats = self._extract_formats(urls) + #print("formats = %s" % (pformat(formats))) + + print("channel = %s, video_id = %s" % (channel, video_id)) + res = { + 'id': compat_str(video_id), + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + } + + print("res = %s" % (pformat(res))) + return res + From 910239c2b7bab5457ccb996764e941b5b4bf1e29 Mon Sep 17 00:00:00 2001 From: "Michael Hsin (ytd.l)" Date: Sun, 4 Dec 2016 08:00:09 +0800 Subject: [PATCH 2/6] Use a smaller test case. --- youtube_dl/extractor/linelive.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/linelive.py b/youtube_dl/extractor/linelive.py index 729854552..740357ba7 100644 --- a/youtube_dl/extractor/linelive.py +++ b/youtube_dl/extractor/linelive.py @@ -70,29 +70,20 @@ class LineLiveIE(LineLiveBaseInfoExtractor): _TESTS = [ { - 'url': 'https://live.line.me/r/channels/21/broadcast/51883', - 'md5': '2137c41a8e78554bb09225b8eb322406', + 'url': 'https://live.line.me/channels/77/broadcast/214088', + 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', 'info_dict': { - 'id': '51883', - 'channel': '21', + 'id': '214088', 'ext': 'mp4', - 'title': '', - 'description': '', - 'thumbnail': 're:^https?:.*\.(?:jpg|png)$', - 'duration': 74, - 'timestamp': 1425657362, - 'upload_date': '20150306', - 'uploader': 'IGN', - 'uploader_id': 'xijv66', - 'age_limit': 0, - 'view_count': int, - 'comment_count': int, - } + 'title': '12月3日 ウェザーナイトニュース', + 'description': '明日の各地のお天気をおやすみ前にお届け。\nコミューニケーション型お天気情報番組♪\n\n皆さんからのコメントも募集中!\nおやすみ前の素敵な時間をLINE LIVEで!\n\nお天気キャスター:眞家泉', + 'duration': 972, + }, }, { 'url': 'https://live.line.me/r/channels/21/broadcast/51883', 'only_matching': True, - } + }, ] def _real_extract(self, url): From a6e4b1121f3862e6effb328bb0ce647ee900a140 Mon Sep 17 00:00:00 2001 From: "Michael Hsin (ytd.l)" Date: Sun, 4 Dec 2016 23:14:33 +0800 Subject: [PATCH 3/6] Add TODO. Remove debug messages. --- youtube_dl/extractor/linelive.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/linelive.py b/youtube_dl/extractor/linelive.py index 740357ba7..d8c6b69a3 100644 --- a/youtube_dl/extractor/linelive.py +++ b/youtube_dl/extractor/linelive.py @@ -22,6 +22,12 @@ from ..utils import ( mimetype2ext, ) +""" +TODO: + * Live streams support. + * More meta fields and comments extraction. +""" + class LineLiveBaseInfoExtractor(InfoExtractor): @classmethod @@ -42,7 +48,7 @@ class LineLiveBaseInfoExtractor(InfoExtractor): else: format_id = key url = orig_urls.get(key) - height = format_id + height = int_or_none(format_id) ext = 'mp4' formats.append({ 'format_id': format_id, @@ -60,18 +66,10 @@ class LineLiveIE(LineLiveBaseInfoExtractor): _VALID_URL = r'(?i)(?:https?://)?live\.line\.me/channels/(?P\d+)/broadcast/(?P\d+)' IE_NAME = 'linelive' - _FORMATS = [ - ('stream_h264_ld_url', 'ld'), - ('stream_h264_url', 'standard'), - ('stream_h264_hq_url', 'hq'), - ('stream_h264_hd_url', 'hd'), - ('stream_h264_hd1080_url', 'hd180'), - ] - _TESTS = [ { 'url': 'https://live.line.me/channels/77/broadcast/214088', - 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', +# 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', 'info_dict': { 'id': '214088', 'ext': 'mp4', @@ -89,25 +87,21 @@ class LineLiveIE(LineLiveBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) channel = self._match_channel(url) - #print("channel = %s, video_id = %s" % (channel, video_id)) info_url = "https://live-api.line-apps.com/app/v2/channel/%s/broadcast/%s" % (channel, video_id) - #print("info_url = %s" % (info_url)) info = self._download_json(info_url, video_id) - #print("info = %s", json.dumps(info)) description = info.get("description") item = info.get("item") + title = '' if item: title = item.get("title") duration = item.get("archiveDuration") urls = info.get("archivedHLSURLs") formats = self._extract_formats(urls) - #print("formats = %s" % (pformat(formats))) - print("channel = %s, video_id = %s" % (channel, video_id)) - res = { + return { 'id': compat_str(video_id), 'title': title, 'description': description, @@ -115,6 +109,3 @@ class LineLiveIE(LineLiveBaseInfoExtractor): 'formats': formats, } - print("res = %s" % (pformat(res))) - return res - From 7aeb868f58132c801b715a50e00b902f98d30697 Mon Sep 17 00:00:00 2001 From: "Michael Hsin (ytd.l)" Date: Mon, 5 Dec 2016 13:46:18 +0800 Subject: [PATCH 4/6] Use a shorter video as test case. --- youtube_dl/extractor/linelive.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/linelive.py b/youtube_dl/extractor/linelive.py index d8c6b69a3..d746d44e5 100644 --- a/youtube_dl/extractor/linelive.py +++ b/youtube_dl/extractor/linelive.py @@ -68,16 +68,27 @@ class LineLiveIE(LineLiveBaseInfoExtractor): _TESTS = [ { - 'url': 'https://live.line.me/channels/77/broadcast/214088', -# 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', + 'url': 'https://live.line.me/channels/27121/broadcast/38687', + 'md5': 'd4f22649557d070fa1d61be2c483819b', 'info_dict': { - 'id': '214088', + 'id': '38687', 'ext': 'mp4', - 'title': '12月3日 ウェザーナイトニュース', - 'description': '明日の各地のお天気をおやすみ前にお届け。\nコミューニケーション型お天気情報番組♪\n\n皆さんからのコメントも募集中!\nおやすみ前の素敵な時間をLINE LIVEで!\n\nお天気キャスター:眞家泉', - 'duration': 972, + 'title': '短時間だよライブ', + 'description': '', + 'duration': 17, }, }, +# { +# 'url': 'https://live.line.me/channels/77/broadcast/214088', +# 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', +# 'info_dict': { +# 'id': '214088', +# 'ext': 'mp4', +# 'title': '12月3日 ウェザーナイトニュース', +# 'description': '明日の各地のお天気をおやすみ前にお届け。\nコミューニケーション型お天気情報番組♪\n\n皆さんからのコメントも募集中!\nおやすみ前の素敵な時間をLINE LIVEで!\n\nお天気キャスター:眞家泉', +# 'duration': 972, +# }, +# }, { 'url': 'https://live.line.me/r/channels/21/broadcast/51883', 'only_matching': True, From 31bbd6b91b176fbbfbbc4a8f057b905430086e52 Mon Sep 17 00:00:00 2001 From: "Michael Hsin (ytd.l)" Date: Mon, 5 Dec 2016 20:00:26 +0800 Subject: [PATCH 5/6] Skip downloads for now. --- youtube_dl/extractor/linelive.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/linelive.py b/youtube_dl/extractor/linelive.py index d746d44e5..08f81e820 100644 --- a/youtube_dl/extractor/linelive.py +++ b/youtube_dl/extractor/linelive.py @@ -63,7 +63,7 @@ class LineLiveBaseInfoExtractor(InfoExtractor): class LineLiveIE(LineLiveBaseInfoExtractor): # https://live.line.me/r/channels/21/broadcast/51883 - _VALID_URL = r'(?i)(?:https?://)?live\.line\.me/channels/(?P\d+)/broadcast/(?P\d+)' + _VALID_URL = r'(?:https?://)?live\.line\.me/channels/(?P\d+)/broadcast/(?P\d+)' IE_NAME = 'linelive' _TESTS = [ @@ -77,18 +77,24 @@ class LineLiveIE(LineLiveBaseInfoExtractor): 'description': '', 'duration': 17, }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://live.line.me/channels/77/broadcast/214088', + 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', + 'info_dict': { + 'id': '214088', + 'ext': 'mp4', + 'title': '12月3日 ウェザーナイトニュース', + 'description': '明日の各地のお天気をおやすみ前にお届け。\nコミューニケーション型お天気情報番組♪\n\n皆さんからのコメントも募集中!\nおやすみ前の素敵な時間をLINE LIVEで!\n\nお天気キャスター:眞家泉', + 'duration': 972, + }, + 'params': { + 'skip_download': True, + }, }, -# { -# 'url': 'https://live.line.me/channels/77/broadcast/214088', -# 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', -# 'info_dict': { -# 'id': '214088', -# 'ext': 'mp4', -# 'title': '12月3日 ウェザーナイトニュース', -# 'description': '明日の各地のお天気をおやすみ前にお届け。\nコミューニケーション型お天気情報番組♪\n\n皆さんからのコメントも募集中!\nおやすみ前の素敵な時間をLINE LIVEで!\n\nお天気キャスター:眞家泉', -# 'duration': 972, -# }, -# }, { 'url': 'https://live.line.me/r/channels/21/broadcast/51883', 'only_matching': True, From 0df35e64c9f052c4030bc9d7963eddf8676cd7b6 Mon Sep 17 00:00:00 2001 From: "Michael Hsin (ytd.l)" Date: Wed, 14 Dec 2016 07:01:52 +0800 Subject: [PATCH 6/6] Update test cases. --- youtube_dl/extractor/linelive.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/linelive.py b/youtube_dl/extractor/linelive.py index 08f81e820..b30c04c6c 100644 --- a/youtube_dl/extractor/linelive.py +++ b/youtube_dl/extractor/linelive.py @@ -76,20 +76,21 @@ class LineLiveIE(LineLiveBaseInfoExtractor): 'title': '短時間だよライブ', 'description': '', 'duration': 17, + 'url': 'http://lss.line-cdn.net/vod/hZHSKDiQtHGd6O3M4DwwNVVVzCAddAxU2AQZCAxs0C1BCAkJlVQdKVE9lXVRKV0cyUlEZCExlWFcTUBV0U1BKBU4wXgA/720.m3u8', }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://live.line.me/channels/77/broadcast/214088', - 'md5': 'c2b16f5a530eadf57cff1b82a3eed185', + 'url': 'https://live.line.me/channels/21/broadcast/51883', 'info_dict': { - 'id': '214088', + 'id': '51883', 'ext': 'mp4', - 'title': '12月3日 ウェザーナイトニュース', - 'description': '明日の各地のお天気をおやすみ前にお届け。\nコミューニケーション型お天気情報番組♪\n\n皆さんからのコメントも募集中!\nおやすみ前の素敵な時間をLINE LIVEで!\n\nお天気キャスター:眞家泉', - 'duration': 972, + 'title': '『ジョジョの奇妙な冒険 ダイヤモンドは砕けない 第一章』会見', + 'description': '2017年夏の公開に向けて、東宝株式会社とワーナー ブラザース ジャパン合同会社が初めて共同製作・配給するビッグプロジェクトが始動!\n映画『ジョジョの奇妙な冒険 ダイヤモンドは砕けない 第一章』会見模様を独占中継。\n会見終了後には、キャストからの生コメントも予定。ベールに包まれた企画内容とは・・・。そして、その企画の出演社は果たして誰だったのか?\n番組レポーター:マフィア梶田', + 'duration': 4641, + 'url': 'http://lss.line-cdn.net/vod/hw0VbqtoXMR9_El5PCiUgLVBIJjEVL25BVXlvLkcfdC9GKztOBH4yek8dISpPeTgZVnVlLk0adS5OIypIU35hfh4adg/720.m3u8', }, 'params': { 'skip_download': True,