[^<]*


', page): + return self.playlist_result([self.url_result(vid) for vid in vids], video_id) + + title = self._html_search_regex( + r'

[^<]*

([^<]+)

', page, 'title') + + return { + '_type': 'url', + 'id': video_id, + 'url': vids[0], + 'title': title, + } + + +class GoGoAnimeSearchIE(InfoExtractor): + IE_NAME = 'gogoanime:search' + IE_DESC = 'GoGoAnime Search' + + _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P[^&]*)' + _TEST = { + 'url': 'http://www.gogoanime.com/?s=bokusatsu', + 'info_dict': { + 'id': 'bokusatsu' + }, + 'playlist_count': 6 + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + posts = re.findall( + r'
[^<]*]*>[^<]*.+)' + + _TESTS = [{ + 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'mahou-shoujo-madoka-magica-07', + 'ext': 'flv', + 'title': 'mahou-shoujo-madoka-magica-07', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) + + video_url = compat_urllib_parse.unquote(self._html_search_regex( + r'_url = "(https?://[^"]+?)";', page, 'url')) + title = self._search_regex(r'.*/(?P[^.]*).', video_url, 'title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + } + + +class ByZooIE(Play44IE): + _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' + + _TESTS = [{ + 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', + 'md5': '455c83dabe2cd9fd74a87612b01fe017', + 'info_dict': { + 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + 'ext': 'mp4', + 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + } + }] + + +class Video44IE(Play44IE): + _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', + 'md5': '43eaec6d0beb10e8d42459b9f108aff3', + 'info_dict': { + 'id': 'chaoshead-12', + 'ext': 'mp4', + 'title': 'chaoshead-12', + } + }] + + +class VideoWingIE(Play44IE): + _VALID_URL = r'''(?x) + http://[w.]*videowing\.[^/]*/ + (?: + .*video=/* + |embed/ + ) + (?P<id>[^&?.]+) + ''' + + _TESTS = [{ + 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }, { + 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', + 'md5': '33fdd71581357018c226f95c5cedcfd7', + 'info_dict': { + 'id': 'mahoushoujomadokamagicamovie1part1', + 'ext': 'flv', + 'title': 'mahoushoujomadokamagicamovie1part1', + } + }] + + +class PlayPandaIE(Play44IE): + _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' + } + }] + + +class VideoZooIE(Play44IE): + _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }] + + +class PlayBBIE(Play44IE): + _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }] + + +class EasyVideoIE(Play44IE): + _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' + + _TESTS = [{ + 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', + 'md5': '26178b57629b7650106d72b191137176', + 'info_dict': { + 'id': 'bokuwatomodachigasukunai-04', + 'ext': 'mp4', + 'title': 'bokuwatomodachigasukunai-04', + }, + 'skip': 'Blocked in Germany', + }] diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..7adb10c03 --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,74 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SoulAnimeWatchingIE(InfoExtractor): + IE_NAME = "soulanime:watching" + IE_DESC = "SoulAnime video" + _TEST = { + 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', + 'md5': '05fae04abf72298098b528e98abf4298', + 'info_dict': { + 'id': 'seirei-tsukai-no-blade-dance-episode-9', + 'ext': 'mp4', + 'title': 'seirei-tsukai-no-blade-dance-episode-9', + 'description': 'seirei-tsukai-no-blade-dance-episode-9' + } + } + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + domain = mobj.group('domain') + + page = self._download_webpage(url, video_id) + + video_url_encoded = self._html_search_regex( + r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') + video_url = "http://www.soul-anime." + domain + video_url_encoded + + vid = self._request_webpage(video_url, video_id) + ext = vid.info().gettype().split("/")[1] + + return { + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': video_id, + 'description': video_id + } + + +class SoulAnimeSeriesIE(InfoExtractor): + IE_NAME = "soulanime:series" + IE_DESC = "SoulAnime Series" + + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' + + _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' + + _TEST = { + 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', + 'info_dict': { + 'id': 'black-rock-shooter-tv' + }, + 'playlist_count': 8 + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('id') + domain = mobj.group('domain') + + pattern = re.compile(self._EPISODE_REGEX) + + page = self._download_webpage(url, series_id, "Downloading series page") + mobj = pattern.findall(page) + + entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] + + return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py new file mode 100644 index 000000000..0364b9d32 --- /dev/null +++ b/youtube_dl/extractor/videofun.py @@ -0,0 +1,36 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse +) + + +class VideoFunIE(InfoExtractor): + _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' + + _TEST = { + 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'Mahou-Shoujo-Madoka-Magica-07', + 'ext': 'flv', + 'title': 'Mahou-Shoujo-Madoka-Magica-07', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading video page') + + video_url_encoded = self._html_search_regex( + r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') + video_url = compat_urllib_parse.unquote(video_url_encoded) + title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + } From 95ceeec72200ed3b2c94a54650eb69dfe946e595 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:05:35 +0100 Subject: [PATCH 0138/1484] Remove unused import --- youtube_dl/downloader/mplayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 34b23b5c2..72cef30ea 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -4,7 +4,6 @@ import os import subprocess from .common import FileDownloader -from ..compat import compat_subprocess_get_DEVNULL from ..utils import ( check_executable, encodeFilename, From c11125f9ed952f9b7ebd06c15eacadcc6005dd8c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:06:53 +0100 Subject: [PATCH 0139/1484] [tests] Remove format 138 from tests (#4559) --- test/test_YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8e4f930e..730f7ec26 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -218,7 +218,7 @@ class TestFormatSelection(unittest.TestCase): # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video - '138', '137', '248', '136', '247', '135', '246', + '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '171', '139', From 8848314c08284f6a4b8f3c3529bf2e3f1b72610c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:08:18 +0100 Subject: [PATCH 0140/1484] [Makefile] Make offline tests actually work offline --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71470eedb..e53a367ef 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ test: ot: offlinetest offlinetest: codetest - nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations + nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists tar: youtube-dl.tar.gz From 2ccd1b10e58cc8e5173dc1aeedc2b3f0ef9b55bf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:20:45 +0100 Subject: [PATCH 0141/1484] [soulanime] Fix under Python 3 --- youtube_dl/extractor/soulanime.py | 10 ++++++++-- youtube_dl/utils.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py index 7adb10c03..feef33e27 100644 --- a/youtube_dl/extractor/soulanime.py +++ b/youtube_dl/extractor/soulanime.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + HEADRequest, + urlhandle_detect_ext, +) class SoulAnimeWatchingIE(InfoExtractor): @@ -31,8 +35,10 @@ class SoulAnimeWatchingIE(InfoExtractor): r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') video_url = "http://www.soul-anime." + domain + video_url_encoded - vid = self._request_webpage(video_url, video_id) - ext = vid.info().gettype().split("/")[1] + ext_req = HEADRequest(video_url) + ext_handle = self._request_webpage( + ext_req, video_id, note='Determining extension') + ext = urlhandle_detect_ext(ext_handle) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index efbe64fb3..bdfe053a7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1550,3 +1550,14 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command return ' '.join(shlex_quote(a) for a in args) + + +def urlhandle_detect_ext(url_handle): + try: + url_handle.headers + getheader = lambda h: url_handle.headers[h] + except AttributeError: # Python < 3 + getheader = url_handle.info().getheader + + return getheader('Content-Type').split("/")[1] + From 7a1818c99b1729796f62c341b1b3f809cd47dbf8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 03:15:27 +0100 Subject: [PATCH 0142/1484] [vk] Add support for rutube embeds (Fixes #4514) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rutube.py | 31 +++++++++++++++++++++++++++++++ youtube_dl/extractor/vk.py | 9 +++++++++ 3 files changed, 41 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3dc09f75..143cd5c49 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -359,6 +359,7 @@ from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, + RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index b72b5a586..5b1c3577a 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor): } +class RutubeEmbedIE(InfoExtractor): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': 'Requires ffmpeg', + }, + } + + def _real_extract(self, url): + embed_id = self._match_id(url) + webpage = self._download_webpage(url, embed_id) + + canonical_url = self._html_search_regex( + r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, + 'Canonical URL') + return self.url_result(canonical_url, 'Rutube') + + class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 542e9198a..129de6cf3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -164,6 +164,15 @@ class VKIE(InfoExtractor): self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + m_rutube = re.search( + r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) + assert m_rutube + if m_rutube is not None: + self.to_screen('rutube video detected') + rutube_url = self._proto_relative_url( + m_rutube.group(1).replace('\\', '')) + return self.url_result(rutube_url) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) From 26886e6140a684058064c30237ef096332e1510f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 03:15:48 +0100 Subject: [PATCH 0143/1484] release 2015.01.04 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2a54b9bbe..09813928a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.03' +__version__ = '2015.01.04' From f4858a71035549cf82b258d01dda5060aef707b7 Mon Sep 17 00:00:00 2001 From: Christopher Krooss <c.krooss@gmail.com> Date: Sun, 4 Jan 2015 13:33:26 +0100 Subject: [PATCH 0144/1484] Add support for Radio Bremen --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/radiobremen.py | 55 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/radiobremen.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 143cd5c49..349f4fe71 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -339,6 +339,7 @@ from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .radiode import RadioDeIE +from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py new file mode 100644 index 000000000..68c78c4f9 --- /dev/null +++ b/youtube_dl/extractor/radiobremen.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RadioBremenIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(index\.html)?\?id=(?P<video_id>[0-9]+)' + IE_NAME = 'radiobremen' + + _TEST = { + 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', + 'info_dict': { + 'id': '114720', + 'ext': 'mp4', + 'height': 288, + 'width': 512, + 'title': 'buten un binnen vom 22. Dezember', + 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id + meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex("<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") + duration = self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration") + + page_doc = self._download_webpage(url, video_id, 'Downloading video information') + pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" + mobj = re.search(pattern, page_doc) + width, video_id, secret, thumbnail = int(mobj.group("width")), mobj.group("video_id"), mobj.group("secret"), mobj.group("thumbnail") + video_url = "http://dl-ondemand.radiobremen.de/mediabase/{:}/{:}_{:}_{:}.mp4".format(video_id, video_id, secret, width) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': [ + {'url': video_url, + 'ext': 'mp4', + 'width': width, + 'protocol': 'http' + } + ], + 'thumbnail': thumbnail, + } From 63948fc62c7f0bfcfe7b2ce102ab6e4e87de558c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 13:40:30 +0100 Subject: [PATCH 0145/1484] [downloader/hls] Respect the 'prefer_ffmpeg' option --- youtube_dl/downloader/hls.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import ( compat_urllib_request, ) from ..utils import ( - check_executable, encodeFilename, ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader): '-bsf:a', 'aac_adtstoasc', encodeFilename(tmpfilename, for_subprocess=True)] - for program in ['avconv', 'ffmpeg']: - if check_executable(program, ['-version']): - break - else: + ffpp = FFmpegPostProcessor(downloader=self) + program = ffpp._executable + if program is None: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False - cmd = [program] + args - - ffpp = FFmpegPostProcessor(downloader=self) ffpp.check_version() + cmd = [program] + args retval = subprocess.call(cmd) if retval == 0: From bc1fc5ddbcba784778cbdd98c051ff2493178515 Mon Sep 17 00:00:00 2001 From: Christopher Krooss <c.krooss@gmail.com> Date: Sun, 4 Jan 2015 14:02:07 +0100 Subject: [PATCH 0146/1484] Don't check for height as it's not provided --- youtube_dl/extractor/radiobremen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 68c78c4f9..6d130d3d9 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -16,7 +16,6 @@ class RadioBremenIE(InfoExtractor): 'info_dict': { 'id': '114720', 'ext': 'mp4', - 'height': 288, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', From bc3e582fe457f9239dc3a3386cbfd0e7db167404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:02:17 +0100 Subject: [PATCH 0147/1484] Don't use '-shortest' option for merging formats (closes #4220, closes #4580) With avconv and older versions of ffmpeg the video is partially copied. The duration difference between the audio and the video seem to be really small, so it's probably not noticeable. --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info From 9fda6ee39fa2da1949af5e9b95633e3df3c6f6b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:06:23 +0100 Subject: [PATCH 0148/1484] [tf1] Remove unused import --- youtube_dl/extractor/tf1.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 07cc81226..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor From 1d2d0e3ff2b4e55810039caf267bb9ad086f3610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:07:06 +0100 Subject: [PATCH 0149/1484] utils: Remove blank line at the end of file --- youtube_dl/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bdfe053a7..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,4 +1560,3 @@ def urlhandle_detect_ext(url_handle): getheader = url_handle.info().getheader return getheader('Content-Type').split("/")[1] - From 67c2bcdf4cf83f9ac32e5f1f50a8b4b38d2ac624 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 19:19:15 +0100 Subject: [PATCH 0150/1484] Remove extractors which infringe copyright (#4554) --- youtube_dl/extractor/__init__.py | 19 ---- youtube_dl/extractor/gogoanime.py | 76 --------------- youtube_dl/extractor/play44.py | 149 ------------------------------ youtube_dl/extractor/videofun.py | 36 -------- 4 files changed, 280 deletions(-) delete mode 100644 youtube_dl/extractor/gogoanime.py delete mode 100644 youtube_dl/extractor/play44.py delete mode 100644 youtube_dl/extractor/videofun.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 143cd5c49..613e8e05b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -164,10 +164,6 @@ from .globo import GloboIE from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE -from .gogoanime import ( - GoGoAnimeIE, - GoGoAnimeSearchIE -) from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE @@ -317,16 +313,6 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE from .played import PlayedIE -from .play44 import ( - Play44IE, - ByZooIE, - Video44IE, - VideoWingIE, - PlayPandaIE, - VideoZooIE, - PlayBBIE, - EasyVideoIE -) from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE @@ -388,10 +374,6 @@ from .smotri import ( from .snotr import SnotrIE from .sockshare import SockshareIE from .sohu import SohuIE -from .soulanime import ( - SoulAnimeWatchingIE, - SoulAnimeSeriesIE -) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, @@ -486,7 +468,6 @@ from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE -from .videofun import VideoFunIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py deleted file mode 100644 index d4f4ecc58..000000000 --- a/youtube_dl/extractor/gogoanime.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - compat_urllib_parse, - get_element_by_attribute, - unescapeHTML -) - - -class GoGoAnimeIE(InfoExtractor): - IE_NAME = 'gogoanime' - IE_DESC = 'GoGoAnime' - _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' - - _TEST = { - 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-movie-1' - }, - 'playlist_count': 3 - } - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - if 'Oops! Page Not Found</font>' in page: - raise ExtractorError('Video does not exist', expected=True) - - content = get_element_by_attribute("class", "postcontent", page) - vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) - vids = [ - unescapeHTML(compat_urllib_parse.unquote(x)) - for x in vids if not re.search(r".*videofun.*", x)] - - if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): - return self.playlist_result([self.url_result(vid) for vid in vids], video_id) - - title = self._html_search_regex( - r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') - - return { - '_type': 'url', - 'id': video_id, - 'url': vids[0], - 'title': title, - } - - -class GoGoAnimeSearchIE(InfoExtractor): - IE_NAME = 'gogoanime:search' - IE_DESC = 'GoGoAnime Search' - - _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' - _TEST = { - 'url': 'http://www.gogoanime.com/?s=bokusatsu', - 'info_dict': { - 'id': 'bokusatsu' - }, - 'playlist_count': 6 - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - posts = re.findall( - r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', - webpage) - - return self.playlist_result( - [self.url_result(p) for p in posts], playlist_id) diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py deleted file mode 100644 index b8696e516..000000000 --- a/youtube_dl/extractor/play44.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class Play44IE(InfoExtractor): - _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-07', - 'ext': 'flv', - 'title': 'mahou-shoujo-madoka-magica-07', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - video_url = compat_urllib_parse.unquote(self._html_search_regex( - r'_url = "(https?://[^"]+?)";', page, 'url')) - title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } - - -class ByZooIE(Play44IE): - _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', - 'md5': '455c83dabe2cd9fd74a87612b01fe017', - 'info_dict': { - 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - 'ext': 'mp4', - 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - } - }] - - -class Video44IE(Play44IE): - _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', - 'md5': '43eaec6d0beb10e8d42459b9f108aff3', - 'info_dict': { - 'id': 'chaoshead-12', - 'ext': 'mp4', - 'title': 'chaoshead-12', - } - }] - - -class VideoWingIE(Play44IE): - _VALID_URL = r'''(?x) - http://[w.]*videowing\.[^/]*/ - (?: - .*video=/* - |embed/ - ) - (?P<id>[^&?.]+) - ''' - - _TESTS = [{ - 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }, { - 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', - 'md5': '33fdd71581357018c226f95c5cedcfd7', - 'info_dict': { - 'id': 'mahoushoujomadokamagicamovie1part1', - 'ext': 'flv', - 'title': 'mahoushoujomadokamagicamovie1part1', - } - }] - - -class PlayPandaIE(Play44IE): - _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' - } - }] - - -class VideoZooIE(Play44IE): - _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class PlayBBIE(Play44IE): - _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class EasyVideoIE(Play44IE): - _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' - - _TESTS = [{ - 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', - 'md5': '26178b57629b7650106d72b191137176', - 'info_dict': { - 'id': 'bokuwatomodachigasukunai-04', - 'ext': 'mp4', - 'title': 'bokuwatomodachigasukunai-04', - }, - 'skip': 'Blocked in Germany', - }] diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py deleted file mode 100644 index 0364b9d32..000000000 --- a/youtube_dl/extractor/videofun.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class VideoFunIE(InfoExtractor): - _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' - - _TEST = { - 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'Mahou-Shoujo-Madoka-Magica-07', - 'ext': 'flv', - 'title': 'Mahou-Shoujo-Madoka-Magica-07', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, 'Downloading video page') - - video_url_encoded = self._html_search_regex( - r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') - video_url = compat_urllib_parse.unquote(video_url_encoded) - title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } From 2f985f4bb4938ee13356bda0436fde18f8c0e434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 00:18:43 +0100 Subject: [PATCH 0151/1484] [youtube:toplist] Remove extractor They use now normal playlists (their id is PL*). --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/youtube.py | 46 -------------------------------- 2 files changed, 47 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 613e8e05b..79e6bba45 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -546,7 +546,6 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeShowIE, YoutubeSubscriptionsIE, - YoutubeTopListIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e9bf39a00..d1bbf0b01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1206,9 +1206,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - if playlist_id.startswith('TL'): - raise ExtractorError('For downloading YouTube.com top lists, use ' - 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1254,49 +1251,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): - IE_NAME = 'youtube:toplist' - IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' - ' (Example: "yttoplist:music:Top Tracks")') - _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' - _TESTS = [{ - 'url': 'yttoplist:music:Trending', - 'playlist_mincount': 5, - 'skip': 'Only works for logged-in users', - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel = mobj.group('chann') - title = mobj.group('title') - query = compat_urllib_parse.urlencode({'title': title}) - channel_page = self._download_webpage( - 'https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex( - r'''(?x) - <a\s+href="([^"]+)".*?>\s* - <span\s+class="branded-page-module-title-text">\s* - <span[^>]*>.*?%s.*?</span>''' % re.escape(query), - channel_page, 'list') - url = compat_urlparse.urljoin('https://www.youtube.com/', link) - - video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' - ids = [] - # sometimes the webpage doesn't contain the videos - # retry until we get them - for i in itertools.count(0): - msg = 'Downloading Youtube mix' - if i > 0: - msg += ', retry #%d' % i - - webpage = self._download_webpage(url, title, msg) - ids = orderedSet(re.findall(video_re, webpage)) - if ids: - break - url_results = self._ids_to_results(ids) - return self.playlist_result(url_results, playlist_title=title) - - class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' From caf90bfaa5434d9ff7035d8575b842b076178ca3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 5 Jan 2015 02:22:01 +0200 Subject: [PATCH 0152/1484] [webofstories] Add new extractor (Closes #4585) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/webofstories.py | 102 +++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 youtube_dl/extractor/webofstories.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79e6bba45..0c8729384 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -511,6 +511,7 @@ from .wdr import ( WDRMobileIE, WDRMausIE, ) +from .webofstories import WebOfStoriesIE from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' + _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' + _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' + _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' + _TESTS = [ + { + 'url': 'http://www.webofstories.com/play/hans.bethe/71', + 'md5': '373e4dd915f60cfe3116322642ddf364', + 'info_dict': { + 'id': '4536', + 'ext': 'mp4', + 'title': 'The temperature of the sun', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Hans Bethe talks about calculating the temperature of the sun', + 'duration': 238, + } + }, + { + 'url': 'http://www.webofstories.com/play/55908', + 'md5': '2985a698e1fe3211022422c4b5ed962c', + 'info_dict': { + 'id': '55908', + 'ext': 'mp4', + 'title': 'The story of Gemmata obscuriglobus', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', + 'duration': 169, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._html_search_meta('description', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + story_filename = self._search_regex( + r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') + speaker_id = self._search_regex( + r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') + story_id = self._search_regex( + r'\.storyId\((\d+)\)', webpage, 'story ID') + speaker_type = self._search_regex( + r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') + great_life = self._search_regex( + r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + is_great_life_series = great_life == 'true' + duration = int_or_none(self._search_regex( + r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + + # URL building, see: http://www.webofstories.com/scripts/player.js + ms_prefix = '' + if speaker_type.lower() == 'ms': + ms_prefix = 'mini_sites/' + + if is_great_life_series: + mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( + self._VIDEO_DOMAIN, speaker_id, story_filename) + rtmp_ext = 'flv' + streamer = self._GREAT_LIFE_STREAMER + play_path = 'stories/{0:}/{1:}'.format( + speaker_id, story_filename) + else: + mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( + self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) + rtmp_ext = 'mp4' + streamer = self._USER_STREAMER + play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( + ms_prefix, speaker_id, story_filename) + + formats = [{ + 'format_id': 'mp4_sd', + 'url': mp4_url, + }, { + 'format_id': 'rtmp_sd', + 'page_url': url, + 'url': streamer, + 'ext': rtmp_ext, + 'play_path': play_path, + }] + + self._sort_formats(formats) + + return { + 'id': story_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } From adf3c58ad31e7376f085271a02fdfe56b1e75989 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 5 Jan 2015 02:55:12 +0200 Subject: [PATCH 0153/1484] [lrt] Fix missing provider key Also, modernize a bit. --- youtube_dl/extractor/lrt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index d72d470aa..9c2fbdd96 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -28,7 +27,6 @@ class LRTIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, - } def _real_extract(self, url): @@ -44,7 +42,9 @@ class LRTIE(InfoExtractor): formats = [] for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): - data = json.loads(js_to_json(js)) + data = self._parse_json(js, video_id, transform_source=js_to_json) + if 'provider' not in data: + continue if data['provider'] == 'rtmp': formats.append({ 'format_id': 'rtmp', From bdf80aa542da15437545ae9c17cd5c80e17e171f Mon Sep 17 00:00:00 2001 From: Bart Kappenburg <bartkappenburg@gmail.com> Date: Mon, 5 Jan 2015 11:51:24 +0100 Subject: [PATCH 0154/1484] Update rtlnl.py Added support for the non-www version of rtlxl.nl by making "www." optional. --- youtube_dl/extractor/rtlnl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration class RtlXlIE(InfoExtractor): IE_NAME = 'rtlxl.nl' - _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' + _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' _TEST = { 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', From a4c3f486394ae8ead64e8e634433670639e3080f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 11:46:40 +0100 Subject: [PATCH 0155/1484] [vimple] Replace tests The first one seems to be no longer available and the second was an episode from a tv show. --- youtube_dl/extractor/vimple.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' _TESTS = [ - # Quality: Large, from iframe { - 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', 'info_dict': { - 'id': 'b132bdfd71b546d3972f9ab9a25f201c', - 'title': 'great-escape-minecraft.flv', + 'id': 'c0f6b1687dcd4000a97ebe70068039cf', 'ext': 'mp4', - 'duration': 352, - 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', }, }, - # Quality: Medium, from mainpage - { - 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - 'info_dict': { - 'id': 'a15950562888453b8e6f9572dc8600cd', - 'title': 'DB 01', - 'ext': 'flv', - 'duration': 1484, - 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - } - }, ] def _real_extract(self, url): From 628bc4d1e73ddef2b67eb6aba7b642c2e0ea894e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 12:28:35 +0100 Subject: [PATCH 0156/1484] [khanacademy] Update test --- youtube_dl/extractor/khanacademy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 408d00944..08a671fa8 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor): 'description': 'The perfect cipher', 'duration': 176, 'uploader': 'Brit Cruise', + 'uploader_id': 'khanacademy', 'upload_date': '20120411', - } + }, + 'add_ie': ['Youtube'], }, { 'url': 'https://www.khanacademy.org/math/applied-math/cryptography', 'info_dict': { From 75311a7e160912550e3c07642a5635f85f72cb0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 12:29:32 +0100 Subject: [PATCH 0157/1484] .travis.yml: Remove my email from the list --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c6cc7a994..f14014414 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications: email: - filippo.valsorda@gmail.com - phihag@phihag.de - - jaime.marquinez.ferrandiz+travis@gmail.com - yasoob.khld@gmail.com # irc: # channels: From 87830900a95f95308dac565f9da12387edea65e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:07:24 +0100 Subject: [PATCH 0158/1484] [generic] Update some tests --- youtube_dl/extractor/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 493afb57d..5c41ff517 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -131,12 +131,13 @@ class GenericIE(InfoExtractor): # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', - 'md5': '5644c6ca5d5782c1d0d350dad9bd840c', + 'md5': '166dd577b433b4d4ebfee10b0824d8ff', 'info_dict': { 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get }, + 'add_ie': ['Ooyala'], }, # google redirect { @@ -146,7 +147,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', + 'description': 're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, From cd791a5ea08b77dab37c15efa7e064c07144cb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:11:13 +0100 Subject: [PATCH 0159/1484] [ted] Add support for embed-ssl.ted.com embedded videos --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/ted.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c41ff517..2d871f8b4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -926,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import ( class TEDIE(SubtitlesInfoExtractor): _VALID_URL = r'''(?x) (?P<proto>https?://) - (?P<type>www|embed)(?P<urlmain>\.ted\.com/ + (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ ( (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) - if m.group('type') == 'embed': + if m.group('type').startswith('embed'): desktop_url = m.group('proto') + 'www' + m.group('urlmain') return self.url_result(desktop_url, 'TED') name = m.group('name') From a285b6377b46518ca45d6a41481bf920b353a857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:59:49 +0100 Subject: [PATCH 0160/1484] [normalboots] Skip download in test, it uses rtmp --- youtube_dl/extractor/normalboots.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 3d35b11ac..c13ff0d65 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor): 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', 'uploader': 'JonTron', 'upload_date': '20140125', - } + }, + 'params': { + # rtmp download + 'skip_download': True, + }, } def _real_extract(self, url): From 03ff2cc1c49c82daf2218b76e169c2d679447f03 Mon Sep 17 00:00:00 2001 From: oteng <otengkwaku@gmail.com> Date: Mon, 5 Jan 2015 16:28:24 +0000 Subject: [PATCH 0161/1484] [Auengine] corrected extractions logic The way the video download url was been extracted was not working well so i change it for it to extract the correct url --- .gitignore | 2 ++ youtube_dl/extractor/auengine.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 86312d4e4..0422adf44 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ updates_key.pem test/testdata .tox youtube-dl.zsh +.idea +.idea/* \ No newline at end of file diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 014a21952..17c3ad2ef 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -29,17 +29,12 @@ class AUEngineIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<title>(?P<title>.+?)', webpage, 'title') title = title.strip() - links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) - links = map(compat_urllib_parse.unquote, links) + video_url = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = map(compat_urllib_parse.unquote, video_url)[0] + thumbnail = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = map(compat_urllib_parse.unquote, thumbnail)[0] - thumbnail = None - video_url = None - for link in links: - if link.endswith('.png'): - thumbnail = link - elif '/videos/' in link: - video_url = link - if not video_url: + if video_url == "" and thumbnail =="": raise ExtractorError('Could not find video URL') ext = '.' + determine_ext(video_url) if ext == title[-len(ext):]: @@ -52,3 +47,4 @@ class AUEngineIE(InfoExtractor): 'thumbnail': thumbnail, 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } + From 9d247bbd2d972953fbb9e8f9aee67472d3854883 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 5 Jan 2015 18:13:19 +0100 Subject: [PATCH 0162/1484] [radiobremen] Fix under Python 2.6 and fix duration --- youtube_dl/extractor/radiobremen.py | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 6d130d3d9..9f7e6af15 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -5,10 +5,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import parse_duration class RadioBremenIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(index\.html)?\?id=(?P[0-9]+)' + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P[0-9]+)' IE_NAME = 'radiobremen' _TEST = { @@ -16,6 +17,7 @@ class RadioBremenIE(InfoExtractor): 'info_dict': { 'id': '114720', 'ext': 'mp4', + 'duration': 1685, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', @@ -23,32 +25,32 @@ class RadioBremenIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') title = self._html_search_regex("(?P.+)</h1>", meta_doc, "title") description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") - duration = self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration") + duration = parse_duration( + self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration")) page_doc = self._download_webpage(url, video_id, 'Downloading video information') pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" mobj = re.search(pattern, page_doc) - width, video_id, secret, thumbnail = int(mobj.group("width")), mobj.group("video_id"), mobj.group("secret"), mobj.group("thumbnail") - video_url = "http://dl-ondemand.radiobremen.de/mediabase/{:}/{:}_{:}_{:}.mp4".format(video_id, video_id, secret, width) + video_url = ( + "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % + (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'width': int(mobj.group("width")), + }] return { 'id': video_id, 'title': title, 'description': description, 'duration': duration, - 'formats': [ - {'url': video_url, - 'ext': 'mp4', - 'width': width, - 'protocol': 'http' - } - ], - 'thumbnail': thumbnail, + 'formats': formats, + 'thumbnail': mobj.group('thumbnail'), } From aa80652f47b3df14664556913d4f14172c9ec4fb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:14:09 +0100 Subject: [PATCH 0163/1484] [radiobremen] Add test for thumbnail --- youtube_dl/extractor/radiobremen.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 9f7e6af15..057dc15ab 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -20,6 +20,7 @@ class RadioBremenIE(InfoExtractor): 'duration': 1685, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', + 'thumbnail': 're:https?://.*\.jpg$', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', }, } From 5e3e1c82d828bc54f6873d2c7bdab315713e9a02 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:14:39 +0100 Subject: [PATCH 0164/1484] Credit @ckrooss for radiobremen (#4632) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9b548cf25..a63c97ae0 100644 --- a/AUTHORS +++ b/AUTHORS @@ -99,3 +99,4 @@ Max Reimann Cédric Luthi Thijs Vermeir Joel Leclerc +Christopher Krooss From d7cc31b63e1efaf5762f38897d4c717901e127e3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:16:47 +0100 Subject: [PATCH 0165/1484] [generic] PEP8 --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2d871f8b4..7a5bf9392 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -926,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') From dda620e88c68e995afcc3cd35b9d360cb42527a0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:17:03 +0100 Subject: [PATCH 0166/1484] [radiobremen] Make code more readable and more resilient to failures --- youtube_dl/extractor/radiobremen.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 057dc15ab..0d706312e 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -29,15 +29,21 @@ class RadioBremenIE(InfoExtractor): video_id = self._match_id(url) meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id - meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') - title = self._html_search_regex("<h1.*>(?P<title>.+)</h1>", meta_doc, "title") - description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") - duration = parse_duration( - self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration")) + meta_doc = self._download_webpage( + meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex( + r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex( + r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False) + duration = parse_duration(self._html_search_regex( + r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", + meta_doc, "duration", fatal=False)) - page_doc = self._download_webpage(url, video_id, 'Downloading video information') - pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" - mobj = re.search(pattern, page_doc) + page_doc = self._download_webpage( + url, video_id, 'Downloading video information') + mobj = re.search( + r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", + page_doc) video_url = ( "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % (video_id, video_id, mobj.group("secret"), mobj.group('width'))) From 6291438073e35adc94f573a43625fb54a64cf733 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:21:32 +0100 Subject: [PATCH 0167/1484] [auengine] Simplify (#4643) --- youtube_dl/extractor/auengine.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 17c3ad2ef..a1b666be0 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse from ..utils import ( determine_ext, ExtractorError, + remove_end, ) @@ -27,18 +28,18 @@ class AUEngineIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>(?P<title>.+?)', webpage, 'title') - title = title.strip() - video_url = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) - video_url = map(compat_urllib_parse.unquote, video_url)[0] - thumbnail = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) - thumbnail = map(compat_urllib_parse.unquote, thumbnail)[0] + title = self._html_search_regex( + r'\s*(?P<title>.+?)\s*', webpage, 'title') + video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = compat_urllib_parse.unquote(video_urls[0]) + thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = compat_urllib_parse.unquote(thumbnails[0]) - if video_url == "" and thumbnail =="": + if not video_url: raise ExtractorError('Could not find video URL') + ext = '.' + determine_ext(video_url) - if ext == title[-len(ext):]: - title = title[:-len(ext)] + title = remove_end(title, ext) return { 'id': video_id, @@ -47,4 +48,3 @@ class AUEngineIE(InfoExtractor): 'thumbnail': thumbnail, 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } - From f4bca0b348fe1f4f65c939b496973062180e0c4f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 5 Jan 2015 18:44:29 +0100 Subject: [PATCH 0168/1484] release 2015.01.05 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 09813928a..086f0ebf0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.04' +__version__ = '2015.01.05' From 8f9529cd0559bdbe6c568cfd765f9129666a77be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 5 Jan 2015 19:14:50 +0100 Subject: [PATCH 0169/1484] [motorsport] Fix extraction and make trailing '/' optional They directly embed a youtube video now. --- youtube_dl/extractor/motorsport.py | 60 ++++++++++++------------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import json -import time - from .common import InfoExtractor from ..compat import ( - compat_parse_qs, - compat_str, -) -from ..utils import ( - int_or_none, + compat_urlparse, ) class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/(?:$|[?#])' + _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', - 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', 'info_dict': { - 'id': '7063', + 'id': '2-T3WuR-KMM', 'ext': 'mp4', 'title': 'Red Bull Racing: 2014 Rules Explained', - 'duration': 207, + 'duration': 208, 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', - 'uploader': 'rainiere', - 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' - } + 'uploader': 'mcomstaff', + 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', + 'upload_date': '20140903', + 'thumbnail': r're:^https?://.+\.jpg$' + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - flashvars_code = self._html_search_regex( - r'Video by: (.*?)', webpage, - 'uploader', fatal=False) + iframe_path = self._html_search_regex( + r'', start_page, 'xml filename') xml_decription_url = xml_root + 'xml/' + xml_name - xml_description = self._download_xml(xml_decription_url, video_id) + xml_description = self._download_xml(xml_decription_url, display_id) video_title = xml_description.find('./metadata/title').text video_formats = self._parse_mp4(xml_description) @@ -172,6 +178,7 @@ class GDCVaultIE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'title': video_title, 'formats': video_formats, } From 5090d93f2c7e5d40cd6d7a8c9eda789f67bd1eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 24 Apr 2015 21:47:13 +0600 Subject: [PATCH 1352/1484] [dotsub] Fix extraction --- youtube_dl/extractor/dotsub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index f51d88a98..e9ca236d4 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -36,7 +36,8 @@ class DotsubIE(InfoExtractor): if not video_url: webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'"file"\s*:\s*\'([^\']+)', webpage, 'video url') + [r']+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'], + webpage, 'video url') return { 'id': video_id, From 2ad978532bd0a94fb7529f429a26e9b9966b2e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 24 Apr 2015 22:03:14 +0600 Subject: [PATCH 1353/1484] [ellentv] Fix extraction --- youtube_dl/extractor/ellentv.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 5154bbd7f..93affaa8f 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -39,24 +39,20 @@ class EllenTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://widgets.ellentube.com/videos/%s' % video_id, + video_id) - video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) - title = self._og_search_title(webpage, default=None) or self._search_regex( - r'pageName\s*=\s*"([^"]+)"', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') or self._og_search_description(webpage) - timestamp = parse_iso8601(self._search_regex( - r'', + webpage, 'video title') + + extension = xpath_text(video_info, './/movie_type') + if not extension: + extension = determine_ext(video_real_url) video_format = extension.upper() - thumbnail = video_info.find('.//thumbnail_url').text - description = video_info.find('.//description').text - upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) - view_count = int_or_none(video_info.find('.//view_counter').text) - comment_count = int_or_none(video_info.find('.//comment_num').text) - duration = parse_duration(video_info.find('.//length').text) - webpage_url = video_info.find('.//watch_url').text + thumbnail = xpath_text(video_info, './/thumbnail_url') + description = xpath_text(video_info, './/description') + timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) + view_count = int_or_none(xpath_text(video_info, './/view_counter')) + comment_count = int_or_none(xpath_text(video_info, './/comment_num')) + duration = parse_duration(xpath_text(video_info, './/length')) + webpage_url = xpath_text(video_info, './/watch_url') if video_info.find('.//ch_id') is not None: uploader_id = video_info.find('.//ch_id').text @@ -153,7 +184,7 @@ class NiconicoIE(InfoExtractor): else: uploader_id = uploader = None - return { + ret = { 'id': video_id, 'url': video_real_url, 'title': title, @@ -162,13 +193,14 @@ class NiconicoIE(InfoExtractor): 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, - 'upload_date': upload_date, + 'timestamp': timestamp, 'uploader_id': uploader_id, 'view_count': view_count, 'comment_count': comment_count, 'duration': duration, 'webpage_url': webpage_url, } + return dict((k, v) for k, v in ret.items() if v is not None) class NiconicoPlaylistIE(InfoExtractor): From 59d814f79341341e6390392a09e628ee12a6f18d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 00:47:52 +0800 Subject: [PATCH 1402/1484] [niconico] Remove credentials from tests and enhance title extraction All test videos can be downloaded without username and password now. --- youtube_dl/extractor/niconico.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 0ca046ac2..a9b770cb3 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -38,13 +38,10 @@ class NiconicoIE(InfoExtractor): 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, - 'params': { - 'username': 'ydl.niconico@gmail.com', - 'password': 'youtube-dl', - }, }, { + # File downloaded with and without credentials are different, so omit + # the md5 field 'url': 'http://www.nicovideo.jp/watch/nm14296458', - 'md5': '8db08e0158457cf852a31519fceea5bc', 'info_dict': { 'id': 'nm14296458', 'ext': 'swf', @@ -56,10 +53,6 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1304065916, 'duration': 209, }, - 'params': { - 'username': 'ydl.niconico@gmail.com', - 'password': 'youtube-dl', - }, }, { # 'video exists but is marked as "deleted" 'url': 'http://www.nicovideo.jp/watch/sm10000', @@ -70,10 +63,6 @@ class NiconicoIE(InfoExtractor): 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', }, - 'params': { - 'username': 'ydl.niconico@gmail.com', - 'password': 'youtube-dl', - } }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' @@ -158,6 +147,8 @@ class NiconicoIE(InfoExtractor): # Start extracting information title = xpath_text(video_info, './/title') + if not title: + title = self._og_search_title(webpage, default=None) if not title: title = self._html_search_regex( r']+class="videoHeaderTitle"[^>]*>([^<]+)', From b2e8e7dab567ed9b27817c5dd0cf173bc7fb8cfa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 02:24:05 +0800 Subject: [PATCH 1403/1484] [niconico] Try to extract all optional fields from various sources --- youtube_dl/extractor/niconico.py | 57 ++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a9b770cb3..dd16d0042 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re import json +import datetime from .common import InfoExtractor from ..compat import ( @@ -55,13 +56,16 @@ class NiconicoIE(InfoExtractor): }, }, { # 'video exists but is marked as "deleted" + # md5 is unstable 'url': 'http://www.nicovideo.jp/watch/sm10000', - 'md5': '38e53c9aad548f3ecf01ca7680b59b08', 'info_dict': { 'id': 'sm10000', 'ext': 'unknown_video', 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', + 'upload_date': '20071224', + 'timestamp': 1198527840, # timestamp field has different value if logged in + 'duration': 304, }, }] @@ -154,17 +158,59 @@ class NiconicoIE(InfoExtractor): r']+class="videoHeaderTitle"[^>]*>([^<]+)', webpage, 'video title') + watch_api_data_string = self._html_search_regex( + r']+id="watchAPIDataContainer"[^>]+>([^<]+)
', + webpage, 'watch api data', default=None) + watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} + video_detail = watch_api_data.get('videoDetail', {}) + extension = xpath_text(video_info, './/movie_type') if not extension: extension = determine_ext(video_real_url) video_format = extension.upper() - thumbnail = xpath_text(video_info, './/thumbnail_url') + + thumbnail = ( + xpath_text(video_info, './/thumbnail_url') or + self._html_search_meta('image', webpage, 'thumbnail', default=None) or + video_detail.get('thumbnail')) + description = xpath_text(video_info, './/description') + timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) + if not timestamp: + match = self._html_search_meta('datePublished', webpage, 'date published', default=None) + if match: + timestamp = parse_iso8601(match.replace('+', ':00+')) + if not timestamp and video_detail.get('postedAt'): + timestamp = parse_iso8601( + video_detail['postedAt'].replace('/', '-'), + delimiter=' ', timezone=datetime.timedelta(hours=9)) + view_count = int_or_none(xpath_text(video_info, './/view_counter')) + if not view_count: + match = self._html_search_regex( + r'>Views: ]*>([^<]+)', + webpage, 'view count', default=None) + if match: + view_count = int_or_none(match.replace(',', '')) + view_count = view_count or video_detail.get('viewCount') + comment_count = int_or_none(xpath_text(video_info, './/comment_num')) - duration = parse_duration(xpath_text(video_info, './/length')) - webpage_url = xpath_text(video_info, './/watch_url') + if not comment_count: + match = self._html_search_regex( + r'>Comments: ]*>([^<]+)', + webpage, 'comment count', default=None) + if match: + comment_count = int_or_none(match.replace(',', '')) + comment_count = comment_count or video_detail.get('commentCount') + + duration = (parse_duration( + xpath_text(video_info, './/length') or + self._html_search_meta( + 'video:duration', webpage, 'video duration', default=None)) or + video_detail.get('length')) + + webpage_url = xpath_text(video_info, './/watch_url') or url if video_info.find('.//ch_id') is not None: uploader_id = video_info.find('.//ch_id').text @@ -175,7 +221,7 @@ class NiconicoIE(InfoExtractor): else: uploader_id = uploader = None - ret = { + return { 'id': video_id, 'url': video_real_url, 'title': title, @@ -191,7 +237,6 @@ class NiconicoIE(InfoExtractor): 'duration': duration, 'webpage_url': webpage_url, } - return dict((k, v) for k, v in ret.items() if v is not None) class NiconicoPlaylistIE(InfoExtractor): From 965cb8d530e4ced61a9bc42530f7f91b67c709e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Apr 2015 22:46:19 +0200 Subject: [PATCH 1404/1484] [escapist] pep8 fixes --- youtube_dl/extractor/escapist.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 600ebf078..8facf1185 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -76,7 +76,8 @@ class EscapistIE(InfoExtractor): formats = [] for q in ['lq', 'hq', 'hd']: - config_req = compat_urllib_request.Request('http://www.escapistmagazine.com/videos/' + config_req = compat_urllib_request.Request( + 'http://www.escapistmagazine.com/videos/' 'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q)) config_req.add_header('Referer', url) config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper()) @@ -92,8 +93,7 @@ class EscapistIE(InfoExtractor): 'url': v, 'format_id': determine_ext(v) + '_' + q + str(i), 'quality': quality(q), - }) - + }) return { 'id': video_id, From 8dd5418803a25de89d08cdb9d32f80f71c5d6c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Apr 2015 22:53:18 +0200 Subject: [PATCH 1405/1484] Make 'best' format only match non-DASH formats (closes #5554) Otherwise it's impossible to only download non-DASH formats, for example `best[height=?480]/best` would download a DASH video if it's the only one with height=480, instead for falling back to the second format specifier. For audio only urls (soundcloud, bandcamp ...), the best audio will be downloaded as before. --- test/test_YoutubeDL.py | 4 ++-- youtube_dl/YoutubeDL.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index bb4a65ee1..82b827536 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -237,7 +237,7 @@ class TestFormatSelection(unittest.TestCase): f2['url'] = 'url:' + f2id info_dict = _make_result([f1, f2], extractor='youtube') - ydl = YDL() + ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) @@ -245,7 +245,7 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], f1id) info_dict = _make_result([f2, f1], extractor='youtube') - ydl = YDL() + ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 827c88e0d..eee9c0154 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -915,7 +915,14 @@ class YoutubeDL(object): return None if format_spec == 'best' or format_spec is None: - return available_formats[-1] + audiovideo_formats = [ + f for f in available_formats + if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] + if audiovideo_formats: + return audiovideo_formats[-1] + # for audio only urls, 'best' selects the best audio format + elif all(f.get('acodec') != 'none' for f in available_formats): + return available_formats[-1] elif format_spec == 'worst': audiovideo_formats = [ f for f in available_formats From 621ffe7bf420aa1a227e823edf2f1acbc67660d0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 17:05:02 +0800 Subject: [PATCH 1406/1484] [niconico] Fix so* video extraction (fixes #4874) (#2087) --- youtube_dl/extractor/niconico.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dd16d0042..3cecebf95 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -67,6 +67,18 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1198527840, # timestamp field has different value if logged in 'duration': 304, }, + }, { + 'url': 'http://www.nicovideo.jp/watch/so22543406', + 'info_dict': { + 'id': '1388129933', + 'ext': 'mp4', + 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', + 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', + 'timestamp': 1388851200, + 'upload_date': '20140104', + 'uploader': 'アニメロチャンネル', + 'uploader_id': '312', + } }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' @@ -109,7 +121,10 @@ class NiconicoIE(InfoExtractor): # Get video webpage. We are not actually interested in it for normal # cases, but need the cookies in order to be able to download the # info webpage - webpage = self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) + webpage, handle = self._download_webpage_handle( + 'http://www.nicovideo.jp/watch/' + video_id, video_id) + if video_id.startswith('so'): + video_id = self._match_id(handle.geturl()) video_info = self._download_xml( 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, From c4a21bc9db1868e8be114f496899f6786b9982ec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 18:23:35 +0800 Subject: [PATCH 1407/1484] [bilibili] Extract multipart videos (closes #3250) --- youtube_dl/extractor/bilibili.py | 74 +++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 904d9a8b4..7ca835e31 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..utils import ( @@ -14,18 +15,25 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P[0-9]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '2c301e4dab317596e837c3e7633e7d86', 'info_dict': { - 'id': '1074402', + 'id': '1074402_part1', 'ext': 'flv', 'title': '【金坷垃】金泡沫', 'duration': 308, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', }, - } + }, { + 'url': 'http://www.bilibili.com/video/av1041170/', + 'info_dict': { + 'id': '1041170', + 'title': '【BD1080P】刀语【诸神&异域】', + }, + 'playlist_count': 9, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -57,19 +65,14 @@ class BiliBiliIE(InfoExtractor): cid = self._search_regex(r'cid=(\d+)', webpage, 'cid') + entries = [] + lq_doc = self._download_xml( 'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, video_id, note='Downloading LQ video info' ) - lq_durl = lq_doc.find('./durl') - formats = [{ - 'format_id': 'lq', - 'quality': 1, - 'url': lq_durl.find('./url').text, - 'filesize': int_or_none( - lq_durl.find('./size'), get_attr='text'), - }] + lq_durls = lq_doc.findall('./durl') hq_doc = self._download_xml( 'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid, @@ -77,23 +80,44 @@ class BiliBiliIE(InfoExtractor): note='Downloading HQ video info', fatal=False, ) - if hq_doc is not False: - hq_durl = hq_doc.find('./durl') - formats.append({ - 'format_id': 'hq', - 'quality': 2, - 'ext': 'flv', - 'url': hq_durl.find('./url').text, + hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None) + + assert len(lq_durls) == len(hq_durls) + + i = 1 + for lq_durl, hq_durl in zip(lq_durls, hq_durls): + formats = [{ + 'format_id': 'lq', + 'quality': 1, + 'url': lq_durl.find('./url').text, 'filesize': int_or_none( - hq_durl.find('./size'), get_attr='text'), + lq_durl.find('./size'), get_attr='text'), + }] + if hq_durl: + formats.append({ + 'format_id': 'hq', + 'quality': 2, + 'ext': 'flv', + 'url': hq_durl.find('./url').text, + 'filesize': int_or_none( + hq_durl.find('./size'), get_attr='text'), + }) + self._sort_formats(formats) + + entries.append({ + 'id': '%s_part%d' % (video_id, i), + 'title': title, + 'formats': formats, + 'duration': duration, + 'upload_date': upload_date, + 'thumbnail': thumbnail, }) - self._sort_formats(formats) + i += 1 + return { + '_type': 'multi_video', + 'entries': entries, 'id': video_id, - 'title': title, - 'formats': formats, - 'duration': duration, - 'upload_date': upload_date, - 'thumbnail': thumbnail, + 'title': title } From f7f1df1d82d556c0726898b9de2f7f3824c1be5a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 22:37:41 +0800 Subject: [PATCH 1408/1484] [VeeHD] Enhance extraction and fix tests (fixes #4965) --- youtube_dl/extractor/veehd.py | 36 +++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 96353f525..7fdeb784d 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -17,7 +17,9 @@ from ..utils import ( class VeeHDIE(InfoExtractor): _VALID_URL = r'https?://veehd\.com/video/(?P\d+)' - _TEST = { + # Seems VeeHD videos have multiple copies on several servers, all of + # whom have different MD5 checksums, so omit md5 field in all tests + _TESTS = [{ 'url': 'http://veehd.com/video/4639434_Solar-Sinter', 'info_dict': { 'id': '4639434', @@ -26,7 +28,26 @@ class VeeHDIE(InfoExtractor): 'uploader_id': 'VideoEyes', 'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', }, - } + 'skip': 'Video deleted', + }, { + 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling', + 'info_dict': { + 'id': '4905758', + 'ext': 'mp4', + 'title': 'Elysian Fields - Channeling', + 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b', + 'uploader_id': 'spotted', + } + }, { + 'url': 'http://veehd.com/video/4665804_Tell-No-One-Ne-le-dis-a-personne-2006-French-EngSoftSubs-Re-Up', + 'info_dict': { + 'id': '4665804', + 'ext': 'avi', + 'title': 'Tell No One (Ne le dis a personne) 2006 French(EngSoftSubs) Re-Up', + 'description': 'md5:d660cca685549776f37165e9a10b60ba', + 'uploader_id': 'belial2549', + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -48,13 +69,21 @@ class VeeHDIE(InfoExtractor): player_page = self._download_webpage( player_url, video_id, 'Downloading player page') + video_url = None + config_json = self._search_regex( r'value=\'config=({.+?})\'', player_page, 'config json', default=None) if config_json: config = json.loads(config_json) video_url = compat_urlparse.unquote(config['clip']['url']) - else: + + if not video_url: + video_url = self._html_search_regex( + r']+type="video/divx"[^>]+src="([^"]+)"', + player_page, 'video url', default=None) + + if not video_url: iframe_src = self._search_regex( r']+src="/?([^"]+)"', player_page, 'iframe url') iframe_url = 'http://veehd.com/%s' % iframe_src @@ -82,7 +111,6 @@ class VeeHDIE(InfoExtractor): 'id': video_id, 'title': title, 'url': video_url, - 'ext': 'mp4', 'uploader_id': uploader_id, 'thumbnail': thumbnail, 'description': description, From e01c56f9e12d7abb2e3b548818a689146092806e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Apr 2015 21:06:51 +0600 Subject: [PATCH 1409/1484] [YoutubeDL] Generalize best/worst format match behavior --- youtube_dl/YoutubeDL.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eee9c0154..9d4a2dce8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -914,22 +914,16 @@ class YoutubeDL(object): if not available_formats: return None - if format_spec == 'best' or format_spec is None: + if format_spec in ['best', 'worst', None]: + format_idx = 0 if format_spec == 'worst' else -1 audiovideo_formats = [ f for f in available_formats if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: - return audiovideo_formats[-1] - # for audio only urls, 'best' selects the best audio format + return audiovideo_formats[format_idx] + # for audio only urls, select the best/worst audio format elif all(f.get('acodec') != 'none' for f in available_formats): - return available_formats[-1] - elif format_spec == 'worst': - audiovideo_formats = [ - f for f in available_formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - return audiovideo_formats[0] - return available_formats[0] + return available_formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ f for f in available_formats From cd298882cd6f8ed2571dd372f684ec17e992fd9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Apr 2015 21:25:17 +0600 Subject: [PATCH 1410/1484] [vporn] Fix metadata extraction (#5560) --- youtube_dl/extractor/vporn.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 2d23effcc..04ed0b381 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -64,29 +64,29 @@ class VpornIE(InfoExtractor): title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() description = self._html_search_regex( - r'
(.*?)
', webpage, 'description', fatal=False) + r'class="(?:descr|description_txt)">(.*?)
', + webpage, 'description', fatal=False) thumbnail = self._html_search_regex( r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None) if thumbnail: thumbnail = 'http://www.vporn.com' + thumbnail uploader = self._html_search_regex( - r'(?s)UPLOADED BY.*?([^<]+)', + r'(?s)Uploaded by:.*?([^<]+)', webpage, 'uploader', fatal=False) categories = re.findall(r'([^<]+)', webpage) duration = parse_duration(self._search_regex( - r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False)) + r'Runtime:\s*\s*(\d+ min \d+ sec)', + webpage, 'duration', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'([\d,\.]+) VIEWS', webpage, 'view count', fatal=False)) - like_count = str_to_int(self._html_search_regex( - r'([\d,\.]+)', webpage, 'like count', fatal=False)) - dislike_count = str_to_int(self._html_search_regex( - r'([\d,\.]+)', webpage, 'dislike count', fatal=False)) + view_count = str_to_int(self._search_regex( + r'class="views">([\d,\.]+) [Vv]iews<', + webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( - r'

Comments \(([\d,\.]+)\)

', webpage, 'comment count', fatal=False)) + r"'Comments \(([\d,\.]+)\)'", + webpage, 'comment count', default=None)) formats = [] @@ -117,8 +117,6 @@ class VpornIE(InfoExtractor): 'categories': categories, 'duration': duration, 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, 'comment_count': comment_count, 'age_limit': 18, 'formats': formats, From 482a1258de6af0a15b6e7859d244f9125cadef47 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 30 Apr 2015 22:58:03 +0800 Subject: [PATCH 1411/1484] [VeeHD] Replace the third test case due to copyright issues --- youtube_dl/extractor/veehd.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 7fdeb784d..346edf485 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -39,13 +39,13 @@ class VeeHDIE(InfoExtractor): 'uploader_id': 'spotted', } }, { - 'url': 'http://veehd.com/video/4665804_Tell-No-One-Ne-le-dis-a-personne-2006-French-EngSoftSubs-Re-Up', + 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer', 'info_dict': { - 'id': '4665804', + 'id': '2046729', 'ext': 'avi', - 'title': 'Tell No One (Ne le dis a personne) 2006 French(EngSoftSubs) Re-Up', - 'description': 'md5:d660cca685549776f37165e9a10b60ba', - 'uploader_id': 'belial2549', + 'title': '2012 (2009) DivX Trailer', + 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b', + 'uploader_id': 'Movie_Trailers', } }] From 7a03280df4555998fc99399907062b62383db2c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Apr 2015 21:31:38 +0600 Subject: [PATCH 1412/1484] [vporn] More metadata extraction fixes and tests update (#5560) --- youtube_dl/extractor/vporn.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 04ed0b381..92c90e517 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -27,9 +27,6 @@ class VpornIE(InfoExtractor): 'duration': 393, 'age_limit': 18, 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, } }, { @@ -47,9 +44,6 @@ class VpornIE(InfoExtractor): 'duration': 588, 'age_limit': 18, 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, } }, ] @@ -72,10 +66,10 @@ class VpornIE(InfoExtractor): thumbnail = 'http://www.vporn.com' + thumbnail uploader = self._html_search_regex( - r'(?s)Uploaded by:.*?([^<]+)', + r'(?s)Uploaded by:.*?]*>(.+?)', webpage, 'uploader', fatal=False) - categories = re.findall(r'([^<]+)', webpage) + categories = re.findall(r']*>([^<]+)', webpage) duration = parse_duration(self._search_regex( r'Runtime:\s*\s*(\d+ min \d+ sec)', From 4070b458ece46a29dad9be2312a7daa48bb2f1d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Apr 2015 23:55:05 +0600 Subject: [PATCH 1413/1484] [YoutubeDL] Do not write requested info in info JSON file (Closes #5562, closes #5564) --- youtube_dl/YoutubeDL.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9d4a2dce8..e747c6892 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1337,8 +1337,11 @@ class YoutubeDL(object): self.to_screen('[info] Video description metadata is already present') else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) + filtered_info_dict = dict( + (k, v) for k, v in info_dict.items() + if not k in ['requested_formats', 'requested_subtitles']) try: - write_json_file(info_dict, infofn) + write_json_file(filtered_info_dict, infofn) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return From df8301fef55f9144f06337c10b8570b6560caa24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 30 Apr 2015 20:18:42 +0200 Subject: [PATCH 1414/1484] [YoutubeDL] pep8: use 'k not in' instead of 'not k in' --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e747c6892..584dbf8a6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1339,7 +1339,7 @@ class YoutubeDL(object): self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) filtered_info_dict = dict( (k, v) for k, v in info_dict.items() - if not k in ['requested_formats', 'requested_subtitles']) + if k not in ['requested_formats', 'requested_subtitles']) try: write_json_file(filtered_info_dict, infofn) except (OSError, IOError): From 67fc8ecd53b4ffe5375a741bf0b1282f7a44587d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 30 Apr 2015 21:26:55 +0300 Subject: [PATCH 1415/1484] [dreisat] Extend _VALID_URL (Closes #5548) --- youtube_dl/extractor/dreisat.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 05bb22ddf..8ac8587be 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -11,19 +11,25 @@ from ..utils import ( class DreiSatIE(InfoExtractor): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' - _TEST = { - 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', - 'md5': 'be37228896d30a88f315b638900a026e', - 'info_dict': { - 'id': '45918', - 'ext': 'mp4', - 'title': 'Waidmannsheil', - 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', - 'uploader': '3sat', - 'upload_date': '20140913' - } - } + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' + _TESTS = [ + { + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', + 'info_dict': { + 'id': '45918', + 'ext': 'mp4', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', + 'uploader': '3sat', + 'upload_date': '20140913' + } + }, + { + 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', + 'only_matching': True, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From cb202fd28635bf82836a025c631339665ba610af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 00:44:34 +0600 Subject: [PATCH 1416/1484] [YoutubeDL] Filter requested info fields on `--load-info` as well In order to properly handle JSON info files generated by youtube-dl versions prior to 4070b458ece46a29dad9be2312a7daa48bb2f1d7 --- youtube_dl/YoutubeDL.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 584dbf8a6..55b429f31 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1337,11 +1337,8 @@ class YoutubeDL(object): self.to_screen('[info] Video description metadata is already present') else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) - filtered_info_dict = dict( - (k, v) for k, v in info_dict.items() - if k not in ['requested_formats', 'requested_subtitles']) try: - write_json_file(filtered_info_dict, infofn) + write_json_file(self.filter_requested_info(info_dict), infofn) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return @@ -1491,7 +1488,7 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = json.loads('\n'.join(f)) + info = self.filter_requested_info(json.loads('\n'.join(f))) try: self.process_ie_result(info, download=True) except DownloadError: @@ -1503,6 +1500,12 @@ class YoutubeDL(object): raise return self._download_retcode + @staticmethod + def filter_requested_info(info_dict): + return dict( + (k, v) for k, v in info_dict.items() + if k not in ['requested_formats', 'requested_subtitles']) + def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" info = dict(ie_info) From 6a8422b942f5140238106c43e27d869d70126446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 02:49:06 +0600 Subject: [PATCH 1417/1484] [foxsports] Add extractor (Closes #5517) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/foxsports.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/foxsports.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 641c45f43..fced42bd9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -161,6 +161,7 @@ from .footyroom import FootyRoomIE from .fourtube import FourTubeIE from .foxgay import FoxgayIE from .foxnews import FoxNewsIE +from .foxsports import FoxSportsIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py new file mode 100644 index 000000000..363866b64 --- /dev/null +++ b/youtube_dl/extractor/foxsports.py @@ -0,0 +1,32 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class FoxSportsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/video\?vid=(?P\d+)' + + _TEST = { + 'url': 'http://www.foxsports.com/video?vid=432609859715', + 'info_dict': { + 'id': 'gA0bHB3Ladz3', + 'ext': 'flv', + 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', + 'description': 'Courtney Lee talks about Memphis being focused.', + }, + 'add_ie': ['ThePlatform'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r"data-player-config='([^']+)'", webpage, 'data player config'), + video_id) + + return self.url_result(smuggle_url( + config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) From 1dbd717eb49d075fa1efabc674e8074fd165eb0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 02:51:55 +0600 Subject: [PATCH 1418/1484] [theplaform] Fix FutureWarning --- youtube_dl/extractor/theplatform.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 6a006b2d2..92731ad3d 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -129,7 +129,9 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) - f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video')) + f4m_node = body.find(_x('smil:seq//smil:video')) + if f4m_node is None: + f4m_node = body.find(_x('smil:seq/smil:video')) if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: @@ -142,7 +144,9 @@ class ThePlatformIE(InfoExtractor): formats = [] switch = body.find(_x('smil:switch')) if switch is None: - switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch')) + switch = body.find(_x('smil:par//smil:switch')) + if switch is None: + switch = body.find(_x('smil:par/smil:switch')) if switch is None: switch = body.find(_x('smil:par')) if switch is not None: @@ -163,7 +167,9 @@ class ThePlatformIE(InfoExtractor): 'vbr': vbr, }) else: - switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch')) + switch = body.find(_x('smil:seq//smil:switch')) + if switch is None: + switch = body.find(_x('smil:seq/smil:switch')) for f in switch.findall(_x('smil:video')): attr = f.attrib vbr = int_or_none(attr.get('system-bitrate'), 1000) From 8683b4d8d91a7c6b72ca4a12bf6b538cbb4b2a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 03:59:13 +0600 Subject: [PATCH 1419/1484] [bbccouk] Improve extraction (Closes #5530) --- youtube_dl/extractor/bbccouk.py | 35 ++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index abc34a576..22c2843be 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) from ..compat import compat_HTTPError @@ -326,16 +329,29 @@ class BBCCoUkIE(InfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') - programme_id = self._search_regex( - r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) + thumbnail = self._og_search_thumbnail(webpage) + + programme_id = None + + tviplayer = self._search_regex( + r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById', + webpage, 'player', default=None) + + if tviplayer: + player = self._parse_json(tviplayer, group_id).get('player', {}) + duration = int_or_none(player.get('duration')) + programme_id = player.get('vpid') + + if not programme_id: + programme_id = self._search_regex( + r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) + if programme_id: - player = self._download_json( - 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, - group_id)['jsConf']['player'] - title = player['title'] - description = player['subtitle'] - duration = player['duration'] formats, subtitles = self._download_media_selector(programme_id) + title = self._og_search_title(webpage) + description = self._search_regex( + r'

([^<]+)

', + webpage, 'description', fatal=False) else: programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) @@ -345,6 +361,7 @@ class BBCCoUkIE(InfoExtractor): 'id': programme_id, 'title': title, 'description': description, + 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, 'subtitles': subtitles, From e68ae99a417f39db269dcffb5011cfcc8341552d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 04:02:56 +0600 Subject: [PATCH 1420/1484] [bbccouk] Add test for #5530 --- youtube_dl/extractor/bbccouk.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 22c2843be..dbfbbb5ca 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -115,6 +115,20 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', + 'info_dict': { + 'id': 'p02n76xf', + 'ext': 'flv', + 'title': 'Natural World, 2015-2016: 2. Super Powered Owls', + 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d', + 'duration': 3540, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'geolocation', }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, From 650cfd0cb0e330c8e6b1a5cc43a5a20d54b4714c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 04:07:30 +0600 Subject: [PATCH 1421/1484] [bbccouk] Mute thumbnail --- youtube_dl/extractor/bbccouk.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index dbfbbb5ca..249bc6bbd 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -343,8 +343,6 @@ class BBCCoUkIE(InfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') - thumbnail = self._og_search_thumbnail(webpage) - programme_id = None tviplayer = self._search_regex( @@ -375,7 +373,7 @@ class BBCCoUkIE(InfoExtractor): 'id': programme_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'formats': formats, 'subtitles': subtitles, From 861e65eb0573c824cf82e1f31b7169df2efa74ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 1 May 2015 12:31:31 +0800 Subject: [PATCH 1422/1484] [yahoo] Extend _VALID_URL --- youtube_dl/extractor/yahoo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b777159c5..bf4e659ac 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -22,7 +22,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+?)-(?P[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?\.html)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -140,12 +140,15 @@ class YahooIE(InfoExtractor): 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } + }, { + 'url': 'https://tw.news.yahoo.com/-100120367.html', + 'only_matching': True, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or self._match_id(url) page_id = mobj.group('id') url = mobj.group('url') host = mobj.group('host') From 083c1bb960715031aed63dfb834a5bdc5ac6ff9a Mon Sep 17 00:00:00 2001 From: Nikoli Date: Wed, 15 Apr 2015 20:27:40 +0300 Subject: [PATCH 1423/1484] Add ability to embed subtitles in mkv files (closes #5434) --- README.md | 2 +- youtube_dl/options.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e1f30ca47..3432546fc 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,7 @@ which means you can modify it, redistribute it or use it however you like. --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs Embed subtitles in the video (only for mp4 videos) + --embed-subs Embed subtitles in the video (only for mkv and mp4 videos) --embed-thumbnail Embed thumbnail in the audio as cover art --add-metadata Write metadata to the video file --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 4c9d39d9a..d0aa8296d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -698,7 +698,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='Embed subtitles in the video (only for mp4 videos)') + help='Embed subtitles in the video (only for mkv and mp4 videos)') postproc.add_option( '--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 1765f4969..214de39f9 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -501,8 +501,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): return cls._lang_map.get(code[:2]) def run(self, information): - if information['ext'] != 'mp4': - self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files') + if information['ext'] not in ['mp4', 'mkv']: + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files') return [], information subtitles = information.get('requested_subtitles') if not subtitles: @@ -520,8 +520,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', - '-c:s', 'mov_text', ] + if information['ext'] == 'mp4': + opts += ['-c:s', 'mov_text'] for (i, lang) in enumerate(sub_langs): opts.extend(['-map', '%d:0' % (i + 1)]) lang_code = self._conver_lang_code(lang) From 5890eef6b021845cb68882107364f1b04d773913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 May 2015 17:43:06 +0600 Subject: [PATCH 1424/1484] [pbs] Add support for HD (Closes #3564, closes #5390) --- youtube_dl/extractor/pbs.py | 56 ++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index afce732e1..761bd6d8d 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -5,6 +5,8 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + determine_ext, + int_or_none, unified_strdate, US_RATINGS, ) @@ -149,21 +151,44 @@ class PBSIE(InfoExtractor): for vid_id in video_id] return self.playlist_result(entries, display_id) - info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id - info = self._download_json(info_url, display_id) + info = self._download_json( + 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, + display_id) - redirect_url = info['alternate_encoding']['url'] - redirect_info = self._download_json( - redirect_url + '?format=json', display_id, - 'Downloading video url info') - if redirect_info['status'] == 'error': - if redirect_info['http_code'] == 403: - message = ( - 'The video is not available in your region due to ' - 'right restrictions') + formats = [] + for encoding_name in ('recommended_encoding', 'alternate_encoding'): + redirect = info.get(encoding_name) + if not redirect: + continue + redirect_url = redirect.get('url') + if not redirect_url: + continue + + redirect_info = self._download_json( + redirect_url + '?format=json', display_id, + 'Downloading %s video url info' % encoding_name) + + if redirect_info['status'] == 'error': + if redirect_info['http_code'] == 403: + message = ( + 'The video is not available in your region due to ' + 'right restrictions') + else: + message = redirect_info['message'] + raise ExtractorError(message, expected=True) + + format_url = redirect_info.get('url') + if not format_url: + continue + + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', preference=1, m3u8_id='hls')) else: - message = redirect_info['message'] - raise ExtractorError(message, expected=True) + formats.append({ + 'url': format_url, + }) + self._sort_formats(formats) rating_str = info.get('rating') if rating_str is not None: @@ -174,11 +199,10 @@ class PBSIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': info['title'], - 'url': redirect_info['url'], - 'ext': 'mp4', 'description': info['program'].get('description'), 'thumbnail': info.get('image_url'), - 'duration': info.get('duration'), + 'duration': int_or_none(info.get('duration')), 'age_limit': age_limit, 'upload_date': upload_date, + 'formats': formats, } From 8e3df9dfeef8503e9a8c01fcf42008d376d8d64d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 2 May 2015 00:08:38 +0800 Subject: [PATCH 1425/1484] [viki] Fix extractor and add a global availble test case --- youtube_dl/extractor/viki.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 6816dacb6..957e3c01e 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals import re -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_urllib_request, +) from ..utils import ( ExtractorError, unescapeHTML, @@ -15,8 +18,11 @@ from .common import InfoExtractor class VikiIE(InfoExtractor): IE_NAME = 'viki' + # iPad2 + _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5' + _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' - _TEST = { + _TESTS = [{ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 'info_dict': { 'id': '1023585v', @@ -28,7 +34,17 @@ class VikiIE(InfoExtractor): 'age_limit': 13, }, 'skip': 'Blocked in the US', - } + }, { + 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', + 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c', + 'info_dict': { + 'id': '1067139v', + 'ext': 'mp4', + 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea', + 'upload_date': '20150430', + 'title': '\'The Avengers: Age of Ultron\' Press Conference', + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -50,9 +66,11 @@ class VikiIE(InfoExtractor): 'rating information', default='').strip() age_limit = US_RATINGS.get(rating_str) - info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id + req = compat_urllib_request.Request( + 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id) + req.add_header('User-Agent', self._USER_AGENT) info_webpage = self._download_webpage( - info_url, video_id, note='Downloading info page') + req, video_id, note='Downloading info page') if re.match(r'\s* Date: Sat, 2 May 2015 00:32:46 +0800 Subject: [PATCH 1426/1484] [viki] Enhance error message handling (#3774) --- youtube_dl/extractor/viki.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 957e3c01e..0fc1ceb19 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -11,6 +11,7 @@ from ..utils import ( unescapeHTML, unified_strdate, US_RATINGS, + clean_html, ) from .common import InfoExtractor @@ -71,10 +72,15 @@ class VikiIE(InfoExtractor): req.add_header('User-Agent', self._USER_AGENT) info_webpage = self._download_webpage( req, video_id, note='Downloading info page') - if re.match(r'\s*]+class="video-error[^>]+>(.+)