[^<]*


', page): + return self.playlist_result([self.url_result(vid) for vid in vids], video_id) + + title = self._html_search_regex( + r'

[^<]*

([^<]+)

', page, 'title') + + return { + '_type': 'url', + 'id': video_id, + 'url': vids[0], + 'title': title, + } + + +class GoGoAnimeSearchIE(InfoExtractor): + IE_NAME = 'gogoanime:search' + IE_DESC = 'GoGoAnime Search' + + _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P[^&]*)' + _TEST = { + 'url': 'http://www.gogoanime.com/?s=bokusatsu', + 'info_dict': { + 'id': 'bokusatsu' + }, + 'playlist_count': 6 + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + posts = re.findall( + r'
[^<]*]*>[^<]*.+)' + + _TESTS = [{ + 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'mahou-shoujo-madoka-magica-07', + 'ext': 'flv', + 'title': 'mahou-shoujo-madoka-magica-07', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) + + video_url = compat_urllib_parse.unquote(self._html_search_regex( + r'_url = "(https?://[^"]+?)";', page, 'url')) + title = self._search_regex(r'.*/(?P[^.]*).', video_url, 'title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + } + + +class ByZooIE(Play44IE): + _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' + + _TESTS = [{ + 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', + 'md5': '455c83dabe2cd9fd74a87612b01fe017', + 'info_dict': { + 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + 'ext': 'mp4', + 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', + } + }] + + +class Video44IE(Play44IE): + _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', + 'md5': '43eaec6d0beb10e8d42459b9f108aff3', + 'info_dict': { + 'id': 'chaoshead-12', + 'ext': 'mp4', + 'title': 'chaoshead-12', + } + }] + + +class VideoWingIE(Play44IE): + _VALID_URL = r'''(?x) + http://[w.]*videowing\.[^/]*/ + (?: + .*video=/* + |embed/ + ) + (?P<id>[^&?.]+) + ''' + + _TESTS = [{ + 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }, { + 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', + 'md5': '33fdd71581357018c226f95c5cedcfd7', + 'info_dict': { + 'id': 'mahoushoujomadokamagicamovie1part1', + 'ext': 'flv', + 'title': 'mahoushoujomadokamagicamovie1part1', + } + }] + + +class PlayPandaIE(Play44IE): + _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' + } + }] + + +class VideoZooIE(Play44IE): + _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }] + + +class PlayBBIE(Play44IE): + _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' + + _TESTS = [{ + 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', + 'md5': '4ed320e353ed26c742c4f12a9c210b60', + 'info_dict': { + 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', + 'ext': 'mp4', + 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', + } + }] + + +class EasyVideoIE(Play44IE): + _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' + + _TESTS = [{ + 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', + 'md5': '26178b57629b7650106d72b191137176', + 'info_dict': { + 'id': 'bokuwatomodachigasukunai-04', + 'ext': 'mp4', + 'title': 'bokuwatomodachigasukunai-04', + }, + 'skip': 'Blocked in Germany', + }] diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..7adb10c03 --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,74 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SoulAnimeWatchingIE(InfoExtractor): + IE_NAME = "soulanime:watching" + IE_DESC = "SoulAnime video" + _TEST = { + 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', + 'md5': '05fae04abf72298098b528e98abf4298', + 'info_dict': { + 'id': 'seirei-tsukai-no-blade-dance-episode-9', + 'ext': 'mp4', + 'title': 'seirei-tsukai-no-blade-dance-episode-9', + 'description': 'seirei-tsukai-no-blade-dance-episode-9' + } + } + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + domain = mobj.group('domain') + + page = self._download_webpage(url, video_id) + + video_url_encoded = self._html_search_regex( + r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') + video_url = "http://www.soul-anime." + domain + video_url_encoded + + vid = self._request_webpage(video_url, video_id) + ext = vid.info().gettype().split("/")[1] + + return { + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': video_id, + 'description': video_id + } + + +class SoulAnimeSeriesIE(InfoExtractor): + IE_NAME = "soulanime:series" + IE_DESC = "SoulAnime Series" + + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' + + _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' + + _TEST = { + 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', + 'info_dict': { + 'id': 'black-rock-shooter-tv' + }, + 'playlist_count': 8 + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('id') + domain = mobj.group('domain') + + pattern = re.compile(self._EPISODE_REGEX) + + page = self._download_webpage(url, series_id, "Downloading series page") + mobj = pattern.findall(page) + + entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] + + return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py new file mode 100644 index 000000000..0364b9d32 --- /dev/null +++ b/youtube_dl/extractor/videofun.py @@ -0,0 +1,36 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse +) + + +class VideoFunIE(InfoExtractor): + _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' + + _TEST = { + 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', + 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', + 'info_dict': { + 'id': 'Mahou-Shoujo-Madoka-Magica-07', + 'ext': 'flv', + 'title': 'Mahou-Shoujo-Madoka-Magica-07', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading video page') + + video_url_encoded = self._html_search_regex( + r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') + video_url = compat_urllib_parse.unquote(video_url_encoded) + title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') + + return { + 'id': title, + 'url': video_url, + 'title': title, + } From 95ceeec72200ed3b2c94a54650eb69dfe946e595 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:05:35 +0100 Subject: [PATCH 383/974] Remove unused import --- youtube_dl/downloader/mplayer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 34b23b5c2..72cef30ea 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -4,7 +4,6 @@ import os import subprocess from .common import FileDownloader -from ..compat import compat_subprocess_get_DEVNULL from ..utils import ( check_executable, encodeFilename, From c11125f9ed952f9b7ebd06c15eacadcc6005dd8c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:06:53 +0100 Subject: [PATCH 384/974] [tests] Remove format 138 from tests (#4559) --- test/test_YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f8e4f930e..730f7ec26 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -218,7 +218,7 @@ class TestFormatSelection(unittest.TestCase): # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video - '138', '137', '248', '136', '247', '135', '246', + '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '171', '139', From 8848314c08284f6a4b8f3c3529bf2e3f1b72610c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:08:18 +0100 Subject: [PATCH 385/974] [Makefile] Make offline tests actually work offline --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71470eedb..e53a367ef 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ test: ot: offlinetest offlinetest: codetest - nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations + nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists tar: youtube-dl.tar.gz From 2ccd1b10e58cc8e5173dc1aeedc2b3f0ef9b55bf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 02:20:45 +0100 Subject: [PATCH 386/974] [soulanime] Fix under Python 3 --- youtube_dl/extractor/soulanime.py | 10 ++++++++-- youtube_dl/utils.py | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py index 7adb10c03..feef33e27 100644 --- a/youtube_dl/extractor/soulanime.py +++ b/youtube_dl/extractor/soulanime.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + HEADRequest, + urlhandle_detect_ext, +) class SoulAnimeWatchingIE(InfoExtractor): @@ -31,8 +35,10 @@ class SoulAnimeWatchingIE(InfoExtractor): r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') video_url = "http://www.soul-anime." + domain + video_url_encoded - vid = self._request_webpage(video_url, video_id) - ext = vid.info().gettype().split("/")[1] + ext_req = HEADRequest(video_url) + ext_handle = self._request_webpage( + ext_req, video_id, note='Determining extension') + ext = urlhandle_detect_ext(ext_handle) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index efbe64fb3..bdfe053a7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1550,3 +1550,14 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command return ' '.join(shlex_quote(a) for a in args) + + +def urlhandle_detect_ext(url_handle): + try: + url_handle.headers + getheader = lambda h: url_handle.headers[h] + except AttributeError: # Python < 3 + getheader = url_handle.info().getheader + + return getheader('Content-Type').split("/")[1] + From 7a1818c99b1729796f62c341b1b3f809cd47dbf8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 03:15:27 +0100 Subject: [PATCH 387/974] [vk] Add support for rutube embeds (Fixes #4514) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rutube.py | 31 +++++++++++++++++++++++++++++++ youtube_dl/extractor/vk.py | 9 +++++++++ 3 files changed, 41 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3dc09f75..143cd5c49 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -359,6 +359,7 @@ from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, + RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, ) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index b72b5a586..5b1c3577a 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor): } +class RutubeEmbedIE(InfoExtractor): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': 'Requires ffmpeg', + }, + } + + def _real_extract(self, url): + embed_id = self._match_id(url) + webpage = self._download_webpage(url, embed_id) + + canonical_url = self._html_search_regex( + r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, + 'Canonical URL') + return self.url_result(canonical_url, 'Rutube') + + class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 542e9198a..129de6cf3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -164,6 +164,15 @@ class VKIE(InfoExtractor): self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + m_rutube = re.search( + r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) + assert m_rutube + if m_rutube is not None: + self.to_screen('rutube video detected') + rutube_url = self._proto_relative_url( + m_rutube.group(1).replace('\\', '')) + return self.url_result(rutube_url) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) From 26886e6140a684058064c30237ef096332e1510f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 03:15:48 +0100 Subject: [PATCH 388/974] release 2015.01.04 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2a54b9bbe..09813928a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.03' +__version__ = '2015.01.04' From f4858a71035549cf82b258d01dda5060aef707b7 Mon Sep 17 00:00:00 2001 From: Christopher Krooss <c.krooss@gmail.com> Date: Sun, 4 Jan 2015 13:33:26 +0100 Subject: [PATCH 389/974] Add support for Radio Bremen --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/radiobremen.py | 55 +++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/radiobremen.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 143cd5c49..349f4fe71 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -339,6 +339,7 @@ from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .radiode import RadioDeIE +from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py new file mode 100644 index 000000000..68c78c4f9 --- /dev/null +++ b/youtube_dl/extractor/radiobremen.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RadioBremenIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(index\.html)?\?id=(?P<video_id>[0-9]+)' + IE_NAME = 'radiobremen' + + _TEST = { + 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', + 'info_dict': { + 'id': '114720', + 'ext': 'mp4', + 'height': 288, + 'width': 512, + 'title': 'buten un binnen vom 22. Dezember', + 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + + meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id + meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex("<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") + duration = self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration") + + page_doc = self._download_webpage(url, video_id, 'Downloading video information') + pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" + mobj = re.search(pattern, page_doc) + width, video_id, secret, thumbnail = int(mobj.group("width")), mobj.group("video_id"), mobj.group("secret"), mobj.group("thumbnail") + video_url = "http://dl-ondemand.radiobremen.de/mediabase/{:}/{:}_{:}_{:}.mp4".format(video_id, video_id, secret, width) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': [ + {'url': video_url, + 'ext': 'mp4', + 'width': width, + 'protocol': 'http' + } + ], + 'thumbnail': thumbnail, + } From 63948fc62c7f0bfcfe7b2ce102ab6e4e87de558c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 13:40:30 +0100 Subject: [PATCH 390/974] [downloader/hls] Respect the 'prefer_ffmpeg' option --- youtube_dl/downloader/hls.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import ( compat_urllib_request, ) from ..utils import ( - check_executable, encodeFilename, ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader): '-bsf:a', 'aac_adtstoasc', encodeFilename(tmpfilename, for_subprocess=True)] - for program in ['avconv', 'ffmpeg']: - if check_executable(program, ['-version']): - break - else: + ffpp = FFmpegPostProcessor(downloader=self) + program = ffpp._executable + if program is None: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False - cmd = [program] + args - - ffpp = FFmpegPostProcessor(downloader=self) ffpp.check_version() + cmd = [program] + args retval = subprocess.call(cmd) if retval == 0: From bc1fc5ddbcba784778cbdd98c051ff2493178515 Mon Sep 17 00:00:00 2001 From: Christopher Krooss <c.krooss@gmail.com> Date: Sun, 4 Jan 2015 14:02:07 +0100 Subject: [PATCH 391/974] Don't check for height as it's not provided --- youtube_dl/extractor/radiobremen.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 68c78c4f9..6d130d3d9 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -16,7 +16,6 @@ class RadioBremenIE(InfoExtractor): 'info_dict': { 'id': '114720', 'ext': 'mp4', - 'height': 288, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', From bc3e582fe457f9239dc3a3386cbfd0e7db167404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:02:17 +0100 Subject: [PATCH 392/974] Don't use '-shortest' option for merging formats (closes #4220, closes #4580) With avconv and older versions of ffmpeg the video is partially copied. The duration difference between the audio and the video seem to be really small, so it's probably not noticeable. --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info From 9fda6ee39fa2da1949af5e9b95633e3df3c6f6b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:06:23 +0100 Subject: [PATCH 393/974] [tf1] Remove unused import --- youtube_dl/extractor/tf1.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 07cc81226..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor From 1d2d0e3ff2b4e55810039caf267bb9ad086f3610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 4 Jan 2015 14:07:06 +0100 Subject: [PATCH 394/974] utils: Remove blank line at the end of file --- youtube_dl/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bdfe053a7..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1560,4 +1560,3 @@ def urlhandle_detect_ext(url_handle): getheader = url_handle.info().getheader return getheader('Content-Type').split("/")[1] - From 67c2bcdf4cf83f9ac32e5f1f50a8b4b38d2ac624 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 4 Jan 2015 19:19:15 +0100 Subject: [PATCH 395/974] Remove extractors which infringe copyright (#4554) --- youtube_dl/extractor/__init__.py | 19 ---- youtube_dl/extractor/gogoanime.py | 76 --------------- youtube_dl/extractor/play44.py | 149 ------------------------------ youtube_dl/extractor/videofun.py | 36 -------- 4 files changed, 280 deletions(-) delete mode 100644 youtube_dl/extractor/gogoanime.py delete mode 100644 youtube_dl/extractor/play44.py delete mode 100644 youtube_dl/extractor/videofun.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 143cd5c49..613e8e05b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -164,10 +164,6 @@ from .globo import GloboIE from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE -from .gogoanime import ( - GoGoAnimeIE, - GoGoAnimeSearchIE -) from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE @@ -317,16 +313,6 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE from .played import PlayedIE -from .play44 import ( - Play44IE, - ByZooIE, - Video44IE, - VideoWingIE, - PlayPandaIE, - VideoZooIE, - PlayBBIE, - EasyVideoIE -) from .playfm import PlayFMIE from .playvid import PlayvidIE from .podomatic import PodomaticIE @@ -388,10 +374,6 @@ from .smotri import ( from .snotr import SnotrIE from .sockshare import SockshareIE from .sohu import SohuIE -from .soulanime import ( - SoulAnimeWatchingIE, - SoulAnimeSeriesIE -) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, @@ -486,7 +468,6 @@ from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE -from .videofun import VideoFunIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE diff --git a/youtube_dl/extractor/gogoanime.py b/youtube_dl/extractor/gogoanime.py deleted file mode 100644 index d4f4ecc58..000000000 --- a/youtube_dl/extractor/gogoanime.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - compat_urllib_parse, - get_element_by_attribute, - unescapeHTML -) - - -class GoGoAnimeIE(InfoExtractor): - IE_NAME = 'gogoanime' - IE_DESC = 'GoGoAnime' - _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)' - - _TEST = { - 'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-movie-1' - }, - 'playlist_count': 3 - } - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - if 'Oops! Page Not Found</font>' in page: - raise ExtractorError('Video does not exist', expected=True) - - content = get_element_by_attribute("class", "postcontent", page) - vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content) - vids = [ - unescapeHTML(compat_urllib_parse.unquote(x)) - for x in vids if not re.search(r".*videofun.*", x)] - - if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page): - return self.playlist_result([self.url_result(vid) for vid in vids], video_id) - - title = self._html_search_regex( - r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title') - - return { - '_type': 'url', - 'id': video_id, - 'url': vids[0], - 'title': title, - } - - -class GoGoAnimeSearchIE(InfoExtractor): - IE_NAME = 'gogoanime:search' - IE_DESC = 'GoGoAnime Search' - - _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)' - _TEST = { - 'url': 'http://www.gogoanime.com/?s=bokusatsu', - 'info_dict': { - 'id': 'bokusatsu' - }, - 'playlist_count': 6 - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - posts = re.findall( - r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"', - webpage) - - return self.playlist_result( - [self.url_result(p) for p in posts], playlist_id) diff --git a/youtube_dl/extractor/play44.py b/youtube_dl/extractor/play44.py deleted file mode 100644 index b8696e516..000000000 --- a/youtube_dl/extractor/play44.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class Play44IE(InfoExtractor): - _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'mahou-shoujo-madoka-magica-07', - 'ext': 'flv', - 'title': 'mahou-shoujo-madoka-magica-07', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - page = self._download_webpage(url, video_id) - - video_url = compat_urllib_parse.unquote(self._html_search_regex( - r'_url = "(https?://[^"]+?)";', page, 'url')) - title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } - - -class ByZooIE(Play44IE): - _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4', - 'md5': '455c83dabe2cd9fd74a87612b01fe017', - 'info_dict': { - 'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - 'ext': 'mp4', - 'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1', - } - }] - - -class Video44IE(Play44IE): - _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1', - 'md5': '43eaec6d0beb10e8d42459b9f108aff3', - 'info_dict': { - 'id': 'chaoshead-12', - 'ext': 'mp4', - 'title': 'chaoshead-12', - } - }] - - -class VideoWingIE(Play44IE): - _VALID_URL = r'''(?x) - http://[w.]*videowing\.[^/]*/ - (?: - .*video=/* - |embed/ - ) - (?P<id>[^&?.]+) - ''' - - _TESTS = [{ - 'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }, { - 'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438', - 'md5': '33fdd71581357018c226f95c5cedcfd7', - 'info_dict': { - 'id': 'mahoushoujomadokamagicamovie1part1', - 'ext': 'flv', - 'title': 'mahoushoujomadokamagicamovie1part1', - } - }] - - -class PlayPandaIE(Play44IE): - _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'description': 'boku_wa_tomodachi_ga_sukunai_-_05' - } - }] - - -class VideoZooIE(Play44IE): - _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class PlayBBIE(Play44IE): - _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*' - - _TESTS = [{ - 'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4', - 'md5': '4ed320e353ed26c742c4f12a9c210b60', - 'info_dict': { - 'id': 'boku_wa_tomodachi_ga_sukunai_-_05', - 'ext': 'mp4', - 'title': 'boku_wa_tomodachi_ga_sukunai_-_05', - } - }] - - -class EasyVideoIE(Play44IE): - _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)' - - _TESTS = [{ - 'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1', - 'md5': '26178b57629b7650106d72b191137176', - 'info_dict': { - 'id': 'bokuwatomodachigasukunai-04', - 'ext': 'mp4', - 'title': 'bokuwatomodachigasukunai-04', - }, - 'skip': 'Blocked in Germany', - }] diff --git a/youtube_dl/extractor/videofun.py b/youtube_dl/extractor/videofun.py deleted file mode 100644 index 0364b9d32..000000000 --- a/youtube_dl/extractor/videofun.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse -) - - -class VideoFunIE(InfoExtractor): - _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)' - - _TEST = { - 'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438', - 'md5': 'e37e99d665f503dd2db952f7c4dba9e6', - 'info_dict': { - 'id': 'Mahou-Shoujo-Madoka-Magica-07', - 'ext': 'flv', - 'title': 'Mahou-Shoujo-Madoka-Magica-07', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, 'Downloading video page') - - video_url_encoded = self._html_search_regex( - r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url') - video_url = compat_urllib_parse.unquote(video_url_encoded) - title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title') - - return { - 'id': title, - 'url': video_url, - 'title': title, - } From 2f985f4bb4938ee13356bda0436fde18f8c0e434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 00:18:43 +0100 Subject: [PATCH 396/974] [youtube:toplist] Remove extractor They use now normal playlists (their id is PL*). --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/youtube.py | 46 -------------------------------- 2 files changed, 47 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 613e8e05b..79e6bba45 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -546,7 +546,6 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeShowIE, YoutubeSubscriptionsIE, - YoutubeTopListIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e9bf39a00..d1bbf0b01 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1206,9 +1206,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - if playlist_id.startswith('TL'): - raise ExtractorError('For downloading YouTube.com top lists, use ' - 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1254,49 +1251,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): - IE_NAME = 'youtube:toplist' - IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' - ' (Example: "yttoplist:music:Top Tracks")') - _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' - _TESTS = [{ - 'url': 'yttoplist:music:Trending', - 'playlist_mincount': 5, - 'skip': 'Only works for logged-in users', - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel = mobj.group('chann') - title = mobj.group('title') - query = compat_urllib_parse.urlencode({'title': title}) - channel_page = self._download_webpage( - 'https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex( - r'''(?x) - <a\s+href="([^"]+)".*?>\s* - <span\s+class="branded-page-module-title-text">\s* - <span[^>]*>.*?%s.*?</span>''' % re.escape(query), - channel_page, 'list') - url = compat_urlparse.urljoin('https://www.youtube.com/', link) - - video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' - ids = [] - # sometimes the webpage doesn't contain the videos - # retry until we get them - for i in itertools.count(0): - msg = 'Downloading Youtube mix' - if i > 0: - msg += ', retry #%d' % i - - webpage = self._download_webpage(url, title, msg) - ids = orderedSet(re.findall(video_re, webpage)) - if ids: - break - url_results = self._ids_to_results(ids) - return self.playlist_result(url_results, playlist_title=title) - - class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' From caf90bfaa5434d9ff7035d8575b842b076178ca3 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 5 Jan 2015 02:22:01 +0200 Subject: [PATCH 397/974] [webofstories] Add new extractor (Closes #4585) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/webofstories.py | 102 +++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 youtube_dl/extractor/webofstories.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79e6bba45..0c8729384 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -511,6 +511,7 @@ from .wdr import ( WDRMobileIE, WDRMausIE, ) +from .webofstories import WebOfStoriesIE from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' + _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' + _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' + _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' + _TESTS = [ + { + 'url': 'http://www.webofstories.com/play/hans.bethe/71', + 'md5': '373e4dd915f60cfe3116322642ddf364', + 'info_dict': { + 'id': '4536', + 'ext': 'mp4', + 'title': 'The temperature of the sun', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Hans Bethe talks about calculating the temperature of the sun', + 'duration': 238, + } + }, + { + 'url': 'http://www.webofstories.com/play/55908', + 'md5': '2985a698e1fe3211022422c4b5ed962c', + 'info_dict': { + 'id': '55908', + 'ext': 'mp4', + 'title': 'The story of Gemmata obscuriglobus', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', + 'duration': 169, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._html_search_meta('description', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + story_filename = self._search_regex( + r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') + speaker_id = self._search_regex( + r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') + story_id = self._search_regex( + r'\.storyId\((\d+)\)', webpage, 'story ID') + speaker_type = self._search_regex( + r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') + great_life = self._search_regex( + r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + is_great_life_series = great_life == 'true' + duration = int_or_none(self._search_regex( + r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + + # URL building, see: http://www.webofstories.com/scripts/player.js + ms_prefix = '' + if speaker_type.lower() == 'ms': + ms_prefix = 'mini_sites/' + + if is_great_life_series: + mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( + self._VIDEO_DOMAIN, speaker_id, story_filename) + rtmp_ext = 'flv' + streamer = self._GREAT_LIFE_STREAMER + play_path = 'stories/{0:}/{1:}'.format( + speaker_id, story_filename) + else: + mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( + self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) + rtmp_ext = 'mp4' + streamer = self._USER_STREAMER + play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( + ms_prefix, speaker_id, story_filename) + + formats = [{ + 'format_id': 'mp4_sd', + 'url': mp4_url, + }, { + 'format_id': 'rtmp_sd', + 'page_url': url, + 'url': streamer, + 'ext': rtmp_ext, + 'play_path': play_path, + }] + + self._sort_formats(formats) + + return { + 'id': story_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } From adf3c58ad31e7376f085271a02fdfe56b1e75989 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 5 Jan 2015 02:55:12 +0200 Subject: [PATCH 398/974] [lrt] Fix missing provider key Also, modernize a bit. --- youtube_dl/extractor/lrt.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index d72d470aa..9c2fbdd96 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -28,7 +27,6 @@ class LRTIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, - } def _real_extract(self, url): @@ -44,7 +42,9 @@ class LRTIE(InfoExtractor): formats = [] for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): - data = json.loads(js_to_json(js)) + data = self._parse_json(js, video_id, transform_source=js_to_json) + if 'provider' not in data: + continue if data['provider'] == 'rtmp': formats.append({ 'format_id': 'rtmp', From bdf80aa542da15437545ae9c17cd5c80e17e171f Mon Sep 17 00:00:00 2001 From: Bart Kappenburg <bartkappenburg@gmail.com> Date: Mon, 5 Jan 2015 11:51:24 +0100 Subject: [PATCH 399/974] Update rtlnl.py Added support for the non-www version of rtlxl.nl by making "www." optional. --- youtube_dl/extractor/rtlnl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration class RtlXlIE(InfoExtractor): IE_NAME = 'rtlxl.nl' - _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' + _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' _TEST = { 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', From a4c3f486394ae8ead64e8e634433670639e3080f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 11:46:40 +0100 Subject: [PATCH 400/974] [vimple] Replace tests The first one seems to be no longer available and the second was an episode from a tv show. --- youtube_dl/extractor/vimple.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' _TESTS = [ - # Quality: Large, from iframe { - 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', 'info_dict': { - 'id': 'b132bdfd71b546d3972f9ab9a25f201c', - 'title': 'great-escape-minecraft.flv', + 'id': 'c0f6b1687dcd4000a97ebe70068039cf', 'ext': 'mp4', - 'duration': 352, - 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', }, }, - # Quality: Medium, from mainpage - { - 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - 'info_dict': { - 'id': 'a15950562888453b8e6f9572dc8600cd', - 'title': 'DB 01', - 'ext': 'flv', - 'duration': 1484, - 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - } - }, ] def _real_extract(self, url): From 628bc4d1e73ddef2b67eb6aba7b642c2e0ea894e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 12:28:35 +0100 Subject: [PATCH 401/974] [khanacademy] Update test --- youtube_dl/extractor/khanacademy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 408d00944..08a671fa8 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor): 'description': 'The perfect cipher', 'duration': 176, 'uploader': 'Brit Cruise', + 'uploader_id': 'khanacademy', 'upload_date': '20120411', - } + }, + 'add_ie': ['Youtube'], }, { 'url': 'https://www.khanacademy.org/math/applied-math/cryptography', 'info_dict': { From 75311a7e160912550e3c07642a5635f85f72cb0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 12:29:32 +0100 Subject: [PATCH 402/974] .travis.yml: Remove my email from the list --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c6cc7a994..f14014414 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications: email: - filippo.valsorda@gmail.com - phihag@phihag.de - - jaime.marquinez.ferrandiz+travis@gmail.com - yasoob.khld@gmail.com # irc: # channels: From 87830900a95f95308dac565f9da12387edea65e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:07:24 +0100 Subject: [PATCH 403/974] [generic] Update some tests --- youtube_dl/extractor/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 493afb57d..5c41ff517 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -131,12 +131,13 @@ class GenericIE(InfoExtractor): # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', - 'md5': '5644c6ca5d5782c1d0d350dad9bd840c', + 'md5': '166dd577b433b4d4ebfee10b0824d8ff', 'info_dict': { 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get }, + 'add_ie': ['Ooyala'], }, # google redirect { @@ -146,7 +147,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', + 'description': 're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, From cd791a5ea08b77dab37c15efa7e064c07144cb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:11:13 +0100 Subject: [PATCH 404/974] [ted] Add support for embed-ssl.ted.com embedded videos --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/ted.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c41ff517..2d871f8b4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -926,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import ( class TEDIE(SubtitlesInfoExtractor): _VALID_URL = r'''(?x) (?P<proto>https?://) - (?P<type>www|embed)(?P<urlmain>\.ted\.com/ + (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ ( (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) - if m.group('type') == 'embed': + if m.group('type').startswith('embed'): desktop_url = m.group('proto') + 'www' + m.group('urlmain') return self.url_result(desktop_url, 'TED') name = m.group('name') From a285b6377b46518ca45d6a41481bf920b353a857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 5 Jan 2015 13:59:49 +0100 Subject: [PATCH 405/974] [normalboots] Skip download in test, it uses rtmp --- youtube_dl/extractor/normalboots.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 3d35b11ac..c13ff0d65 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor): 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', 'uploader': 'JonTron', 'upload_date': '20140125', - } + }, + 'params': { + # rtmp download + 'skip_download': True, + }, } def _real_extract(self, url): From 03ff2cc1c49c82daf2218b76e169c2d679447f03 Mon Sep 17 00:00:00 2001 From: oteng <otengkwaku@gmail.com> Date: Mon, 5 Jan 2015 16:28:24 +0000 Subject: [PATCH 406/974] [Auengine] corrected extractions logic The way the video download url was been extracted was not working well so i change it for it to extract the correct url --- .gitignore | 2 ++ youtube_dl/extractor/auengine.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 86312d4e4..0422adf44 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ updates_key.pem test/testdata .tox youtube-dl.zsh +.idea +.idea/* \ No newline at end of file diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 014a21952..17c3ad2ef 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -29,17 +29,12 @@ class AUEngineIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<title>(?P<title>.+?)', webpage, 'title') title = title.strip() - links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) - links = map(compat_urllib_parse.unquote, links) + video_url = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = map(compat_urllib_parse.unquote, video_url)[0] + thumbnail = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = map(compat_urllib_parse.unquote, thumbnail)[0] - thumbnail = None - video_url = None - for link in links: - if link.endswith('.png'): - thumbnail = link - elif '/videos/' in link: - video_url = link - if not video_url: + if video_url == "" and thumbnail =="": raise ExtractorError('Could not find video URL') ext = '.' + determine_ext(video_url) if ext == title[-len(ext):]: @@ -52,3 +47,4 @@ class AUEngineIE(InfoExtractor): 'thumbnail': thumbnail, 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } + From 9d247bbd2d972953fbb9e8f9aee67472d3854883 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 5 Jan 2015 18:13:19 +0100 Subject: [PATCH 407/974] [radiobremen] Fix under Python 2.6 and fix duration --- youtube_dl/extractor/radiobremen.py | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 6d130d3d9..9f7e6af15 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -5,10 +5,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import parse_duration class RadioBremenIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(index\.html)?\?id=(?P[0-9]+)' + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P[0-9]+)' IE_NAME = 'radiobremen' _TEST = { @@ -16,6 +17,7 @@ class RadioBremenIE(InfoExtractor): 'info_dict': { 'id': '114720', 'ext': 'mp4', + 'duration': 1685, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', @@ -23,32 +25,32 @@ class RadioBremenIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') title = self._html_search_regex("(?P.+)</h1>", meta_doc, "title") description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") - duration = self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration") + duration = parse_duration( + self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration")) page_doc = self._download_webpage(url, video_id, 'Downloading video information') pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" mobj = re.search(pattern, page_doc) - width, video_id, secret, thumbnail = int(mobj.group("width")), mobj.group("video_id"), mobj.group("secret"), mobj.group("thumbnail") - video_url = "http://dl-ondemand.radiobremen.de/mediabase/{:}/{:}_{:}_{:}.mp4".format(video_id, video_id, secret, width) + video_url = ( + "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % + (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'width': int(mobj.group("width")), + }] return { 'id': video_id, 'title': title, 'description': description, 'duration': duration, - 'formats': [ - {'url': video_url, - 'ext': 'mp4', - 'width': width, - 'protocol': 'http' - } - ], - 'thumbnail': thumbnail, + 'formats': formats, + 'thumbnail': mobj.group('thumbnail'), } From aa80652f47b3df14664556913d4f14172c9ec4fb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:14:09 +0100 Subject: [PATCH 408/974] [radiobremen] Add test for thumbnail --- youtube_dl/extractor/radiobremen.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 9f7e6af15..057dc15ab 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -20,6 +20,7 @@ class RadioBremenIE(InfoExtractor): 'duration': 1685, 'width': 512, 'title': 'buten un binnen vom 22. Dezember', + 'thumbnail': 're:https?://.*\.jpg$', 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', }, } From 5e3e1c82d828bc54f6873d2c7bdab315713e9a02 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:14:39 +0100 Subject: [PATCH 409/974] Credit @ckrooss for radiobremen (#4632) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9b548cf25..a63c97ae0 100644 --- a/AUTHORS +++ b/AUTHORS @@ -99,3 +99,4 @@ Max Reimann Cédric Luthi Thijs Vermeir Joel Leclerc +Christopher Krooss From d7cc31b63e1efaf5762f38897d4c717901e127e3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:16:47 +0100 Subject: [PATCH 410/974] [generic] PEP8 --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2d871f8b4..7a5bf9392 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -926,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') From dda620e88c68e995afcc3cd35b9d360cb42527a0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:17:03 +0100 Subject: [PATCH 411/974] [radiobremen] Make code more readable and more resilient to failures --- youtube_dl/extractor/radiobremen.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 057dc15ab..0d706312e 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -29,15 +29,21 @@ class RadioBremenIE(InfoExtractor): video_id = self._match_id(url) meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id - meta_doc = self._download_webpage(meta_url, video_id, 'Downloading metadata') - title = self._html_search_regex("<h1.*>(?P<title>.+)</h1>", meta_doc, "title") - description = self._html_search_regex("<p>(?P<description>.*)</p>", meta_doc, "description") - duration = parse_duration( - self._html_search_regex("Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", meta_doc, "duration")) + meta_doc = self._download_webpage( + meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex( + r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex( + r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False) + duration = parse_duration(self._html_search_regex( + r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", + meta_doc, "duration", fatal=False)) - page_doc = self._download_webpage(url, video_id, 'Downloading video information') - pattern = "ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)" - mobj = re.search(pattern, page_doc) + page_doc = self._download_webpage( + url, video_id, 'Downloading video information') + mobj = re.search( + r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", + page_doc) video_url = ( "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % (video_id, video_id, mobj.group("secret"), mobj.group('width'))) From 6291438073e35adc94f573a43625fb54a64cf733 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 5 Jan 2015 18:21:32 +0100 Subject: [PATCH 412/974] [auengine] Simplify (#4643) --- youtube_dl/extractor/auengine.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 17c3ad2ef..a1b666be0 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse from ..utils import ( determine_ext, ExtractorError, + remove_end, ) @@ -27,18 +28,18 @@ class AUEngineIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>(?P<title>.+?)', webpage, 'title') - title = title.strip() - video_url = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) - video_url = map(compat_urllib_parse.unquote, video_url)[0] - thumbnail = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) - thumbnail = map(compat_urllib_parse.unquote, thumbnail)[0] + title = self._html_search_regex( + r'\s*(?P<title>.+?)\s*', webpage, 'title') + video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = compat_urllib_parse.unquote(video_urls[0]) + thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = compat_urllib_parse.unquote(thumbnails[0]) - if video_url == "" and thumbnail =="": + if not video_url: raise ExtractorError('Could not find video URL') + ext = '.' + determine_ext(video_url) - if ext == title[-len(ext):]: - title = title[:-len(ext)] + title = remove_end(title, ext) return { 'id': video_id, @@ -47,4 +48,3 @@ class AUEngineIE(InfoExtractor): 'thumbnail': thumbnail, 'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf', } - From f4bca0b348fe1f4f65c939b496973062180e0c4f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 5 Jan 2015 18:44:29 +0100 Subject: [PATCH 413/974] release 2015.01.05 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 09813928a..086f0ebf0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.04' +__version__ = '2015.01.05' From 8f9529cd0559bdbe6c568cfd765f9129666a77be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 5 Jan 2015 19:14:50 +0100 Subject: [PATCH 414/974] [motorsport] Fix extraction and make trailing '/' optional They directly embed a youtube video now. --- youtube_dl/extractor/motorsport.py | 60 ++++++++++++------------------ 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import json -import time - from .common import InfoExtractor from ..compat import ( - compat_parse_qs, - compat_str, -) -from ..utils import ( - int_or_none, + compat_urlparse, ) class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/(?:$|[?#])' + _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', - 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', 'info_dict': { - 'id': '7063', + 'id': '2-T3WuR-KMM', 'ext': 'mp4', 'title': 'Red Bull Racing: 2014 Rules Explained', - 'duration': 207, + 'duration': 208, 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', - 'uploader': 'rainiere', - 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' - } + 'uploader': 'mcomstaff', + 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', + 'upload_date': '20140903', + 'thumbnail': r're:^https?://.+\.jpg$' + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - flashvars_code = self._html_search_regex( - r'Video by: (.*?)', webpage, - 'uploader', fatal=False) + iframe_path = self._html_search_regex( + r'