From 481c5c5137baab386c1cc4a4163bae7404a25f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 29 Jul 2016 21:43:17 +0700 Subject: [PATCH 001/775] [tv2:article] Fix extraction (Closes #10188) --- youtube_dl/extractor/tv2.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 86bb7915d..e4b4ac0e7 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, int_or_none, float_or_none, + js_to_json, parse_iso8601, remove_end, ) @@ -105,7 +106,7 @@ class TV2ArticleIE(InfoExtractor): 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', 'info_dict': { 'id': '6930542', - 'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', + 'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret', 'description': 'md5:339573779d3eea3542ffe12006190954', }, 'playlist_count': 2, @@ -119,9 +120,23 @@ class TV2ArticleIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) + # Old embed pattern (looks unused nowadays) + assets = re.findall(r'data-assetid=["\'](\d+)', webpage) + + if not assets: + # New embed pattern + for v in re.findall('TV2ContentboxVideo\(({.+?})\)', webpage): + video = self._parse_json( + v, playlist_id, transform_source=js_to_json, fatal=False) + if not video: + continue + asset = video.get('assetId') + if asset: + assets.append(asset) + entries = [ - self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') - for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] + self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2') + for asset_id in assets] title = remove_end(self._og_search_title(webpage), ' - TV2.no') description = remove_end(self._og_search_description(webpage), ' - TV2.no') From dbc0b39b9158be0bdf50d031d1e993078e6cd264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 29 Jul 2016 22:01:34 +0700 Subject: [PATCH 002/775] [tv2] Improve extraction --- youtube_dl/extractor/tv2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index e4b4ac0e7..f225ec684 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -55,10 +55,11 @@ class TV2IE(InfoExtractor): ext = determine_ext(video_url) if ext == 'f4m': formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id=format_id)) + video_url, video_id, f4m_id=format_id, fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id)) + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) elif ext == 'ism' or video_url.endswith('.ism/Manifest'): pass else: From bb9f3bfedf5d11d8f246654a5e67708edc01c30d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Jul 2016 17:14:04 +0200 Subject: [PATCH 003/775] Revert "[rtve] Fix extraction (#10076)" This reverts commit c39b2ed990105e640456f126321ef3d771884405. Apparently outside of Spain using 'auth/resources' is required (#10097). --- youtube_dl/extractor/rtve.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index d33b05f5d..05becc92b 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -113,7 +113,9 @@ class RTVEALaCartaIE(InfoExtractor): png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): - video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') + video_url = video_url.replace( + 'resources/', 'auth/resources/' + ).replace('.net.rtve', '.multimedia.cdn.rtve') subtitles = None if info.get('sbtFile') is not None: From da0baba5c8a0ac6220cc0155044a01b97bc00a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Jul 2016 17:20:27 +0200 Subject: [PATCH 004/775] [rtve] Fix extraction for some videos For example http://www.rtve.es/alacarta/videos/documentos-tv/documentos-tv-descredito/3574098/. --- youtube_dl/extractor/rtve.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 05becc92b..34f9c4a99 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -113,9 +113,9 @@ class RTVEALaCartaIE(InfoExtractor): png = self._download_webpage(png_request, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): - video_url = video_url.replace( - 'resources/', 'auth/resources/' - ).replace('.net.rtve', '.multimedia.cdn.rtve') + if '?' not in video_url: + video_url = video_url.replace('resources/', 'auth/resources/') + video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') subtitles = None if info.get('sbtFile') is not None: From 485fedf6fd801d7ae0796d661ae7624564f67df8 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 28 Jul 2016 19:23:51 +0200 Subject: [PATCH 005/775] [dailymotion:playlist] Optimize download archive processing --- youtube_dl/extractor/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 1f92823b7..98d2c82f4 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -331,7 +331,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): for video_id in re.findall(r'data-xid="(.+?)"', webpage): if video_id not in video_ids: - yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion', video_id) video_ids.add(video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: From fa9f1d16b807e66a9c3c2dead77c44624d556408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 29 Jul 2016 22:47:34 +0700 Subject: [PATCH 006/775] [dailymotion:playlist] Carry long line --- youtube_dl/extractor/dailymotion.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 98d2c82f4..496883d15 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -331,7 +331,9 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): for video_id in re.findall(r'data-xid="(.+?)"', webpage): if video_id not in video_ids: - yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion', video_id) + yield self.url_result( + 'http://www.dailymotion.com/video/%s' % video_id, + DailymotionIE.ie_key(), video_id) video_ids.add(video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: From 35aa6c538f593eb47169e6b06ef383ad12b45930 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 7 Jul 2016 13:41:25 +0800 Subject: [PATCH 007/775] Add ChangeLog --- ChangeLog | 261 ++++++++++++++++++++++++++++++++++++++++++ Makefile | 4 +- devscripts/release.sh | 5 +- 3 files changed, 267 insertions(+), 3 deletions(-) create mode 100644 ChangeLog diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 000000000..de29ccf5e --- /dev/null +++ b/ChangeLog @@ -0,0 +1,261 @@ +version + +Fixed/improved extractors +- twitch +- tv2 (#10188) +- rtve (#10076) +- dailymotion (#10180) + + +version 2016.07.28 + +Fixed/improved extractors +- shared (#10170) +- soundcloud (#10179) +- twitch (#9767) + + +version 2016.07.26.2 + +Fixed/improved extractors +- smotri +- camdemy +- mtv +- comedycentral +- cmt +- cbc +- mgtv +- orf + + +version 2016.07.24 + +New extractors +- arkena (#8682) +- lcp (#8682) + +Fixed/improved extractors +- facebook (#10151) +- dailymail +- telegraaf +- dcn +- onet +- tvp + +Miscellaneous +- Support $Time$ in DASH manifests + + +version 2016.07.22 + +New extractors +- odatv (#9285) + +Fixed/improved extractors +- bbc +- youjizz (#10131) +- youtube (#10140) +- pornhub (#10138) +- eporner (#10139) + + +version 2016.07.17 + +New extractors +- nintendo (#9986) +- streamable (#9122) + +Fixed/improved extractors +- ard (#10095) +- mtv +- comedycentral (#10101) +- viki (#10098) +- spike (#10106) + +Miscellaneous +- Improved twitter player detection (#10090) + + +version 2016.07.16 + +New extractors +- ninenow (#5181) + +Fixed/improved extractors +- rtve (#10076) +- brightcove +- 3qsdn +- syfy (#9087, #3820, #2388) +- youtube (#10083) + +Miscellaneous +- Fix subtitle embedding for video-only and audio-only files (#10081) + + +version 2016.07.13 + +New extractors +- rudo + +Fixed/improved extractors +- biobiochiletv +- tvplay +- dbtv +- brightcove +- tmz +- youtube (#10059) +- shahid (#10062) +- vk +- ellentv (#10067) + + +version 2016.07.11 + +New Extractors +- roosterteeth (#9864) + +Fixed/improved extractors +- miomio (#9605) +- vuclip +- youtube +- vidzi (#10058) + + +version 2016.07.09.2 + +Fixed/improved extractors +- vimeo (#1638) +- facebook (#10048) +- lynda (#10047) +- animeondemand + +Fixed/improved features +- Embedding subtitles no longer throws an error with problematic inputs (#9063) + + +version 2016.07.09.1 + +Fixed/improved extractors +- youtube +- ard +- srmediatek (#9373) + + +version 2016.07.09 + +New extractors +- Flipagram (#9898) + +Fixed/improved extractors +- telecinco +- toutv +- radiocanada +- tweakers (#9516) +- lynda +- nick (#7542) +- polskieradio (#10028) +- le +- facebook (#9851) +- mgtv +- animeondemand (#10031) + +Fixed/improved features +- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs + on non-Windows systems + + +version 2016.07.07 + +New extractors +- kamcord (#10001) + +Fixed/improved extractors +- spiegel (#10018) +- metacafe (#8539, #3253) +- onet (#9950) +- francetv (#9955) +- brightcove (#9965) +- daum (#9972) + + +version 2016.07.06 + +Fixed/improved extractors +- youtube (#10007, #10009) +- xuite +- stitcher +- spiegel +- slideshare +- sandia +- rtvnh +- prosiebensat1 +- onionstudios + + +version 2016.07.05 + +Fixed/improved extractors +- brightcove +- yahoo (#9995) +- pornhub (#9997) +- iqiyi +- kaltura (#5557) +- la7 +- Changed features +- Rename --cn-verfication-proxy to --geo-verification-proxy +Miscellaneous +- Add script for displaying downloads statistics + + +version 2016.07.03.1 + +Fixed/improved extractors +- theplatform +- aenetworks +- nationalgeographic +- hrti (#9482) +- facebook (#5701) +- buzzfeed (#5701) +- rai (#8617, #9157, #9232, #8552, #8551) +- nationalgeographic (#9991) +- iqiyi + + +version 2016.07.03 + +New extractors +- hrti (#9482) + +Fixed/improved extractors +- vk (#9981) +- facebook (#9938) +- xtube (#9953, #9961) + + +version 2016.07.02 + +New extractors +- fusion (#9958) + +Fixed/improved extractors +- twitch (#9975) +- vine (#9970) +- periscope (#9967) +- pornhub (#8696) + + +version 2016.07.01 + +New extractors +- 9c9media +- ctvnews (#2156) +- ctv (#4077) + +Fixed/Improved extractors +- rds +- meta (#8789) +- pornhub (#9964) +- sixplay (#2183) + +New features +- Accept quoted strings across multiple lines (#9940) diff --git a/Makefile b/Makefile index 6ee4ba4eb..354052c50 100644 --- a/Makefile +++ b/Makefile @@ -94,7 +94,7 @@ _EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'la youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ -youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ @@ -107,7 +107,7 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude 'docs/_build' \ -- \ bin devscripts test youtube_dl docs \ - LICENSE README.md README.txt \ + ChangeLog LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ youtube-dl.zsh youtube-dl.fish setup.py \ youtube-dl diff --git a/devscripts/release.sh b/devscripts/release.sh index f8d466ba8..ca6ae1b49 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -71,9 +71,12 @@ fi /bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py +/bin/echo -e "\n### Changing version in ChangeLog..." +sed -i "s//$version/" ChangeLog + /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites -git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py +git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog git commit $gpg_sign_commits -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." From 9361f2169c61df29139aad1c500dda39c1bce4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Jul 2016 14:43:28 +0700 Subject: [PATCH 008/775] [ChangeLog] Make extractor improvements' descriptions more concrete --- ChangeLog | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index de29ccf5e..1c7bc6094 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,11 @@ version Fixed/improved extractors -- twitch -- tv2 (#10188) +- [twitch:clips] Sort formats +- [tv2] Use m3u8_native +- [tv2:article] Fix video detection (#10188) - rtve (#10076) -- dailymotion (#10180) +- [dailymotion:playlist] Optimize download archive processing (#10180) version 2016.07.28 From 2903137292721afc50c1f3a97c677cea1bc2d07b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Jul 2016 14:45:07 +0700 Subject: [PATCH 009/775] release 2016.07.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 27257ee0a..aaf06aa05 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.28 +[debug] youtube-dl version 2016.07.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1c7bc6094..3ee3b1237 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.07.30 Fixed/improved extractors - [twitch:clips] Sort formats diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2cfa406d9..9056a883b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.28' +__version__ = '2016.07.30' From cf03e34ad3cd42997da92b49412d98d06ea82681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 31 Jul 2016 07:56:18 +0700 Subject: [PATCH 010/775] [yandexmusic:track] Fix extraction (Closes #10193) --- youtube_dl/extractor/yandexmusic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index b37d0eab6..fd6268ba4 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -75,6 +75,12 @@ class YandexMusicTrackIE(YandexMusicBaseIE): % storage_dir, track_id, 'Downloading track location JSON') + # Each string is now wrapped in a list, this is probably only temporarily thus + # supporting both scenarios (see https://github.com/rg3/youtube-dl/issues/10193) + for k, v in data.items(): + if v and isinstance(v, list): + data[k] = v[0] + key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest() storage = storage_dir.split('.') From 116e7e0d044c5b50bf8221329bcd54d00c0dcad5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 31 Jul 2016 14:46:54 +0800 Subject: [PATCH 011/775] [bloomberg] Support BPlayer() players (closes #10187) --- ChangeLog | 5 +++++ youtube_dl/extractor/bloomberg.py | 19 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3ee3b1237..cb2d0beb2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Fixed/improved extractors +- [bloomberg] Support another form of player (#10187) + version 2016.07.30 Fixed/improved extractors diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index bd538be50..2a8cd64b9 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -20,6 +21,18 @@ class BloombergIE(InfoExtractor): 'params': { 'format': 'best[format_id^=hds]', }, + }, { + # video ID in BPlayer(...) + 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', + 'info_dict': { + 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', + 'ext': 'flv', + 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', + 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', + }, + 'params': { + 'format': 'best[format_id^=hds]', + }, }, { 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', 'only_matching': True, @@ -33,7 +46,11 @@ class BloombergIE(InfoExtractor): webpage = self._download_webpage(url, name) video_id = self._search_regex( r'["\']bmmrId["\']\s*:\s*(["\'])(?P.+?)\1', - webpage, 'id', group='url') + webpage, 'id', group='url', default=None) + if not video_id: + bplayer_data = self._parse_json(self._search_regex( + r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) + video_id = bplayer_data['id'] title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( From e382b953f00982a2085d3e0b1b6fb4d2a0f2db7e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Aug 2016 00:33:30 +0100 Subject: [PATCH 012/775] [limelight] skip preview and drm protected videos --- youtube_dl/extractor/limelight.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 5d2c3e256..0d7abbaa8 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -37,11 +37,12 @@ class LimelightBaseIE(InfoExtractor): for stream in streams: stream_url = stream.get('url') - if not stream_url: + if not stream_url or stream.get('previewStream') or stream.get('drmProtected'): continue - if '.f4m' in stream_url: + ext = determine_ext(stream_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( - stream_url, video_id, fatal=False)) + stream_url, video_id, f4m_id='hds', fatal=False)) else: fmt = { 'url': stream_url, @@ -50,7 +51,7 @@ class LimelightBaseIE(InfoExtractor): 'fps': float_or_none(stream.get('videoFrameRate')), 'width': int_or_none(stream.get('videoWidthInPixels')), 'height': int_or_none(stream.get('videoHeightInPixels')), - 'ext': determine_ext(stream_url) + 'ext': ext, } rtmp = re.search(r'^(?Prtmpe?://[^/]+/(?P.+))/(?Pmp4:.+)$', stream_url) if rtmp: @@ -68,18 +69,23 @@ class LimelightBaseIE(InfoExtractor): for mobile_url in mobile_urls: media_url = mobile_url.get('mobileUrl') - if not media_url: - continue format_id = mobile_url.get('targetMediaPlatform') - if determine_ext(media_url) == 'm3u8': + if not media_url or format_id == 'Widevine': + continue + ext = determine_ext(media_url) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + stream_url, video_id, f4m_id=format_id, fatal=False)) else: formats.append({ 'url': media_url, 'format_id': format_id, 'preference': -1, + 'ext': ext, }) self._sort_formats(formats) @@ -145,7 +151,7 @@ class LimelightMediaIE(LimelightBaseIE): 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { 'id': '3ffd040b522b4485b6d84effc750cd86', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'HaP and the HB Prince Trailer', 'description': 'md5:8005b944181778e313d95c1237ddb640', 'thumbnail': 're:^https?://.*\.jpeg$', @@ -154,7 +160,7 @@ class LimelightMediaIE(LimelightBaseIE): 'upload_date': '20090604', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, }, { @@ -164,7 +170,6 @@ class LimelightMediaIE(LimelightBaseIE): 'id': 'a3e00274d4564ec4a9b29b9466432335', 'ext': 'flv', 'title': '3Play Media Overview Video', - 'description': '', 'thumbnail': 're:^https?://.*\.jpeg$', 'duration': 78.101, 'timestamp': 1338929955, @@ -172,7 +177,7 @@ class LimelightMediaIE(LimelightBaseIE): 'subtitles': 'mincount:9', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, }, { From 697655a7c0c1469dd7474714652025961e82bd69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 1 Aug 2016 21:48:48 +0700 Subject: [PATCH 013/775] [safari] Relax url regexes (Closes #10202) --- youtube_dl/extractor/safari.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 6ba91f202..08ddbe3c4 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -75,7 +75,7 @@ class SafariBaseIE(InfoExtractor): class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P[^/]+)/(?Ppart\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P[^/]+)/(?P[^/?#&]+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -92,6 +92,9 @@ class SafariIE(SafariBaseIE): # non-digits in course id 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -132,12 +135,15 @@ class SafariIE(SafariBaseIE): class SafariApiIE(SafariBaseIE): IE_NAME = 'safari:api' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?Ppart\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' - _TEST = { + _TESTS = [{ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'only_matching': True, - } + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From a70e45f80a398fccbb757dd2e166d15f16ffb160 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 1 Aug 2016 16:25:41 +0100 Subject: [PATCH 014/775] [limelight] keep videos marked as previewStream https://github.com/rg3/youtube-dl/commit/e382b953f00982a2085d3e0b1b6fb4d2a0f2db7e#commitcomment-18472915 --- youtube_dl/extractor/limelight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 0d7abbaa8..efe1437e0 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -37,7 +37,7 @@ class LimelightBaseIE(InfoExtractor): for stream in streams: stream_url = stream.get('url') - if not stream_url or stream.get('previewStream') or stream.get('drmProtected'): + if not stream_url or stream.get('drmProtected'): continue ext = determine_ext(stream_url) if ext == 'f4m': @@ -177,7 +177,7 @@ class LimelightMediaIE(LimelightBaseIE): 'subtitles': 'mincount:9', }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { From e03d3e64533a2b64293b9ed38b62acd3a8b06d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 1 Aug 2016 22:51:01 +0700 Subject: [PATCH 015/775] [cwtv] Add support for cwtvpr.com (Closes #10196) --- youtube_dl/extractor/cwtv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index ebd14cb16..c66c359cf 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -9,7 +9,7 @@ from ..utils import ( class CWTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' + _VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' _TESTS = [{ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', 'info_dict': { @@ -51,6 +51,12 @@ class CWTVIE(InfoExtractor): }, { 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', 'only_matching': True, + }, { + 'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e', + 'only_matching': True, + }, { + 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63', + 'only_matching': True, }] def _real_extract(self, url): From eafc66855dd591ce4112bb51cdd88eebd3fcf2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 1 Aug 2016 22:56:01 +0700 Subject: [PATCH 016/775] [ChangeLog] Add recent changes --- ChangeLog | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ChangeLog b/ChangeLog index cb2d0beb2..56bb6cc18 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,11 @@ version Fixed/improved extractors +- [yandexmusic:track] Adapt to changes in track location JSON (#10193) - [bloomberg] Support another form of player (#10187) +- [limelight] Skip DRM protected videos +- [safari] Relax regular expressions for URL matching (#10202) +- [cwtv] Add support for cwtvpr.com (#10196) version 2016.07.30 From 45408eb0750ac53f965756331022cbed3dbe2a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 1 Aug 2016 22:59:23 +0700 Subject: [PATCH 017/775] release 2016.08.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index aaf06aa05..9d15b6a89 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.30 +[debug] youtube-dl version 2016.08.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 56bb6cc18..f3c752e66 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.01 Fixed/improved extractors - [yandexmusic:track] Adapt to changes in track location JSON (#10193) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9056a883b..27f97b213 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.30' +__version__ = '2016.08.01' From 6a9b3b61ea145ac03198bc81a67ee322d42a1bc1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Aug 2016 14:02:31 +0200 Subject: [PATCH 018/775] [comedycentral] Re-add shortnames In cc99d4f826a942b18133fe4221c9de2f9197e860, the shortname feature got deleted by accident. Re-add it as a separate IE. --- youtube_dl/extractor/comedycentral.py | 20 ++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 21 insertions(+) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index c76909e48..88346dde7 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor +from .common import InfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): @@ -96,3 +97,22 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor): webpage, 'mrss url', group='url') return self._get_videos_info_from_url(mrss_url, video_id) + + +class ComedyCentralShortnameIE(InfoExtractor): + _VALID_URL = r'^:(?Ptds|thedailyshow)$' + _TESTS = [{ + 'url': ':tds', + 'only_matching': True, + }, { + 'url': ':thedailyshow', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + shortcut_map = { + 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + } + return self.url_result(shortcut_map[video_id]) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 53fab1a31..86c48ff54 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -159,6 +159,7 @@ from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ( ComedyCentralIE, + ComedyCentralShortnameIE, ComedyCentralTVIE, ToshIE, ) From 3aa9a73554a43b6dcb7a69a6ff8e8fd02c1e836e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Aug 2016 17:03:26 +0200 Subject: [PATCH 019/775] [options] Hide --password=secret in verbose output --- youtube_dl/options.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index c4a85b2c0..942d44912 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import os.path import optparse +import re import sys from .downloader.external import list_external_downloaders @@ -93,8 +94,18 @@ def parseOpts(overrideArguments=None): setattr(parser.values, option.dest, value.split(',')) def _hide_login_info(opts): - opts = list(opts) - for private_opt in ['-p', '--password', '-u', '--username', '--video-password']: + PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password'] + eqre = re.compile('^(?P' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') + + def _scrub_eq(o): + m = eqre.match(o) + if m: + return m.group('key') + '=PRIVATE' + else: + return o + + opts = list(map(_scrub_eq, opts)) + for private_opt in PRIVATE_OPTS: try: i = opts.index(private_opt) opts[i + 1] = 'PRIVATE' From ce28252c485cea5cdc2b0f5fd9f1c8d50c26c7a2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 2 Aug 2016 17:03:46 +0200 Subject: [PATCH 020/775] [options] Add test that checks that --password=secret is hidden in verbose output --- test/test_verbose_output.py | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 test/test_verbose_output.py diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py new file mode 100644 index 000000000..4c77df242 --- /dev/null +++ b/test/test_verbose_output.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import unittest + +import sys +import os +import subprocess +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +class TestVerboseOutput(unittest.TestCase): + def test_private_info_arg(self): + outp = subprocess.Popen( + [ + sys.executable, 'youtube_dl/__main__.py', '-v', + '--username', 'johnsmith@gmail.com', + '--password', 'secret', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue('--username' in serr) + self.assertTrue('johnsmith' not in serr) + self.assertTrue('--password' in serr) + self.assertTrue('secret' not in serr) + + def test_private_info_shortarg(self): + outp = subprocess.Popen( + [ + sys.executable, 'youtube_dl/__main__.py', '-v', + '-u', 'johnsmith@gmail.com', + '-p', 'secret', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue('-u' in serr) + self.assertTrue('johnsmith' not in serr) + self.assertTrue('-p' in serr) + self.assertTrue('secret' not in serr) + + def test_private_info_eq(self): + outp = subprocess.Popen( + [ + sys.executable, 'youtube_dl/__main__.py', '-v', + '--username=johnsmith@gmail.com', + '--password=secret', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue('--username' in serr) + self.assertTrue('johnsmith' not in serr) + self.assertTrue('--password' in serr) + self.assertTrue('secret' not in serr) + + def test_private_info_shortarg_eq(self): + outp = subprocess.Popen( + [ + sys.executable, 'youtube_dl/__main__.py', '-v', + '-u=johnsmith@gmail.com', + '-p=secret', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue('-u' in serr) + self.assertTrue('johnsmith' not in serr) + self.assertTrue('-p' in serr) + self.assertTrue('secret' not in serr) + +if __name__ == '__main__': + unittest.main() From b070564efb190e1740c4e9d5fdde25cc436c72c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Aug 2016 22:55:14 +0700 Subject: [PATCH 021/775] [extractor/common] Support multiple properties in _og_search_property --- test/test_InfoExtractor.py | 3 +++ youtube_dl/extractor/common.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 88e8ff904..a98305c74 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') + self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) def test_html_search_meta(self): ie = self.ie diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 53c28f016..3b6a5491d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -727,9 +727,14 @@ class InfoExtractor(object): [^>]+?content=(["\'])(?P.*?)\2''' % re.escape(prop) def _og_search_property(self, prop, html, name=None, **kargs): + if not isinstance(prop, (list, tuple)): + prop = [prop] if name is None: - name = 'OpenGraph %s' % prop - escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs) + name = 'OpenGraph %s' % prop[0] + og_regexes = [] + for p in prop: + og_regexes.extend(self._og_regexes(p)) + escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs) if escaped is None: return None return unescapeHTML(escaped) From 9cb0e65d7e1c87a29268d30c6159e595bcf210ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 2 Aug 2016 22:56:48 +0700 Subject: [PATCH 022/775] [ntvru] Fix extraction --- youtube_dl/extractor/ntvru.py | 122 ++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index 0895d7ea4..e8702ebcd 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -11,70 +11,64 @@ from ..utils import ( class NTVRuIE(InfoExtractor): IE_NAME = 'ntv.ru' - _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [ - { - 'url': 'http://www.ntv.ru/novosti/863142/', - 'md5': 'ba7ea172a91cb83eb734cad18c10e723', - 'info_dict': { - 'id': '746000', - 'ext': 'mp4', - 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', - 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', - 'thumbnail': 're:^http://.*\.jpg', - 'duration': 136, - }, + _TESTS = [{ + 'url': 'http://www.ntv.ru/novosti/863142/', + 'md5': 'ba7ea172a91cb83eb734cad18c10e723', + 'info_dict': { + 'id': '746000', + 'ext': 'mp4', + 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', + 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', + 'thumbnail': 're:^http://.*\.jpg', + 'duration': 136, }, - { - 'url': 'http://www.ntv.ru/video/novosti/750370/', - 'md5': 'adecff79691b4d71e25220a191477124', - 'info_dict': { - 'id': '750370', - 'ext': 'mp4', - 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', - 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', - 'thumbnail': 're:^http://.*\.jpg', - 'duration': 172, - }, + }, { + 'url': 'http://www.ntv.ru/video/novosti/750370/', + 'md5': 'adecff79691b4d71e25220a191477124', + 'info_dict': { + 'id': '750370', + 'ext': 'mp4', + 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', + 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', + 'thumbnail': 're:^http://.*\.jpg', + 'duration': 172, }, - { - 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', - 'md5': '82dbd49b38e3af1d00df16acbeab260c', - 'info_dict': { - 'id': '747480', - 'ext': 'mp4', - 'title': '«Сегодня». 21 марта 2014 года. 16:00', - 'description': '«Сегодня». 21 марта 2014 года. 16:00', - 'thumbnail': 're:^http://.*\.jpg', - 'duration': 1496, - }, + }, { + 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', + 'md5': '82dbd49b38e3af1d00df16acbeab260c', + 'info_dict': { + 'id': '747480', + 'ext': 'mp4', + 'title': '«Сегодня». 21 марта 2014 года. 16:00', + 'description': '«Сегодня». 21 марта 2014 года. 16:00', + 'thumbnail': 're:^http://.*\.jpg', + 'duration': 1496, }, - { - 'url': 'http://www.ntv.ru/kino/Koma_film', - 'md5': 'f825770930937aa7e5aca0dc0d29319a', - 'info_dict': { - 'id': '1007609', - 'ext': 'mp4', - 'title': 'Остросюжетный фильм «Кома»', - 'description': 'Остросюжетный фильм «Кома»', - 'thumbnail': 're:^http://.*\.jpg', - 'duration': 5592, - }, + }, { + 'url': 'http://www.ntv.ru/kino/Koma_film', + 'md5': 'f825770930937aa7e5aca0dc0d29319a', + 'info_dict': { + 'id': '1007609', + 'ext': 'mp4', + 'title': 'Остросюжетный фильм «Кома»', + 'description': 'Остросюжетный фильм «Кома»', + 'thumbnail': 're:^http://.*\.jpg', + 'duration': 5592, }, - { - 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', - 'md5': '9320cd0e23f3ea59c330dc744e06ff3b', - 'info_dict': { - 'id': '751482', - 'ext': 'mp4', - 'title': '«Дело врачей»: «Деревце жизни»', - 'description': '«Дело врачей»: «Деревце жизни»', - 'thumbnail': 're:^http://.*\.jpg', - 'duration': 2590, - }, + }, { + 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', + 'md5': '9320cd0e23f3ea59c330dc744e06ff3b', + 'info_dict': { + 'id': '751482', + 'ext': 'mp4', + 'title': '«Дело врачей»: «Деревце жизни»', + 'description': '«Дело врачей»: «Деревце жизни»', + 'thumbnail': 're:^http://.*\.jpg', + 'duration': 2590, }, - ] + }] _VIDEO_ID_REGEXES = [ r'\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself. +You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: ``` From 8b40854529eea6b32f41df448c475b87a42038d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 09:24:20 +0100 Subject: [PATCH 027/775] [common] lower proto_preference of rtsp formats Most of the time the RtspFD fail to download videos but it report success of the download with this output: [mpv] 0 bytes [download] 100% of 0.00B --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3b6a5491d..2d337d614 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -916,7 +916,8 @@ class InfoExtractor(object): if f.get('ext') in ['f4f', 'f4m']: # Not yet supported preference -= 0.5 - proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1 + protocol = f.get('protocol') or determine_protocol(f) + proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1) if f.get('vcodec') == 'none': # audio only preference -= 50 From 217d5ae0137943829db23d13eee425e5fd7c08ae Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 09:37:27 +0100 Subject: [PATCH 028/775] [vodplatform] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 8 ++++ youtube_dl/extractor/vodplatform.py | 58 +++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/vodplatform.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8b866ed57..909f7d25c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1004,6 +1004,7 @@ from .vk import ( ) from .vlive import VLiveIE from .vodlocker import VodlockerIE +from .vodplatform import VODPlatformIE from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE from .vporn import VpornIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5364f0b19..e89a03760 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2207,6 +2207,14 @@ class GenericIE(InfoExtractor): return self.url_result( self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine') + # Look for VODPlatform embeds + mobj = re.search( + r']+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)', + webpage) + if mobj is not None: + return self.url_result( + self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform') + # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py new file mode 100644 index 000000000..b49542b16 --- /dev/null +++ b/youtube_dl/extractor/vodplatform.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unescapeHTML + + +class VODPlatformIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P[^/?#]+)' + _TEST = { + # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar + 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', + 'md5': '1db2b7249ce383d6be96499006e951fc', + 'info_dict': { + 'id': 'RufMcytHDolTH1MuKHY9Fw', + 'ext': 'mp4', + 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = unescapeHTML(self._og_search_title(webpage)) + hidden_inputs = self._hidden_inputs(webpage) + + base_url = self._search_regex( + '(.*/)(?:playlist.m3u8|manifest.mpd)', + hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], + 'base url') + formats = self._extract_m3u8_formats( + base_url + 'playlist.m3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + base_url + 'manifest.mpd', video_id, + mpd_id='dash', fatal=False)) + rtmp_formats = self._extract_smil_formats( + base_url + 'jwplayer.smil', video_id, fatal=False) + for rtmp_format in rtmp_formats: + rtsp_format = rtmp_format.copy() + rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + del rtsp_format['play_path'] + del rtsp_format['ext'] + rtsp_format.update({ + 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'protocol': 'rtsp', + }) + formats.extend([rtmp_format, rtsp_format]) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage), + 'formats': formats, + } From 1094074c045140e9a91b521b0a933f394a7bba91 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 09:38:37 +0100 Subject: [PATCH 029/775] [kaltura] extract subtitles and reduce requests --- youtube_dl/extractor/kaltura.py | 43 +++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 1729f5bfb..9930ea710 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -62,6 +62,11 @@ class KalturaIE(InfoExtractor): { 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342', 'only_matching': True, + }, + { + # video with subtitles + 'url': 'kaltura:111032:1_cw786r8q', + 'only_matching': True, } ] @@ -130,7 +135,6 @@ class KalturaIE(InfoExtractor): video_id, actions, service_url, note='Downloading Kaltura signature')['ks'] def _get_video_info(self, video_id, partner_id, service_url=None): - signature = self._get_kaltura_signature(video_id, partner_id, service_url) actions = [ { 'action': 'null', @@ -138,18 +142,30 @@ class KalturaIE(InfoExtractor): 'clientTag': 'kdp:v3.8.5', 'format': 1, # JSON, 2 = XML, 3 = PHP 'service': 'multirequest', - 'ks': signature, + }, + { + 'expiry': 86400, + 'service': 'session', + 'action': 'startWidgetSession', + 'widgetId': '_%s' % partner_id, }, { 'action': 'get', 'entryId': video_id, 'service': 'baseentry', - 'version': '-1', + 'ks': '{1:result:ks}', }, { 'action': 'getbyentryid', 'entryId': video_id, 'service': 'flavorAsset', + 'ks': '{1:result:ks}', + }, + { + 'action': 'list', + 'filter:entryIdEqual': video_id, + 'service': 'caption_captionasset', + 'ks': '{1:result:ks}', }, ] return self._kaltura_api_call( @@ -161,8 +177,9 @@ class KalturaIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) partner_id, entry_id = mobj.group('partner_id', 'id') ks = None + captions = None if partner_id and entry_id: - info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) + _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url')) else: path, query = mobj.group('path', 'query') if not path and not query: @@ -181,7 +198,7 @@ class KalturaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) if 'entry_id' in params: entry_id = params['entry_id'][0] - info, flavor_assets = self._get_video_info(entry_id, partner_id) + _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id) elif 'uiconf_id' in params and 'flashvars[referenceId]' in params: reference_id = params['flashvars[referenceId]'][0] webpage = self._download_webpage(url, reference_id) @@ -217,7 +234,7 @@ class KalturaIE(InfoExtractor): formats = [] for f in flavor_assets: # Continue if asset is not ready - if f['status'] != 2: + if f.get('status') != 2: continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) @@ -240,13 +257,25 @@ class KalturaIE(InfoExtractor): m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._check_formats(formats, entry_id) self._sort_formats(formats) + subtitles = {} + if captions: + for caption in captions.get('objects', []): + print(caption) + # Continue if caption is not ready + if f.get('status') != 2: + continue + subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ + 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), + 'ext': caption.get('fileExt'), + }) + return { 'id': entry_id, 'title': info['name'], 'formats': formats, + 'subtitles': subtitles, 'description': clean_html(info.get('description')), 'thumbnail': info.get('thumbnailUrl'), 'duration': info.get('duration'), From 1891ea2d760a49d356a472516db40bad8309ef3c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 12:18:10 +0100 Subject: [PATCH 030/775] [nationalgeographic] Add support for National Geographic Episode Guide --- youtube_dl/extractor/nationalgeographic.py | 54 ++++++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index e717abb9f..fe43d4bc8 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -1,16 +1,19 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from .theplatform import ThePlatformIE from ..utils import ( smuggle_url, url_basename, update_url_query, + get_element_by_class, ) -class NationalGeographicIE(InfoExtractor): - IE_NAME = 'natgeo' +class NationalGeographicVideoIE(InfoExtractor): + IE_NAME = 'natgeo:video' _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?' _TESTS = [ @@ -62,9 +65,9 @@ class NationalGeographicIE(InfoExtractor): } -class NationalGeographicChannelIE(ThePlatformIE): - IE_NAME = 'natgeo:channel' - _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P[^/?]+)' +class NationalGeographicIE(ThePlatformIE): + IE_NAME = 'natgeo' + _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P[^/?]+)' _TESTS = [ { @@ -95,6 +98,10 @@ class NationalGeographicChannelIE(ThePlatformIE): }, 'add_ie': ['ThePlatform'], }, + { + 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/', + 'only_matching': True, + } ] def _real_extract(self, url): @@ -122,3 +129,40 @@ class NationalGeographicChannelIE(ThePlatformIE): {'force_smil_url': True}), 'display_id': display_id, } + + +class NationalGeographicEpisodeGuideIE(ThePlatformIE): + IE_NAME = 'natgeo:episodeguide' + _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P[^/]+)/episode-guide' + _TESTS = [ + { + 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/', + 'info_dict': { + 'id': 'the-story-of-god-with-morgan-freeman-season-1', + 'title': 'The Story of God with Morgan Freeman - Season 1', + }, + 'playlist_mincount': 6, + }, + { + 'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2', + 'info_dict': { + 'id': 'underworld-inc-season-2', + 'title': 'Underworld, Inc. - Season 2', + }, + 'playlist_mincount': 7, + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + show = get_element_by_class('show', webpage) + selected_season = self._search_regex( + r']+class="select-seasons[^"]*".*?]*>(.*?)', + webpage, 'selected season') + entries = [ + self.url_result(self._proto_relative_url(url), 'NationalGeographic') + for url in re.findall('(?s)]+class="col-inner"[^>]*?>.*?]+href="([^"]+)"', webpage)] + return self.playlist_result( + entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')), + '%s - %s' % (show, selected_season)) From 3c2c3af059d99e6795d1944a55d6601b5959871b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 12:20:56 +0100 Subject: [PATCH 031/775] [extractors] change imports for national geographic extractors --- youtube_dl/extractor/extractors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 909f7d25c..28858cfea 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -491,8 +491,9 @@ from .myvi import MyviIE from .myvideo import MyVideoIE from .myvidster import MyVidsterIE from .nationalgeographic import ( + NationalGeographicVideoIE, NationalGeographicIE, - NationalGeographicChannelIE, + NationalGeographicEpisodeGuideIE, ) from .naver import NaverIE from .nba import NBAIE From 14704aeff6eeee4357e3a26f83432ff908db64fc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 14:54:34 +0100 Subject: [PATCH 032/775] [kaltura] remove debugging line --- youtube_dl/extractor/kaltura.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 9930ea710..ddf1165ff 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -262,7 +262,6 @@ class KalturaIE(InfoExtractor): subtitles = {} if captions: for caption in captions.get('objects', []): - print(caption) # Continue if caption is not ready if f.get('status') != 2: continue From 2396062c747ee81420dac0eac914d531ec8df910 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 16:21:27 +0100 Subject: [PATCH 033/775] [5min] delegate extraction to AolIE recently the 5min SenseHandler request return HTTP Error 503: Service Unavailable error --- youtube_dl/extractor/fivemin.py | 135 +++++--------------------------- 1 file changed, 19 insertions(+), 116 deletions(-) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 6b8345416..2f882cbcb 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -1,24 +1,11 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse, - compat_urlparse, -) -from ..utils import ( - ExtractorError, - parse_duration, - replace_extension, -) class FiveMinIE(InfoExtractor): IE_NAME = '5min' - _VALID_URL = r'(?:5min:(?P\d+)(?::(?P\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P.*))' + _VALID_URL = r'(?:5min:|(?:https?://(?:[^/]*?5min\.com|delivery\.vidible\.tv/aol)/(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P\d+)' _TESTS = [ { @@ -29,8 +16,16 @@ class FiveMinIE(InfoExtractor): 'id': '518013791', 'ext': 'mp4', 'title': 'iPad Mini with Retina Display Review', + 'description': 'iPad mini with Retina Display review', 'duration': 177, + 'uploader': 'engadget', + 'upload_date': '20131115', + 'timestamp': 1384515288, }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { # From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247 @@ -44,108 +39,16 @@ class FiveMinIE(InfoExtractor): }, 'skip': 'no longer available', }, + { + 'url': 'http://embed.5min.com/518726732/', + 'only_matching': True, + }, + { + 'url': 'http://delivery.vidible.tv/aol?playList=518013791', + 'only_matching': True, + } ] - _ERRORS = { - 'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.', - 'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.', - 'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.', - 'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.', - 'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', - 'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', - } - _QUALITIES = { - 1: { - 'width': 640, - 'height': 360, - }, - 2: { - 'width': 854, - 'height': 480, - }, - 4: { - 'width': 1280, - 'height': 720, - }, - 8: { - 'width': 1920, - 'height': 1080, - }, - 16: { - 'width': 640, - 'height': 360, - }, - 32: { - 'width': 854, - 'height': 480, - }, - 64: { - 'width': 1280, - 'height': 720, - }, - 128: { - 'width': 640, - 'height': 360, - }, - } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - sid = mobj.group('sid') - - if mobj.group('query'): - qs = compat_parse_qs(mobj.group('query')) - if not qs.get('playList'): - raise ExtractorError('Invalid URL', expected=True) - video_id = qs['playList'][0] - if qs.get('sid'): - sid = qs['sid'][0] - - embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id - if not sid: - embed_page = self._download_webpage(embed_url, video_id, - 'Downloading embed page') - sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') - - response = self._download_json( - 'https://syn.5min.com/handlers/SenseHandler.ashx?' + - compat_urllib_parse_urlencode({ - 'func': 'GetResults', - 'playlist': video_id, - 'sid': sid, - 'isPlayerSeed': 'true', - 'url': embed_url, - }), - video_id) - if not response['success']: - raise ExtractorError( - '%s said: %s' % ( - self.IE_NAME, - self._ERRORS.get(response['errorMessage'], response['errorMessage'])), - expected=True) - info = response['binding'][0] - - formats = [] - parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs( - compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0]) - for rendition in info['Renditions']: - if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8': - continue - else: - rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType']))) - quality = self._QUALITIES.get(rendition['ID'], {}) - formats.append({ - 'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']), - 'url': rendition_url, - 'width': quality.get('width'), - 'height': quality.get('height'), - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': info['Title'], - 'thumbnail': info.get('ThumbURL'), - 'duration': parse_duration(info.get('Duration')), - 'formats': formats, - } + video_id = self._match_id(url) + return self.url_result('aol-video:%s' % video_id) From 52e7fcfeb794ca0e50e312357d60234bcca5118c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 16:34:47 +0100 Subject: [PATCH 034/775] [engadget] Relax _VALID_URL --- youtube_dl/extractor/engadget.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index e5e57d485..a39e9010d 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -4,9 +4,10 @@ from .common import InfoExtractor class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://www.engadget.com/video/(?P\d+)' + _VALID_URL = r'https?://www.engadget.com/video/(?P[^/?#]+)' - _TEST = { + _TESTS = [{ + # video with 5min ID 'url': 'http://www.engadget.com/video/518153925/', 'md5': 'c6820d4828a5064447a4d9fc73f312c9', 'info_dict': { @@ -15,8 +16,12 @@ class EngadgetIE(InfoExtractor): 'title': 'Samsung Galaxy Tab Pro 8.4 Review', }, 'add_ie': ['FiveMin'], - } + }, { + # video with vidible ID + 'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - return self.url_result('5min:%s' % video_id) + return self.url_result('aol-video:%s' % video_id) From 8895be01fc2d3774aa0b9b000af20c22903d3391 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 16:55:12 +0100 Subject: [PATCH 035/775] [5min] fix _VALID_URL --- youtube_dl/extractor/fivemin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 2f882cbcb..ab82701c2 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class FiveMinIE(InfoExtractor): IE_NAME = '5min' - _VALID_URL = r'(?:5min:|(?:https?://(?:[^/]*?5min\.com|delivery\.vidible\.tv/aol)/(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P\d+)' + _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com|delivery\.vidible\.tv/aol)/(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P\d+)' _TESTS = [ { From 8a00ea567b88ea240c6fe672387cc5cc482495fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 4 Aug 2016 23:21:04 +0700 Subject: [PATCH 036/775] [natgeo:episodeguide] Do not shadow url from outer scope --- youtube_dl/extractor/nationalgeographic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index fe43d4bc8..4bffcf704 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -161,8 +161,8 @@ class NationalGeographicEpisodeGuideIE(ThePlatformIE): r']+class="select-seasons[^"]*".*?]*>(.*?)', webpage, 'selected season') entries = [ - self.url_result(self._proto_relative_url(url), 'NationalGeographic') - for url in re.findall('(?s)]+class="col-inner"[^>]*?>.*?]+href="([^"]+)"', webpage)] + self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic') + for entry_url in re.findall('(?s)]+class="col-inner"[^>]*?>.*?]+href="([^"]+)"', webpage)] return self.playlist_result( entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')), '%s - %s' % (show, selected_season)) From 4f427c4be860c582ca72dd4be64d45b54499232c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 18:28:49 +0100 Subject: [PATCH 037/775] [condenast] improve extraction --- youtube_dl/extractor/condenast.py | 111 ++++++++++++++++++------------ 1 file changed, 66 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index e8f2b5a07..976a0e89d 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,13 +5,17 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) from ..utils import ( orderedSet, remove_end, + extract_attributes, + mimetype2ext, + determine_ext, + int_or_none, + parse_iso8601, ) @@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor): 'ext': 'mp4', 'title': '3D Printed Speakers Lit With LED', 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', + 'uploader': 'wired', + 'upload_date': '20130314', + 'timestamp': 1363219200, } }, { # JS embed @@ -67,70 +74,84 @@ class CondeNastIE(InfoExtractor): 'id': '55f9cf8b61646d1acf00000c', 'ext': 'mp4', 'title': '3D printed TSA Travel Sentry keys really do open TSA locks', + 'uploader': 'arstechnica', + 'upload_date': '20150916', + 'timestamp': 1442434955, } }] def _extract_series(self, url, webpage): - title = self._html_search_regex(r'
.*?

(.+?)

', - webpage, 'series title', flags=re.DOTALL) + title = self._html_search_regex( + r'(?s)
.*?

(.+?)

', + webpage, 'series title') url_object = compat_urllib_parse_urlparse(url) base_url = '%s://%s' % (url_object.scheme, url_object.netloc) - m_paths = re.finditer(r'

.*?.*?(.+?)

', - r'
(.+?)
', - ], - webpage, 'description', fatal=False, flags=re.DOTALL) + query = {} + params = self._search_regex( + r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None) + if params: + query.update({ + 'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'), + 'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'), + 'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'), + }) else: - description = None - params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, - 'player params', flags=re.DOTALL) - video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') - player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') - target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') - data = compat_urllib_parse_urlencode({'videoId': video_id, - 'playerId': player_id, - 'target': target, - }) - base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]', - webpage, 'base info url', - default='http://player.cnevids.com/player/loader.js?') - info_url = base_info_url + data - info_page = self._download_webpage(info_url, video_id, - 'Downloading video info') - video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info') - video_info = self._parse_json(video_info, video_id) + params = extract_attributes(self._search_regex( + r'(<[^>]+data-js="video-player"[^>]+>)', + webpage, 'player params element')) + query.update({ + 'videoId': params['data-video'], + 'playerId': params['data-player'], + 'target': params['id'], + }) + video_id = query['videoId'] + info_page = self._download_webpage( + 'http://player.cnevids.com/player/video.js', + video_id, 'Downloading video info', query=query) + video_info = self._parse_json(self._search_regex( + r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] + title = video_info['title'] - formats = [{ - 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), - 'url': fdata['src'], - 'ext': fdata['type'].split('/')[-1], - 'quality': 1 if fdata['quality'] == 'high' else 0, - } for fdata in video_info['sources'][0]] + formats = [] + for fdata in video_info.get('sources', [{}])[0]: + src = fdata.get('src') + if not src: + continue + ext = mimetype2ext(fdata.get('type')) or determine_ext(src) + quality = fdata.get('quality') + formats.append({ + 'format_id': ext + ('-%s' % quality if quality else ''), + 'url': src, + 'ext': ext, + 'quality': 1 if quality == 'high' else 0, + }) self._sort_formats(formats) - return { + info = self._search_json_ld(webpage, video_id) if url_type != 'embed' else {} + info.update({ 'id': video_id, 'formats': formats, - 'title': video_info['title'], - 'thumbnail': video_info['poster_frame'], - 'description': description, - } + 'title': title, + 'thumbnail': video_info.get('poster_frame'), + 'uploader': video_info.get('brand'), + 'duration': int_or_none(video_info.get('duration')), + 'tags': video_info.get('tags'), + 'series': video_info.get('series_title'), + 'season': video_info.get('season_title'), + 'timestamp': parse_iso8601(video_info.get('premiere_date')), + }) + return info def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - site = mobj.group('site') - url_type = mobj.group('type') - item_id = mobj.group('id') + site, url_type, item_id = re.match(self._VALID_URL, url).groups() # Convert JS embed to regular embed if url_type == 'embedjs': From b02b960c6bba834d9e7199ac53430c7933079dc8 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 4 Aug 2016 21:42:22 +0100 Subject: [PATCH 038/775] [naver] improve extraction(closes #8096) --- youtube_dl/extractor/naver.py | 95 ++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 6d6f69b44..60a4872be 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -4,12 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( ExtractorError, + int_or_none, + update_url_query, ) @@ -23,7 +21,6 @@ class NaverIE(InfoExtractor): 'ext': 'mp4', 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', - 'upload_date': '20130903', }, }, { 'url': 'http://tvcast.naver.com/v/395837', @@ -33,7 +30,6 @@ class NaverIE(InfoExtractor): 'ext': 'mp4', 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7', - 'upload_date': '20150519', }, 'skip': 'Georestricted', }] @@ -51,48 +47,65 @@ class NaverIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) raise ExtractorError('couldn\'t extract vid and key') - vid = m_id.group(1) - key = m_id.group(2) - query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, }) - query_urls = compat_urllib_parse_urlencode({ - 'masterVid': vid, - 'protocol': 'p2p', - 'inKey': key, + video_data = self._download_json('http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), video_id, query={ + 'key': m_id.group(2), }) - info = self._download_xml( - 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, - video_id, 'Downloading video info') - urls = self._download_xml( - 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, - video_id, 'Downloading video formats info') - + meta = video_data['meta'] + title = meta['subject'] formats = [] - for format_el in urls.findall('EncodingOptions/EncodingOption'): - domain = format_el.find('Domain').text - uri = format_el.find('uri').text - f = { - 'url': compat_urlparse.urljoin(domain, uri), - 'ext': 'mp4', - 'width': int(format_el.find('width').text), - 'height': int(format_el.find('height').text), - } - if domain.startswith('rtmp'): - # urlparse does not support custom schemes - # https://bugs.python.org/issue18828 - f.update({ - 'url': domain + uri, - 'ext': 'flv', - 'rtmp_protocol': '1', # rtmpt + + def extract_formats(streams, stream_type, query={}): + for stream in streams: + stream_url = stream.get('source') + if not stream_url: + continue + stream_url = update_url_query(stream_url, query) + encoding_option = stream.get('encodingOption', {}) + bitrate = stream.get('bitrate', {}) + formats.append({ + 'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')), + 'url': stream_url, + 'width': int_or_none(encoding_option.get('width')), + 'height': int_or_none(encoding_option.get('height')), + 'vbr': int_or_none(bitrate.get('video')), + 'abr': int_or_none(bitrate.get('audio')), + 'filesize': int_or_none(stream.get('size')), + 'protocol': 'm3u8_native' if stream_type == 'HLS' else None, }) - formats.append(f) + + extract_formats(video_data.get('videos', {}).get('list', []), 'H264') + for stream_set in video_data.get('streams', []): + query = {} + for param in stream_set.get('keys', []): + query[param['name']] = param['value'] + stream_type = stream_set.get('type') + videos = stream_set.get('videos') + if videos: + extract_formats(videos, stream_type, query) + elif stream_type == 'HLS': + stream_url = stream_set.get('source') + if not stream_url: + continue + formats.extend(self._extract_m3u8_formats( + update_url_query(stream_url, query), video_id, + 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) self._sort_formats(formats) + subtitles = {} + for caption in video_data.get('captions', {}).get('list', []): + caption_url = caption.get('source') + if not caption_url: + continue + subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({ + 'url': caption_url, + }) + return { 'id': video_id, - 'title': info.find('Subject').text, + 'title': title, 'formats': formats, + 'subtitles': subtitles, 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'upload_date': info.find('WriteDate').text.replace('.', ''), - 'view_count': int(info.find('PlayCount').text), + 'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage), + 'view_count': int_or_none(meta.get('count')), } From 7dc2a74e0ac9cfa74cc9de6f586ffd5cc8bac0d9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 5 Aug 2016 11:41:55 +0800 Subject: [PATCH 039/775] [utils] Fix unified_timestamp for formats parsed by parsedate_tz() --- test/test_utils.py | 1 + youtube_dl/utils.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 2273b5a10..5a2ae4a1e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -308,6 +308,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) + self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f5cd6819b..97ddd9883 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1101,7 +1101,7 @@ def unified_timestamp(date_str, day_first=True): date_str = date_str.replace(',', ' ') - pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0) + pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 timezone, date_str = extract_timezone(date_str) # Remove AM/PM + timezone @@ -1109,13 +1109,13 @@ def unified_timestamp(date_str, day_first=True): for expression in date_formats(day_first): try: - dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta + dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) return calendar.timegm(dt.timetuple()) except ValueError: pass timetuple = email.utils.parsedate_tz(date_str) if timetuple: - return calendar.timegm(timetuple.timetuple()) + return calendar.timegm(timetuple) + pm_delta * 3600 def determine_ext(url, default_ext='unknown_video'): From 962250f7eaf648e998e66e1d6aa5c2b4e018cd27 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 5 Aug 2016 11:44:50 +0800 Subject: [PATCH 040/775] [cbslocal] Fix timestamp parsing (closes #10213) --- ChangeLog | 7 +++++++ youtube_dl/extractor/cbslocal.py | 9 ++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index f3c752e66..2d021ddf4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Fixed/improved extractors +- [cbslocal] Fix timestamp parsing (#10213) + + version 2016.08.01 Fixed/improved extractors @@ -7,6 +13,7 @@ Fixed/improved extractors - [safari] Relax regular expressions for URL matching (#10202) - [cwtv] Add support for cwtvpr.com (#10196) + version 2016.07.30 Fixed/improved extractors diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 74adb38a6..008c5fe32 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -1,12 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime - from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse +from ..utils import unified_timestamp class CBSLocalIE(AnvatoIE): @@ -71,10 +69,7 @@ class CBSLocalIE(AnvatoIE): time_str = self._html_search_regex( r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) - timestamp = None - if time_str: - timestamp = calendar.timegm(datetime.datetime.strptime( - time_str, '%b %d, %Y %I:%M %p').timetuple()) + timestamp = unified_timestamp(time_str) info_dict.update({ 'display_id': display_id, From f65dc41b72d3449d9faf1a2fcd0a2e32befa3607 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 08:11:44 +0100 Subject: [PATCH 041/775] [naver] extract upload date --- youtube_dl/extractor/naver.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 60a4872be..0891d2772 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -21,6 +21,7 @@ class NaverIE(InfoExtractor): 'ext': 'mp4', 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', + 'upload_date': '20130903', }, }, { 'url': 'http://tvcast.naver.com/v/395837', @@ -30,6 +31,7 @@ class NaverIE(InfoExtractor): 'ext': 'mp4', 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7', + 'upload_date': '20150519', }, 'skip': 'Georestricted', }] @@ -47,9 +49,11 @@ class NaverIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) raise ExtractorError('couldn\'t extract vid and key') - video_data = self._download_json('http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), video_id, query={ - 'key': m_id.group(2), - }) + video_data = self._download_json( + 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1), + video_id, query={ + 'key': m_id.group(2), + }) meta = video_data['meta'] title = meta['subject'] formats = [] @@ -100,6 +104,12 @@ class NaverIE(InfoExtractor): 'url': caption_url, }) + upload_date = self._search_regex( + r']+class="date".*?(\d{4}\.\d{2}\.\d{2})', + webpage, 'upload date', fatal=False) + if upload_date: + upload_date = upload_date.replace('.', '') + return { 'id': video_id, 'title': title, @@ -108,4 +118,5 @@ class NaverIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage), 'view_count': int_or_none(meta.get('count')), + 'upload_date': upload_date, } From fdd0b8f8e044adf61da95fe85bc9414724f1d835 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 09:44:15 +0100 Subject: [PATCH 042/775] [tvp] extract video id from the webpage(fixes #7799) --- youtube_dl/extractor/tvp.py | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index e84876b54..2abfb7830 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -24,6 +24,7 @@ class TVPIE(InfoExtractor): 'id': '194536', 'ext': 'mp4', 'title': 'Czas honoru, I seria – odc. 13', + 'description': 'md5:76649d2014f65c99477be17f23a4dead', }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', @@ -32,6 +33,16 @@ class TVPIE(InfoExtractor): 'id': '17916176', 'ext': 'mp4', 'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', + 'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata', + }, + }, { + # page id is not the same as video id(#7799) + 'url': 'http://vod.tvp.pl/22704887/08122015-1500', + 'md5': 'cf6a4705dfd1489aef8deb168d6ba742', + 'info_dict': { + 'id': '22680786', + 'ext': 'mp4', + 'title': 'Wiadomości, 08.12.2015, 15:00', }, }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', @@ -53,6 +64,39 @@ class TVPIE(InfoExtractor): 'only_matching': True, }] + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + video_id = self._search_regex([ + r']+src="[^"]*?object_id=(\d+)', + "object_id\s*:\s*'(\d+)'"], webpage, 'video id') + return { + '_type': 'url_transparent', + 'url': 'tvp:' + video_id, + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'ie_key': 'TVPEmbed', + } + + +class TVPEmbedIE(InfoExtractor): + IE_NAME = 'tvp:embed' + IE_DESC = 'Telewizja Polska' + _VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P\d+)' + + _TESTS = [{ + 'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268', + 'md5': '8c9cd59d16edabf39331f93bf8a766c7', + 'info_dict': { + 'id': '22670268', + 'ext': 'mp4', + 'title': 'Panorama, 07.12.2015, 15:40', + }, + }, { + 'url': 'tvp:22670268', + 'only_matching': True, + }] + def _real_extract(self, url): video_id = self._match_id(url) From a7d29530732574554cd20324fda0faa17979a1ae Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 10:11:59 +0100 Subject: [PATCH 043/775] [extractors] add tvp:embed import --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 28858cfea..fec560ba3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,6 +892,7 @@ from .tvc import ( from .tvigle import TvigleIE from .tvland import TVLandIE from .tvp import ( + TVPEmbedIE, TVPIE, TVPSeriesIE, ) From 5a993e169262237f4990207049d4cbbc69d5fd8d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 10:13:11 +0100 Subject: [PATCH 044/775] [natgeo] fix tests(closes #10229) --- youtube_dl/extractor/nationalgeographic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 4bffcf704..0027ff1b8 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -74,7 +74,7 @@ class NationalGeographicIE(ThePlatformIE): 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', 'md5': '518c9aa655686cf81493af5cc21e2a04', 'info_dict': { - 'id': 'nB5vIAfmyllm', + 'id': 'vKInpacll2pC', 'ext': 'mp4', 'title': 'Uncovering a Universal Knowledge', 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', @@ -88,7 +88,7 @@ class NationalGeographicIE(ThePlatformIE): 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', 'md5': 'c4912f656b4cbe58f3e000c489360989', 'info_dict': { - 'id': '3TmMv9OvGwIR', + 'id': 'Pok5lWCkiEFA', 'ext': 'mp4', 'title': 'The Stunning Red Bird of Paradise', 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', From 08c655906c6afd41ca56e784f35791cfdb727d3d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 10:22:33 +0100 Subject: [PATCH 045/775] [5min] fix _VALID_URL(closes #10228) --- youtube_dl/extractor/fivemin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index ab82701c2..f3f876ecd 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class FiveMinIE(InfoExtractor): IE_NAME = '5min' - _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com|delivery\.vidible\.tv/aol)/(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P\d+)' + _VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P\d+)' _TESTS = [ { From f0d31c624e51322e26889df417152b833549bb75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Aug 2016 22:17:32 +0700 Subject: [PATCH 046/775] [tvplay] Add support for subtitles (Closes #10194) --- youtube_dl/extractor/tvplay.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 918f8f8bc..0c072a6ae 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( parse_iso8601, qualities, @@ -226,7 +229,8 @@ class TVPlayIE(InfoExtractor): 'ext': ext, } if video_url.startswith('rtmp'): - m = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) + m = re.search( + r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) if not m: continue fmt.update({ @@ -242,6 +246,17 @@ class TVPlayIE(InfoExtractor): formats.append(fmt) self._sort_formats(formats) + # TODO: webvtt in m3u8 + subtitles = {} + sami_path = video.get('sami_path') + if sami_path: + lang = self._search_regex( + r'_([a-z]{2})\.xml', sami_path, 'lang', + default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) + subtitles[lang] = [{ + 'url': sami_path, + }] + return { 'id': video_id, 'title': title, @@ -251,4 +266,5 @@ class TVPlayIE(InfoExtractor): 'view_count': int_or_none(video.get('views', {}).get('total')), 'age_limit': int_or_none(video.get('age_limit', 0)), 'formats': formats, + 'subtitles': subtitles, } From 5ca968d0a6278400be7f4215f93c026c2bf0eafb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Aug 2016 22:37:38 +0700 Subject: [PATCH 047/775] [tvplay] Extract series metadata --- youtube_dl/extractor/tvplay.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 0c072a6ae..3d4c576c8 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -37,6 +37,9 @@ class TVPlayIE(InfoExtractor): 'ext': 'mp4', 'title': 'Kādi ir īri? - Viņas melo labāk', 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.', + 'series': 'Viņas melo labāk', + 'season': '2.sezona', + 'season_number': 2, 'duration': 25, 'timestamp': 1406097056, 'upload_date': '20140723', @@ -49,6 +52,10 @@ class TVPlayIE(InfoExtractor): 'ext': 'flv', 'title': 'Moterys meluoja geriau', 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e', + 'series': 'Moterys meluoja geriau', + 'episode_number': 47, + 'season': '1 sezonas', + 'season_number': 1, 'duration': 1330, 'timestamp': 1403769181, 'upload_date': '20140626', @@ -257,10 +264,19 @@ class TVPlayIE(InfoExtractor): 'url': sami_path, }] + series = video.get('format_title') + episode_number = int_or_none(video.get('format_position', {}).get('episode')) + season = video.get('_embedded', {}).get('season', {}).get('title') + season_number = int_or_none(video.get('format_position', {}).get('season')) + return { 'id': video_id, 'title': title, 'description': video.get('description'), + 'series': series, + 'episode_number': episode_number, + 'season': season, + 'season_number': season_number, 'duration': int_or_none(video.get('duration')), 'timestamp': parse_iso8601(video.get('created_at')), 'view_count': int_or_none(video.get('views', {}).get('total')), From 0ca057b965e5699a88fc952da460b5adfb8e7644 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 16:39:24 +0100 Subject: [PATCH 048/775] [jwplatform] add support for playlist extraction and relative urls and improve audio detection --- youtube_dl/extractor/jwplatform.py | 132 ++++++++++++++++------------- 1 file changed, 72 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index e44e31104..2a499bb77 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, float_or_none, int_or_none, + mimetype2ext, ) @@ -28,74 +30,84 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: jwplayer_data = {'playlist': [jwplayer_data]} - video_data = jwplayer_data['playlist'][0] + entries = [] + for video_data in jwplayer_data['playlist']: + # JWPlayer backward compatibility: flattened sources + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 + if 'sources' not in video_data: + video_data['sources'] = [video_data] - # JWPlayer backward compatibility: flattened sources - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 - if 'sources' not in video_data: - video_data['sources'] = [video_data] - - formats = [] - for source in video_data['sources']: - source_url = self._proto_relative_url(source['file']) - source_type = source.get('type') or '' - if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif source_type.startswith('audio'): - formats.append({ - 'url': source_url, - 'vcodec': 'none', - }) - else: - a_format = { - 'url': source_url, - 'width': int_or_none(source.get('width')), - 'height': int_or_none(source.get('height')), - } - if source_url.startswith('rtmp'): - a_format['ext'] = 'flv', - - # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as - # of jwplayer.flash.swf - rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) - if len(rtmp_url_parts) == 3: - rtmp_url, prefix, play_path = rtmp_url_parts - a_format.update({ - 'url': rtmp_url, - 'play_path': prefix + play_path, - }) - if rtmp_params: - a_format.update(rtmp_params) - formats.append(a_format) - self._sort_formats(formats) - - subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if track.get('file') and track.get('kind') == 'captions': - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track['file']) + formats = [] + for source in video_data['sources']: + source_url = self._proto_relative_url(source['file']) + if base_url: + source_url = compat_urlparse.urljoin(base_url, source_url) + source_type = source.get('type') or '' + ext = mimetype2ext(source_type) or determine_ext(source_url) + if source_type == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 + elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): + formats.append({ + 'url': source_url, + 'vcodec': 'none', + 'ext': ext, }) + else: + a_format = { + 'url': source_url, + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + 'ext': ext, + } + if source_url.startswith('rtmp'): + a_format['ext'] = 'flv', - return { - 'id': video_id, - 'title': video_data['title'] if require_title else video_data.get('title'), - 'description': video_data.get('description'), - 'thumbnail': self._proto_relative_url(video_data.get('image')), - 'timestamp': int_or_none(video_data.get('pubdate')), - 'duration': float_or_none(jwplayer_data.get('duration')), - 'subtitles': subtitles, - 'formats': formats, - } + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url_parts = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) + self._sort_formats(formats) + + subtitles = {} + tracks = video_data.get('tracks') + if tracks and isinstance(tracks, list): + for track in tracks: + if track.get('file') and track.get('kind') == 'captions': + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track['file']) + }) + + entries.append({ + 'id': video_id, + 'title': video_data['title'] if require_title else video_data.get('title'), + 'description': video_data.get('description'), + 'thumbnail': self._proto_relative_url(video_data.get('image')), + 'timestamp': int_or_none(video_data.get('pubdate')), + 'duration': float_or_none(jwplayer_data.get('duration')), + 'subtitles': subtitles, + 'formats': formats, + }) + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries) class JWPlatformIE(JWPlatformBaseIE): From d50aca41f8e3aa3af8e19ec91100283b555ac59f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 16:40:21 +0100 Subject: [PATCH 049/775] [archiveorg] improve format extraction(closes #10219) --- youtube_dl/extractor/archiveorg.py | 78 +++++++++++++++--------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 8feb7cb74..2472e4cc6 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -1,67 +1,65 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import unified_strdate +from .jwplatform import JWPlatformBaseIE +from ..utils import ( + unified_strdate, + clean_html, +) -class ArchiveOrgIE(InfoExtractor): +class ArchiveOrgIE(JWPlatformBaseIE): IE_NAME = 'archive.org' IE_DESC = 'archive.org videos' - _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P[^?/]+)(?:[?].*)?$' + _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P[^/?#]+)(?:[?].*)?$' _TESTS = [{ 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', 'md5': '8af1d4cf447933ed3c7f4871162602db', 'info_dict': { 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', - 'ext': 'ogv', + 'ext': 'ogg', 'title': '1968 Demo - FJCC Conference Presentation Reel #1', - 'description': 'md5:1780b464abaca9991d8968c877bb53ed', + 'description': 'md5:da45c349df039f1cc8075268eb1b5c25', 'upload_date': '19681210', 'uploader': 'SRI International' } }, { 'url': 'https://archive.org/details/Cops1922', - 'md5': '18f2a19e6d89af8425671da1cf3d4e04', + 'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba', 'info_dict': { 'id': 'Cops1922', - 'ext': 'ogv', + 'ext': 'mp4', 'title': 'Buster Keaton\'s "Cops" (1922)', - 'description': 'md5:70f72ee70882f713d4578725461ffcc3', + 'description': 'md5:b4544662605877edd99df22f9620d858', } + }, { + 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://archive.org/embed/' + video_id, video_id) + jwplayer_playlist = self._parse_json(self._search_regex( + r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);", + webpage, 'jwplayer playlist'), video_id) + info = self._parse_jwplayer_data( + {'playlist': jwplayer_playlist}, video_id, base_url=url) - json_url = url + ('&' if '?' in url else '?') + 'output=json' - data = self._download_json(json_url, video_id) + def get_optional(metadata, field): + return metadata.get(field, [None])[0] - def get_optional(data_dict, field): - return data_dict['metadata'].get(field, [None])[0] - - title = get_optional(data, 'title') - description = get_optional(data, 'description') - uploader = get_optional(data, 'creator') - upload_date = unified_strdate(get_optional(data, 'date')) - - formats = [ - { - 'format': fdata['format'], - 'url': 'http://' + data['server'] + data['dir'] + fn, - 'file_size': int(fdata['size']), - } - for fn, fdata in data['files'].items() - if 'Video' in fdata['format']] - - self._sort_formats(formats) - - return { - '_type': 'video', - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'uploader': uploader, - 'upload_date': upload_date, - 'thumbnail': data.get('misc', {}).get('image'), - } + metadata = self._download_json( + 'http://archive.org/details/' + video_id, video_id, query={ + 'output': 'json', + })['metadata'] + info.update({ + 'title': get_optional(metadata, 'title') or info.get('title'), + 'description': clean_html(get_optional(metadata, 'description')), + }) + if info.get('_type') != 'playlist': + info.update({ + 'uploader': get_optional(metadata, 'creator'), + 'upload_date': unified_strdate(get_optional(metadata, 'date')), + }) + return info \ No newline at end of file From 3859ebeee6d6448240176ef5e4c20f6b1d1db795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Aug 2016 22:49:19 +0700 Subject: [PATCH 050/775] [tvplay] Capture and output native error message --- youtube_dl/extractor/tvplay.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 3d4c576c8..150bde663 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -5,15 +5,17 @@ import re from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_str, compat_urlparse, ) from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, parse_iso8601, qualities, - determine_ext, update_url_query, - int_or_none, ) @@ -206,12 +208,15 @@ class TVPlayIE(InfoExtractor): title = video['title'] - if video.get('is_geo_blocked'): - self.report_warning( - 'This content might not be available in your country due to copyright reasons') - - streams = self._download_json( - 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') + try: + streams = self._download_json( + 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, + video_id, 'Downloading streams JSON') + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + msg = self._parse_json(e.cause.read().decode('utf-8'), video_id) + raise ExtractorError(msg['msg'], expected=True) + raise quality = qualities(['hls', 'medium', 'high']) formats = [] @@ -251,6 +256,11 @@ class TVPlayIE(InfoExtractor): 'url': video_url, }) formats.append(fmt) + + if not formats and video.get('is_geo_blocked'): + self.raise_geo_restricted( + 'This content might not be available in your country due to copyright reasons') + self._sort_formats(formats) # TODO: webvtt in m3u8 From 46933a15d69a0079e66a04475659d0cbd5e5f08d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Aug 2016 23:14:32 +0700 Subject: [PATCH 051/775] [extractor/common] Support root JSON-LD lists (Closes #10203) --- youtube_dl/extractor/common.py | 76 ++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2d337d614..70909fc1c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -828,41 +828,47 @@ class InfoExtractor(object): if not json_ld: return {} info = {} - if json_ld.get('@context') == 'http://schema.org': - item_type = json_ld.get('@type') - if expected_type is not None and expected_type != item_type: - return info - if item_type == 'TVEpisode': - info.update({ - 'episode': unescapeHTML(json_ld.get('name')), - 'episode_number': int_or_none(json_ld.get('episodeNumber')), - 'description': unescapeHTML(json_ld.get('description')), - }) - part_of_season = json_ld.get('partOfSeason') - if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': - info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) - part_of_series = json_ld.get('partOfSeries') - if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': - info['series'] = unescapeHTML(part_of_series.get('name')) - elif item_type == 'Article': - info.update({ - 'timestamp': parse_iso8601(json_ld.get('datePublished')), - 'title': unescapeHTML(json_ld.get('headline')), - 'description': unescapeHTML(json_ld.get('articleBody')), - }) - elif item_type == 'VideoObject': - info.update({ - 'url': json_ld.get('contentUrl'), - 'title': unescapeHTML(json_ld.get('name')), - 'description': unescapeHTML(json_ld.get('description')), - 'thumbnail': json_ld.get('thumbnailUrl'), - 'duration': parse_duration(json_ld.get('duration')), - 'timestamp': unified_timestamp(json_ld.get('uploadDate')), - 'filesize': float_or_none(json_ld.get('contentSize')), - 'tbr': int_or_none(json_ld.get('bitrate')), - 'width': int_or_none(json_ld.get('width')), - 'height': int_or_none(json_ld.get('height')), - }) + if not isinstance(json_ld, (list, tuple, dict)): + return info + if isinstance(json_ld, dict): + json_ld = [json_ld] + for e in json_ld: + if e.get('@context') == 'http://schema.org': + item_type = e.get('@type') + if expected_type is not None and expected_type != item_type: + return info + if item_type == 'TVEpisode': + info.update({ + 'episode': unescapeHTML(e.get('name')), + 'episode_number': int_or_none(e.get('episodeNumber')), + 'description': unescapeHTML(e.get('description')), + }) + part_of_season = e.get('partOfSeason') + if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': + info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) + part_of_series = e.get('partOfSeries') + if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': + info['series'] = unescapeHTML(part_of_series.get('name')) + elif item_type == 'Article': + info.update({ + 'timestamp': parse_iso8601(e.get('datePublished')), + 'title': unescapeHTML(e.get('headline')), + 'description': unescapeHTML(e.get('articleBody')), + }) + elif item_type == 'VideoObject': + info.update({ + 'url': e.get('contentUrl'), + 'title': unescapeHTML(e.get('name')), + 'description': unescapeHTML(e.get('description')), + 'thumbnail': e.get('thumbnailUrl'), + 'duration': parse_duration(e.get('duration')), + 'timestamp': unified_timestamp(e.get('uploadDate')), + 'filesize': float_or_none(e.get('contentSize')), + 'tbr': int_or_none(e.get('bitrate')), + 'width': int_or_none(e.get('width')), + 'height': int_or_none(e.get('height')), + }) + break return dict((k, v) for k, v in info.items() if v is not None) @staticmethod From 84bc23b41b5d1bc51102b264c23e96202e093d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 5 Aug 2016 23:16:19 +0700 Subject: [PATCH 052/775] [archiveorg] PEP 8 --- youtube_dl/extractor/archiveorg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 2472e4cc6..486dff82d 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -62,4 +62,4 @@ class ArchiveOrgIE(JWPlatformBaseIE): 'uploader': get_optional(metadata, 'creator'), 'upload_date': unified_strdate(get_optional(metadata, 'date')), }) - return info \ No newline at end of file + return info From 038a5e1a65401ad6bfd2b6572f321af66e7cd9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Aug 2016 00:00:05 +0700 Subject: [PATCH 053/775] [adultswim] Add support for trailers (Closes #10235) --- youtube_dl/extractor/adultswim.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 8157da2cb..6d20229f9 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -83,6 +83,21 @@ class AdultSwimIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + # heroMetadata.trailer + 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', + 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', + 'info_dict': { + 'id': 'I0LQFQkaSUaFp8PnAWHhoQ', + 'ext': 'mp4', + 'title': 'Decker - Inside Decker: A New Hero', + 'description': 'md5:c916df071d425d62d70c86d4399d3ee0', + 'duration': 249.008, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }] @staticmethod @@ -133,20 +148,26 @@ class AdultSwimIE(InfoExtractor): if video_info is None: if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: video_info = bootstrapped_data['slugged_video'] - else: - raise ExtractorError('Unable to find video info') + if not video_info: + video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video') + if not video_info: + raise ExtractorError('Unable to find video info') show = bootstrapped_data['show'] show_title = show['title'] stream = video_info.get('stream') - clips = [stream] if stream else video_info.get('clips') - if not clips: + if stream and stream.get('videoPlaybackID'): + segment_ids = [stream['videoPlaybackID']] + elif video_info.get('clips'): + segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] + elif video_info.get('videoPlaybackID'): + segment_ids = [video_info['videoPlaybackID']] + else: raise ExtractorError( 'This video is only available via cable service provider subscription that' ' is not currently supported. You may want to use --cookies.' if video_info.get('auth') is True else 'Unable to find stream or clips', expected=True) - segment_ids = [clip['videoPlaybackID'] for clip in clips] episode_id = video_info['id'] episode_title = video_info['title'] From fe3ad1d45651d3bd05a3faae77bc23bd05e689f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Aug 2016 00:02:05 +0700 Subject: [PATCH 054/775] [adultswim] Remove superfluous md5 from test --- youtube_dl/extractor/adultswim.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 6d20229f9..3f7f8c036 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -86,7 +86,6 @@ class AdultSwimIE(InfoExtractor): }, { # heroMetadata.trailer 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', - 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', 'info_dict': { 'id': 'I0LQFQkaSUaFp8PnAWHhoQ', 'ext': 'mp4', From 8122e79fefa8eca62c3cddbf686c8920b6d06a9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Aug 2016 00:12:37 +0700 Subject: [PATCH 055/775] [gamekings] Remove remnants --- youtube_dl/extractor/gamekings.py | 76 ------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 youtube_dl/extractor/gamekings.py diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py deleted file mode 100644 index cbcddcb7c..000000000 --- a/youtube_dl/extractor/gamekings.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - xpath_text, - xpath_with_ns, -) -from .youtube import YoutubeIE - - -class GamekingsIE(InfoExtractor): - _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P[^/]+)' - _TESTS = [{ - # YouTube embed video - 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', - 'md5': '5208d3a17adeaef829a7861887cb9029', - 'info_dict': { - 'id': 'HkSQKetlGOU', - 'ext': 'mp4', - 'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review', - 'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ', - 'uploader': 'Gamekings Vault', - 'upload_date': '20151123', - }, - 'add_ie': ['Youtube'], - }, { - # vimeo video - 'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/', - 'md5': '12bf04dfd238e70058046937657ea68d', - 'info_dict': { - 'id': 'the-legend-of-zelda-majoras-mask', - 'ext': 'mp4', - 'title': 'The Legend of Zelda: Majora’s Mask', - 'description': 'md5:9917825fe0e9f4057601fe1e38860de3', - 'thumbnail': 're:^https?://.*\.jpg$', - }, - }, { - 'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - playlist_id = self._search_regex( - r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id') - - # Check if a YouTube embed is used - if YoutubeIE.suitable(playlist_id): - return self.url_result(playlist_id, ie='Youtube') - - playlist = self._download_xml( - 'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id, - video_id) - - NS_MAP = { - 'jwplayer': 'http://rss.jwpcdn.com/' - } - - item = playlist.find('./channel/item') - - thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail') - video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file') - - return { - 'id': video_id, - 'url': video_url, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': thumbnail, - } From 7f2339c61789256b3ed12e196d14eff6791f4f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Aug 2016 01:19:47 +0700 Subject: [PATCH 056/775] [ChangeLog] Actualize --- ChangeLog | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2d021ddf4..7fd44d2a0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,36 @@ version -Fixed/improved extractors -- [cbslocal] Fix timestamp parsing (#10213) +Core +* Add support for JSON-LD root list entries (#10203) +* Improve unified_timestamp +* Lower preference of RTSP formats in generic sorting ++ Add support for multiple properties in _og_search_property +* Improve password hiding from verbose output + +Extractors ++ [adultswim] Add support for trailers (#10235) +* [archiveorg] Improve extraction (#10219) ++ [jwplatform] Add support for playlists ++ [jwplatform] Add support for relative URLs +* [jwplatform] Improve audio detection ++ [tvplay] Capture and output native error message ++ [tvplay] Extract series metadata ++ [tvplay] Add support for subtitles (#10194) +* [tvp] Improve extraction (#7799) +* [cbslocal] Fix timestamp parsing (#10213) ++ [naver] Add support for subtitles (#8096) +* [naver] Improve extraction +* [condenast] Improve extraction +* [engadget] Relax URL regular expression +* [5min] Fix extraction ++ [nationalgeographic] Add support for Episode Guide ++ [kaltura] Add support for subtitles +* [kaltura] Optimize network requests ++ [vodplatform] Add extractor for vod-platform.net +- [gamekings] Remove extractor +* [limelight] Extract HTTP formats +* [ntvru] Fix extraction ++ [comedycentral] Re-add :tds and :thedailyshow shortnames version 2016.08.01 From 491c42e690bd51687e43fd5178bebf99dcc2cc0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Aug 2016 01:23:48 +0700 Subject: [PATCH 057/775] release 2016.08.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 9d15b6a89..7241840c5 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.01 +[debug] youtube-dl version 2016.08.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7fd44d2a0..9fd78cdda 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.06 Core * Add support for JSON-LD root list entries (#10203) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1f89b1c14..2c8f950df 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -142,6 +142,7 @@ - **CollegeRama** - **ComCarCoff** - **ComedyCentral** + - **ComedyCentralShortname** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **Coub** @@ -247,7 +248,6 @@ - **FunnyOrDie** - **Fusion** - **GameInformer** - - **Gamekings** - **GameOne** - **gameone:playlist** - **Gamersyde** @@ -415,7 +415,8 @@ - **MyVidster** - **n-tv.de** - **natgeo** - - **natgeo:channel** + - **natgeo:episodeguide** + - **natgeo:video** - **Naver** - **NBA** - **NBC** @@ -726,6 +727,7 @@ - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** - **tvp**: Telewizja Polska + - **tvp:embed**: Telewizja Polska - **tvp:series** - **TVPlay**: TV3Play and related services - **Tweakers** @@ -805,6 +807,7 @@ - **vk:wallpost** - **vlive** - **Vodlocker** + - **VODPlatform** - **VoiceRepublic** - **VoxMedia** - **Vporn** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 27f97b213..a4654fa59 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.01' +__version__ = '2016.08.06' From e563c0d73b778a1c91007f8abe0e6b43b1f7b608 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 Aug 2016 21:01:16 +0100 Subject: [PATCH 058/775] [condenast] fallback to loader.js if video.js fail --- youtube_dl/extractor/condenast.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 976a0e89d..15fabbb1c 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -113,11 +113,19 @@ class CondeNastIE(InfoExtractor): 'target': params['id'], }) video_id = query['videoId'] + video_info = None info_page = self._download_webpage( 'http://player.cnevids.com/player/video.js', - video_id, 'Downloading video info', query=query) - video_info = self._parse_json(self._search_regex( - r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] + video_id, 'Downloading video info', query=query, fatal=False) + if info_page: + video_info = self._parse_json(self._search_regex( + r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video'] + else: + info_page = self._download_webpage( + 'http://player.cnevids.com/player/loader.js', + video_id, 'Downloading loader info', query=query) + video_info = self._parse_json(self._search_regex( + r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id) title = video_info['title'] formats = [] From d73ebac100c9f91acb002c4844ba67b73616322a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 11:18:14 +0100 Subject: [PATCH 059/775] [pokemon] Add new extractor(closes #10093) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pokemon.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/pokemon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fec560ba3..11b64eeaa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -636,6 +636,7 @@ from .pluralsight import ( PluralsightCourseIE, ) from .podomatic import PodomaticIE +from .pokemon import PokemonIE from .polskieradio import PolskieRadioIE from .porn91 import Porn91IE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py new file mode 100644 index 000000000..ce27f33e1 --- /dev/null +++ b/youtube_dl/extractor/pokemon.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, +) + + +class PokemonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/pokemon-episodes/(?P[^/?#]+))' + _TESTS = [{ + 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true', + 'md5': '9fb209ae3a569aac25de0f5afc4ee08f', + 'info_dict': { + 'id': 'd0436c00c3ce4071ac6cee8130ac54a1', + 'ext': 'mp4', + 'title': 'From A to Z!', + 'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!', + 'timestamp': 1460478136, + 'upload_date': '20160412', + }, + 'add_id': ['LimelightMedia'] + }, { + 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, display_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id or display_id) + video_data = extract_attributes(self._search_regex( + r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), + webpage, 'video data element')) + video_id = video_data['data-video-id'] + title = video_data['data-video-title'] + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'title': title, + 'description': video_data.get('data-video-summary'), + 'thumbnail': video_data.get('data-video-poster'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('data-video-season')), + 'episode': title, + 'episode_number': int_or_none(video_data.get('data-video-episode')), + 'ie_key': 'LimelightMedia', + } From 089a40955cdcdce1d8ea89b1402bde4c88c75546 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 12:08:14 +0100 Subject: [PATCH 060/775] [pokemon] improve _VALID_URL --- youtube_dl/extractor/pokemon.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py index ce27f33e1..2d87e7e70 100644 --- a/youtube_dl/extractor/pokemon.py +++ b/youtube_dl/extractor/pokemon.py @@ -11,7 +11,7 @@ from ..utils import ( class PokemonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/pokemon-episodes/(?P[^/?#]+))' + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P[^/?#]+))' _TESTS = [{ 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true', 'md5': '9fb209ae3a569aac25de0f5afc4ee08f', @@ -27,6 +27,12 @@ class PokemonIE(InfoExtractor): }, { 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', + 'only_matching': True, }] def _real_extract(self, url): From a7e5f274123dcfe9afd53134a33f83d53d1e497e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 18:48:09 +0100 Subject: [PATCH 061/775] [bbc] improve extraction - extract f4m and dash formats - improve format sorting and listing - improve extraction of articles with `otherSettings.playlist` --- youtube_dl/extractor/bbc.py | 175 +++++++++++++++++------------------- 1 file changed, 83 insertions(+), 92 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 9cb7630a1..45c562bd8 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -229,51 +229,6 @@ class BBCCoUkIE(InfoExtractor): asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') return [ref.get('href') for ref in asx.findall('./Entry/ref')] - def _extract_connection(self, connection, programme_id): - formats = [] - kind = connection.get('kind') - protocol = connection.get('protocol') - supplier = connection.get('supplier') - if protocol == 'http': - href = connection.get('href') - transfer_format = connection.get('transferFormat') - # ASX playlist - if supplier == 'asx': - for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): - formats.append({ - 'url': ref, - 'format_id': 'ref%s_%s' % (i, supplier), - }) - # Skip DASH until supported - elif transfer_format == 'dash': - pass - elif transfer_format == 'hls': - formats.extend(self._extract_m3u8_formats( - href, programme_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=supplier, fatal=False)) - # Direct link - else: - formats.append({ - 'url': href, - 'format_id': supplier or kind or protocol, - }) - elif protocol == 'rtmp': - application = connection.get('application', 'ondemand') - auth_string = connection.get('authString') - identifier = connection.get('identifier') - server = connection.get('server') - formats.append({ - 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), - 'play_path': identifier, - 'app': '%s?%s' % (application, auth_string), - 'page_url': 'http://www.bbc.co.uk', - 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', - 'rtmp_live': False, - 'ext': 'flv', - 'format_id': supplier, - }) - return formats - def _extract_items(self, playlist): return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) @@ -294,46 +249,6 @@ class BBCCoUkIE(InfoExtractor): def _extract_connections(self, media): return self._findall_ns(media, './{%s}connection') - def _extract_video(self, media, programme_id): - formats = [] - vbr = int_or_none(media.get('bitrate')) - vcodec = media.get('encoding') - service = media.get('service') - width = int_or_none(media.get('width')) - height = int_or_none(media.get('height')) - file_size = int_or_none(media.get('media_file_size')) - for connection in self._extract_connections(media): - conn_formats = self._extract_connection(connection, programme_id) - for format in conn_formats: - format.update({ - 'width': width, - 'height': height, - 'vbr': vbr, - 'vcodec': vcodec, - 'filesize': file_size, - }) - if service: - format['format_id'] = '%s_%s' % (service, format['format_id']) - formats.extend(conn_formats) - return formats - - def _extract_audio(self, media, programme_id): - formats = [] - abr = int_or_none(media.get('bitrate')) - acodec = media.get('encoding') - service = media.get('service') - for connection in self._extract_connections(media): - conn_formats = self._extract_connection(connection, programme_id) - for format in conn_formats: - format.update({ - 'format_id': '%s_%s' % (service, format['format_id']), - 'abr': abr, - 'acodec': acodec, - 'vcodec': 'none', - }) - formats.extend(conn_formats) - return formats - def _get_subtitles(self, media, programme_id): subtitles = {} for connection in self._extract_connections(media): @@ -382,10 +297,77 @@ class BBCCoUkIE(InfoExtractor): for media in self._extract_medias(media_selection): kind = media.get('kind') - if kind == 'audio': - formats.extend(self._extract_audio(media, programme_id)) - elif kind == 'video': - formats.extend(self._extract_video(media, programme_id)) + if kind in ('video', 'audio'): + bitrate = int_or_none(media.get('bitrate')) + encoding = media.get('encoding') + service = media.get('service') + width = int_or_none(media.get('width')) + height = int_or_none(media.get('height')) + file_size = int_or_none(media.get('media_file_size')) + for connection in self._extract_connections(media): + conn_kind = connection.get('kind') + protocol = connection.get('protocol') + supplier = connection.get('supplier') + href = connection.get('href') + transfer_format = connection.get('transferFormat') + format_id = supplier or conn_kind or protocol + if service: + format_id = '%s_%s' % (service, format_id) + # ASX playlist + if supplier == 'asx': + for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): + formats.append({ + 'url': ref, + 'format_id': 'ref%s_%s' % (i, format_id), + }) + elif transfer_format == 'dash': + formats.extend(self._extract_mpd_formats( + href, programme_id, mpd_id=format_id, fatal=False)) + elif transfer_format == 'hls': + formats.extend(self._extract_m3u8_formats( + href, programme_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) + elif transfer_format == 'hds': + formats.extend(self._extract_f4m_formats( + href, programme_id, f4m_id=format_id, fatal=False)) + else: + fmt = { + 'format_id': format_id, + 'filesize': file_size, + } + if kind == 'video': + fmt.update({ + 'width': width, + 'height': height, + 'vbr': bitrate, + 'vcodec': encoding, + }) + else: + fmt.update({ + 'abr': bitrate, + 'acodec': encoding, + 'vcodec': 'none', + }) + if protocol == 'http': + # Direct link + fmt.update({ + 'url': href, + }) + elif protocol == 'rtmp': + application = connection.get('application', 'ondemand') + auth_string = connection.get('authString') + identifier = connection.get('identifier') + server = connection.get('server') + fmt.update({ + 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'play_path': identifier, + 'app': '%s?%s' % (application, auth_string), + 'page_url': 'http://www.bbc.co.uk', + 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', + 'rtmp_live': False, + 'ext': 'flv', + }) + formats.append(fmt) elif kind == 'captions': subtitles = self.extract_subtitles(media, programme_id) return formats, subtitles @@ -820,13 +802,19 @@ class BBCIE(BBCCoUkIE): # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) playlist = data_playable.get('otherSettings', {}).get('playlist', {}) if playlist: - for key in ('progressiveDownload', 'streaming'): + entry = None + for key in ('streaming', 'progressiveDownload'): playlist_url = playlist.get('%sUrl' % key) if not playlist_url: continue try: - entries.append(self._extract_from_playlist_sxml( - playlist_url, playlist_id, timestamp)) + info = self._extract_from_playlist_sxml( + playlist_url, playlist_id, timestamp) + if not entry: + entry = info + else: + entry['title'] = info['title'] + entry['formats'].extend(info['formats']) except Exception as e: # Some playlist URL may fail with 500, at the same time # the other one may work fine (e.g. @@ -834,6 +822,9 @@ class BBCIE(BBCCoUkIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: continue raise + if entry: + self._sort_formats(entry['formats']) + entries.append(entry) if entries: return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From c57244cdb10cac6ff88fd4fa430c98f3909c065a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 18:55:05 +0100 Subject: [PATCH 062/775] [common] lower the preference of m3u8 master manifest format --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 70909fc1c..b54a36996 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1140,7 +1140,7 @@ class InfoExtractor(object): 'url': m3u8_url, 'ext': ext, 'protocol': 'm3u8', - 'preference': preference - 1 if preference else -1, + 'preference': -100, 'resolution': 'multiple', 'format_note': 'Quality selection URL', } From d16b3c6677f1f699635892876f4566962094221d Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 18:58:38 +0100 Subject: [PATCH 063/775] [common] extract partOfTVSeries info in json-ld --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b54a36996..67f49f51b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -846,7 +846,7 @@ class InfoExtractor(object): part_of_season = e.get('partOfSeason') if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) - part_of_series = e.get('partOfSeries') + part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': info['series'] = unescapeHTML(part_of_series.get('name')) elif item_type == 'Article': From b0af12154e4a76bf7d493c7eb75cdeea6cf8fe17 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 19:24:59 +0100 Subject: [PATCH 064/775] [bbc] reduce requests and improve format_id --- youtube_dl/extractor/bbc.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 45c562bd8..35d042e4d 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -294,6 +294,7 @@ class BBCCoUkIE(InfoExtractor): def _process_media_selector(self, media_selection, programme_id): formats = [] subtitles = None + urls = [] for media in self._extract_medias(media_selection): kind = media.get('kind') @@ -305,10 +306,14 @@ class BBCCoUkIE(InfoExtractor): height = int_or_none(media.get('height')) file_size = int_or_none(media.get('media_file_size')) for connection in self._extract_connections(media): + href = connection.get('href') + if href in urls: + continue + if href: + urls.append(href) conn_kind = connection.get('kind') protocol = connection.get('protocol') supplier = connection.get('supplier') - href = connection.get('href') transfer_format = connection.get('transferFormat') format_id = supplier or conn_kind or protocol if service: @@ -331,6 +336,8 @@ class BBCCoUkIE(InfoExtractor): formats.extend(self._extract_f4m_formats( href, programme_id, f4m_id=format_id, fatal=False)) else: + if bitrate: + format_id += '-%d' % bitrate fmt = { 'format_id': format_id, 'filesize': file_size, From ad152e2d954445c1bfa974c5d4a47ea622269d82 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 19:36:12 +0100 Subject: [PATCH 065/775] [bbc] fix test --- youtube_dl/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 35d042e4d..7bc7b2ed6 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -578,7 +578,7 @@ class BBCIE(BBCCoUkIE): 'info_dict': { 'id': '150615_telabyad_kentin_cogu', 'ext': 'mp4', - 'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi", + 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde", 'description': 'md5:33a4805a855c9baf7115fcbde57e7025', 'timestamp': 1434397334, 'upload_date': '20150615', From d3f8e038fea62fc117b7747627eb8d654863d152 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Aug 2016 02:42:58 +0800 Subject: [PATCH 066/775] [utils] Add decode_png for openload (#9706) --- youtube_dl/utils.py | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 97ddd9883..ddbfcd2f1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_socket_create_connection, compat_str, compat_struct_pack, + compat_struct_unpack, compat_urllib_error, compat_urllib_parse, compat_urllib_parse_urlencode, @@ -2969,3 +2970,110 @@ def parse_m3u8_attributes(attrib): def urshift(val, n): return val >> n if val >= 0 else (val + 0x100000000) >> n + + +# Based on png2str() written by @gdkchan and improved by @yokrysty +# Originally posted at https://github.com/rg3/youtube-dl/issues/9706 +def decode_png(png_data): + # Reference: https://www.w3.org/TR/PNG/ + header = png_data[8:] + + if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR': + raise IOError('Not a valid PNG file.') + + int_map = {1: '>B', 2: '>H', 4: '>I'} + unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0] + + chunks = [] + + while header: + length = unpack_integer(header[:4]) + header = header[4:] + + chunk_type = header[:4] + header = header[4:] + + chunk_data = header[:length] + header = header[length:] + + header = header[4:] # Skip CRC + + chunks.append({ + 'type': chunk_type, + 'length': length, + 'data': chunk_data + }) + + ihdr = chunks[0]['data'] + + width = unpack_integer(ihdr[:4]) + height = unpack_integer(ihdr[4:8]) + + idat = b'' + + for chunk in chunks: + if chunk['type'] == b'IDAT': + idat += chunk['data'] + + if not idat: + raise IOError('Unable to read PNG data.') + + decompressed_data = bytearray(zlib.decompress(idat)) + + stride = width * 3 + pixels = [] + + def _get_pixel(idx): + x = idx % stride + y = idx // stride + return pixels[y][x] + + for y in range(height): + basePos = y * (1 + stride) + filter_type = decompressed_data[basePos] + + current_row = [] + + pixels.append(current_row) + + for x in range(stride): + color = decompressed_data[1 + basePos + x] + basex = y * stride + x + left = 0 + up = 0 + + if x > 2: + left = _get_pixel(basex - 3) + if y > 0: + up = _get_pixel(basex - stride) + + if filter_type == 1: # Sub + color = (color + left) & 0xff + elif filter_type == 2: # Up + color = (color + up) & 0xff + elif filter_type == 3: # Average + color = (color + ((left + up) >> 1)) & 0xff + elif filter_type == 4: # Paeth + a = left + b = up + c = 0 + + if x > 2 and y > 0: + c = _get_pixel(basex - stride - 3) + + p = a + b - c + + pa = abs(p - a) + pb = abs(p - b) + pc = abs(p - c) + + if pa <= pb and pa <= pc: + color = (color + a) & 0xff + elif pb <= pc: + color = (color + b) & 0xff + else: + color = (color + c) & 0xff + + current_row.append(color) + + return width, height, pixels From c1decda58c812b3d0a3d4dfa998e7d8bd8f99203 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Aug 2016 02:44:15 +0800 Subject: [PATCH 067/775] [openload] Fix extraction (closes #9706) --- youtube_dl/extractor/openload.py | 133 +++++++++++++++---------------- 1 file changed, 63 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 6415b8fdc..4e80ca9ff 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,15 +1,14 @@ # coding: utf-8 -from __future__ import unicode_literals +from __future__ import unicode_literals, division -import re +import math from .common import InfoExtractor from ..compat import compat_chr from ..utils import ( + decode_png, determine_ext, - encode_base_n, ExtractorError, - mimetype2ext, ) @@ -41,60 +40,6 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - @staticmethod - def openload_level2_debase(m): - radix, num = int(m.group(1)) + 27, int(m.group(2)) - return '"' + encode_base_n(num, radix) + '"' - - @classmethod - def openload_level2(cls, txt): - # The function name is ǃ \u01c3 - # Using escaped unicode literals does not work in Python 3.2 - return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '') - - # Openload uses a variant of aadecode - # openload_decode and related functions are originally written by - # vitas@matfyz.cz and released with public domain - # See https://github.com/rg3/youtube-dl/issues/8489 - @classmethod - def openload_decode(cls, txt): - symbol_table = [ - ('_', '(゚Д゚) [゚Θ゚]'), - ('a', '(゚Д゚) [゚ω゚ノ]'), - ('b', '(゚Д゚) [゚Θ゚ノ]'), - ('c', '(゚Д゚) [\'c\']'), - ('d', '(゚Д゚) [゚ー゚ノ]'), - ('e', '(゚Д゚) [゚Д゚ノ]'), - ('f', '(゚Д゚) [1]'), - - ('o', '(゚Д゚) [\'o\']'), - ('u', '(o゚ー゚o)'), - ('c', '(゚Д゚) [\'c\']'), - - ('7', '((゚ー゚) + (o^_^o))'), - ('6', '((o^_^o) +(o^_^o) +(c^_^o))'), - ('5', '((゚ー゚) + (゚Θ゚))'), - ('4', '(-~3)'), - ('3', '(-~-~1)'), - ('2', '(-~1)'), - ('1', '(-~0)'), - ('0', '((c^_^o)-(c^_^o))'), - ] - delim = '(゚Д゚)[゚ε゚]+' - ret = '' - for aachar in txt.split(delim): - for val, pat in symbol_table: - aachar = aachar.replace(pat, val) - aachar = aachar.replace('+ ', '') - m = re.match(r'^\d+', aachar) - if m: - ret += compat_chr(int(m.group(0), 8)) - else: - m = re.match(r'^u([\da-f]+)', aachar) - if m: - ret += compat_chr(int(m.group(1), 16)) - return cls.openload_level2(ret) - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -102,29 +47,77 @@ class OpenloadIE(InfoExtractor): if 'File not found' in webpage: raise ExtractorError('File not found', expected=True) - code = self._search_regex( - r'\s*
\s*]+>[^>]+\s*]+>([^<]+)', - webpage, 'JS code') + # The following extraction logic is proposed by @Belderak and @gdkchan + # and declared to be used freely in youtube-dl + # See https://github.com/rg3/youtube-dl/issues/9706 - decoded = self.openload_decode(code) + numbers_js = self._download_webpage( + 'https://openload.co/assets/js/obfuscator/n.js', video_id, + note='Downloading signature numbers') + signums = self._search_regex( + r'window\.signatureNumbers\s*=\s*[\'"](?P[a-z]+)[\'"]', + numbers_js, 'signature numbers', group='data') - video_url = self._search_regex( - r'return\s+"(https?://[^"]+)"', decoded, 'video URL') + linkimg_uri = self._search_regex( + r']+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image') + linkimg = self._request_webpage( + linkimg_uri, video_id, note=False).read() + + width, height, pixels = decode_png(linkimg) + + output = '' + for y in range(height): + for x in range(width): + r, g, b = pixels[y][3 * x:3 * x + 3] + if r == 0 and g == 0 and b == 0: + break + else: + output += compat_chr(r) + output += compat_chr(g) + output += compat_chr(b) + + img_str_length = len(output) // 200 + img_str = [[0 for x in range(img_str_length)] for y in range(10)] + + sig_str_length = len(signums) // 260 + sig_str = [[0 for x in range(sig_str_length)] for y in range(10)] + + for i in range(10): + for j in range(img_str_length): + begin = i * img_str_length * 20 + j * 20 + img_str[i][j] = output[begin:begin + 20] + for j in range(sig_str_length): + begin = i * sig_str_length * 26 + j * 26 + sig_str[i][j] = signums[begin:begin + 26] + + parts = [] + # TODO: find better names for str_, chr_ and sum_ + str_ = '' + for i in [2, 3, 5, 7]: + str_ = '' + sum_ = float(99) + for j in range(len(sig_str[i])): + for chr_idx in range(len(img_str[i][j])): + if sum_ > float(122): + sum_ = float(98) + chr_ = compat_chr(int(math.floor(sum_))) + if sig_str[i][j][chr_idx] == chr_ and j >= len(str_): + sum_ += float(2.5) + str_ += img_str[i][j][chr_idx] + parts.append(str_.replace(',', '')) + + video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0]) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) - ext = mimetype2ext(self._search_regex( - r'window\.vt\s*=\s*(["\'])(?P.+?)\1', decoded, - 'mimetype', default=None, group='mimetype')) or determine_ext( - video_url, 'mp4') - return { 'id': video_id, 'title': title, - 'ext': ext, 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, + # Seems all videos have extensions in their titles + 'ext': determine_ext(title), } From e37b54b140c552ee3d751b868ecc0a71df13829f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 20:53:13 +0100 Subject: [PATCH 068/775] [fox] fix theplatform release url query --- youtube_dl/extractor/fox.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 95c1abf94..9f406b17e 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + smuggle_url, + update_url_query, +) class FOXIE(InfoExtractor): @@ -29,11 +32,12 @@ class FOXIE(InfoExtractor): release_url = self._parse_json(self._search_regex( r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), - video_id)['release_url'] + '&switch=http' + video_id)['release_url'] return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(release_url, {'force_smil_url': True}), + 'url': smuggle_url(update_url_query( + release_url, {'switch': 'http'}), {'force_smil_url': True}), 'id': video_id, } From b47a75017bc4b4739ccebb3e8bde0642981968b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 15:57:42 +0700 Subject: [PATCH 069/775] [tnaflix] Fix metadata extraction (Closes #10249) --- youtube_dl/extractor/tnaflix.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 78174178e..74a0c8f17 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -189,9 +189,9 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE): class TNAFlixIE(TNAFlixNetworkBaseIE): _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P[^/]+)/video(?P\d+)' - _TITLE_REGEX = r'(.+?) - TNAFlix Porn Videos' - _DESCRIPTION_REGEX = r']+name="description"[^>]+content="([^"]+)"' - _UPLOADER_REGEX = r'\s*Verified Member\s*\s*

(.+?)

' + _TITLE_REGEX = r'(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)' + _DESCRIPTION_REGEX = r'(?s)>Description:]+>(.+?)<' + _UPLOADER_REGEX = r'\s*Verified Member\s*\s*(.+?)<' _CATEGORIES_REGEX = r'(?s)]*>Categories:(.+?)' _TESTS = [{ From a1aadd09a4721ffd4d782aae4ddeae2699ca17fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 15:59:05 +0700 Subject: [PATCH 070/775] [tnaflixnetworkbase] Improve title extraction --- youtube_dl/extractor/tnaflix.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 74a0c8f17..7ddf77767 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -118,8 +118,12 @@ class TNAFlixNetworkBaseIE(InfoExtractor): xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:') thumbnails = self._extract_thumbnails(cfg_xml) - title = self._html_search_regex( - self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage) + title = None + if self._TITLE_REGEX: + title = self._html_search_regex( + self._TITLE_REGEX, webpage, 'title', default=None) + if not title: + title = self._og_search_title(webpage) age_limit = self._rta_search(webpage) or 18 From 37768f92422c2cf61a961dbaf54d61dabd364506 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Aug 2016 10:58:11 +0100 Subject: [PATCH 071/775] [common] correctly lower the preference of m3u8 master manifest format --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 67f49f51b..0891309dd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1140,7 +1140,7 @@ class InfoExtractor(object): 'url': m3u8_url, 'ext': ext, 'protocol': 'm3u8', - 'preference': -100, + 'preference': preference - 100 if preference else -100, 'resolution': 'multiple', 'format_note': 'Quality selection URL', } From f9622868e722a1863de257c7f3cbc3076eea6e83 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Aug 2016 11:14:15 +0100 Subject: [PATCH 072/775] [bbc] preserve format_id backward compatibility --- youtube_dl/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 7bc7b2ed6..b6c240832 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -336,7 +336,7 @@ class BBCCoUkIE(InfoExtractor): formats.extend(self._extract_f4m_formats( href, programme_id, f4m_id=format_id, fatal=False)) else: - if bitrate: + if not service and not supplier and bitrate: format_id += '-%d' % bitrate fmt = { 'format_id': format_id, From 9fb64c04cdbe1b58f968fa80489168173ac7e565 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 18:01:50 +0700 Subject: [PATCH 073/775] [bbc] Add support for morph embeds (Closes #10239) --- youtube_dl/extractor/bbc.py | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index b6c240832..0ee096dda 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -5,11 +5,13 @@ import re from .common import InfoExtractor from ..utils import ( + dict_get, ExtractorError, float_or_none, int_or_none, parse_duration, parse_iso8601, + try_get, unescapeHTML, ) from ..compat import ( @@ -643,6 +645,24 @@ class BBCIE(BBCCoUkIE): # rtmp download 'skip_download': True, } + }, { + # single video embedded with Morph + 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975', + 'info_dict': { + 'id': 'p041vhd0', + 'ext': 'mp4', + 'title': "Nigeria v Japan - Men's First Round", + 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.', + 'duration': 7980, + 'uploader': 'BBC Sport', + 'uploader_id': 'bbc_sport', + }, + 'params': { + # m3u8 download + 'skip_download': True, + 'proxy': '5.101.173.158:8080', + }, + 'skip': 'Georestricted to UK', }, { # single video with playlist.sxml URL in playlist param 'url': 'http://www.bbc.com/sport/0/football/33653409', @@ -864,6 +884,50 @@ class BBCIE(BBCCoUkIE): 'subtitles': subtitles, } + # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) + # There are several setPayload calls may be present but the video + # seems to be always related to the first one + morph_payload = self._parse_json( + self._search_regex( + r'Morph\.setPayload\([^,]+,\s*({.+?})\);', + webpage, 'morph payload', default='{}'), + playlist_id, fatal=False) + if morph_payload: + components = try_get(morph_payload, lambda x: x['body']['components'], list) or [] + for component in components: + if not isinstance(component, dict): + continue + lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict) + if not lead_media: + continue + identifiers = lead_media.get('identifiers') + if not identifiers or not isinstance(identifiers, dict): + continue + programme_id = identifiers.get('vpid') or identifiers.get('playablePid') + if not programme_id: + continue + title = lead_media.get('title') or self._og_search_title(webpage) + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + description = lead_media.get('summary') + uploader = lead_media.get('masterBrand') + uploader_id = lead_media.get('mid') + duration = None + duration_d = lead_media.get('duration') + if isinstance(duration_d, dict): + duration = parse_duration(dict_get( + duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration'))) + return { + 'id': programme_id, + 'title': title, + 'description': description, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + 'subtitles': subtitles, + } + def extract_all(pattern): return list(filter(None, map( lambda s: self._parse_json(s, playlist_id, fatal=False), From aaa42cf0cf3e8c09209908474998fbd3dc86b91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 18:05:13 +0700 Subject: [PATCH 074/775] [bbc] PEP 8 --- youtube_dl/extractor/bbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 0ee096dda..d059e02a3 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -339,7 +339,7 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, f4m_id=format_id, fatal=False)) else: if not service and not supplier and bitrate: - format_id += '-%d' % bitrate + format_id += '-%d' % bitrate fmt = { 'format_id': format_id, 'filesize': file_size, @@ -945,7 +945,7 @@ class BBCIE(BBCCoUkIE): r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) if entries: return self.playlist_result( - [self.url_result(entry, 'BBCCoUk') for entry in entries], + [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], playlist_id, playlist_title, playlist_description) # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511) From 998f0944526658ae19f29fd4ca04391a5a1ac027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 18:13:05 +0700 Subject: [PATCH 075/775] [bbc] Remove proxy from test --- youtube_dl/extractor/bbc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index d059e02a3..b29e05970 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -660,7 +660,6 @@ class BBCIE(BBCCoUkIE): 'params': { # m3u8 download 'skip_download': True, - 'proxy': '5.101.173.158:8080', }, 'skip': 'Georestricted to UK', }, { From 958849275f1c6072c712e8d611294a762fadc7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 19:04:22 +0700 Subject: [PATCH 076/775] [extractor/generic] Make _search_json_ld non fatal --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e89a03760..44c6c354c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2241,7 +2241,7 @@ class GenericIE(InfoExtractor): # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( - webpage, video_id, default=None, expected_type='VideoObject') + webpage, video_id, fatal=False, expected_type='VideoObject') if json_ld and json_ld.get('url'): info_dict.update({ 'title': video_title or info_dict['title'], From d34995a9e3f596c4dd80178d99f7bd8dbc748e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 19:06:55 +0700 Subject: [PATCH 077/775] [flipagram] Make _search_json_ld non fatal --- youtube_dl/extractor/flipagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index acb6133ff..634f5fe3b 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor): flipagram = video_data['flipagram'] video = flipagram['video'] - json_ld = self._search_json_ld(webpage, video_id, default=False) + json_ld = self._search_json_ld(webpage, video_id, fatal=False) title = json_ld.get('title') or flipagram['captionText'] description = json_ld.get('description') or flipagram.get('captionText') From a8795327cae2bfce299d770039db40a3ac4df2e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 20:45:18 +0700 Subject: [PATCH 078/775] [utils] Add support TV Parental Guidelines ratings in parse_age_limit --- test/test_utils.py | 15 +++++++++++++++ youtube_dl/utils.py | 20 ++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5a2ae4a1e..724346886 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -42,6 +42,7 @@ from youtube_dl.utils import ( ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, + parse_age_limit, parse_duration, parse_filesize, parse_count, @@ -432,6 +433,20 @@ class TestUtil(unittest.TestCase): url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') + def test_parse_age_limit(self): + self.assertEqual(parse_age_limit(None), None) + self.assertEqual(parse_age_limit(False), None) + self.assertEqual(parse_age_limit('invalid'), None) + self.assertEqual(parse_age_limit(0), 0) + self.assertEqual(parse_age_limit(18), 18) + self.assertEqual(parse_age_limit(21), 21) + self.assertEqual(parse_age_limit(22), None) + self.assertEqual(parse_age_limit('18'), 18) + self.assertEqual(parse_age_limit('18+'), 18) + self.assertEqual(parse_age_limit('PG-13'), 13) + self.assertEqual(parse_age_limit('TV-14'), 14) + self.assertEqual(parse_age_limit('TV-MA'), 17) + def test_parse_duration(self): self.assertEqual(parse_duration(None), None) self.assertEqual(parse_duration(False), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ddbfcd2f1..c50238ba1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1984,11 +1984,27 @@ US_RATINGS = { } +TV_PARENTAL_GUIDELINES = { + 'TV-Y': 0, + 'TV-Y7': 7, + 'TV-G': 0, + 'TV-PG': 0, + 'TV-14': 14, + 'TV-MA': 17, +} + + def parse_age_limit(s): - if s is None: + if type(s) == int: + return s if 0 <= s <= 21 else None + if not isinstance(s, compat_basestring): return None m = re.match(r'^(?P\d{1,2})\+?$', s) - return int(m.group('age')) if m else US_RATINGS.get(s) + if m: + return int(m.group('age')) + if s in US_RATINGS: + return US_RATINGS[s] + return TV_PARENTAL_GUIDELINES.get(s) def strip_jsonp(code): From d92cb463052c262a6750f3a5bfc7d564a527ee9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 20:57:05 +0700 Subject: [PATCH 079/775] [discoverygo] Add extractor (Closes #10245) --- youtube_dl/extractor/discoverygo.py | 98 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 99 insertions(+) create mode 100644 youtube_dl/extractor/discoverygo.py diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py new file mode 100644 index 000000000..adb68b96c --- /dev/null +++ b/youtube_dl/extractor/discoverygo.py @@ -0,0 +1,98 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + extract_attributes, + int_or_none, + parse_age_limit, + unescapeHTML, +) + + +class DiscoveryGoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P[^/?#&]+)' + _TEST = { + 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', + 'info_dict': { + 'id': '57a33c536b66d1cd0345eeb1', + 'ext': 'mp4', + 'title': 'Kiss First, Ask Questions Later!', + 'description': 'md5:fe923ba34050eae468bffae10831cb22', + 'duration': 2579, + 'series': 'Love at First Kiss', + 'season_number': 1, + 'episode_number': 1, + 'age_limit': 14, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + container = extract_attributes( + self._search_regex( + r'(]+class=["\']video-player-container[^>]+>)', + webpage, 'video container')) + + video = self._parse_json( + unescapeHTML(container.get('data-video') or container.get('data-json')), + display_id) + + title = video['name'] + + stream = video['stream'] + STREAM_URL_SUFFIX = 'streamUrl' + formats = [] + for stream_kind in ('', 'hds'): + suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX + stream_url = stream.get('%s%s' % (stream_kind, suffix)) + if not stream_url: + continue + if stream_kind == '': + formats.extend(self._extract_m3u8_formats( + stream_url, display_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif stream_kind == 'hds': + formats.extend(self._extract_f4m_formats( + stream_url, display_id, f4m_id=stream_kind, fatal=False)) + self._sort_formats(formats) + + video_id = video.get('id') or display_id + description = video.get('description', {}).get('detailed') + duration = int_or_none(video.get('duration')) + + series = video.get('show', {}).get('name') + season_number = int_or_none(video.get('season', {}).get('number')) + episode_number = int_or_none(video.get('episodeNumber')) + + tags = video.get('tags') + age_limit = parse_age_limit(video.get('parental', {}).get('rating')) + + subtitles = {} + captions = stream.get('captions') + if isinstance(captions, list): + for caption in captions: + subtitle_url = caption.get('fileUrl') + if (not subtitle_url or not isinstance(subtitle_url, compat_str) or + not subtitle_url.startswith('http')): + continue + lang = caption.get('fileLang', 'en') + subtitles.setdefault(lang, []).append({'url': subtitle_url}) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'duration': duration, + 'series': series, + 'season_number': season_number, + 'episode_number': episode_number, + 'tags': tags, + 'age_limit': age_limit, + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 11b64eeaa..c2c4617ee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -221,6 +221,7 @@ from .dvtv import DVTVIE from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE +from .discoverygo import DiscoveryGoIE from .dispeak import DigitallySpeakingIE from .dropbox import DropboxIE from .dw import ( From 845dfcdc40c20ea43f1f2bed9ccbc34e1994a89e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 21:10:48 +0700 Subject: [PATCH 080/775] [ChangeLog] Actualize --- ChangeLog | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ChangeLog b/ChangeLog index 9fd78cdda..c4743df6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +version + +Core ++ Add support for TV Parental Guidelines ratings in parse_age_limit ++ Add decode_png (#9706) ++ Add support for partOfTVSeries in JSON-LD +* Lower master M3U8 manifest preference for better format sorting + +Extractors ++ [discoverygo] Add extractor (#10245) +* [flipagram] Make JSON-LD extraction non fatal +* [generic] Make JSON-LD extraction non fatal ++ [bbc] Add support for morph embeds (#10239) +* [tnaflixnetworkbase] Improve title extraction +* [tnaflix] Fix metadata extraction (#10249) +* [fox] Fix theplatform release URL query +* [openload] Fix extraction (#9706) +* [bbc] Skip duplicate manifest URLs +* [bbc] Improve format code ++ [bbc] Add support for DASH and F4M +* [bbc] Improve format sorting and listing +* [bbc] Improve playlist extraction ++ [pokemon] Add extractor (#10093) ++ [condenast] Add fallback scenario for video info extraction + + version 2016.08.06 Core From 4a01befb347b35f36dda4f344b7b502e53253da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 7 Aug 2016 21:12:41 +0700 Subject: [PATCH 081/775] release 2016.08.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7241840c5..2319e45df 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.06** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.06 +[debug] youtube-dl version 2016.08.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c4743df6d..7803ec1e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.07 Core + Add support for TV Parental Guidelines ratings in parse_age_limit diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2c8f950df..3608e1807 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -182,6 +182,7 @@ - **DigitallySpeaking** - **Digiteka** - **Discovery** + - **DiscoveryGo** - **Dotsub** - **DouyuTV**: 斗鱼 - **DPlay** @@ -518,6 +519,7 @@ - **plus.google**: Google Plus - **pluzz.francetv.fr** - **podomatic** + - **Pokemon** - **PolskieRadio** - **PornHd** - **PornHub**: PornHub and Thumbzilla diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a4654fa59..b48552031 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.06' +__version__ = '2016.08.07' From b2bd968f4b6795ffe3fc2d923cc73171687c2980 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Aug 2016 22:59:34 +0800 Subject: [PATCH 082/775] [kuwo:singer] Fix extraction --- ChangeLog | 5 +++++ youtube_dl/extractor/kuwo.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7803ec1e9..32a96432b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors +* [kuwo:singer] Fix extraction + version 2016.08.07 Core diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index b1d460599..0eeb9ffeb 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( get_element_by_id, clean_html, @@ -242,8 +243,9 @@ class KuwoSingerIE(InfoExtractor): query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE}) return [ - self.url_result(song_url, 'Kuwo') for song_url in re.findall( - r']+class="name">]+href="(http://www\.kuwo\.cn/yinyue/\d+)', + self.url_result(compat_urlparse.urljoin(url, song_url), 'Kuwo') + for song_url in re.findall( + r']+class="name">]+href="(/yinyue/\d+)', webpage) ] From d21a661bb4cbdb92f4db588e3801bb19d4fa96b2 Mon Sep 17 00:00:00 2001 From: Charlie Le Date: Sun, 7 Aug 2016 13:38:10 -0700 Subject: [PATCH 083/775] [README.md] Update Options Link The link references a bad anchor. The updated link now references the correct anchor. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a9f3001a6..b42d5c730 100644 --- a/README.md +++ b/README.md @@ -1196,7 +1196,7 @@ Make sure that someone has not already opened the issue you're trying to open. S ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? From e1f93a0a76f054f537c9103a8958e1c1ab631fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Zvoni=CC=81c=CC=8Cek?= Date: Sun, 7 Aug 2016 17:35:54 +0200 Subject: [PATCH 084/775] [rozhlas] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rozhlas.py | 34 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/rozhlas.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c2c4617ee..7ede6afda 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -696,6 +696,7 @@ from .rockstargames import RockstarGamesIE from .roosterteeth import RoosterTeethIE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE +from .rozhlas import RozhlasIE from .rtbf import RTBFIE from .rte import RteIE, RteRadioIE from .rtlnl import RtlNlIE diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dl/extractor/rozhlas.py new file mode 100644 index 000000000..ef99b5b1f --- /dev/null +++ b/youtube_dl/extractor/rozhlas.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RozhlasIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P[0-9]+)' + _TEST = { + 'url': 'http://prehravac.rozhlas.cz/audio/3421320', + 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', + 'info_dict': { + 'id': '3421320', + 'ext': 'mp3', + 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', + 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' + } + } + + def _real_extract(self, url): + audio_id = self._match_id(url) + webpage = self._download_webpage(url, audio_id) + + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + description = self._html_search_regex(r'

', webpage, 'description', fatal=False) + + url = 'http://media.rozhlas.cz/_audio/' + audio_id + '.mp3' + + return { + 'id': audio_id, + 'url': url, + 'title': title, + 'description': description, + } From de02d1f4e94a50eaad0f268fffec6880e9d61d2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 04:58:02 +0700 Subject: [PATCH 085/775] [rozhlas] Fix regexes and improve extraction (Closes #10253) --- youtube_dl/extractor/rozhlas.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dl/extractor/rozhlas.py index ef99b5b1f..f8eda8dea 100644 --- a/youtube_dl/extractor/rozhlas.py +++ b/youtube_dl/extractor/rozhlas.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_start, +) class RozhlasIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://prehravac.rozhlas.cz/audio/3421320', 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', 'info_dict': { @@ -15,20 +19,32 @@ class RozhlasIE(InfoExtractor): 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' } - } + }, { + 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', + 'skip_download': True, + }] def _real_extract(self, url): audio_id = self._match_id(url) - webpage = self._download_webpage(url, audio_id) - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - description = self._html_search_regex(r'

', webpage, 'description', fatal=False) + webpage = self._download_webpage( + 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) - url = 'http://media.rozhlas.cz/_audio/' + audio_id + '.mp3' + title = self._html_search_regex( + r'

(.+?)

\s*]*>.*?

\s*]+id=["\']player-track', + webpage, 'title', default=None) or remove_start( + self._og_search_title(webpage), 'Radio Wave - ') + description = self._html_search_regex( + r']+title=(["\'])(?P(?:(?!\1).)+)\1[^>]*>.*?

\s*]+id=["\']player-track', + webpage, 'description', fatal=False, group='url') + duration = int_or_none(self._search_regex( + r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) return { 'id': audio_id, - 'url': url, + 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, 'title': title, 'description': description, + 'duration': duration, + 'vcodec': 'none', } From 6bb801cfafb08efbd5d2d371e3ff2b7336ca09fa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 Aug 2016 22:56:04 +0100 Subject: [PATCH 086/775] [cwtv] extract http formats --- youtube_dl/extractor/cwtv.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index c66c359cf..1ab9333b2 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -28,7 +28,8 @@ class CWTVIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'skip': 'redirect to http://cwtv.com/shows/arrow/', }, { 'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088', 'info_dict': { @@ -44,10 +45,6 @@ class CWTVIE(InfoExtractor): 'upload_date': '20151006', 'timestamp': 1444107300, }, - 'params': { - # m3u8 download - 'skip_download': True, - } }, { 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', 'only_matching': True, @@ -61,11 +58,30 @@ class CWTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - 'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id) - - formats = self._extract_m3u8_formats( - video_data['videos']['variantplaylist']['uri'], video_id, 'mp4') + video_data = None + formats = [] + for partner in (154, 213): + vdata = self._download_json( + 'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False) + if not vdata: + continue + video_data = vdata + for quality, quality_data in vdata.get('videos', {}).items(): + quality_url = quality_data.get('uri') + if not quality_url: + continue + if quality == 'variantplaylist': + formats.extend(self._extract_m3u8_formats( + quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + tbr = int_or_none(quality_data.get('bitrate')) + format_id = 'http' + ('-%d' % tbr if tbr else '') + if self._is_valid_url(quality_url, video_id, format_id): + formats.append({ + 'format_id': format_id, + 'url': quality_url, + 'tbr': tbr, + }) self._sort_formats(formats) thumbnails = [{ From f17d5f6d1446c17206df9cc277ea2666aed09fab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 7 Aug 2016 23:57:19 +0800 Subject: [PATCH 087/775] [features.aol.com] Fix _TESTS --- youtube_dl/extractor/aol.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 42c21bf41..2cdee3320 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -123,6 +123,10 @@ class AolFeaturesIE(InfoExtractor): 'title': 'What To Watch - February 17, 2016', }, 'add_ie': ['FiveMin'], + 'params': { + # encrypted m3u8 download + 'skip_download': True, + }, }] def _real_extract(self, url): From 412abb8760c493fe2e8eeedab58aa7ca667c336d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 8 Aug 2016 12:57:17 +0800 Subject: [PATCH 088/775] [bilibili] Update _TESTS --- youtube_dl/extractor/bilibili.py | 72 +++++++------------------------- 1 file changed, 16 insertions(+), 56 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index b17047b39..d8eb71821 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -25,13 +25,13 @@ class BiliBiliIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', - 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', + 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { 'id': '1554319', - 'ext': 'flv', + 'ext': 'mp4', 'title': '【金坷垃】金泡沫', 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', - 'duration': 308.067, + 'duration': 308.315, 'timestamp': 1398012660, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', @@ -41,73 +41,33 @@ class BiliBiliIE(InfoExtractor): }, { 'url': 'http://www.bilibili.com/video/av1041170/', 'info_dict': { - 'id': '1041170', + 'id': '1507019', + 'ext': 'mp4', 'title': '【BD1080P】刀语【诸神&异域】', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', + 'timestamp': 1396530060, + 'upload_date': '20140403', + 'uploader': '枫叶逝去', + 'uploader_id': '520116', }, - 'playlist_count': 9, }, { 'url': 'http://www.bilibili.com/video/av4808130/', 'info_dict': { - 'id': '4808130', + 'id': '7802182', + 'ext': 'mp4', 'title': '【长篇】哆啦A梦443【钉铛】', 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', }, - 'playlist': [{ - 'md5': '55cdadedf3254caaa0d5d27cf20a8f9c', - 'info_dict': { - 'id': '4808130_part1', - 'ext': 'flv', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - 'md5': '926f9f67d0c482091872fbd8eca7ea3d', - 'info_dict': { - 'id': '4808130_part2', - 'ext': 'flv', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - 'md5': '4b7b225b968402d7c32348c646f1fd83', - 'info_dict': { - 'id': '4808130_part3', - 'ext': 'flv', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - 'md5': '7b795e214166501e9141139eea236e91', - 'info_dict': { - 'id': '4808130_part4', - 'ext': 'flv', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }], }, { # Missing upload time 'url': 'http://www.bilibili.com/video/av1867637/', 'info_dict': { 'id': '2880301', - 'ext': 'flv', + 'ext': 'mp4', 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', 'uploader': '黑夜为猫', From b1c6f21c741b7f18b2b46165be0a3539735a6184 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 8 Aug 2016 12:59:07 +0800 Subject: [PATCH 089/775] [aparat] Fix extraction --- ChangeLog | 1 + youtube_dl/extractor/aparat.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 32a96432b..657ff3e48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,7 @@ version Extractors * [kuwo:singer] Fix extraction +* [aparat] Fix extraction version 2016.08.07 diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 63429780e..025e29aa4 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -15,7 +13,7 @@ class AparatIE(InfoExtractor): _TEST = { 'url': 'http://www.aparat.com/v/wP8On', - 'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1', + 'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 'info_dict': { 'id': 'wP8On', 'ext': 'mp4', @@ -31,13 +29,13 @@ class AparatIE(InfoExtractor): # Note: There is an easier-to-parse configuration at # http://www.aparat.com/video/video/config/videohash/%video_id # but the URL in there does not work - embed_url = ('http://www.aparat.com/video/video/embed/videohash/' + - video_id + '/vt/frame') + embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id webpage = self._download_webpage(embed_url, video_id) - video_urls = [video_url.replace('\\/', '/') for video_url in re.findall( - r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)] - for i, video_url in enumerate(video_urls): + file_list = self._parse_json(self._search_regex( + r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id) + for i, item in enumerate(file_list[0]): + video_url = item['file'] req = HEADRequest(video_url) res = self._request_webpage( req, video_id, note='Testing video URL %d' % i, errnote=False) From d71207121df27dc251ee15628b0742e8d7db0db7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 8 Aug 2016 12:59:55 +0800 Subject: [PATCH 090/775] [biqle] Skip an invalid test --- youtube_dl/extractor/biqle.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index ae4579b33..beaebfd2a 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -24,7 +24,8 @@ class BIQLEIE(InfoExtractor): 'ext': 'mp4', 'title': 'Ребенок в шоке от автоматической мойки', 'uploader': 'Dmitry Kotov', - } + }, + 'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.', }] def _real_extract(self, url): From a41a6c5094e757eb51cbd6747d868c2f9450f324 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 8 Aug 2016 13:06:02 +0800 Subject: [PATCH 091/775] [chaturbate] Skip the invalid test --- youtube_dl/extractor/chaturbate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index b2234549e..29a8820d5 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -17,7 +17,8 @@ class ChaturbateIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, + 'skip': 'Room is offline', }, { 'url': 'https://en.chaturbate.com/siswet19/', 'only_matching': True, From 3dc240e8c659112a12e8bd1260c04db9f50f1b61 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 8 Aug 2016 18:48:21 +0800 Subject: [PATCH 092/775] [sohu] Update _TESTS (closes #10260) --- youtube_dl/extractor/sohu.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 72fe66142..48e2ba2dd 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -14,10 +14,10 @@ from ..utils import ExtractorError class SohuIE(InfoExtractor): _VALID_URL = r'https?://(?Pmy\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P\d+)\.shtml.*?' + # Sohu videos give different MD5 sums on Travis CI and my machine _TESTS = [{ 'note': 'This video is available only in Mainland China', 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super', - 'md5': '29175c8cadd8b5cc4055001e85d6b372', 'info_dict': { 'id': '382479172', 'ext': 'mp4', @@ -26,7 +26,6 @@ class SohuIE(InfoExtractor): 'skip': 'On available in China', }, { 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', - 'md5': '699060e75cf58858dd47fb9c03c42cfb', 'info_dict': { 'id': '409385080', 'ext': 'mp4', @@ -34,7 +33,6 @@ class SohuIE(InfoExtractor): } }, { 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml', - 'md5': '9bf34be48f2f4dadcb226c74127e203c', 'info_dict': { 'id': '78693464', 'ext': 'mp4', @@ -48,7 +46,6 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', }, 'playlist': [{ - 'md5': 'bdbfb8f39924725e6589c146bc1883ad', 'info_dict': { 'id': '78910339_part1', 'ext': 'mp4', @@ -56,7 +53,6 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', } }, { - 'md5': '3e1f46aaeb95354fd10e7fca9fc1804e', 'info_dict': { 'id': '78910339_part2', 'ext': 'mp4', @@ -64,7 +60,6 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', } }, { - 'md5': '8407e634175fdac706766481b9443450', 'info_dict': { 'id': '78910339_part3', 'ext': 'mp4', From 395c74615c58f5f1b2e462786d6f4d8bab1a313a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 21:49:27 +0700 Subject: [PATCH 093/775] Revert "[extractor/generic] Make _search_json_ld non fatal" This reverts commit 958849275f1c6072c712e8d611294a762fadc7f0. --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 44c6c354c..e89a03760 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2241,7 +2241,7 @@ class GenericIE(InfoExtractor): # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( - webpage, video_id, fatal=False, expected_type='VideoObject') + webpage, video_id, default=None, expected_type='VideoObject') if json_ld and json_ld.get('url'): info_dict.update({ 'title': video_title or info_dict['title'], From 3711fa1eb24224a5b898ede355136c9009355a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 21:49:45 +0700 Subject: [PATCH 094/775] Revert "[flipagram] Make _search_json_ld non fatal" This reverts commit d34995a9e3f596c4dd80178d99f7bd8dbc748e2b. --- youtube_dl/extractor/flipagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index 634f5fe3b..acb6133ff 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor): flipagram = video_data['flipagram'] video = flipagram['video'] - json_ld = self._search_json_ld(webpage, video_id, fatal=False) + json_ld = self._search_json_ld(webpage, video_id, default=False) title = json_ld.get('title') or flipagram['captionText'] description = json_ld.get('description') or flipagram.get('captionText') From 321b5e082a7340225091ea5ad852e12b2dc499e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 22:36:18 +0700 Subject: [PATCH 095/775] [extractor/common] Respect default in _search_json_ld --- youtube_dl/extractor/common.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0891309dd..e47770c1d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -816,11 +816,14 @@ class InfoExtractor(object): json_ld = self._search_regex( r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P.+?)', html, 'JSON-LD', group='json_ld', **kwargs) + default = kwargs.get('default', NO_DEFAULT) if not json_ld: - return {} - return self._json_ld( - json_ld, video_id, fatal=kwargs.get('fatal', True), - expected_type=expected_type) + return default if default is not NO_DEFAULT else {} + # JSON-LD may be malformed and thus `fatal` should be respected. + # At the same time `default` may be passed that assumes `fatal=False` + # for _search_regex. Let's simulate the same behavior here as well. + fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False + return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type) def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): if isinstance(json_ld, compat_str): From 522f6c066da93b6baec3399b7098556e5ec55f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 22:44:36 +0700 Subject: [PATCH 096/775] [bbc] Add proper default to _search_json_ld call --- youtube_dl/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index b29e05970..83e6d024c 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -759,7 +759,7 @@ class BBCIE(BBCCoUkIE): webpage = self._download_webpage(url, playlist_id) - json_ld_info = self._search_json_ld(webpage, playlist_id, default=None) + json_ld_info = self._search_json_ld(webpage, playlist_id, default={}) timestamp = json_ld_info.get('timestamp') playlist_title = json_ld_info.get('title') From 1e7f602e2ac27bac266cde4c67b31f59510a91f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 22:45:49 +0700 Subject: [PATCH 097/775] [condenast] Make _search_json_ld call non fatal --- youtube_dl/extractor/condenast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 15fabbb1c..8d8f60598 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -143,7 +143,8 @@ class CondeNastIE(InfoExtractor): }) self._sort_formats(formats) - info = self._search_json_ld(webpage, video_id) if url_type != 'embed' else {} + info = self._search_json_ld( + webpage, video_id, fatal=False) if url_type != 'embed' else {} info.update({ 'id': video_id, 'formats': formats, From e8ed7354e6fdd7bd3759e24f0aa08233316303b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 22:46:19 +0700 Subject: [PATCH 098/775] [flipagram] Add proper default to _search_json_ld call --- youtube_dl/extractor/flipagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py index acb6133ff..1902a2393 100644 --- a/youtube_dl/extractor/flipagram.py +++ b/youtube_dl/extractor/flipagram.py @@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor): flipagram = video_data['flipagram'] video = flipagram['video'] - json_ld = self._search_json_ld(webpage, video_id, default=False) + json_ld = self._search_json_ld(webpage, video_id, default={}) title = json_ld.get('title') or flipagram['captionText'] description = json_ld.get('description') or flipagram.get('captionText') From 082395d0a0f21375f63e3c9b33011b30d5de3aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 8 Aug 2016 22:48:33 +0700 Subject: [PATCH 099/775] [extractor/generic] Add proper default to _search_json_ld call --- youtube_dl/extractor/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e89a03760..50500ce0e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2241,8 +2241,8 @@ class GenericIE(InfoExtractor): # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( - webpage, video_id, default=None, expected_type='VideoObject') - if json_ld and json_ld.get('url'): + webpage, video_id, default={}, expected_type='VideoObject') + if json_ld.get('url'): info_dict.update({ 'title': video_title or info_dict['title'], 'description': video_description, From 8991844ea260a17943e935acfd5565caab27a99e Mon Sep 17 00:00:00 2001 From: Sukhbir Singh Date: Sun, 7 Aug 2016 19:11:00 -0400 Subject: [PATCH 100/775] [sonyliv] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sonyliv.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 youtube_dl/extractor/sonyliv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7ede6afda..f1043dae6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -756,6 +756,7 @@ from .smotri import ( ) from .snotr import SnotrIE from .sohu import SohuIE +from .sonyliv import SonyLIVIE from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, diff --git a/youtube_dl/extractor/sonyliv.py b/youtube_dl/extractor/sonyliv.py new file mode 100644 index 000000000..c717b1f4d --- /dev/null +++ b/youtube_dl/extractor/sonyliv.py @@ -0,0 +1,28 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SonyLIVIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?sonyliv\.com/details/episodes/(?P[0-9]+)/' + _TEST = { + 'url': 'http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor\'s-Delight', + 'info_dict': { + 'title': 'Ep. 1 - Achaari Cheese Toast - Bachelor\'s Delight', + 'id': '5024612095001', + 'ext': 'mp4', + 'upload_date': '20160707', + 'description': 'Bachelor\'s Delight is a new food show from Sony LIV to satisfy the taste buds of all those bachelors looking for a quick bite.', + 'uploader_id': '4338955589001', + 'timestamp': 1467870968, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % self._match_id(url), 'BrightcoveNew') From 77426a087b6af712b69faf62a54db959f0d13288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 02:16:28 +0700 Subject: [PATCH 101/775] [sonyliv] Improve (Closes #10258) --- youtube_dl/extractor/sonyliv.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/sonyliv.py b/youtube_dl/extractor/sonyliv.py index c717b1f4d..accd112aa 100644 --- a/youtube_dl/extractor/sonyliv.py +++ b/youtube_dl/extractor/sonyliv.py @@ -5,15 +5,15 @@ from .common import InfoExtractor class SonyLIVIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?sonyliv\.com/details/episodes/(?P[0-9]+)/' - _TEST = { - 'url': 'http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor\'s-Delight', + _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P\d+)' + _TESTS = [{ + 'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", 'info_dict': { - 'title': 'Ep. 1 - Achaari Cheese Toast - Bachelor\'s Delight', + 'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", 'id': '5024612095001', 'ext': 'mp4', 'upload_date': '20160707', - 'description': 'Bachelor\'s Delight is a new food show from Sony LIV to satisfy the taste buds of all those bachelors looking for a quick bite.', + 'description': 'md5:7f28509a148d5be9d0782b4d5106410d', 'uploader_id': '4338955589001', 'timestamp': 1467870968, }, @@ -21,8 +21,14 @@ class SonyLIVIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['BrightcoveNew'], - } + }, { + 'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' def _real_extract(self, url): - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % self._match_id(url), 'BrightcoveNew') + brightcove_id = self._match_id(url) + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From affaea0688780c03afe36bc81c7151e696649b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Zvoni=CC=81c=CC=8Cek?= Date: Sat, 6 Aug 2016 15:26:48 +0200 Subject: [PATCH 102/775] [rbmaradio] Fixed extractor --- youtube_dl/extractor/rbmaradio.py | 45 +++++++++++++------------------ 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py index 7932af6ef..92f0d8f76 100644 --- a/youtube_dl/extractor/rbmaradio.py +++ b/youtube_dl/extractor/rbmaradio.py @@ -1,55 +1,46 @@ # encoding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( - ExtractorError, + clean_html ) class RBMARadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/[^/]+/episodes/(?P[^/]+)$' _TEST = { - 'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011', + 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', 'info_dict': { 'id': 'ford-lopatin-live-at-primavera-sound-2011', 'ext': 'mp3', - 'uploader_id': 'ford-lopatin', - 'location': 'Spain', 'description': 'Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.', - 'uploader': 'Ford & Lopatin', - 'title': 'Live at Primavera Sound 2011', + 'title': 'Ford & Lopatin - Main Stage', }, } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('videoID') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + json_data = self._search_regex(r'', + webpage, 'json data') + data = self._parse_json(json_data, video_id) - json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$', - webpage, 'json data', flags=re.MULTILINE) + item = None + for episode in data['episodes']: + items = data['episodes'][episode] + if video_id in items: + item = items[video_id] - try: - data = json.loads(json_data) - except ValueError as e: - raise ExtractorError('Invalid JSON: ' + str(e)) - - video_url = data['akamai_url'] + '&cbr=256' + video_url = item['audioURL'] + '?cbr=256' return { 'id': video_id, 'url': video_url, - 'title': data['title'], - 'description': data.get('teaser_text'), - 'location': data.get('country_of_origin'), - 'uploader': data.get('host', {}).get('name'), - 'uploader_id': data.get('host', {}).get('slug'), - 'thumbnail': data.get('image', {}).get('large_url_2x'), - 'duration': data.get('duration'), + 'title': item.get('title') + ' - ' + item.get('showTitle'), + 'description': clean_html(item.get('longTeaser')), + 'thumbnail': self._proto_relative_url(item.get('imageURL', {}).get('landscape')), + 'duration': item.get('duration'), } From 3a380766d1d6abd83213319b41cdf9a18977a69c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 02:46:29 +0700 Subject: [PATCH 103/775] [rbmaradio] Improve, simplify and extract all formats (Closes #10242) --- youtube_dl/extractor/rbmaradio.py | 69 +++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py index 92f0d8f76..471928ef8 100644 --- a/youtube_dl/extractor/rbmaradio.py +++ b/youtube_dl/extractor/rbmaradio.py @@ -1,46 +1,71 @@ -# encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - clean_html + clean_html, + int_or_none, + unified_timestamp, + update_url_query, ) class RBMARadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/[^/]+/episodes/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P[^/]+)/episodes/(?P[^/?#&]+)' _TEST = { 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', 'info_dict': { 'id': 'ford-lopatin-live-at-primavera-sound-2011', 'ext': 'mp3', - 'description': 'Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.', - 'title': 'Ford & Lopatin - Main Stage', + 'title': 'Main Stage - Ford & Lopatin', + 'description': 'md5:4f340fb48426423530af5a9d87bd7b91', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 2452, + 'timestamp': 1307103164, + 'upload_date': '20110603', }, } def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + show_id = mobj.group('show_id') + episode_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - json_data = self._search_regex(r'', - webpage, 'json data') - data = self._parse_json(json_data, video_id) + webpage = self._download_webpage(url, episode_id) - item = None - for episode in data['episodes']: - items = data['episodes'][episode] - if video_id in items: - item = items[video_id] + episode = self._parse_json( + self._search_regex( + r'__INITIAL_STATE__\s*=\s*({.+?})\s*', + webpage, 'json data'), + episode_id)['episodes'][show_id][episode_id] - video_url = item['audioURL'] + '?cbr=256' + title = episode['title'] + + show_title = episode.get('showTitle') + if show_title: + title = '%s - %s' % (show_title, title) + + formats = [{ + 'url': update_url_query(episode['audioURL'], query={'cbr': abr}), + 'format_id': compat_str(abr), + 'abr': abr, + 'vcodec': 'none', + } for abr in (96, 128, 256)] + + description = clean_html(episode.get('longTeaser')) + thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape')) + duration = int_or_none(episode.get('duration')) + timestamp = unified_timestamp(episode.get('publishedAt')) return { - 'id': video_id, - 'url': video_url, - 'title': item.get('title') + ' - ' + item.get('showTitle'), - 'description': clean_html(item.get('longTeaser')), - 'thumbnail': self._proto_relative_url(item.get('imageURL', {}).get('landscape')), - 'duration': item.get('duration'), + 'id': episode_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, } From 065bc354894f1d35592529455d9eb685470124b9 Mon Sep 17 00:00:00 2001 From: singh-pratyush96 Date: Thu, 4 Aug 2016 15:47:22 +0530 Subject: [PATCH 104/775] Add --max-sleep-interval (Closes #9930) --- youtube_dl/YoutubeDL.py | 6 +++++- youtube_dl/__init__.py | 7 +++++++ youtube_dl/downloader/common.py | 7 +++++-- youtube_dl/options.py | 12 ++++++++++-- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6551f086f..82b77783d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -249,7 +249,11 @@ class YoutubeDL(object): source_address: (Experimental) Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. - sleep_interval: Number of seconds to sleep before each download. + sleep_interval: Minimum number of seconds to sleep before each download. + Sleep will be for a random interval if --max-sleep-interval is also passed. + max_sleep_interval:Max number of seconds to sleep before each download. + Sleep will be for a random interval if passed along with --min-sleep-interval + or --sleep-interval, otherwise ignored. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called with the info_dict of diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b34bf9c2..86af18d33 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -145,6 +145,12 @@ def _real_main(argv=None): if numeric_limit is None: parser.error('invalid max_filesize specified') opts.max_filesize = numeric_limit + if opts.sleep_interval is not None: + if opts.sleep_interval < 0: + parser.error('sleep interval should not be negative') + elif opts.max_sleep_interval is not None: + if opts.max_sleep_interval < opts.sleep_interval: + parser.error('max sleep interval should not be less than sleep interval') def parse_retries(retries): if retries in ('inf', 'infinite'): @@ -370,6 +376,7 @@ def _real_main(argv=None): 'source_address': opts.source_address, 'call_home': opts.call_home, 'sleep_interval': opts.sleep_interval, + 'max_sleep_interval': opts.max_sleep_interval, 'external_downloader': opts.external_downloader, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 1dba9f49a..8e377c72c 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -4,6 +4,7 @@ import os import re import sys import time +import random from ..compat import compat_os_name from ..utils import ( @@ -342,8 +343,10 @@ class FileDownloader(object): }) return True - sleep_interval = self.params.get('sleep_interval') - if sleep_interval: + sleep_lower_bound = self.params.get('sleep_interval') + if sleep_lower_bound: + sleep_upper_bound = self.params.get('max_sleep_interval', sleep_lower_bound) + sleep_interval = random.uniform(sleep_lower_bound, sleep_upper_bound) self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 942d44912..068e824a0 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -499,9 +499,17 @@ def parseOpts(overrideArguments=None): dest='bidi_workaround', action='store_true', help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') workarounds.add_option( - '--sleep-interval', metavar='SECONDS', + '--sleep-interval', '--min-sleep-interval', metavar='SECONDS', dest='sleep_interval', type=float, - help='Number of seconds to sleep before each download.') + help='Minimum number of seconds to sleep before each download. Sleep will be for a random interval if ' + '--max-sleep-interval is also passed.' + ) + workarounds.add_option( + '--max-sleep-interval', metavar='SECONDS', + dest='max_sleep_interval', type=float, + help='Max number of seconds to sleep before each download. Sleep will be for a random interval if passed' + ' along with --min-sleep-interval or --sleep-interval, otherwise ignored.' + ) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( From 7aa589a5e10c983f701b48b8431cc5f54b85cf3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 03:46:52 +0700 Subject: [PATCH 105/775] Fix --min/max-sleep-interval wording --- youtube_dl/YoutubeDL.py | 15 ++++++++++----- youtube_dl/options.py | 15 +++++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 82b77783d..193f8db9f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -249,11 +249,16 @@ class YoutubeDL(object): source_address: (Experimental) Client-side IP address to bind to. call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. - sleep_interval: Minimum number of seconds to sleep before each download. - Sleep will be for a random interval if --max-sleep-interval is also passed. - max_sleep_interval:Max number of seconds to sleep before each download. - Sleep will be for a random interval if passed along with --min-sleep-interval - or --sleep-interval, otherwise ignored. + sleep_interval: Number of seconds to sleep before each download when + used alone or a lower bound of a range for randomized + sleep before each download (minimum possible number + of seconds to sleep) when used along with + max_sleep_interval. + max_sleep_interval:Upper bound of a range for randomized sleep before each + download (maximum possible number of seconds to sleep). + Must only be used along with sleep_interval. + Actual sleep time will be a random float from range + [sleep_interval; max_sleep_interval]. listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called with the info_dict of diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 068e824a0..d32a9e32c 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -501,15 +501,18 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--sleep-interval', '--min-sleep-interval', metavar='SECONDS', dest='sleep_interval', type=float, - help='Minimum number of seconds to sleep before each download. Sleep will be for a random interval if ' - '--max-sleep-interval is also passed.' - ) + help=( + 'Number of seconds to sleep before each download when used alone ' + 'or a lower bound of a range for randomized sleep before each download ' + '(minimum possible number of seconds to sleep) when used along with ' + '--max-sleep-interval.')) workarounds.add_option( '--max-sleep-interval', metavar='SECONDS', dest='max_sleep_interval', type=float, - help='Max number of seconds to sleep before each download. Sleep will be for a random interval if passed' - ' along with --min-sleep-interval or --sleep-interval, otherwise ignored.' - ) + help=( + 'Upper bound of a range for randomized sleep before each download ' + '(maximum possible number of seconds to sleep). Must only be used ' + 'along with --min-sleep-interval.')) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') verbosity.add_option( From 1ad6b891b21b45830736698a7b59c30d9605a562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 03:47:56 +0700 Subject: [PATCH 106/775] Add more checks for --min/max-sleep-interval arguments and use more idiomatic naming --- youtube_dl/__init__.py | 12 ++++++++---- youtube_dl/downloader/common.py | 9 +++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 86af18d33..a9730292c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -147,10 +147,14 @@ def _real_main(argv=None): opts.max_filesize = numeric_limit if opts.sleep_interval is not None: if opts.sleep_interval < 0: - parser.error('sleep interval should not be negative') - elif opts.max_sleep_interval is not None: - if opts.max_sleep_interval < opts.sleep_interval: - parser.error('max sleep interval should not be less than sleep interval') + parser.error('sleep interval must be positive or 0') + if opts.max_sleep_interval is not None: + if opts.max_sleep_interval < 0: + parser.error('max sleep interval must be positive or 0') + if opts.max_sleep_interval < opts.sleep_interval: + parser.error('max sleep interval must be greater than or equal to min sleep interval') + else: + opts.max_sleep_interval = opts.sleep_interval def parse_retries(retries): if retries in ('inf', 'infinite'): diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8e377c72c..8482cbd84 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -343,10 +343,11 @@ class FileDownloader(object): }) return True - sleep_lower_bound = self.params.get('sleep_interval') - if sleep_lower_bound: - sleep_upper_bound = self.params.get('max_sleep_interval', sleep_lower_bound) - sleep_interval = random.uniform(sleep_lower_bound, sleep_upper_bound) + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + print(min_sleep_interval, max_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) From 5e42f8a0adec94156b911399558a967214a9d3aa Mon Sep 17 00:00:00 2001 From: nyorain Date: Sat, 6 Aug 2016 01:21:39 +0200 Subject: [PATCH 107/775] Make --metadata-from-title non fatal Output a warning if the metadata can't be parsed from the title (and don't write any metadata) instead of raising a critical error. --- youtube_dl/postprocessor/metadatafromtitle.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index 42377fa0f..f7c831c67 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -38,7 +38,8 @@ class MetadataFromTitlePP(PostProcessor): title = info['title'] match = re.match(self._titleregex, title) if match is None: - raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat) + self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat) + return [], info for attribute, value in match.groupdict().items(): value = match.group(attribute) info[attribute] = value From 25dd58ca6a1326c27e57262c3a298b163fc3c1eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 04:01:05 +0700 Subject: [PATCH 108/775] [metadatafromtitle] Remove unused exception class --- youtube_dl/postprocessor/metadatafromtitle.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index f7c831c67..920573da9 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -3,11 +3,6 @@ from __future__ import unicode_literals import re from .common import PostProcessor -from ..utils import PostProcessingError - - -class MetadataFromTitlePPError(PostProcessingError): - pass class MetadataFromTitlePP(PostProcessor): From 9778b3e7eede2d405f0102a958ea7df7ab2c6444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 04:03:52 +0700 Subject: [PATCH 109/775] Credit @zvonicek for #10242 and #10253 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 890c827a0..7e17d625b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -179,3 +179,4 @@ Jakub Adam Wieczorek Aleksandar Topuzović Nehal Patel Rob van Bekkum +Petr Zvoníček From b6578166847c5435095346b0225abd758cad6bb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 04:04:45 +0700 Subject: [PATCH 110/775] Credit @singh-pratyush96 for #10223 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 7e17d625b..1fd4be785 100644 --- a/AUTHORS +++ b/AUTHORS @@ -180,3 +180,4 @@ Aleksandar Topuzović Nehal Patel Rob van Bekkum Petr Zvoníček +Pratyush Singh From 4e62d26aa211281ae85e9942c01adf31fe80aee5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Aug 2016 15:09:08 +0100 Subject: [PATCH 111/775] [uol] Add new extractor(#4263) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/uol.py | 130 +++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 youtube_dl/extractor/uol.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f1043dae6..387230be0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -929,6 +929,7 @@ from .udemy import ( from .udn import UDNEmbedIE from .digiteka import DigitekaIE from .unistra import UnistraIE +from .uol import UOLIE from .urort import UrortIE from .urplay import URPlayIE from .usatoday import USATodayIE diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py new file mode 100644 index 000000000..5c6a3577b --- /dev/null +++ b/youtube_dl/extractor/uol.py @@ -0,0 +1,130 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_duration, + update_url_query, + str_or_none, +) + + +class UOLIE(InfoExtractor): + IE_NAME = 'uol.com.br' + _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P\d+|[\w-]+-[A-Z0-9]+)' + _TESTS = [{ + 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931', + 'md5': '25291da27dc45e0afb5718a8603d3816', + 'info_dict': { + 'id': '15951931', + 'ext': 'mp4', + 'title': 'Miss simpatia é encontrada morta', + 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2', + } + }, { + 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326', + 'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9', + 'info_dict': { + 'id': '15954259', + 'ext': 'mp4', + 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres', + 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.', + } + }, { + 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931', + 'only_matching': True, + }, { + 'url': 'http://mais.uol.com.br/view/15954259', + 'only_matching': True, + }, { + 'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html', + 'only_matching': True, + }, { + 'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia', + 'only_matching': True, + }, { + 'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326', + 'only_matching': True, + }, { + 'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326', + 'only_matching': True, + }, { + 'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470', + 'only_matching': True, + }] + + _FORMATS = { + '2': { + 'width': 640, + 'height': 360, + }, + '5': { + 'width': 1080, + 'height': 720, + }, + '6': { + 'width': 426, + 'height': 240, + }, + '7': { + 'width': 1920, + 'height': 1080, + }, + '8': { + 'width': 192, + 'height': 144, + }, + '9': { + 'width': 568, + 'height': 320, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if not video_id.isdigit(): + embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id) + video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id') + video_data = self._download_json( + 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id, + video_id)['item'] + title = video_data['title'] + + query = { + 'ver': video_data.get('numRevision', 2), + 'r': 'http://mais.uol.com.br', + } + formats = [] + for f in video_data.get('formats', []): + f_url = f.get('url') or f.get('secureUrl') + if not f_url: + continue + format_id = str_or_none(f.get('id')) + fmt = { + 'format_id': format_id, + 'url': update_url_query(f_url, query), + } + fmt.update(self._FORMATS.get(format_id, {})) + formats.append(fmt) + self._sort_formats(formats) + + tags = [] + for tag in video_data.get('tags', []): + tag_description = tag.get('description') + if not tag_description: + continue + tags.append(tag_description) + + return { + 'id': video_id, + 'title': title, + 'description': clean_html(video_data.get('desMedia')), + 'thumbnail': video_data.get('thumbnail'), + 'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')), + 'tags': tags, + 'formats': formats, + } From 20ef4123b9ddbac62f5e4cbd27979adb9b0d948c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 9 Aug 2016 15:13:15 +0100 Subject: [PATCH 112/775] [uol] remove unused import --- youtube_dl/extractor/uol.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py index 5c6a3577b..c27c64387 100644 --- a/youtube_dl/extractor/uol.py +++ b/youtube_dl/extractor/uol.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( clean_html, From cc9c8ce5df9c1ac2f369a8060cdcca1e18f25457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 9 Aug 2016 22:24:58 +0700 Subject: [PATCH 113/775] [devscripts/prepare_manpage] Fix description strings starting with dash (Closes #10273) --- devscripts/prepare_manpage.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index e3f6339b5..ce548739f 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -54,17 +54,21 @@ def filter_options(readme): if in_options: if line.lstrip().startswith('-'): - option, description = re.split(r'\s{2,}', line.lstrip()) - split_option = option.split(' ') + split = re.split(r'\s{2,}', line.lstrip()) + # Description string may start with `-` as well. If there is + # only one piece then it's a description bit not an option. + if len(split) > 1: + option, description = split + split_option = option.split(' ') - if not split_option[-1].startswith('-'): # metavar - option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) + if not split_option[-1].startswith('-'): # metavar + option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) - # Pandoc's definition_lists. See http://pandoc.org/README.html - # for more information. - ret += '\n%s\n: %s\n' % (option, description) - else: - ret += line.lstrip() + '\n' + # Pandoc's definition_lists. See http://pandoc.org/README.html + # for more information. + ret += '\n%s\n: %s\n' % (option, description) + continue + ret += line.lstrip() + '\n' else: ret += line + '\n' From 5c8411e968a2e6962bc477356231483963248c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Aug 2016 00:18:28 +0700 Subject: [PATCH 114/775] [ChangeLog] Actualize --- ChangeLog | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 657ff3e48..2b6794c4a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,20 @@ version +Core +* Make --metadata-from-title non fatal when title does not match the pattern +* Introduce options for randomized sleep before each download + --min-sleep-interval and --max-sleep-interval (#9930) +* Respect default in _search_json_ld + Extractors -* [kuwo:singer] Fix extraction ++ [uol] Add extractor for uol.com.br (#4263) +* [rbmaradio] Fix extraction and extract all formats (#10242) ++ [sonyliv] Add extractor for sonyliv.com (#10258) * [aparat] Fix extraction +* [cwtv] Extract HTTP formats ++ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253) +* [kuwo:singer] Fix extraction + version 2016.08.07 From b1ce2ba1978142c75418ead4b91ea6138bb37a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Aug 2016 00:20:44 +0700 Subject: [PATCH 115/775] release 2016.08.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- README.md | 10 +++++++++- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 6 files changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2319e45df..1c06ba36e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.07 +[debug] youtube-dl version 2016.08.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fbf0ab7e8..95392030e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Is there enough context in your bug report? diff --git a/ChangeLog b/ChangeLog index 2b6794c4a..ac9e78a8d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.10 Core * Make --metadata-from-title non fatal when title does not match the pattern diff --git a/README.md b/README.md index b42d5c730..cabbbef76 100644 --- a/README.md +++ b/README.md @@ -330,7 +330,15 @@ which means you can modify it, redistribute it or use it however you like. bidirectional text support. Requires bidiv or fribidi executable in PATH --sleep-interval SECONDS Number of seconds to sleep before each - download. + download when used alone or a lower bound + of a range for randomized sleep before each + download (minimum possible number of + seconds to sleep) when used along with + --max-sleep-interval. + --max-sleep-interval SECONDS Upper bound of a range for randomized sleep + before each download (maximum possible + number of seconds to sleep). Must only be + used along with --min-sleep-interval. ## Video Format Options: -f, --format FORMAT Video format code, see the "FORMAT diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3608e1807..a44167a94 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -564,6 +564,7 @@ - **RoosterTeeth** - **RottenTomatoes** - **Roxwel** + - **Rozhlas** - **RTBF** - **rte**: Raidió Teilifís Éireann TV - **rte:radio**: Raidió Teilifís Éireann radio @@ -621,6 +622,7 @@ - **smotri:user**: Smotri.com user videos - **Snotr** - **Sohu** + - **SonyLIV** - **soundcloud** - **soundcloud:playlist** - **soundcloud:search**: Soundcloud search @@ -747,6 +749,7 @@ - **udemy:course** - **UDNEmbed**: 聯合影音 - **Unistra** + - **uol.com.br** - **Urort**: NRK P3 Urørt - **URPlay** - **USAToday** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b48552031..f7ad846d9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.07' +__version__ = '2016.08.10' From 81c13222c6418b72e0d5e11ed37700e984c4de43 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 10 Aug 2016 11:36:49 +0800 Subject: [PATCH 116/775] [utils] Recognize more formats in unified_timestamp Used in CtsNews --- ChangeLog | 6 ++++++ youtube_dl/utils.py | 1 + 2 files changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index ac9e78a8d..cd59a87a9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core ++ Recognize more formats in unified_timestamp + + version 2016.08.10 Core diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c50238ba1..a03f7184d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -122,6 +122,7 @@ DATE_FORMATS = ( '%Y %m %d', '%Y-%m-%d', '%Y/%m/%d', + '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', From 69d8eeeec56e4b9f4f3e59086a6f0b0820fd37c9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 10 Aug 2016 11:38:19 +0800 Subject: [PATCH 117/775] [ctsnews] Fix extraction --- ChangeLog | 3 ++ youtube_dl/extractor/ctsnews.py | 49 ++++++++++++++------------------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index cd59a87a9..adbdc4f9b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core + Recognize more formats in unified_timestamp +Extractors +* [ctsnews] Fix extraction + version 2016.08.10 diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py index 1622fc844..83ca90c3b 100644 --- a/youtube_dl/extractor/ctsnews.py +++ b/youtube_dl/extractor/ctsnews.py @@ -1,13 +1,12 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import parse_iso8601, ExtractorError +from ..utils import unified_timestamp class CtsNewsIE(InfoExtractor): IE_DESC = '華視新聞' - # https connection failed (Connection reset) _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P\d+)\.html' _TESTS = [{ 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', @@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor): 'id': '201501291578109', 'ext': 'mp4', 'title': '以色列.真主黨交火 3人死亡', - 'description': 'md5:95e9b295c898b7ff294f09d450178d7d', + 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...', 'timestamp': 1422528540, 'upload_date': '20150129', } @@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor): 'id': '201309031304098', 'ext': 'mp4', 'title': '韓國31歲童顏男 貌如十多歲小孩', - 'description': 'md5:f183feeba3752b683827aab71adad584', + 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...', 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1378205880, 'upload_date': '20130903', @@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor): }, { # With Youtube embedded video 'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html', - 'md5': '1d842c771dc94c8c3bca5af2cc1db9c5', - 'add_ie': ['Youtube'], + 'md5': 'e4726b2ccd70ba2c319865e28f0a91d1', 'info_dict': { 'id': 'OVbfO7d0_hQ', 'ext': 'mp4', @@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor): 'upload_date': '20150128', 'uploader_id': 'TBSCTS', 'uploader': '中華電視公司', - } + }, + 'add_ie': ['Youtube'], }] def _real_extract(self, url): news_id = self._match_id(url) page = self._download_webpage(url, news_id) - if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None): - feed_url = self._html_search_regex( - r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)', - page, 'feed url') - video_url = self._download_webpage( - feed_url, news_id, note='Fetching feed') + news_id = self._hidden_inputs(page).get('get_id') + + if news_id: + mp4_feed = self._download_json( + 'http://news.cts.com.tw/action/test_mp4feed.php', + news_id, note='Fetching feed', query={'news_id': news_id}) + video_url = mp4_feed['source_url'] else: self.to_screen('Not CTSPlayer video, trying Youtube...') youtube_url = self._search_regex( - r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url', - default=None) - if not youtube_url: - raise ExtractorError('The news includes no videos!', expected=True) + r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url') - return { - '_type': 'url', - 'url': youtube_url, - 'ie_key': 'Youtube', - } + return self.url_result(youtube_url, ie='Youtube') description = self._html_search_meta('description', page) - title = self._html_search_meta('title', page) + title = self._html_search_meta('title', page, fatal=True) thumbnail = self._html_search_meta('image', page) datetime_str = self._html_search_regex( - r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time') - # Transform into ISO 8601 format with timezone info - datetime_str = datetime_str.replace('/', '-') + ':00+0800' - timestamp = parse_iso8601(datetime_str, delimiter=' ') + r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False) + timestamp = None + if datetime_str: + timestamp = unified_timestamp(datetime_str) - 8 * 3600 return { 'id': news_id, From 57ce8a6d08a05140230864eccbc52029f1fd46c1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 10 Aug 2016 14:17:22 +0100 Subject: [PATCH 118/775] [wat] improve extraction(#10281) add alternative method to extract http formats works even if the video is geo-restricted or removed from public access(most of the cases) --- youtube_dl/extractor/wat.py | 126 +++++++++++++++++++++++------------- 1 file changed, 81 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 48fc438ed..9f1b8b4b5 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, unified_strdate, HEADRequest, + int_or_none, ) @@ -30,48 +31,58 @@ class WatIE(InfoExtractor): }, { 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', - 'md5': 'fbc84e4378165278e743956d9c1bf16b', + 'md5': '34bdfa5ca9fd3c7eb88601b635b0424c', 'info_dict': { 'id': '11713075', 'ext': 'mp4', 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', - 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', 'upload_date': '20140816', - 'duration': 2910, }, - 'skip': "Ce contenu n'est pas disponible pour l'instant.", + 'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."], }, ] + _FORMATS = ( + (200, 416, 234), + (400, 480, 270), + (600, 640, 360), + (1200, 640, 360), + (1800, 960, 540), + (2500, 1280, 720), + ) + def _real_extract(self, url): video_id = self._match_id(url) video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them - video_info = self._download_json( - 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media'] + video_data = self._download_json( + 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id) + video_info = video_data['media'] error_desc = video_info.get('error_desc') if error_desc: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) + self.report_warning( + '%s returned error: %s' % (self.IE_NAME, error_desc)) chapters = video_info['chapters'] - first_chapter = chapters[0] + if chapters: + first_chapter = chapters[0] - def video_id_for_chapter(chapter): - return chapter['tc_start'].split('-')[0] + def video_id_for_chapter(chapter): + return chapter['tc_start'].split('-')[0] - if video_id_for_chapter(first_chapter) != video_id: - self.to_screen('Multipart video detected') - entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] - return self.playlist_result(entries, video_id, video_info['title']) - # Otherwise we can continue and extract just one part, we have to use - # the video id for getting the video url + if video_id_for_chapter(first_chapter) != video_id: + self.to_screen('Multipart video detected') + entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] + return self.playlist_result(entries, video_id, video_info['title']) + # Otherwise we can continue and extract just one part, we have to use + # the video id for getting the video url + else: + first_chapter = video_info - date_diffusion = first_chapter.get('date_diffusion') - upload_date = unified_strdate(date_diffusion) if date_diffusion else None + title = first_chapter['title'] def extract_url(path_template, url_type): req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) @@ -83,36 +94,61 @@ class WatIE(InfoExtractor): expected=True) return red_url - m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') - http_url = extract_url('android5/%s.mp4', 'http') - formats = [] - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - formats.extend(m3u8_formats) - formats.extend(self._extract_f4m_formats( - m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), - video_id, f4m_id='hds', fatal=False)) - for m3u8_format in m3u8_formats: - vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') - if not vbr or not abr: - continue - f = m3u8_format.copy() - f.update({ - 'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url), - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(f) - self._sort_formats(formats) + try: + http_url = extract_url('android5/%s.mp4', 'http') + m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + formats.extend(m3u8_formats) + formats.extend(self._extract_f4m_formats( + m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + for m3u8_format in m3u8_formats: + vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') + if not vbr or not abr: + continue + format_id = m3u8_format['format_id'].replace('hls', 'http') + fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url) + if self._is_valid_url(fmt_url, video_id, format_id): + f = m3u8_format.copy() + f.update({ + 'url': fmt_url, + 'format_id': format_id, + 'protocol': 'http', + }) + formats.append(f) + self._sort_formats(formats) + except ExtractorError: + abr = 64 + for vbr, width, height in self._FORMATS: + tbr = vbr + abr + format_id = 'http-%s' % tbr + fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr) + if self._is_valid_url(fmt_url, video_id, format_id): + formats.append({ + 'format_id': format_id, + 'url': fmt_url, + 'vbr': vbr, + 'abr': abr, + 'width': width, + 'height': height, + }) + + date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4') + upload_date = unified_strdate(date_diffusion) if date_diffusion else None + duration = None + files = video_info['files'] + if files: + duration = int_or_none(files[0].get('duration')) return { 'id': video_id, - 'title': first_chapter['title'], - 'thumbnail': first_chapter['preview'], - 'description': first_chapter['description'], - 'view_count': video_info['views'], + 'title': title, + 'thumbnail': first_chapter.get('preview'), + 'description': first_chapter.get('description'), + 'view_count': int_or_none(video_info.get('views')), 'upload_date': upload_date, - 'duration': video_info['files'][0]['duration'], + 'duration': duration, 'formats': formats, } From c3fa77bdef14643c966913c49f0400ebc1e46b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Aug 2016 21:00:40 +0700 Subject: [PATCH 119/775] [formula1] Relax _VALID_URL (Closes #10283) --- youtube_dl/extractor/formula1.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 322c41e5a..8c417ab65 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -5,8 +5,8 @@ from .common import InfoExtractor class Formula1IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P.+?)\.html' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P.+?)\.html' + _TESTS = [{ 'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', 'md5': '8c79e54be72078b26b89e0e111c0502b', 'info_dict': { @@ -15,7 +15,10 @@ class Formula1IE(InfoExtractor): 'title': 'Race highlights - Spain 2016', }, 'add_ie': ['Ooyala'], - } + }, { + 'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) From 7f2ed4759513b153e526cd890fd5b8877f56f1c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 10 Aug 2016 21:07:43 +0700 Subject: [PATCH 120/775] [rtlnl] Relax _VALID_URL (Closes #10282) --- youtube_dl/extractor/rtlnl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index 4d612b5e3..f0250af8a 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -14,7 +14,7 @@ class RtlNlIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:www\.)? (?: - rtlxl\.nl/\#!/[^/]+/| + rtlxl\.nl/[^\#]*\#!/[^/]+/| rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid= ) (?P[0-9a-f-]+)''' @@ -67,6 +67,9 @@ class RtlNlIE(InfoExtractor): }, { 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', 'only_matching': True, + }, { + 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', + 'only_matching': True, }] def _real_extract(self, url): From 7f832413d6e4aa5aae4c904c42e0ecf4ae72aaf9 Mon Sep 17 00:00:00 2001 From: lkho Date: Tue, 9 Aug 2016 15:25:23 +0800 Subject: [PATCH 121/775] Preserve line endings for downloaded subtitle files --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 193f8db9f..fd7577bb8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1603,7 +1603,7 @@ class YoutubeDL(object): self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: + with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_data) except (OSError, IOError): self.report_error('Cannot write subtitles file ' + sub_filename) From b1927f4e8a07a7893392135a71fdb6818295bbad Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 11 Aug 2016 19:04:23 +0800 Subject: [PATCH 122/775] [YoutubeDL] Disable newline conversion when writing subtitles By default io.open() convert all '\n' occurrences to '\r\n' when writing files. If the content already contains '\r\n', it will be converted to '\r\r\n', breaking some video players. --- youtube_dl/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index fd7577bb8..e844dc98a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1603,6 +1603,8 @@ class YoutubeDL(object): self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format)) else: self.to_screen('[info] Writing video subtitles to: ' + sub_filename) + # Use newline='' to prevent conversion of newline characters + # See https://github.com/rg3/youtube-dl/issues/10268 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_data) except (OSError, IOError): From e5f878c20573b258cad1974cc79a0526bcd1d46b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 11 Aug 2016 19:13:41 +0800 Subject: [PATCH 123/775] [ChangeLog] Add change log for #10269 [skip ci] --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index adbdc4f9b..b6ea39cba 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* Subtitles are now written as is. Newline conversions are disabled. (#10268) + Recognize more formats in unified_timestamp Extractors From 30b25d382d1c2e06c19d8730ecbc0776a436d967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 11 Aug 2016 21:42:55 +0700 Subject: [PATCH 124/775] [francetvinfo] Relax _VALID_URL --- youtube_dl/extractor/francetv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 7653975e3..3233f66d5 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -131,7 +131,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' - _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P.+)\.html' + _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)' _TESTS = [{ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', @@ -206,6 +206,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): 'uploader_id': 'x2q2ez', }, 'add_ie': ['Dailymotion'], + }, { + 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', + 'only_matching': True, }] def _real_extract(self, url): From 0c070681c56589d44f81df8ed2165bca4333cef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Aug 2016 23:37:56 +0700 Subject: [PATCH 125/775] [chirbit] Fix extraction (Closes #10296) --- youtube_dl/extractor/chirbit.py | 54 ++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index b1eeaf101..b43518652 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -1,30 +1,33 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 + from .common import InfoExtractor -from ..utils import ( - parse_duration, - int_or_none, -) +from ..utils import parse_duration class ChirbitIE(InfoExtractor): IE_NAME = 'chirbit' _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' _TESTS = [{ - 'url': 'http://chirb.it/PrIPv5', - 'md5': '9847b0dad6ac3e074568bf2cfb197de8', + 'url': 'http://chirb.it/be2abG', 'info_dict': { - 'id': 'PrIPv5', + 'id': 'be2abG', 'ext': 'mp3', - 'title': 'Фасадстрой', - 'duration': 52, - 'view_count': int, - 'comment_count': int, + 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', + 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', + 'duration': 306, + }, + 'params': { + 'skip_download': True, } }, { 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', 'only_matching': True, + }, { + 'url': 'https://chirb.it/wp/MN58c2', + 'only_matching': True, }] def _real_extract(self, url): @@ -33,27 +36,30 @@ class ChirbitIE(InfoExtractor): webpage = self._download_webpage( 'http://chirb.it/%s' % audio_id, audio_id) - audio_url = self._search_regex( - r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') + data_fd = self._search_regex( + r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'data fd', group='url') + + # Reverse engineered from https://chirb.it/js/chirbit.player.js (look + # for soundURL) + audio_url = base64.b64decode( + data_fd[::-1].encode('ascii')).decode('utf-8') title = self._search_regex( - r'itemprop="name">([^<]+)', webpage, 'title') - duration = parse_duration(self._html_search_meta( - 'duration', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - r'itemprop="playCount"\s*>(\d+)', webpage, - 'listen count', fatal=False)) - comment_count = int_or_none(self._search_regex( - r'>(\d+) Comments?:', webpage, - 'comment count', fatal=False)) + r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') + description = self._search_regex( + r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>', + webpage, 'description', default=None) + duration = parse_duration(self._search_regex( + r'class=["\']c-length["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)) return { 'id': audio_id, 'url': audio_url, 'title': title, + 'description': description, 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, } From 0aef0771f8cf18d97ce1b6b9123ce76bae45f3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Aug 2016 23:47:27 +0700 Subject: [PATCH 126/775] [drtuber] Make dislike count optional (Closes #10297) --- youtube_dl/extractor/drtuber.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 639f9182c..4e5557703 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import str_to_int +from ..utils import ( + NO_DEFAULT, + str_to_int, +) class DrTuberIE(InfoExtractor): @@ -17,7 +20,6 @@ class DrTuberIE(InfoExtractor): 'ext': 'mp4', 'title': 'hot perky blonde naked golf', 'like_count': int, - 'dislike_count': int, 'comment_count': int, 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 'thumbnail': 're:https?://.*\.jpg$', @@ -43,18 +45,20 @@ class DrTuberIE(InfoExtractor): r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) - def extract_count(id_, name): + def extract_count(id_, name, default=NO_DEFAULT): return str_to_int(self._html_search_regex( r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_, - webpage, '%s count' % name, fatal=False)) + webpage, '%s count' % name, default=default, fatal=False)) like_count = extract_count('rate_likes', 'like') - dislike_count = extract_count('rate_dislikes', 'dislike') + dislike_count = extract_count('rate_dislikes', 'dislike', default=None) comment_count = extract_count('comments_count', 'comment') cats_str = self._search_regex( - r'<div[^>]+class="categories_list">(.+?)</div>', webpage, 'categories', fatal=False) - categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str) + r'<div[^>]+class="categories_list">(.+?)</div>', + webpage, 'categories', fatal=False) + categories = [] if not cats_str else re.findall( + r'<a title="([^"]+)"', cats_str) return { 'id': video_id, From 367976d49fba48ea44fc5bf622adcc989896f29a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Aug 2016 23:47:52 +0700 Subject: [PATCH 127/775] [drtuber] Improve title extraction --- youtube_dl/extractor/drtuber.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py index 4e5557703..e8870c460 100644 --- a/youtube_dl/extractor/drtuber.py +++ b/youtube_dl/extractor/drtuber.py @@ -38,7 +38,9 @@ class DrTuberIE(InfoExtractor): r'<source src="([^"]+)"', webpage, 'video URL') title = self._html_search_regex( - [r'<p[^>]+class="title_substrate">([^<]+)</p>', r'<title>([^<]+) - \d+'], + (r'class="title_watch"[^>]*><p>([^<]+)<', + r'<p[^>]+class="title_substrate">([^<]+)</p>', + r'<title>([^<]+) - \d+'), webpage, 'title') thumbnail = self._html_search_regex( From 0fd1b1624cc42412fe4701f9de09f49adfa467f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Aug 2016 23:52:17 +0700 Subject: [PATCH 128/775] [goldenmoustache] Remove extractor (Closes #10298) Now uses dailymotion --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/goldenmoustache.py | 48 ------------------------- 2 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 youtube_dl/extractor/goldenmoustache.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 387230be0..c0c18393f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -311,7 +311,6 @@ from .globo import ( ) from .godtube import GodTubeIE from .godtv import GodTVIE -from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googledrive import GoogleDriveIE from .googleplus import GooglePlusIE @@ -1004,6 +1003,7 @@ from .viki import ( VikiIE, VikiChannelIE, ) +from .viu import ViuIE from .vk import ( VKIE, VKUserVideosIE, diff --git a/youtube_dl/extractor/goldenmoustache.py b/youtube_dl/extractor/goldenmoustache.py deleted file mode 100644 index 0fb509724..000000000 --- a/youtube_dl/extractor/goldenmoustache.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class GoldenMoustacheIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/', - 'md5': '0f904432fa07da5054d6c8beb5efb51a', - 'info_dict': { - 'id': '3700', - 'ext': 'mp4', - 'title': 'Suricate - Le Poker', - 'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9', - 'thumbnail': 're:^https?://.*\.jpg$', - } - }, { - 'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/', - 'md5': '27f0c50fb4dd5f01dc9082fc67cd5700', - 'info_dict': { - 'id': '55249', - 'ext': 'mp4', - 'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)', - 'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a', - 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL') - title = self._html_search_regex( - r'<title>(.*?)(?: - Golden Moustache)?', webpage, 'title') - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description(webpage) - - return { - 'id': video_id, - 'url': video_url, - 'ext': 'mp4', - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - } From a3be69b7f0d21c024e288a42864704f5c81d9dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Aug 2016 00:14:51 +0700 Subject: [PATCH 129/775] [viu] Remove from extractors --- youtube_dl/extractor/extractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c0c18393f..acf4e5d62 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1003,7 +1003,6 @@ from .viki import ( VikiIE, VikiChannelIE, ) -from .viu import ViuIE from .vk import ( VKIE, VKUserVideosIE, From fff37cfd4f09db6bb9f35da59b7d73b4e72855ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Aug 2016 00:18:28 +0700 Subject: [PATCH 130/775] [ChangeLog] Actualize --- ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ChangeLog b/ChangeLog index b6ea39cba..985dca7d0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,14 @@ Core + Recognize more formats in unified_timestamp Extractors +- [goldenmoustache] Remove extractor (#10298) +* [drtuber] Improve title extraction +* [drtuber] Make dislike count optional (#10297) +* [chirbit] Fix extraction (#10296) +* [francetvinfo] Relax URL regular expression +* [rtlnl] Relax URL regular expression (#10282) +* [formula1] Relax URL regular expression (#10283) +* [wat] Improve extraction (#10281) * [ctsnews] Fix extraction From b0081562d240fbe2ad854c53b6e098fa7e626247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Aug 2016 00:22:22 +0700 Subject: [PATCH 131/775] release 2016.08.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 1 - youtube_dl/version.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1c06ba36e..6fdb2f77b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.10 +[debug] youtube-dl version 2016.08.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 985dca7d0..376d96d12 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.12 Core * Subtitles are now written as is. Newline conversions are disabled. (#10268) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a44167a94..8fb581d2b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -265,7 +265,6 @@ - **GloboArticle** - **GodTube** - **GodTV** - - **GoldenMoustache** - **Golem** - **GoogleDrive** - **Goshgay** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f7ad846d9..becf14458 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.10' +__version__ = '2016.08.12' From 990d533ee4a33f8c59921a4152817ff4835a974f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Aug 2016 00:56:16 +0700 Subject: [PATCH 132/775] [crunchyroll] Add support for HLS (Closes #10301) --- youtube_dl/extractor/crunchyroll.py | 32 +++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 90a64303d..6d3abb52f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -114,6 +114,21 @@ class CrunchyrollIE(CrunchyrollBaseIE): # rtmp 'skip_download': True, }, + }, { + 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409', + 'info_dict': { + 'id': '702409', + 'ext': 'mp4', + 'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant', + 'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'TV TOKYO', + 'upload_date': '20160508', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', 'only_matching': True, @@ -336,9 +351,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if video_encode_id in video_encode_ids: continue video_encode_ids.append(video_encode_id) + + video_file = xpath_text(stream_info, './file') + if not video_file: + continue + if video_file.startswith('http'): + formats.extend(self._extract_m3u8_formats( + video_file, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + video_url = xpath_text(stream_info, './host') - video_play_path = xpath_text(stream_info, './file') - if not video_url or not video_play_path: + if not video_url: continue metadata = stream_info.find('./metadata') format_info = { @@ -353,7 +377,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text parsed_video_url = compat_urlparse.urlparse(video_url) direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( netloc='v.lvlt.crcdn.net', - path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1]))) + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1]))) if self._is_valid_url(direct_video_url, video_id, video_format): format_info.update({ 'url': direct_video_url, @@ -363,7 +387,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text format_info.update({ 'url': video_url, - 'play_path': video_play_path, + 'play_path': video_file, 'ext': 'flv', }) formats.append(format_info) From 3cddb8d6a776b09afd7f50772fa30cb536b1149a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 12 Aug 2016 08:38:06 +0100 Subject: [PATCH 133/775] [pbs] check all http formats and remove unnecessary request - some of the quality that not reported in the documentation are available(4500k, 6500k) - the videoInfo request doesn't work for a long time --- youtube_dl/extractor/pbs.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index f6f423597..6e2ef0fba 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -448,17 +448,6 @@ class PBSIE(InfoExtractor): redirects.append(redirect) redirect_urls.add(redirect_url) - try: - video_info = self._download_json( - 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, - display_id, 'Downloading video info JSON') - extract_redirect_urls(video_info) - info = video_info - except ExtractorError as e: - # videoInfo API may not work for some videos - if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404: - raise - # Player pages may also serve different qualities for page in ('widget/partnerplayer', 'portalplayer'): player = self._download_webpage( @@ -511,12 +500,12 @@ class PBSIE(InfoExtractor): formats)) if http_url: for m3u8_format in m3u8_formats: - bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None) + bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None) # extract only the formats that we know that they will be available as http format. # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications - if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): + if not bitrate or int(bitrate) < 400: continue - f_url = re.sub(r'\d+k|baseline', bitrate, http_url) + f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url) # This may produce invalid links sometimes (e.g. # http://www.pbs.org/wgbh/frontline/film/suicide-plan) if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate): From 98e698f1ff3fd467ff03e10a8f8881cd06345ca7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 12 Aug 2016 12:30:02 +0100 Subject: [PATCH 134/775] [external/curl] respect more downloader options and display progress --- youtube_dl/downloader/external.py | 15 +++++++++++++++ youtube_dl/utils.py | 2 ++ 2 files changed, 17 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index fae245024..f0c30007f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -96,6 +96,12 @@ class CurlFD(ExternalFD): cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') + cmd += self._valueless_option('--silent', 'noprogress') + cmd += self._valueless_option('--verbose', 'verbose') + cmd += self._option('--limit-rate', 'ratelimit') + cmd += self._option('--retry', 'retries') + cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--insecure', 'nocheckcertificate') @@ -103,6 +109,15 @@ class CurlFD(ExternalFD): cmd += ['--', info_dict['url']] return cmd + def _call_downloader(self, tmpfilename, info_dict): + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) + + p = subprocess.Popen(cmd) + p.communicate() + return p.returncode + class AxelFD(ExternalFD): AVAILABLE_OPT = '-V' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a03f7184d..b3b687a31 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2410,6 +2410,8 @@ def dfxp2srt(dfxp_data): def cli_option(params, command_option, param): param = params.get(param) + if param: + param = compat_str(param) return [command_option, param] if param is not None else [] From f0d3669437bb7f198ada9c0fead64d50a6e7a972 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 12 Aug 2016 18:05:49 +0100 Subject: [PATCH 135/775] [hgtv] Add new extractor(closes #3999) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hgtv.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/hgtv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index acf4e5d62..6420167f2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -325,6 +325,7 @@ from .heise import HeiseIE from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE +from .hgtv import HGTVIE from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE diff --git a/youtube_dl/extractor/hgtv.py b/youtube_dl/extractor/hgtv.py new file mode 100644 index 000000000..c3f0733cf --- /dev/null +++ b/youtube_dl/extractor/hgtv.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + smuggle_url, +) + + +class HGTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P[^/]+)/video.html' + _TEST = { + 'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video', + 'md5': '', + 'info_dict': { + 'id': 'aFH__I_5FBOX', + 'ext': 'mp4', + 'title': 'Overnight Success', + 'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.', + 'uploader': 'SHWM-NEW', + 'timestamp': 1470320034, + 'upload_date': '20160804', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + embed_vars = self._parse_json(self._search_regex( + r'(?s)embed_vars\s*=\s*({.*?});', + webpage, 'embed vars'), display_id, js_to_json) + return { + '_type': 'url_transparent', + 'url': smuggle_url( + 'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], { + 'force_smil_url': True + }), + 'series': embed_vars.get('show'), + 'season_number': int_or_none(embed_vars.get('season')), + 'episode_number': int_or_none(embed_vars.get('episode')), + 'ie_key': 'ThePlatform', + } From 794e5dcd7e24784c05e042e7e0655c584347f5c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 14:09:35 +0700 Subject: [PATCH 136/775] [sunporno] Fix metadata extraction (Closes #10316) --- youtube_dl/extractor/sunporno.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index e527aa971..4269f2a30 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -15,10 +15,10 @@ class SunPornoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P\d+)' _TEST = { 'url': 'http://www.sunporno.com/videos/807778/', - 'md5': '6457d3c165fd6de062b99ef6c2ff4c86', + 'md5': '507887e29033502f29dba69affeebfc9', 'info_dict': { 'id': '807778', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'md5:0a400058e8105d39e35c35e7c5184164', 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb', 'thumbnail': 're:^https?://.*\.jpg$', @@ -40,7 +40,8 @@ class SunPornoIE(InfoExtractor): r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) duration = parse_duration(self._search_regex( - r'itemprop="duration">\s*(\d+:\d+)\s*<', + (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<', + r'>Duration:\s*]+>\s*(\d+:\d+)\s*<'), webpage, 'duration', fatal=False)) view_count = int_or_none(self._html_search_regex( @@ -48,7 +49,7 @@ class SunPornoIE(InfoExtractor): webpage, 'view count', fatal=False)) comment_count = int_or_none(self._html_search_regex( r'(\d+) Comments?', - webpage, 'comment count', fatal=False)) + webpage, 'comment count', fatal=False, default=None)) formats = [] quality = qualities(['mp4', 'flv']) From b69b2ff7366cee97202eb333bf06329bfb2e974e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 14:13:49 +0700 Subject: [PATCH 137/775] [sunporno] Add support for embed URLs --- youtube_dl/extractor/sunporno.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index 4269f2a30..ef9be7926 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -12,8 +12,8 @@ from ..utils import ( class SunPornoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?sunporno\.com/videos/(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P\d+)' + _TESTS = [{ 'url': 'http://www.sunporno.com/videos/807778/', 'md5': '507887e29033502f29dba69affeebfc9', 'info_dict': { @@ -25,12 +25,16 @@ class SunPornoIE(InfoExtractor): 'duration': 302, 'age_limit': 18, } - } + }, { + 'url': 'http://embeds.sunporno.com/embed/807778', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://www.sunporno.com/videos/%s' % video_id, video_id) title = self._html_search_regex( r'([^<]+)', webpage, 'title') From bd6fb007de2323065bface4467539b509fbdb062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 14:22:47 +0700 Subject: [PATCH 138/775] [24video] Fix comment count extraction --- youtube_dl/extractor/twentyfourvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 4025edf02..8b808d6d6 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -64,7 +64,7 @@ class TwentyFourVideoIE(InfoExtractor): r'(\d+) просмотр', webpage, 'view count', fatal=False)) comment_count = int_or_none(self._html_search_regex( - r'
(\d+) комментари', + r']+href="#tab-comments"[^>]*>(\d+) комментари', webpage, 'comment count', fatal=False)) # Sets some cookies From 6a26c5f9d5d7b32648e116be4ce902802994654e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 14:28:44 +0700 Subject: [PATCH 139/775] [muenchentv] Fix extraction (Closes #10313) --- youtube_dl/extractor/muenchentv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dl/extractor/muenchentv.py index b4e8ad17e..d9f176136 100644 --- a/youtube_dl/extractor/muenchentv.py +++ b/youtube_dl/extractor/muenchentv.py @@ -36,7 +36,7 @@ class MuenchenTVIE(InfoExtractor): title = self._live_title(self._og_search_title(webpage)) data_js = self._search_regex( - r'(?s)\nplaylist:\s*(\[.*?}\]),related:', + r'(?s)\nplaylist:\s*(\[.*?}\]),', webpage, 'playlist configuration') data_json = js_to_json(data_js) data = json.loads(data_json)[0] From c366f8d30a177d2d44130c9d077b15a4c960c003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 14:47:51 +0700 Subject: [PATCH 140/775] [24video] Add support for me and xxx TLDs --- youtube_dl/extractor/twentyfourvideo.py | 48 ++++++++++++------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 8b808d6d6..af92b713b 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -12,32 +12,32 @@ from ..utils import ( class TwentyFourVideoIE(InfoExtractor): IE_NAME = '24video' - _VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P\d+)' - _TESTS = [ - { - 'url': 'http://www.24video.net/video/view/1044982', - 'md5': 'e09fc0901d9eaeedac872f154931deeb', - 'info_dict': { - 'id': '1044982', - 'ext': 'mp4', - 'title': 'Эротика каменного века', - 'description': 'Как смотрели порно в каменном веке.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'SUPERTELO', - 'duration': 31, - 'timestamp': 1275937857, - 'upload_date': '20100607', - 'age_limit': 18, - 'like_count': int, - 'dislike_count': int, - }, + _TESTS = [{ + 'url': 'http://www.24video.net/video/view/1044982', + 'md5': 'e09fc0901d9eaeedac872f154931deeb', + 'info_dict': { + 'id': '1044982', + 'ext': 'mp4', + 'title': 'Эротика каменного века', + 'description': 'Как смотрели порно в каменном веке.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'SUPERTELO', + 'duration': 31, + 'timestamp': 1275937857, + 'upload_date': '20100607', + 'age_limit': 18, + 'like_count': int, + 'dislike_count': int, }, - { - 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982', - 'only_matching': True, - } - ] + }, { + 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982', + 'only_matching': True, + }, { + 'url': 'http://www.24video.me/video/view/1044982', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From f50365e91cfccf33d0b5696c7f989944bcf748e7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Aug 2016 09:08:57 +0100 Subject: [PATCH 141/775] [pbs] add test for videos with undocumented http formats and remove unused import --- youtube_dl/extractor/pbs.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 6e2ef0fba..335e44bdc 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_HTTPError from ..utils import ( ExtractorError, determine_ext, @@ -334,6 +333,16 @@ class PBSIE(InfoExtractor): 'formats': 'mincount:8', }, }, + { + # has undocumented http formats(4500k and 6500k) + 'url': 'http://www.pbs.org/video/2365815229/', + 'md5': '94635cd06b7133688e23f4b94e6637a5', + 'info_dict': { + 'id': '2365815229', + 'ext': 'mp4', + 'title': 'FRONTLINE - Mosquito Hunter', + }, + }, { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, @@ -501,14 +510,18 @@ class PBSIE(InfoExtractor): if http_url: for m3u8_format in m3u8_formats: bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None) - # extract only the formats that we know that they will be available as http format. + # lower qualities(150k and 192k) are not available as http formats + # https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 + # we will try to extract any http format higher than than the lowest quality documented in # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications + # as there also undocumented http formats formats(4500k and 6500k) + # http://www.pbs.org/video/2365815229/ if not bitrate or int(bitrate) < 400: continue f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url) # This may produce invalid links sometimes (e.g. # http://www.pbs.org/wgbh/frontline/film/suicide-plan) - if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate): + if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate): continue f = m3u8_format.copy() f.update({ From e581224843db95574ca65965f5c5a594a7ffd370 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 13 Aug 2016 16:32:07 +0800 Subject: [PATCH 142/775] [tapely] Remove extractor. It's shut down Closes #10323 --- ChangeLog | 5 ++ youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/tapely.py | 109 ----------------------------- 3 files changed, 5 insertions(+), 110 deletions(-) delete mode 100644 youtube_dl/extractor/tapely.py diff --git a/ChangeLog b/ChangeLog index 376d96d12..b1ce63d75 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version + +Extractors +- [tapely] Remove extractor (#10323) + version 2016.08.12 Core diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6420167f2..104d8e37e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -812,7 +812,6 @@ from .tagesschau import ( TagesschauPlayerIE, TagesschauIE, ) -from .tapely import TapelyIE from .tass import TassIE from .tdslifeway import TDSLifewayIE from .teachertube import ( diff --git a/youtube_dl/extractor/tapely.py b/youtube_dl/extractor/tapely.py deleted file mode 100644 index ed560bd24..000000000 --- a/youtube_dl/extractor/tapely.py +++ /dev/null @@ -1,109 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - clean_html, - ExtractorError, - float_or_none, - parse_iso8601, - sanitized_Request, -) - - -class TapelyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P[A-Za-z0-9\-_]+)(?:/(?P\d+))?' - _API_URL = 'http://tape.ly/showtape?id={0:}' - _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}' - _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}' - _TESTS = [ - { - 'url': 'http://tape.ly/my-grief-as-told-by-water', - 'info_dict': { - 'id': 23952, - 'title': 'my grief as told by water', - 'thumbnail': 're:^https?://.*\.png$', - 'uploader_id': 16484, - 'timestamp': 1411848286, - 'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.', - }, - 'playlist_count': 13, - }, - { - 'url': 'http://tape.ly/my-grief-as-told-by-water/1', - 'md5': '79031f459fdec6530663b854cbc5715c', - 'info_dict': { - 'id': 258464, - 'title': 'Dreaming Awake (My Brightest Diamond)', - 'ext': 'm4a', - }, - }, - { - 'url': 'https://tapely.com/my-grief-as-told-by-water', - 'only_matching': True, - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - - playlist_url = self._API_URL.format(display_id) - request = sanitized_Request(playlist_url) - request.add_header('X-Requested-With', 'XMLHttpRequest') - request.add_header('Accept', 'application/json') - request.add_header('Referer', url) - - playlist = self._download_json(request, display_id) - - tape = playlist['tape'] - - entries = [] - for s in tape['songs']: - song = s['song'] - entry = { - 'id': song['id'], - 'duration': float_or_none(song.get('songduration'), 1000), - 'title': song['title'], - } - if song['source'] == 'S3': - entry.update({ - 'url': self._S3_SONG_URL.format(song['filename']), - }) - entries.append(entry) - elif song['source'] == 'YT': - self.to_screen('YouTube video detected') - yt_id = song['filename'].replace('/youtube/', '') - entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id)) - entries.append(entry) - elif song['source'] == 'SC': - self.to_screen('SoundCloud song detected') - sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename']) - entry.update(self.url_result(sc_url, 'Soundcloud')) - entries.append(entry) - else: - self.report_warning('Unknown song source: %s' % song['source']) - - if mobj.group('songnr'): - songnr = int(mobj.group('songnr')) - 1 - try: - return entries[songnr] - except IndexError: - raise ExtractorError( - 'No song with index: %s' % mobj.group('songnr'), - expected=True) - - return { - '_type': 'playlist', - 'id': tape['id'], - 'display_id': display_id, - 'title': tape['name'], - 'entries': entries, - 'thumbnail': tape.get('image_url'), - 'description': clean_html(tape.get('subtext')), - 'like_count': tape.get('likescount'), - 'uploader_id': tape.get('user_id'), - 'timestamp': parse_iso8601(tape.get('published_at')), - } From cb55908e51b80d9a51664ec76dcfe05d739dadf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 15:47:20 +0700 Subject: [PATCH 143/775] [vbox7] Fix extraction (Closes #10309) --- youtube_dl/extractor/vbox7.py | 57 ++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index dff1bb702..326440758 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -12,7 +12,15 @@ from ..utils import ( class Vbox7IE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P[^/]+)' - _TEST = { + _TESTS = [{ + 'url': 'http://vbox7.com/play:0946fff23c', + 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', + 'info_dict': { + 'id': '0946fff23c', + 'ext': 'mp4', + 'title': 'Борисов: Притеснен съм за бъдещето на България', + }, + }, { 'url': 'http://vbox7.com/play:249bb972c2', 'md5': '99f65c0c9ef9b682b97313e052734c3f', 'info_dict': { @@ -20,43 +28,38 @@ class Vbox7IE(InfoExtractor): 'ext': 'mp4', 'title': 'Смях! Чудо - чист за секунди - Скрита камера', }, - } + 'skip': 'georestricted', + }] def _real_extract(self, url): video_id = self._match_id(url) - # need to get the page 3 times for the correct jsSecretToken cookie - # which is necessary for the correct title - def get_session_id(): - redirect_page = self._download_webpage(url, video_id) - session_id_url = self._search_regex( - r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page, - 'session id url') - self._download_webpage( - compat_urlparse.urljoin(url, session_id_url), video_id, - 'Getting session id') + webpage = self._download_webpage(url, video_id) - get_session_id() - get_session_id() + title = self._html_search_regex( + r'(.*)', webpage, 'title').split('/')[0].strip() - webpage = self._download_webpage(url, video_id, - 'Downloading redirect page') + video_url = self._search_regex( + r'src\s*:\s*(["\'])(?P.+?.mp4.*?)\1', + webpage, 'video url', default=None, group='url') - title = self._html_search_regex(r'(.*)', - webpage, 'title').split('/')[0].strip() + thumbnail_url = self._og_search_thumbnail(webpage) - info_url = 'http://vbox7.com/play/magare.do' - data = urlencode_postdata({'as3': '1', 'vid': video_id}) - info_request = sanitized_Request(info_url, data) - info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') - if info_response is None: - raise ExtractorError('Unable to extract the media url') - (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&')) + if not video_url: + info_response = self._download_webpage( + 'http://vbox7.com/play/magare.do', video_id, + 'Downloading info webpage', + data=urlencode_postdata({'as3': '1', 'vid': video_id}), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + final_url, thumbnail_url = map( + lambda x: x.split('=')[1], info_response.split('&')) + + if '/na.mp4' in video_url: + self.raise_geo_restricted() return { 'id': video_id, - 'url': final_url, + 'url': self._proto_relative_url(video_url, 'http:'), 'title': title, 'thumbnail': thumbnail_url, } From 5f2c2b7936eb092e482309a5b9aa036028dbab2c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Aug 2016 09:53:46 +0100 Subject: [PATCH 144/775] [test_utils] add test for option with not str value --- test/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_utils.py b/test/test_utils.py index 724346886..74fcf91c0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -968,6 +968,7 @@ The first line self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) + self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10']) def test_cli_valueless_option(self): self.assertEqual(cli_valueless_option( From acfccacad5555c21d649729c5e2cb237a70f46e6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Aug 2016 10:26:02 +0100 Subject: [PATCH 145/775] [downloader/external:curl] Clarify why CurlFD should not capture stderr --- youtube_dl/downloader/external.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index f0c30007f..cf4556221 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -114,6 +114,7 @@ class CurlFD(ExternalFD): self._debug_cmd(cmd) + # curl writes the progress to stderr so don't capture it. p = subprocess.Popen(cmd) p.communicate() return p.returncode From e97c55ee6aaf5170f86bc8146a20cef56e337a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 16:29:05 +0700 Subject: [PATCH 146/775] [expotv] Improve extraction and update test --- youtube_dl/extractor/expotv.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py index 1585a03bb..971c918a4 100644 --- a/youtube_dl/extractor/expotv.py +++ b/youtube_dl/extractor/expotv.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, @@ -12,23 +10,22 @@ from ..utils import ( class ExpoTVIE(InfoExtractor): _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' _TEST = { - 'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561', - 'md5': '2985e6d7a392b2f7a05e0ca350fe41d0', + 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', + 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', 'info_dict': { - 'id': '17561', + 'id': '667916', 'ext': 'mp4', - 'upload_date': '20060212', - 'title': 'My Favorite Online Scrapbook Store', - 'view_count': int, - 'description': 'You\'ll find most everything you need at this virtual store front.', - 'uploader': 'Anna T.', + 'title': 'NYX Butter Lipstick Little Susie', + 'description': 'Goes on like butter, but looks better!', 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Stephanie S.', + 'upload_date': '20150520', + 'view_count': int, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_key = self._search_regex( @@ -66,7 +63,7 @@ class ExpoTVIE(InfoExtractor): fatal=False) upload_date = unified_strdate(self._search_regex( r'
Reviewed on ([0-9/.]+)
', webpage, 'upload date', - fatal=False)) + fatal=False), day_first=False) return { 'id': video_id, From 52aa7e7476415ec632053f85f9db0919f7bf75c3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 13 Aug 2016 17:36:14 +0800 Subject: [PATCH 147/775] [test_verbose_output] Fix tests under Python 3 --- test/test_verbose_output.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index 4c77df242..96a66f7a0 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -22,10 +22,10 @@ class TestVerboseOutput(unittest.TestCase): '--password', 'secret', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() - self.assertTrue('--username' in serr) - self.assertTrue('johnsmith' not in serr) - self.assertTrue('--password' in serr) - self.assertTrue('secret' not in serr) + self.assertTrue(b'--username' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'--password' in serr) + self.assertTrue(b'secret' not in serr) def test_private_info_shortarg(self): outp = subprocess.Popen( @@ -35,10 +35,10 @@ class TestVerboseOutput(unittest.TestCase): '-p', 'secret', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() - self.assertTrue('-u' in serr) - self.assertTrue('johnsmith' not in serr) - self.assertTrue('-p' in serr) - self.assertTrue('secret' not in serr) + self.assertTrue(b'-u' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'-p' in serr) + self.assertTrue(b'secret' not in serr) def test_private_info_eq(self): outp = subprocess.Popen( @@ -48,10 +48,10 @@ class TestVerboseOutput(unittest.TestCase): '--password=secret', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() - self.assertTrue('--username' in serr) - self.assertTrue('johnsmith' not in serr) - self.assertTrue('--password' in serr) - self.assertTrue('secret' not in serr) + self.assertTrue(b'--username' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'--password' in serr) + self.assertTrue(b'secret' not in serr) def test_private_info_shortarg_eq(self): outp = subprocess.Popen( @@ -61,10 +61,10 @@ class TestVerboseOutput(unittest.TestCase): '-p=secret', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() - self.assertTrue('-u' in serr) - self.assertTrue('johnsmith' not in serr) - self.assertTrue('-p' in serr) - self.assertTrue('secret' not in serr) + self.assertTrue(b'-u' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'-p' in serr) + self.assertTrue(b'secret' not in serr) if __name__ == '__main__': unittest.main() From cd29eaab955b930fc7ee595553d6351ad643569d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 16:45:34 +0700 Subject: [PATCH 148/775] [vbox7] Remove unused imports --- youtube_dl/extractor/vbox7.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 326440758..fa7899e6d 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -2,12 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - sanitized_Request, - urlencode_postdata, -) +from ..utils import urlencode_postdata class Vbox7IE(InfoExtractor): From c2a453b46177787d5cc17e09cedb3eca215ab159 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 16:46:07 +0700 Subject: [PATCH 149/775] [imgur] Fix width and height extraction (Closes #10325) --- youtube_dl/extractor/imgur.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 85e9344aa..d23489dcf 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -50,12 +50,10 @@ class ImgurIE(InfoExtractor): webpage = self._download_webpage( compat_urlparse.urljoin(url, video_id), video_id) - width = int_or_none(self._search_regex( - r'(.*?)
', From db535435b30540029b292e7217fb443bcc670aab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 13 Aug 2016 18:02:11 +0800 Subject: [PATCH 150/775] [bigflix] Remove an invalid test There's no video anymore --- youtube_dl/extractor/bigflix.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index b19f35b5d..b4ce767af 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -11,15 +11,6 @@ from ..compat import compat_urllib_parse_unquote class BigflixIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', - 'md5': 'dc1b4aebb46e3a7077ecc0d9f43f61e3', - 'info_dict': { - 'id': '16537', - 'ext': 'mp4', - 'title': 'Singham Returns', - 'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', - } - }, { # 2 formats 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', 'info_dict': { From 77afa008dd14efd930f8504609815a8ad2fedc7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 19:55:09 +0700 Subject: [PATCH 151/775] [4tube] Fix metadata extraction (Closes #10321) --- youtube_dl/extractor/fourtube.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index fc4a5a0fb..9776c8422 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -43,14 +43,14 @@ class FourTubeIE(InfoExtractor): 'uploadDate', webpage)) thumbnail = self._html_search_meta('thumbnailUrl', webpage) uploader_id = self._html_search_regex( - r'
', + r'', webpage, 'uploader id', fatal=False) uploader = self._html_search_regex( - r'', + r'', webpage, 'uploader', fatal=False) categories_html = self._search_regex( - r'(?s)>\s*Categories / Tags\s*.*?
    (.*?)
', + r'(?s)>\s*Categories / Tags\s*.*?
    (.*?)
', webpage, 'categories', fatal=False) categories = None if categories_html: @@ -59,10 +59,10 @@ class FourTubeIE(InfoExtractor): r'(?s)
  • (.*?)', categories_html)] view_count = str_to_int(self._search_regex( - r'', + r']+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">', webpage, 'view count', fatal=False)) like_count = str_to_int(self._search_regex( - r'', + r']+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">', webpage, 'like count', fatal=False)) duration = parse_duration(self._html_search_meta('duration', webpage)) From 647a7bf5e8355b34ed030827f53ea1e87ffc9131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 20:49:16 +0700 Subject: [PATCH 152/775] [pornotube] Fix extraction (Closes #10322) --- youtube_dl/extractor/pornotube.py | 83 ++++++++++++++----------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py index 5398e708b..63816c358 100644 --- a/youtube_dl/extractor/pornotube.py +++ b/youtube_dl/extractor/pornotube.py @@ -3,10 +3,7 @@ from __future__ import unicode_literals import json from .common import InfoExtractor -from ..utils import ( - int_or_none, - sanitized_Request, -) +from ..utils import int_or_none class PornotubeIE(InfoExtractor): @@ -31,59 +28,55 @@ class PornotubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # Fetch origin token - js_config = self._download_webpage( - 'http://www.pornotube.com/assets/src/app/config.js', video_id, - note='Download JS config') - originAuthenticationSpaceKey = self._search_regex( - r"constant\('originAuthenticationSpaceKey',\s*'([^']+)'", - js_config, 'originAuthenticationSpaceKey') + token = self._download_json( + 'https://api.aebn.net/auth/v2/origins/authenticate', + video_id, note='Downloading token', + data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'http://www.pornotube.com', + })['tokenKey'] - # Fetch actual token - token_req_data = { - 'authenticationSpaceKey': originAuthenticationSpaceKey, - 'credentials': 'Clip Application', - } - token_req = sanitized_Request( - 'https://api.aebn.net/auth/v1/token/primal', - data=json.dumps(token_req_data).encode('utf-8')) - token_req.add_header('Content-Type', 'application/json') - token_req.add_header('Origin', 'http://www.pornotube.com') - token_answer = self._download_json( - token_req, video_id, note='Requesting primal token') - token = token_answer['tokenKey'] + video_url = self._download_json( + 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, + video_id, note='Downloading delivery information', + headers={'Authorization': token})['mediaUrl'] - # Get video URL - delivery_req = sanitized_Request( - 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id) - delivery_req.add_header('Authorization', token) - delivery_info = self._download_json( - delivery_req, video_id, note='Downloading delivery information') - video_url = delivery_info['mediaUrl'] + FIELDS = ( + 'title', 'description', 'startSecond', 'endSecond', 'publishDate', + 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' + ) - # Get additional info (title etc.) - info_req = sanitized_Request( - 'https://api.aebn.net/content/v1/clips/%s?expand=' - 'title,description,primaryImageNumber,startSecond,endSecond,' - 'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,' - 'movie.studios,stars.name,studios.name,categories.name,' - 'clipActive,movieActive,publishDate,orientations' % video_id) - info_req.add_header('Authorization', token) info = self._download_json( - info_req, video_id, note='Downloading metadata') + 'https://api.aebn.net/content/v2/clips/%s?fields=%s' + % (video_id, ','.join(FIELDS)), video_id, + note='Downloading metadata', + headers={'Authorization': token}) + + if isinstance(info, list): + info = info[0] + + title = info['title'] timestamp = int_or_none(info.get('publishDate'), scale=1000) uploader = info.get('studios', [{}])[0].get('name') - movie_id = info['movie']['movieId'] - thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( - movie_id, movie_id, info['primaryImageNumber']) - categories = [c['name'] for c in info.get('categories')] + movie_id = info.get('movieId') + primary_image_number = info.get('primaryImageNumber') + thumbnail = None + if movie_id and primary_image_number: + thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( + movie_id, movie_id, primary_image_number) + start = int_or_none(info.get('startSecond')) + end = int_or_none(info.get('endSecond')) + duration = end - start if start and end else None + categories = [c['name'] for c in info.get('categories', []) if c.get('name')] return { 'id': video_id, 'url': video_url, - 'title': info['title'], + 'title': title, 'description': info.get('description'), + 'duration': duration, 'timestamp': timestamp, 'uploader': uploader, 'thumbnail': thumbnail, From 82997dad571988aae59d85db4355f5f1695efcbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 21:00:34 +0700 Subject: [PATCH 153/775] [franceculture] Fix extraction (Closes #10324) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/franceculture.py | 98 +++++++-------------------- 2 files changed, 26 insertions(+), 77 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 104d8e37e..82d4ed153 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -272,10 +272,7 @@ from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import FoxNewsIE from .foxsports import FoxSportsIE -from .franceculture import ( - FranceCultureIE, - FranceCultureEmissionIE, -) +from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( PluzzIE, diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index e2ca96283..186da0d3b 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -2,104 +2,56 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) from ..utils import ( determine_ext, - int_or_none, - ExtractorError, + unified_strdate, ) class FranceCultureIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P[^/?#&]+)' _TEST = { - 'url': 'http://www.franceculture.fr/player/reecouter?play=4795174', + 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks', 'info_dict': { - 'id': '4795174', + 'id': 'rendez-vous-au-pays-des-geeks', + 'display_id': 'rendez-vous-au-pays-des-geeks', 'ext': 'mp3', 'title': 'Rendez-vous au pays des geeks', - 'alt_title': 'Carnet nomade | 13-14', - 'vcodec': 'none', + 'thumbnail': 're:^https?://.*\\.jpg$', 'upload_date': '20140301', - 'thumbnail': r're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', - 'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche', - 'timestamp': 1393700400, + 'vcodec': 'none', } } - def _extract_from_player(self, url, video_id): - webpage = self._download_webpage(url, video_id) + def _real_extract(self, url): + display_id = self._match_id(url) - video_path = self._search_regex( - r']+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?]+href="([^"]+)"', + webpage, 'video path') + + title = self._og_search_title(webpage) + + upload_date = unified_strdate(self._search_regex( + '(?s)]+class="date"[^>]*>.*?]+class="inner"[^>]*>([^<]+)<', webpage, 'upload date', fatal=False)) thumbnail = self._search_regex( - r'\s+]+itemtype="https://schema.org/ImageObject"[^>]*>.*?]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"', webpage, 'thumbnail', fatal=False) - - display_id = self._search_regex( - r'emission-(.*?)', webpage, 'display_id') - - title = self._html_search_regex( - r'(.*?)', webpage, 'title') - alt_title = self._html_search_regex( - r'(.*?)', - webpage, 'alt_title', fatal=False) - description = self._html_search_regex( - r'(.*?)', - webpage, 'description', fatal=False) - uploader = self._html_search_regex( r'(?s)
    (.*?)', webpage, 'uploader', default=None) vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None return { - 'id': video_id, + 'id': display_id, + 'display_id': display_id, 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, 'vcodec': vcodec, 'uploader': uploader, - 'timestamp': timestamp, - 'title': title, - 'alt_title': alt_title, - 'thumbnail': thumbnail, - 'description': description, - 'display_id': display_id, + 'upload_date': upload_date, } - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_from_player(url, video_id) - - -class FranceCultureEmissionIE(FranceCultureIE): - _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P[^?#]+)' - _TEST = { - 'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', - 'info_dict': { - 'title': 'Jean-Gabriel Périot, cinéaste', - 'alt_title': 'Les Carnets de la création', - 'id': '5093239', - 'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', - 'ext': 'mp3', - 'timestamp': 1444762500, - 'upload_date': '20151013', - 'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_path = self._html_search_regex( - r'[0-9]+)', video_path, 'new_id', group='id') - video_url = compat_urlparse.urljoin(url, video_path) - return self._extract_from_player(video_url, new_id) From 542130a5d914e5f02acb872c88866194c66a612d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 21:59:29 +0700 Subject: [PATCH 154/775] [pbs] Fix description extraction and update tests --- youtube_dl/extractor/pbs.py | 47 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 335e44bdc..09aef7fb9 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, js_to_json, strip_jsonp, + strip_or_none, unified_strdate, US_RATINGS, ) @@ -200,7 +201,7 @@ class PBSIE(InfoExtractor): 'id': '2365006249', 'ext': 'mp4', 'title': 'Constitution USA with Peter Sagal - A More Perfect Union', - 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071', + 'description': 'md5:31b664af3c65fd07fa460d306b837d00', 'duration': 3190, }, }, @@ -211,7 +212,7 @@ class PBSIE(InfoExtractor): 'id': '2365297690', 'ext': 'mp4', 'title': 'FRONTLINE - Losing Iraq', - 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9', + 'description': 'md5:5979a4d069b157f622d02bff62fbe654', 'duration': 5050, }, }, @@ -222,7 +223,7 @@ class PBSIE(InfoExtractor): 'id': '2201174722', 'ext': 'mp4', 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', - 'description': 'md5:95a19f568689d09a166dff9edada3301', + 'description': 'md5:86ab9a3d04458b876147b355788b8781', 'duration': 801, }, }, @@ -267,7 +268,7 @@ class PBSIE(InfoExtractor): 'display_id': 'player', 'ext': 'mp4', 'title': 'American Experience - Death and the Civil War, Chapter 1', - 'description': 'md5:1b80a74e0380ed2a4fb335026de1600d', + 'description': 'md5:67fa89a9402e2ee7d08f53b920674c18', 'duration': 682, 'thumbnail': 're:^https?://.*\.jpg$', }, @@ -293,13 +294,13 @@ class PBSIE(InfoExtractor): # "', webpage): url = self._search_regex( @@ -432,10 +428,10 @@ class PBSIE(InfoExtractor): video_id = mobj.group('id') display_id = video_id - return video_id, display_id, None + return video_id, display_id, None, description def _real_extract(self, url): - video_id, display_id, upload_date = self._extract_webpage(url) + video_id, display_id, upload_date, description = self._extract_webpage(url) if isinstance(video_id, list): entries = [self.url_result( @@ -564,11 +560,14 @@ class PBSIE(InfoExtractor): if alt_title: info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) + description = info.get('description') or info.get( + 'program', {}).get('description') or description + return { 'id': video_id, 'display_id': display_id, 'title': info['title'], - 'description': info.get('description') or info.get('program', {}).get('description'), + 'description': description, 'thumbnail': info.get('image_url'), 'duration': int_or_none(info.get('duration')), 'age_limit': age_limit, From 5ec5461e1a805595c5fef4ae482e86d7d7872d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 22:50:18 +0700 Subject: [PATCH 155/775] [pbs] Clarify comment on http formats --- youtube_dl/extractor/pbs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 09aef7fb9..b490ef74c 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -506,12 +506,12 @@ class PBSIE(InfoExtractor): if http_url: for m3u8_format in m3u8_formats: bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None) - # lower qualities(150k and 192k) are not available as http formats - # https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 - # we will try to extract any http format higher than than the lowest quality documented in - # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications - # as there also undocumented http formats formats(4500k and 6500k) - # http://www.pbs.org/video/2365815229/ + # Lower qualities (150k and 192k) are not available as HTTP formats (see [1]), + # we won't try extracting them. + # Since summer 2016 higher quality formats (4500k and 6500k) are also available + # albeit they are not documented in [2]. + # 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 + # 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications if not bitrate or int(bitrate) < 400: continue f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url) From a560f28c98445e2ae2528795609d5ac718ec5b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 23:01:35 +0700 Subject: [PATCH 156/775] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index b1ce63d75..5efcb2316 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,27 @@ version +Core +* Show progress for curl external downloader +* Forward more options to curl external downloader + Extractors +* [pbs] Fix description extraction +* [franceculture] Fix extraction (#10324) +* [pornotube] Fix extraction (#10322) +* [4tube] Fix metadata extraction (#10321) +* [imgur] Fix width and height extraction (#10325) +* [expotv] Improve extraction ++ [vbox7] Fix extraction (#10309) - [tapely] Remove extractor (#10323) +* [muenchentv] Fix extraction (#10313) ++ [24video] Add support for .me and .xxx TLDs +* [24video] Fix comment count extraction +* [sunporno] Add support for embed URLs +* [sunporno] Fix metadata extraction (#10316) ++ [hgtv] Add extractor for hgtv.ca (#3999) +- [pbs] Remove request to unavailable API ++ [pbs] Add support for high quality HTTP formats ++ [crunchyroll] Add support for HLS formats (#10301) version 2016.08.12 From 73a85620eeb2d595cd86f73357bc4cb081cb3bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 23:17:11 +0700 Subject: [PATCH 157/775] release 2016.08.13 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6fdb2f77b..1e0d99b43 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.12 +[debug] youtube-dl version 2016.08.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5efcb2316..fc99b9f73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.13 Core * Show progress for curl external downloader diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8fb581d2b..56fc41a40 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -238,7 +238,6 @@ - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** - - **FranceCultureEmission** - **FranceInter** - **francetv**: France 2, 3, 4, 5 and Ô - **francetvinfo.fr** @@ -277,6 +276,7 @@ - **HellPorno** - **Helsinki**: helsinki.fi - **HentaiStigma** + - **HGTV** - **HistoricFilms** - **history:topic**: History.com Topic - **hitbox** @@ -664,7 +664,6 @@ - **SztvHu** - **Tagesschau** - **tagesschau:player** - - **Tapely** - **Tass** - **TDSLifeway** - **teachertube**: teachertube.com videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index becf14458..cc93d22aa 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.12' +__version__ = '2016.08.13' From 097eba019d0d5cab93e9ce66e1b727b782d48250 Mon Sep 17 00:00:00 2001 From: phi Date: Sun, 14 Aug 2016 02:18:59 +0800 Subject: [PATCH 158/775] bug fix for extractor xiami.py Before applying this patch, when downloading resources from xiami.com, it crashes with these: Traceback (most recent call last): File "/home/phi/.local/bin/youtube-dl", line 11, in sys.exit(main()) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/__init__.py", line 433, in main _real_main(argv) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/__init__.py", line 423, in _real_main retcode = ydl.download(all_urls) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/YoutubeDL.py", line 1786, in download url, force_generic_extractor=self.params.get('force_generic_extractor', False)) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/YoutubeDL.py", line 691, in extract_info ie_result = ie.extract(url) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 347, in extract return self._real_extract(url) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/xiami.py", line 116, in _real_extract return self._extract_tracks(self._match_id(url))[0] File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/xiami.py", line 43, in _extract_tracks '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 562, in _download_json json_string, video_id, transform_source=transform_source, fatal=fatal) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 568, in _parse_json return json.loads(json_string) File "/usr/lib/python3.5/json/__init__.py", line 312, in loads s.__class__.__name__)) TypeError: the JSON object must be str, not 'NoneType' This patch solves exactly this problem. --- youtube_dl/extractor/xiami.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index a6dfc4af9..86abef257 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -13,6 +13,7 @@ class XiamiBaseIE(InfoExtractor): webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs) if '>Xiami is currently not available in your country.<' in webpage: self.raise_geo_restricted('Xiami is currently not available in your country') + return webpage def _extract_track(self, track, track_id=None): title = track['title'] From fafabc0712d95e6a5b2ac56e9375fe90060738f5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 14 Aug 2016 02:33:15 +0800 Subject: [PATCH 159/775] Update ChangeLog for #10342 [skip ci] --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index fc99b9f73..d04c5fc2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [xiami] Fix extraction (#10342) + + version 2016.08.13 Core @@ -23,6 +29,7 @@ Extractors + [pbs] Add support for high quality HTTP formats + [crunchyroll] Add support for HLS formats (#10301) + version 2016.08.12 Core From aaf44a2f47f013e8d864ac9f98b2833904a8be78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Aug 2016 22:53:07 +0100 Subject: [PATCH 160/775] [uplynk] Add new extractor --- youtube_dl/downloader/hls.py | 6 +++ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/uplynk.py | 64 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/uplynk.py diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 3b7bb3508..8d7971e5d 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -20,6 +20,7 @@ from ..utils import ( encodeFilename, sanitize_open, parse_m3u8_attributes, + update_url_query, ) @@ -82,6 +83,7 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -95,6 +97,8 @@ class HlsFD(FragmentFD): if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + if extra_param_to_segment_url: + frag_url = update_url_query(frag_url, extra_param_to_segment_url) success = ctx['dl'].download(frag_filename, {'url': frag_url}) if not success: return False @@ -120,6 +124,8 @@ class HlsFD(FragmentFD): if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) + if extra_param_to_segment_url: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url) decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 82d4ed153..901847509 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -926,6 +926,10 @@ from .udn import UDNEmbedIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .uol import UOLIE +from .uplynk import ( + UplynkIE, + UplynkPreplayIE, +) from .urort import UrortIE from .urplay import URPlayIE from .usatoday import USATodayIE diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py new file mode 100644 index 000000000..a6a685c9d --- /dev/null +++ b/youtube_dl/extractor/uplynk.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + ExtractorError, +) + + +class UplynkIE(InfoExtractor): + _VALID_URL = r'https?://.*?\.uplynk\.com/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P[^&]+))?' + _TEST = { + 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', + 'info_dict': { + 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e', + 'ext': 'mp4', + 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4', + 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + path, external_id, video_id, session_id = re.match(self._VALID_URL, url).groups() + display_id = video_id or external_id + formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') + if session_id: + for f in formats: + f['extra_param_to_segment_url'] = { + 'pbs': session_id, + } + self._sort_formats(formats) + asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) + if asset.get('error') == 1: + raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True) + + return { + 'id': asset['asset'], + 'title': asset['desc'], + 'thumbnail': asset.get('default_poster_url'), + 'duration': float_or_none(asset.get('duration')), + 'uploader_id': asset.get('owner'), + 'formats': formats, + } + + +class UplynkPreplayIE(InfoExtractor): + _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' + + def _real_extract(self, url): + path, external_id, video_id = re.match(self._VALID_URL, url).groups() + display_id = video_id or external_id + preplay = self._download_json(url, display_id) + content_url = 'http://content.uplynk.com/%s.m3u8' % path + session_id = preplay.get('sid') + if session_id: + content_url += '?pbs=' + session_id + return self.url_result(content_url, 'Uplynk') From 320d597c21e7a0981f1dc9c4167fce53473ab488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Aug 2016 16:25:14 +0700 Subject: [PATCH 161/775] [vgtv] Detect geo restricted videos (#10348) --- youtube_dl/extractor/vgtv.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index b11cd254c..185756301 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -8,6 +8,7 @@ from .xstream import XstreamIE from ..utils import ( ExtractorError, float_or_none, + try_get, ) @@ -129,6 +130,11 @@ class VGTVIE(XstreamIE): 'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil', 'only_matching': True, }, + { + # geoblocked + 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -196,6 +202,12 @@ class VGTVIE(XstreamIE): info['formats'].extend(formats) + if not info['formats']: + properties = try_get( + data, lambda x: x['streamConfiguration']['properties'], list) + if properties and 'geoblocked' in properties: + raise self.raise_geo_restricted() + self._sort_formats(info['formats']) info.update({ From 2118fdd1a96ed7a904b53ed5aad50a203d0e0c70 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 11:48:13 +0100 Subject: [PATCH 162/775] [common] add separate method for getting netrc ligin info --- youtube_dl/extractor/common.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e47770c1d..9427ff449 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -662,6 +662,24 @@ class InfoExtractor(object): else: return res + def _get_netrc_login_info(self, netrc_machine=None): + username = None + password = None + netrc_machine = netrc_machine or self._NETRC_MACHINE + + if self._downloader.params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(netrc_machine) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine) + except (IOError, netrc.NetrcParseError) as err: + self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + + return (username, password) + def _get_login_info(self): """ Get the login info as (username, password) @@ -679,16 +697,8 @@ class InfoExtractor(object): if downloader_params.get('username') is not None: username = downloader_params['username'] password = downloader_params['password'] - elif downloader_params.get('usenetrc', False): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - username = info[0] - password = info[2] - else: - raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + else: + username, password = self._get_netrc_login_info() return (username, password) From 9771b1f901b19ad5ba6632a37fc6348e8e6e98dd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 11:52:48 +0100 Subject: [PATCH 163/775] [theplatform] use _get_netrc_login_info and fix session expiration check(#10345) --- youtube_dl/extractor/theplatform.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index bb3efc4ea..9ca765a5f 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -218,15 +218,16 @@ class ThePlatformIE(ThePlatformBaseIE): requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token: - token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', '')) - if token_expires and token_expires >= time.time(): + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) + if token_expires and token_expires <= int(time.time()): authn_token = None + requestor_info = {} if not authn_token: # TODO add support for other TV Providers mso_id = 'DTV' - login_info = netrc.netrc().authenticators(mso_id) - if not login_info: - return None + username, password = self._get_netrc_login_info(mso_id) + if not username or not password: + return '' def post_form(form_page, note, data={}): post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') @@ -248,8 +249,8 @@ class ThePlatformIE(ThePlatformBaseIE): provider_login_page = post_form( provider_redirect_page, 'Downloading Provider Login Page') mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': login_info[0], - 'password': login_info[2], + 'username': username, + 'password': password, }) post_form(mvpd_confirm_page, 'Confirming Login') From 884cdb6cd9c872ea68a03341e462b58e51fba58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Aug 2016 20:49:11 +0700 Subject: [PATCH 164/775] [life:embed] Improve extraction --- youtube_dl/extractor/lifenews.py | 68 +++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index c2b4490c4..87120ecd1 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( determine_ext, ExtractorError, @@ -96,7 +99,7 @@ class LifeNewsIE(InfoExtractor): r']+>]+src=["\'](.+?)["\']', webpage) iframe_links = re.findall( - r']+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']', + r']+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']', webpage) if not video_urls and not iframe_links: @@ -164,9 +167,9 @@ class LifeNewsIE(InfoExtractor): class LifeEmbedIE(InfoExtractor): IE_NAME = 'life:embed' - _VALID_URL = r'https?://embed\.life\.ru/embed/(?P[\da-f]{32})' + _VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P[\da-f]{32})' - _TEST = { + _TESTS = [{ 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', 'md5': 'b889715c9e49cb1981281d0e5458fbbe', 'info_dict': { @@ -175,30 +178,57 @@ class LifeEmbedIE(InfoExtractor): 'title': 'e50c2dec2867350528e2574c899b8291', 'thumbnail': 're:http://.*\.jpg', } - } + }, { + # with 1080p + 'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + thumbnail = None formats = [] - for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): - video_url = compat_urlparse.urljoin(url, video_url) - ext = determine_ext(video_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8')) - else: - formats.append({ - 'url': video_url, - 'format_id': ext, - 'preference': 1, - }) + + def extract_m3u8(manifest_url): + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) + + def extract_original(original_url): + formats.append({ + 'url': original_url, + 'format_id': determine_ext(original_url, None), + 'preference': 1, + }) + + playlist = self._parse_json( + self._search_regex( + r'options\s*=\s*({.+?});', webpage, 'options', default='{}'), + video_id).get('playlist', {}) + if playlist: + master = playlist.get('master') + if isinstance(master, compat_str) and determine_ext(master) == 'm3u8': + extract_m3u8(compat_urlparse.urljoin(url, master)) + original = playlist.get('original') + if isinstance(original, compat_str): + extract_original(original) + thumbnail = playlist.get('image') + + # Old rendition fallback + if not formats: + for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): + video_url = compat_urlparse.urljoin(url, video_url) + if determine_ext(video_url) == 'm3u8': + extract_m3u8(video_url) + else: + extract_original(video_url) + self._sort_formats(formats) - thumbnail = self._search_regex( + thumbnail = thumbnail or self._search_regex( r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) return { From 1fd6e30988f44d372c7112c2d5e44c0d5cdbc4ed Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 17:55:56 +0100 Subject: [PATCH 165/775] [adobepass] create separate class for adobe pass authentication --- youtube_dl/extractor/adobepass.py | 124 +++++++++++++++++++++ youtube_dl/extractor/aenetworks.py | 5 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/syfy.py | 4 +- youtube_dl/extractor/theplatform.py | 98 +--------------- 5 files changed, 134 insertions(+), 99 deletions(-) create mode 100644 youtube_dl/extractor/adobepass.py diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py new file mode 100644 index 000000000..4e59302ab --- /dev/null +++ b/youtube_dl/extractor/adobepass.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +import xml.etree.ElementTree as etree + +from .common import InfoExtractor +from ..utils import ( + unescapeHTML, + urlencode_postdata, + unified_timestamp, +) + + +class AdobePass(InfoExtractor): + _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' + _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + + @staticmethod + def _get_mvpd_resource(provider_id, title, guid, rating): + channel = etree.Element('channel') + channel_title = etree.SubElement(channel, 'title') + channel_title.text = provider_id + item = etree.SubElement(channel, 'item') + resource_title = etree.SubElement(item, 'title') + resource_title.text = title + resource_guid = etree.SubElement(item, 'guid') + resource_guid.text = guid + resource_rating = etree.SubElement(item, 'media:rating') + resource_rating.attrib = {'scheme': 'urn:v-chip'} + resource_rating.text = rating + return '' + etree.tostring(channel).decode() + '' + + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def xml_text(xml_str, tag): + return self._search_regex( + '<%s>(.+?)' % (tag, tag), xml_str, tag) + + mvpd_headers = { + 'ap_42': 'anonymous', + 'ap_11': 'Linux i686', + 'ap_z': self._USER_AGENT, + 'User-Agent': self._USER_AGENT, + } + + guid = xml_text(resource, 'guid') + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token: + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) + if token_expires and token_expires <= int(time.time()): + authn_token = None + requestor_info = {} + if not authn_token: + # TODO add support for other TV Providers + mso_id = 'DTV' + username, password = self._get_netrc_login_info(mso_id) + if not username or not password: + return '' + + def post_form(form_page, note, data={}): + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + return self._download_webpage( + post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + + provider_redirect_page = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + }) + provider_login_page = post_form( + provider_redirect_page, 'Downloading Provider Login Page') + mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { + 'username': username, + 'password': password, + }) + post_form(mvpd_confirm_page, 'Confirming Login') + + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + authn_token = unescapeHTML(xml_text(session, 'authnToken')) + requestor_info['authn_token'] = authn_token + self._downloader.cache.store('mvpd', requestor_id, requestor_info) + + authz_token = requestor_info.get(guid) + if not authz_token: + authorize = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, + 'Retrieving Authorization Token', data=urlencode_postdata({ + 'resource_id': resource, + 'requestor_id': requestor_id, + 'authentication_token': authn_token, + 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), + 'userMeta': '1', + }), headers=mvpd_headers) + authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) + requestor_info[guid] = authz_token + self._downloader.cache.store('mvpd', requestor_id, requestor_info) + + mvpd_headers.update({ + 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), + 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), + }) + + return self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', + video_id, 'Retrieving Media Token', data=urlencode_postdata({ + 'authz_token': authz_token, + 'requestor_id': requestor_id, + 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), + 'hashed_guid': 'false', + }), headers=mvpd_headers) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 8f53050c9..6adb6d824 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE): info = self._parse_theplatform_metadata(theplatform_metadata) if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] - resource = '%s%s%s%s' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating']) + resource = self._get_mvpd_resource( + requestor_id, theplatform_metadata['title'], + theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), + theplatform_metadata['ratings'][0]['rating']) query['auth'] = self._extract_mvpd_auth( url, video_id, requestor_id, resource) info.update(self._search_json_ld(webpage, video_id, fatal=False)) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 0027ff1b8..890e8d5bc 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -119,7 +119,7 @@ class NationalGeographicIE(ThePlatformIE): auth_resource_id = self._search_regex( r"video_auth_resourceId\s*=\s*'([^']+)'", webpage, 'auth resource id') - query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or '' + query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) return { '_type': 'url_transparent', diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 53723b66e..764287a64 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -40,7 +40,9 @@ class SyfyIE(ThePlatformIE): 'manifest': 'm3u', } if syfy_mpx.get('entitlement') == 'auth': - resource = 'syfy<![CDATA[%s]]>%s%s' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14')) + resource = self._get_mvpd_resource( + 'syfy', title, video_id, + syfy_mpx.get('mpxRating', 'TV-14')) query['auth'] = self._extract_mvpd_auth( url, video_id, 'syfy', resource) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 9ca765a5f..108ddd3a9 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -6,10 +6,10 @@ import time import hmac import binascii import hashlib -import netrc from .once import OnceIE +from .adobepass import AdobePass from ..compat import ( compat_parse_qs, compat_urllib_parse_urlparse, @@ -25,9 +25,6 @@ from ..utils import ( xpath_with_ns, mimetype2ext, find_xpath_attr, - unescapeHTML, - urlencode_postdata, - unified_timestamp, ) default_ns = 'http://www.w3.org/2005/SMIL21/Language' @@ -96,7 +93,7 @@ class ThePlatformBaseIE(OnceIE): return self._parse_theplatform_metadata(info) -class ThePlatformIE(ThePlatformBaseIE): +class ThePlatformIE(ThePlatformBaseIE, AdobePass): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? @@ -202,97 +199,6 @@ class ThePlatformIE(ThePlatformBaseIE): sig = flags + expiration_date + checksum + str_to_hex(sig_secret) return '%s&sig=%s' % (url, sig) - def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): - def xml_text(xml_str, tag): - return self._search_regex( - '<%s>(.+?)' % (tag, tag), xml_str, tag) - - mvpd_headers = { - 'ap_42': 'anonymous', - 'ap_11': 'Linux i686', - 'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', - 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', - } - - guid = xml_text(resource, 'guid') - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} - authn_token = requestor_info.get('authn_token') - if authn_token: - token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) - if token_expires and token_expires <= int(time.time()): - authn_token = None - requestor_info = {} - if not authn_token: - # TODO add support for other TV Providers - mso_id = 'DTV' - username, password = self._get_netrc_login_info(mso_id) - if not username or not password: - return '' - - def post_form(form_page, note, data={}): - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - return self._download_webpage( - post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - - provider_redirect_page = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }) - provider_login_page = post_form( - provider_redirect_page, 'Downloading Provider Login Page') - mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': username, - 'password': password, - }) - post_form(mvpd_confirm_page, 'Confirming Login') - - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', - 'requestor_id': requestor_id, - }), headers=mvpd_headers) - authn_token = unescapeHTML(xml_text(session, 'authnToken')) - requestor_info['authn_token'] = authn_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) - - authz_token = requestor_info.get(guid) - if not authz_token: - authorize = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, - 'Retrieving Authorization Token', data=urlencode_postdata({ - 'resource_id': resource, - 'requestor_id': requestor_id, - 'authentication_token': authn_token, - 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), - 'userMeta': '1', - }), headers=mvpd_headers) - authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) - requestor_info[guid] = authz_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) - - mvpd_headers.update({ - 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), - 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), - }) - - return self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', - video_id, 'Retrieving Media Token', data=urlencode_postdata({ - 'authz_token': authz_token, - 'requestor_id': requestor_id, - 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), - 'hashed_guid': 'false', - }), headers=mvpd_headers) - def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From d2ac04674d0d9085aedec229820c1d07082e5825 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 18:03:42 +0100 Subject: [PATCH 166/775] [viceland] Add new extractor(#8799) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/viceland.py | 100 +++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 youtube_dl/extractor/viceland.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 901847509..be96e34ba 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,6 +958,7 @@ from .vice import ( ViceIE, ViceShowIE, ) +from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py new file mode 100644 index 000000000..c66e8eb95 --- /dev/null +++ b/youtube_dl/extractor/viceland.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import time +import hashlib +import json + +from .adobepass import AdobePass +from ..compat import compat_HTTPError +from ..utils import ( + int_or_none, + parse_age_limit, + str_or_none, + parse_duration, + ExtractorError, + extract_attributes, +) + + +class VicelandIE(AdobePass): + _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' + _TEST = { + # FIXME: fill the test after fixing delegation problem + 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', + 'info_dict': { + 'id': '57608447973ee7705f6fbd4e', + 'ext': 'mp4', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay', 'Uplynk'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + watch_hub_data = extract_attributes(self._search_regex( + r'(?s)()', webpage, 'watch hub')) + video_id = watch_hub_data['vms-id'] + title = watch_hub_data['video-title'] + + query = {} + if watch_hub_data.get('video-locked') == '1': + resource = self._get_mvpd_resource( + 'VICELAND', title, video_id, + watch_hub_data.get('video-rating')) + query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + + # signature generation algorithm is reverse engineered from signatureGenerator in + # webpack:///../shared/~/vice-player/dist/js/vice-player.js in + # https://www.viceland.com/assets/common/js/web.vendor.bundle.js + exp = int(time.time()) + 14400 + query.update({ + 'exp': exp, + 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), + }) + + try: + preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + error = json.loads(e.cause.read().decode()) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + + video_data = preplay['video'] + base = video_data['base'] + uplynk_preplay_url = preplay['preplayURL'] + episode = video_data.get('episode', {}) + channel = video_data.get('channel', {}) + + subtitles = {} + cc_url = preplay.get('ccURL') + if cc_url: + subtitles['en'] = [{ + 'url': cc_url, + }] + + return { + '_type': 'url_transparent', + 'url': uplynk_preplay_url, + 'id': video_id, + 'title': title, + 'description': base.get('body'), + 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), + 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at')), + 'age_limit': parse_age_limit(video_data.get('video_rating')), + 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), + 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), + 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), + 'season_number': int_or_none(watch_hub_data.get('season')), + 'season_id': str_or_none(episode.get('season_id')), + 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), + 'uploader_id': str_or_none(channel.get('id')), + 'subtitles': subtitles, + 'ie_key': 'UplynkPreplay', + } From 9fa57892790ce205634f6a7c83de2b9e52ab5284 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 19:04:23 +0100 Subject: [PATCH 167/775] [viceland] fix info extraction(closes #8799) --- youtube_dl/extractor/uplynk.py | 11 +++++++---- youtube_dl/extractor/viceland.py | 7 +++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index a6a685c9d..4313bc9cb 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -26,8 +26,8 @@ class UplynkIE(InfoExtractor): }, } - def _real_extract(self, url): - path, external_id, video_id, session_id = re.match(self._VALID_URL, url).groups() + def _extract_uplynk_info(self, uplynk_content_url): + path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups() display_id = video_id or external_id formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') if session_id: @@ -49,8 +49,11 @@ class UplynkIE(InfoExtractor): 'formats': formats, } + def _real_extract(self, url): + return self._extract_uplynk_info(url) -class UplynkPreplayIE(InfoExtractor): + +class UplynkPreplayIE(UplynkIE): _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' def _real_extract(self, url): @@ -61,4 +64,4 @@ class UplynkPreplayIE(InfoExtractor): session_id = preplay.get('sid') if session_id: content_url += '?pbs=' + session_id - return self.url_result(content_url, 'Uplynk') + return self._extract_uplynk_info(content_url) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index c66e8eb95..f72294b51 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -25,6 +25,13 @@ class VicelandIE(AdobePass): 'info_dict': { 'id': '57608447973ee7705f6fbd4e', 'ext': 'mp4', + 'title': 'CYBERWAR (Trailer)', + 'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.', + 'age_limit': 14, + 'timestamp': 1466008539, + 'upload_date': '20160615', + 'uploader_id': '11', + 'uploader': 'Viceland', }, 'params': { # m3u8 download From 6103f59095bd1e514e43b3f84f4633e27ee09b69 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 19:08:35 +0100 Subject: [PATCH 168/775] [viceland] remove outdated comment --- youtube_dl/extractor/viceland.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index f72294b51..0be8a792f 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -20,7 +20,6 @@ from ..utils import ( class VicelandIE(AdobePass): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' _TEST = { - # FIXME: fill the test after fixing delegation problem 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', 'info_dict': { 'id': '57608447973ee7705f6fbd4e', From e811bcf8f820d92b6629920b7c3c5a902815e6d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 20:12:53 +0100 Subject: [PATCH 169/775] [viceland] raise ExtractorError for errors other than HTTP 400 --- youtube_dl/extractor/viceland.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 0be8a792f..814a72fa2 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -70,6 +70,7 @@ class VicelandIE(AdobePass): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: error = json.loads(e.cause.read().decode()) raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise video_data = preplay['video'] base = video_data['base'] From 7e60ce9cf7b104c15fcc4c495166dc57b950b987 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 21:24:33 +0100 Subject: [PATCH 170/775] [adobepass] clear cache in case of pendingLogout errors --- youtube_dl/extractor/adobepass.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 4e59302ab..d315bfbc1 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -90,6 +90,9 @@ class AdobePass(InfoExtractor): '_method': 'GET', 'requestor_id': requestor_id, }), headers=mvpd_headers) + if ' Date: Sun, 14 Aug 2016 21:25:43 +0100 Subject: [PATCH 171/775] [adobepass] fix check for pendingLogout errors --- youtube_dl/extractor/adobepass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index d315bfbc1..cf3a15cbb 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -90,7 +90,7 @@ class AdobePass(InfoExtractor): '_method': 'GET', 'requestor_id': requestor_id, }), headers=mvpd_headers) - if ' Date: Sun, 14 Aug 2016 22:45:43 +0100 Subject: [PATCH 172/775] [uplynk,viceland] update tests and change uplynk extractors names --- youtube_dl/extractor/uplynk.py | 3 +++ youtube_dl/extractor/viceland.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index 4313bc9cb..ae529f690 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -11,6 +11,7 @@ from ..utils import ( class UplynkIE(InfoExtractor): + IE_NAME = 'uplynk' _VALID_URL = r'https?://.*?\.uplynk\.com/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P[^&]+))?' _TEST = { 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', @@ -54,7 +55,9 @@ class UplynkIE(InfoExtractor): class UplynkPreplayIE(UplynkIE): + IE_NAME = 'uplynk:preplay' _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' + _TEST = None def _real_extract(self, url): path, external_id, video_id = re.match(self._VALID_URL, url).groups() diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 814a72fa2..da766d8db 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -36,7 +36,7 @@ class VicelandIE(AdobePass): # m3u8 download 'skip_download': True, }, - 'add_ie': ['UplynkPreplay', 'Uplynk'], + 'add_ie': ['UplynkPreplay'], } def _real_extract(self, url): From 1a57b8c18c9bdaf5e231f2178499041446b57a3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Aug 2016 08:25:24 +0700 Subject: [PATCH 173/775] [zippcast] Remove extractor (Closes #10332) ZippCast is shut down --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/zippcast.py | 94 ------------------------------ 2 files changed, 95 deletions(-) delete mode 100644 youtube_dl/extractor/zippcast.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index be96e34ba..15bc0a675 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1109,4 +1109,3 @@ from .zingmp3 import ( ZingMp3SongIE, ZingMp3AlbumIE, ) -from .zippcast import ZippCastIE diff --git a/youtube_dl/extractor/zippcast.py b/youtube_dl/extractor/zippcast.py deleted file mode 100644 index de819376d..000000000 --- a/youtube_dl/extractor/zippcast.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - str_to_int, -) - - -class ZippCastIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?zippcast\.com/(?:video/|videoview\.php\?.*\bvplay=)(?P[0-9a-zA-Z]+)' - _TESTS = [{ - # m3u8, hq direct link - 'url': 'http://www.zippcast.com/video/c9cfd5c7e44dbc29c81', - 'md5': '5ea0263b5606866c4d6cda0fc5e8c6b6', - 'info_dict': { - 'id': 'c9cfd5c7e44dbc29c81', - 'ext': 'mp4', - 'title': '[Vinesauce] Vinny - Digital Space Traveler', - 'description': 'Muted on youtube, but now uploaded in it\'s original form.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'vinesauce', - 'view_count': int, - 'categories': ['Entertainment'], - 'tags': list, - }, - }, { - # f4m, lq ipod direct link - 'url': 'http://www.zippcast.com/video/b79c0a233e9c6581775', - 'only_matching': True, - }, { - 'url': 'http://www.zippcast.com/videoview.php?vplay=c9cfd5c7e44dbc29c81&auto=no', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://www.zippcast.com/video/%s' % video_id, video_id) - - formats = [] - video_url = self._search_regex( - r']+src=(["\'])(?P.+?)\1', webpage, - 'video url', default=None, group='url') - if video_url: - formats.append({ - 'url': video_url, - 'format_id': 'http', - 'preference': 0, # direct link is almost always of worse quality - }) - src_url = self._search_regex( - r'src\s*:\s*(?:escape\()?(["\'])(?Phttp://.+?)\1', - webpage, 'src', default=None, group='url') - ext = determine_ext(src_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - src_url, video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) or self._html_search_meta( - 'description', webpage) - uploader = self._search_regex( - r']+href="https?://[^/]+/profile/[^>]+>([^<]+)', - webpage, 'uploader', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - view_count = str_to_int(self._search_regex( - r'>([\d,.]+) views!', webpage, 'view count', fatal=False)) - - categories = re.findall( - r']+href="https?://[^/]+/categories/[^"]+">([^<]+),?<', - webpage) - tags = re.findall( - r']+href="https?://[^/]+/search/tags/[^"]+">([^<]+),?<', - webpage) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'view_count': view_count, - 'categories': categories, - 'tags': tags, - 'formats': formats, - } From b6c4e36728e8f60ae7f4910a9b7027a2b702e8dc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:29:01 +0800 Subject: [PATCH 174/775] [jwplatform] Parse video_id from JWPlayer data And remove a mysterious comma from 115c65793af4c56c8f1986d2640105fc7e760c13 --- youtube_dl/extractor/jwplatform.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 2a499bb77..ce3126943 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -30,7 +30,7 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: @@ -43,6 +43,8 @@ class JWPlatformBaseIE(InfoExtractor): if 'sources' not in video_data: video_data['sources'] = [video_data] + this_video_id = video_id or video_data['mediaid'] + formats = [] for source in video_data['sources']: source_url = self._proto_relative_url(source['file']) @@ -52,7 +54,7 @@ class JWPlatformBaseIE(InfoExtractor): ext = mimetype2ext(source_type) or determine_ext(source_url) if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ @@ -68,7 +70,7 @@ class JWPlatformBaseIE(InfoExtractor): 'ext': ext, } if source_url.startswith('rtmp'): - a_format['ext'] = 'flv', + a_format['ext'] = 'flv' # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf @@ -95,7 +97,7 @@ class JWPlatformBaseIE(InfoExtractor): }) entries.append({ - 'id': video_id, + 'id': this_video_id, 'title': video_data['title'] if require_title else video_data.get('title'), 'description': video_data.get('description'), 'thumbnail': self._proto_relative_url(video_data.get('image')), From 5c2d08722139118d8de27d43d6210e18ab1da9d5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:31:08 +0800 Subject: [PATCH 175/775] [sendtonews] Fix extraction --- youtube_dl/extractor/sendtonews.py | 103 +++++++++++++++-------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py index 1c636f672..2dbe490bb 100644 --- a/youtube_dl/extractor/sendtonews.py +++ b/youtube_dl/extractor/sendtonews.py @@ -4,33 +4,43 @@ from __future__ import unicode_literals import re from .jwplatform import JWPlatformBaseIE -from ..compat import compat_parse_qs from ..utils import ( - ExtractorError, - parse_duration, + float_or_none, + parse_iso8601, + update_url_query, ) class SendtoNewsIE(JWPlatformBaseIE): - _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P[^#]+)' + _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P[0-9A-Za-z-]+)' _TEST = { # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ - 'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes', + 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', 'info_dict': { - 'id': 'GxfCe0Zo7D-175909-5588', - 'ext': 'mp4', - 'title': 'Recap: CLE 15, CIN 6', - 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', - 'duration': 49, + 'id': 'GxfCe0Zo7D-175909-5588' }, + 'playlist_count': 9, + # test the first video only to prevent lengthy tests + 'playlist': [{ + 'info_dict': { + 'id': '198180', + 'ext': 'mp4', + 'title': 'Recap: CLE 5, LAA 4', + 'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win', + 'duration': 57.343, + 'thumbnail': 're:https?://.*\.jpg$', + 'upload_date': '20160815', + 'timestamp': 1471221961, + }, + }], 'params': { # m3u8 download 'skip_download': True, }, } - _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s' + _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' @classmethod def _extract_url(cls, webpage): @@ -39,48 +49,41 @@ class SendtoNewsIE(JWPlatformBaseIE): .*\bSC=(?P[0-9a-zA-Z-]+).* \1>''', webpage) if mobj: - sk, mk, pk = mobj.group('SC').split('-') - return cls._URL_TEMPLATE % (sk, mk, pk) + sc = mobj.group('SC') + return cls._URL_TEMPLATE % sc def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - params = compat_parse_qs(mobj.group('query')) + playlist_id = self._match_id(url) - if 'SK' not in params or 'MK' not in params or 'PK' not in params: - raise ExtractorError('Invalid URL', expected=True) + data_url = update_url_query( + url.replace('embedplayer.php', 'data_read.php'), + {'cmd': 'loadInitial'}) + playlist_data = self._download_json(data_url, playlist_id) - video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]]) + entries = [] + for video in playlist_data['playlistData'][0]: + info_dict = self._parse_jwplayer_data( + video['jwconfiguration'], + require_title=False, rtmp_params={'no_resume': True}) - webpage = self._download_webpage(url, video_id) + thumbnails = [] + if video.get('thumbnailUrl'): + thumbnails.append({ + 'id': 'normal', + 'url': video['thumbnailUrl'], + }) + if video.get('smThumbnailUrl'): + thumbnails.append({ + 'id': 'small', + 'url': video['smThumbnailUrl'], + }) + info_dict.update({ + 'title': video['S_headLine'], + 'description': video.get('S_fullStory'), + 'thumbnails': thumbnails, + 'duration': float_or_none(video.get('SM_length')), + 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), + }) + entries.append(info_dict) - jwplayer_data_str = self._search_regex( - r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data') - js_vars = { - 'w': 1024, - 'h': 768, - 'modeVar': 'html5', - } - for name, val in js_vars.items(): - js_val = '%d' % val if isinstance(val, int) else '"%s"' % val - jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val) - - info_dict = self._parse_jwplayer_data( - self._parse_json(jwplayer_data_str, video_id), - video_id, require_title=False, rtmp_params={'no_resume': True}) - - title = self._html_search_regex( - r']+class="embedTitle">([^<]+)
    ', webpage, 'title') - description = self._html_search_regex( - r']+class="embedSubTitle">([^<]+)', webpage, - 'description', fatal=False) - duration = parse_duration(self._html_search_regex( - r']+class="embedDetails">([0-9:]+)', webpage, - 'duration', fatal=False)) - - info_dict.update({ - 'title': title, - 'description': description, - 'duration': duration, - }) - - return info_dict + return self.playlist_result(entries, playlist_id) From 760845ce9965b57484f232a162b9bb4ad3a505a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:37:37 +0800 Subject: [PATCH 176/775] [cbslocal] Adapt to SendtoNewsIE --- youtube_dl/extractor/cbslocal.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 008c5fe32..4bcd104af 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -41,13 +41,8 @@ class CBSLocalIE(AnvatoIE): 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', 'info_dict': { 'id': 'GxfCe0Zo7D-175909-5588', - 'ext': 'mp4', - 'title': 'Recap: CLE 15, CIN 6', - 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', - 'upload_date': '20160516', - 'timestamp': 1463433840, - 'duration': 49, }, + 'playlist_count': 9, 'params': { # m3u8 download 'skip_download': True, @@ -60,12 +55,11 @@ class CBSLocalIE(AnvatoIE): sendtonews_url = SendtoNewsIE._extract_url(webpage) if sendtonews_url: - info_dict = { - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, sendtonews_url), - } - else: - info_dict = self._extract_anvato_videos(webpage, display_id) + return self.url_result( + compat_urlparse.urljoin(url, sendtonews_url), + ie=SendtoNewsIE.ie_key()) + + info_dict = self._extract_anvato_videos(webpage, display_id) time_str = self._html_search_regex( r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) From 6d8ec8c3b7381c40afd89f9c118ae770997703d0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:39:43 +0800 Subject: [PATCH 177/775] [ChangeLog] Update for CBSLocal and related changes --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index d04c5fc2a..32504dab5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,9 @@ version Extractors +* [cbslocal] Fix extraction for SendtoNews-based videos +* [sendtonews] Fix extraction +* [jwplatform] Now can parse video_id from JWPlayer data * [xiami] Fix extraction (#10342) From 69eb4d699fe3f6d84acc7882e427e661040faecb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 20:29:22 +0800 Subject: [PATCH 178/775] [cbsnews] Remove invalid tests. CBS Live videos gets deleted soon. --- youtube_dl/extractor/cbsnews.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 9328e3e20..9d3b75526 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -70,7 +70,8 @@ class CBSNewsLiveVideoIE(InfoExtractor): IE_DESC = 'CBS News Live Videos' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P[\da-z_-]+)' - _TESTS = [{ + # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples + _TEST = { 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', @@ -78,15 +79,8 @@ class CBSNewsLiveVideoIE(InfoExtractor): 'title': 'Clinton, Sanders Prepare To Face Off In NH', 'duration': 334, }, - 'skip': 'Video gone, redirected to http://www.cbsnews.com/live/', - }, { - 'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/', - 'info_dict': { - 'id': 'video-shows-intense-paragliding-accident', - 'ext': 'flv', - 'title': 'Video Shows Intense Paragliding Accident', - }, - }] + 'skip': 'Video gone', + } def _real_extract(self, url): video_id = self._match_id(url) From bf90c46790bac92e8a61ee0514cf3c41a8c048e9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 15 Aug 2016 16:33:35 +0100 Subject: [PATCH 179/775] [fxnetworks] Add new extractor(closes #9462) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/fxnetworks.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/fxnetworks.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15bc0a675..07928c530 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,6 +287,7 @@ from .freevideo import FreeVideoIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE +from .fxnetworks import FXNetworksIE from .gameinformer import GameInformerIE from .gameone import ( GameOneIE, diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py new file mode 100644 index 000000000..70bc186a3 --- /dev/null +++ b/youtube_dl/extractor/fxnetworks.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .adobepass import AdobePass +from ..utils import ( + update_url_query, + extract_attributes, + parse_age_limit, + smuggle_url, +) + + +class FXNetworksIE(AdobePass): + _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = extract_attributes(self._search_regex( + r'()', webpage, 'video data')) + player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', fatal=False) + release_url = video_data['rel'] + title = video_data['data-title'] + rating = video_data.get('data-rating') + query = { + 'mbr': 'true', + } + if player_type == 'movies': + query.update({ + 'manifest': 'm3u', + }) + else: + query.update({ + 'switch': 'http', + }) + if video_data.get('data-req-auth') == '1': + resource = self._get_mvpd_resource( + video_data['data-channel'], title, + video_data.get('data-guid'), rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), + 'thumbnail': video_data.get('data-large-thumb'), + 'age_limit': parse_age_limit(rating), + 'ie_key': 'ThePlatform', + } From cbef4d5c9ff5013d0c10b960e1690805724120cd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 15 Aug 2016 17:10:45 +0100 Subject: [PATCH 180/775] [fxnetworks] add test and check geo restriction --- youtube_dl/extractor/fxnetworks.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 70bc186a3..940e7427c 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -12,10 +12,27 @@ from ..utils import ( class FXNetworksIE(AdobePass): _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' + _TEST = { + 'url': 'http://www.fxnetworks.com/video/719841347694', + 'md5': '1447d4722e42ebca19e5232ab93abb22', + 'info_dict': { + 'id': '719841347694', + 'ext': 'mp4', + 'title': 'Vanpage', + 'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.', + 'age_limit': 14, + 'uploader': 'NEWA-FNG-FX', + 'upload_date': '20160706', + 'timestamp': 1467844741, + }, + 'add_ie': ['ThePlatform'], + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if 'The content you are trying to access is not available in your region.' in webpage: + self.raise_geo_restricted() video_data = extract_attributes(self._search_regex( r'()', webpage, 'video data')) player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', fatal=False) @@ -42,6 +59,7 @@ class FXNetworksIE(AdobePass): return { '_type': 'url_transparent', 'id': video_id, + 'title': title, 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), 'thumbnail': video_data.get('data-large-thumb'), 'age_limit': parse_age_limit(rating), From 818ac213eb80e18f472ecdf2406569bafd4cccaf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 15 Aug 2016 21:36:34 +0100 Subject: [PATCH 181/775] [adobepass] add IE suffix to the extractor and remove duplicate constant --- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/fxnetworks.py | 4 ++-- youtube_dl/extractor/nationalgeographic.py | 6 +++--- youtube_dl/extractor/syfy.py | 4 ++-- youtube_dl/extractor/theplatform.py | 5 ++--- youtube_dl/extractor/viceland.py | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index cf3a15cbb..2c9f8817b 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -13,7 +13,7 @@ from ..utils import ( ) -class AdobePass(InfoExtractor): +class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 940e7427c..3ec3b0b46 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -from .adobepass import AdobePass +from .adobepass import AdobePassIE from ..utils import ( update_url_query, extract_attributes, @@ -10,7 +10,7 @@ from ..utils import ( ) -class FXNetworksIE(AdobePass): +class FXNetworksIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' _TEST = { 'url': 'http://www.fxnetworks.com/video/719841347694', diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 890e8d5bc..1dcf27afe 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .theplatform import ThePlatformIE +from .adobepass import AdobePassIE from ..utils import ( smuggle_url, url_basename, @@ -65,7 +65,7 @@ class NationalGeographicVideoIE(InfoExtractor): } -class NationalGeographicIE(ThePlatformIE): +class NationalGeographicIE(AdobePassIE): IE_NAME = 'natgeo' _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P[^/?]+)' @@ -131,7 +131,7 @@ class NationalGeographicIE(ThePlatformIE): } -class NationalGeographicEpisodeGuideIE(ThePlatformIE): +class NationalGeographicEpisodeGuideIE(InfoExtractor): IE_NAME = 'natgeo:episodeguide' _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P[^/]+)/episode-guide' _TESTS = [ diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 764287a64..cc81f6003 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -from .theplatform import ThePlatformIE +from .adobepass import AdobePassIE from ..utils import ( update_url_query, smuggle_url, ) -class SyfyIE(ThePlatformIE): +class SyfyIE(AdobePassIE): _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 108ddd3a9..eda899497 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -9,7 +9,7 @@ import hashlib from .once import OnceIE -from .adobepass import AdobePass +from .adobepass import AdobePassIE from ..compat import ( compat_parse_qs, compat_urllib_parse_urlparse, @@ -93,7 +93,7 @@ class ThePlatformBaseIE(OnceIE): return self._parse_theplatform_metadata(info) -class ThePlatformIE(ThePlatformBaseIE, AdobePass): +class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? @@ -164,7 +164,6 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePass): 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781', 'only_matching': True, }] - _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' @classmethod def _extract_urls(cls, webpage): diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index da766d8db..8742b607a 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -5,7 +5,7 @@ import time import hashlib import json -from .adobepass import AdobePass +from .adobepass import AdobePassIE from ..compat import compat_HTTPError from ..utils import ( int_or_none, @@ -17,7 +17,7 @@ from ..utils import ( ) -class VicelandIE(AdobePass): +class VicelandIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' _TEST = { 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', From 254e64a20aa37a033cb200bc6f1aa9daf57eead8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 04:36:23 +0700 Subject: [PATCH 182/775] [bbc:playlist] Add support for pagination (Closes #10349) --- youtube_dl/extractor/bbc.py | 48 ++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 83e6d024c..16a97a76d 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..utils import ( @@ -17,6 +18,7 @@ from ..utils import ( from ..compat import ( compat_etree_fromstring, compat_HTTPError, + compat_urlparse, ) @@ -1056,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor): + def _entries(self, webpage, url, playlist_id): + single_page = 'page' in compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query) + for page_num in itertools.count(2): + for video_id in re.findall( + self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): + yield self.url_result( + self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) + if single_page: + return + next_page = self._search_regex( + r']+class=(["\'])pagination_+next\1[^>]*>]+href=(["\'])(?P(?:(?!\2).)+)\2', + webpage, 'next page url', default=None, group='url') + if not next_page: + break + webpage = self._download_webpage( + compat_urlparse.urljoin(url, next_page), playlist_id, + 'Downloading page %d' % page_num, page_num) + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - entries = [ - self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) - for video_id in re.findall( - self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)] - title, description = self._extract_title_and_description(webpage) - return self.playlist_result(entries, playlist_id, title, description) + return self.playlist_result( + self._entries(webpage, url, playlist_id), + playlist_id, title, description) class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): @@ -1094,6 +1112,24 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', }, 'playlist_mincount': 10, + }, { + # explicit page + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 24, + }, { + # all pages + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 142, }] def _extract_title_and_description(self, webpage): From 4f640f28901be8a3ce57e77ead404d751e36d208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 04:43:10 +0700 Subject: [PATCH 183/775] [bbc:playlist] Fix tests --- youtube_dl/extractor/bbc.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 16a97a76d..deb9cc1c0 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1112,24 +1112,6 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', }, 'playlist_mincount': 10, - }, { - # explicit page - 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', - 'info_dict': { - 'id': 'b00mfl7n', - 'title': 'Bohemian Icons', - 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', - }, - 'playlist_mincount': 24, - }, { - # all pages - 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', - 'info_dict': { - 'id': 'b00mfl7n', - 'title': 'Bohemian Icons', - 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', - }, - 'playlist_mincount': 142, }] def _extract_title_and_description(self, webpage): @@ -1153,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'French thriller serial about a missing teenager.', }, 'playlist_mincount': 7, + }, { + # multipage playlist, explicit page + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Frozen Planet - Clips - BBC One', + 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', + }, + 'playlist_mincount': 24, + }, { + # multipage playlist, all pages + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Frozen Planet - Clips - BBC One', + 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', + }, + 'playlist_mincount': 142, }, { 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', 'only_matching': True, From fb64adcbd37a660da92687878831d08e82ae748c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 04:45:21 +0700 Subject: [PATCH 184/775] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 2c9f8817b..9e3a3e362 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -131,4 +131,4 @@ class AdobePassIE(InfoExtractor): if ' Date: Tue, 16 Aug 2016 13:43:33 +0100 Subject: [PATCH 185/775] [amcnetworks] Add new extractor --- youtube_dl/extractor/amcnetworks.py | 72 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/amcnetworks.py diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py new file mode 100644 index 000000000..be9552541 --- /dev/null +++ b/youtube_dl/extractor/amcnetworks.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .theplatform import ThePlatformIE +from ..utils import ( + update_url_query, + parse_age_limit, +) + + +class AMCNetworksIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' + _TESTS = [{ + 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', + 'md5': '', + 'info_dict': { + 'id': 's3MX01Nl4vPH', + 'ext': 'mp4', + 'title': 'Step 1', + 'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', + 'age_limit': 17, + 'upload_date': '20160505', + 'timestamp': 1462468831, + 'uploader': 'AMCN', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', + 'only_matching': True, + }, { + 'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal', + 'only_matching': True, + }, { + 'url': 'http://www.ifc.com/movies/chaos', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url') + theplatform_metadata = self._download_theplatform_metadata(self._search_regex( + r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id) + info = self._parse_theplatform_metadata(theplatform_metadata) + video_id = theplatform_metadata['pid'] + title = theplatform_metadata['title'] + rating = theplatform_metadata['ratings'][0]['rating'] + auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') + if auth_required == 'true': + requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id') + resource = self._get_mvpd_resource(requestor_id, title, video_id, rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource) + media_url = update_url_query(media_url, query) + formats, subtitles = self._extract_theplatform_smil(media_url, video_id) + self._sort_formats(formats) + info.update({ + 'id': video_id, + 'subtiles': subtitles, + 'formats': formats, + 'age_limit': parse_age_limit(parse_age_limit(rating)), + }) + return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 07928c530..a5e0805b2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -29,6 +29,7 @@ from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE +from .amcnetworks import AMCNetworksIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE from .anysex import AnySexIE From 837e56c8eefa725ca72feca9431050cdda571c57 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 14:49:32 +0100 Subject: [PATCH 186/775] [amcnetworks] extract episode metadata --- youtube_dl/extractor/amcnetworks.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index be9552541..26f46acb5 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -5,6 +5,7 @@ from .theplatform import ThePlatformIE from ..utils import ( update_url_query, parse_age_limit, + int_or_none, ) @@ -16,7 +17,7 @@ class AMCNetworksIE(ThePlatformIE): 'info_dict': { 'id': 's3MX01Nl4vPH', 'ext': 'mp4', - 'title': 'Step 1', + 'title': 'Maron - Season 4 - Step 1', 'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', 'age_limit': 17, 'upload_date': '20160505', @@ -69,4 +70,22 @@ class AMCNetworksIE(ThePlatformIE): 'formats': formats, 'age_limit': parse_age_limit(parse_age_limit(rating)), }) + ns_keys = theplatform_metadata.get('$xmlns', {}).keys() + if ns_keys: + ns = list(ns_keys)[0] + series = theplatform_metadata.get(ns + '$show') + season_number = int_or_none(theplatform_metadata.get(ns + '$season')) + episode = theplatform_metadata.get(ns + '$episodeTitle') + episode_number = int_or_none(theplatform_metadata.get(ns + '$episode')) + if season_number: + title = 'Season %d - %s' % (season_number, title) + if series: + title = '%s - %s' % (series, title) + info.update({ + 'title': title, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) return info From 70a2829fee4203ebeb399481304d289ff92adf29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 21:17:52 +0700 Subject: [PATCH 187/775] [xvideos] Fix HLS extraction (Closes #10356) --- youtube_dl/extractor/xvideos.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 1dfe031ca..b2ef15119 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -42,24 +42,24 @@ class XVideosIE(InfoExtractor): video_url = compat_urllib_parse_unquote(self._search_regex( r'flv_url=(.+?)&', webpage, 'video URL', default='')) if video_url: - formats.append({'url': video_url}) + formats.append({ + 'url': video_url, + 'format_id': 'flv', + }) - player_args = self._search_regex( - r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None) - if player_args: - for arg in player_args.split(','): - format_url = self._search_regex( - r'(["\'])(?Phttps?://.+?)\1', arg, 'url', - default=None, group='url') - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'mp4': - formats.append({'url': format_url}) - elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + for kind, _, format_url in re.findall( + r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage): + format_id = kind.lower() + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + elif format_id in ('urllow', 'urlhigh'): + formats.append({ + 'url': format_url, + 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]), + 'quality': -2 if format_id.endswith('low') else None, + }) self._sort_formats(formats) From 98affc1a482ab41466c76cfded41949c4db58f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 21:20:15 +0700 Subject: [PATCH 188/775] [xvideos] Fix test --- youtube_dl/extractor/xvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index b2ef15119..30825daae 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -15,10 +15,10 @@ class XVideosIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' _TEST = { 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', - 'md5': '4b46ae6ea5e6e9086e714d883313c0c9', + 'md5': '14cea69fcb84db54293b1e971466c2e1', 'info_dict': { 'id': '4588838', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Biker Takes his Girl', 'age_limit': 18, } From 11f502fac145b4592f47c025ee8317fe44020db0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 16:19:36 +0100 Subject: [PATCH 189/775] [theplatform] extract subtitles with multiple formats from the metadata --- youtube_dl/extractor/theplatform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index eda899497..23067e8c6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -73,10 +73,10 @@ class ThePlatformBaseIE(OnceIE): if isinstance(captions, list): for caption in captions: lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type') - subtitles[lang] = [{ + subtitles.setdefault(lang, []).append({ 'ext': mimetype2ext(mime), 'url': src, - }] + }) return { 'title': info['title'], From 2cabee2a7d4c94aa2f4f2e84a3c68eb97cdf9cce Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 16:20:07 +0100 Subject: [PATCH 190/775] [amcnetworks] fix typo --- youtube_dl/extractor/amcnetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 26f46acb5..c739d2c99 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -66,7 +66,7 @@ class AMCNetworksIE(ThePlatformIE): self._sort_formats(formats) info.update({ 'id': video_id, - 'subtiles': subtitles, + 'subtitles': subtitles, 'formats': formats, 'age_limit': parse_age_limit(parse_age_limit(rating)), }) From 53fef319f14896ce497d309f661ceb586d7b4d90 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 16:21:04 +0100 Subject: [PATCH 191/775] [fxnetworks] extend _VALID_URL to support simpsonsworld.com --- youtube_dl/extractor/fxnetworks.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 3ec3b0b46..629897317 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -11,8 +11,8 @@ from ..utils import ( class FXNetworksIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P\d+)' + _TESTS = [{ 'url': 'http://www.fxnetworks.com/video/719841347694', 'md5': '1447d4722e42ebca19e5232ab93abb22', 'info_dict': { @@ -26,7 +26,10 @@ class FXNetworksIE(AdobePassIE): 'timestamp': 1467844741, }, 'add_ie': ['ThePlatform'], - } + }, { + 'url': 'http://www.simpsonsworld.com/video/716094019682', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +38,7 @@ class FXNetworksIE(AdobePassIE): self.raise_geo_restricted() video_data = extract_attributes(self._search_regex( r'()', webpage, 'video data')) - player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', fatal=False) + player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) release_url = video_data['rel'] title = video_data['data-title'] rating = video_data.get('data-rating') From 6e7e4a6edf6c4ffd56d908ade7f0bfe2bff738b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:19:43 +0700 Subject: [PATCH 192/775] [mtg] Add support for viafree URLs (#10358) --- youtube_dl/extractor/tvplay.py | 41 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 150bde663..d82bf67b4 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -20,16 +20,25 @@ from ..utils import ( class TVPlayIE(InfoExtractor): - IE_DESC = 'TV3Play and related services' - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:tvplay(?:\.skaties)?\.lv/parraides| - (?:tv3play|play\.tv3)\.lt/programos| - tv3play(?:\.tv3)?\.ee/sisu| - tv(?:3|6|8|10)play\.se/program| - (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer| - play\.novatv\.bg/programi - )/[^/]+/(?P\d+) - ''' + IE_NAME = 'mtg' + IE_DESC = 'MTG services' + _VALID_URL = r'''(?x) + (?: + mtg:| + https?:// + (?:www\.)? + (?: + tvplay(?:\.skaties)?\.lv/parraides| + (?:tv3play|play\.tv3)\.lt/programos| + tv3play(?:\.tv3)?\.ee/sisu| + (?:tv(?:3|6|8|10)play|viafree)\.se/program| + (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer| + play\.novatv\.bg/programi + ) + /(?:[^/]+/)+ + ) + (?P\d+) + ''' _TESTS = [ { 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', @@ -197,6 +206,14 @@ class TVPlayIE(InfoExtractor): { 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', 'only_matching': True, + }, + { + 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869', + 'only_matching': True, + }, + { + 'url': 'mtg:418113', + 'only_matching': True, } ] @@ -204,13 +221,13 @@ class TVPlayIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') + 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON') title = video['title'] try: streams = self._download_json( - 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, + 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: From b35b0d73d853c52ca96ccf4488a4f8960a12e2ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:21:30 +0700 Subject: [PATCH 193/775] [viafree] Add extractor (Closes #10358) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/tvplay.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a5e0805b2..55c639158 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -898,7 +898,10 @@ from .tvp import ( TVPIE, TVPSeriesIE, ) -from .tvplay import TVPlayIE +from .tvplay import ( + TVPlayIE, + ViafreeIE, +) from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index d82bf67b4..c8ec2465c 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -311,3 +311,56 @@ class TVPlayIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class ViafreeIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + viafree\. + (?: + (?:dk|no)/programmer| + se/program + ) + /(?:[^/]+/)+(?P[^/?#&]+) + ''' + _TESTS = [{ + 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2', + 'info_dict': { + 'id': '395375', + 'ext': 'mp4', + 'title': 'Husräddarna S02E02', + 'description': 'md5:4db5c933e37db629b5a2f75dfb34829e', + 'series': 'Husräddarna', + 'season': 'Säsong 2', + 'season_number': 2, + 'duration': 2576, + 'timestamp': 1400596321, + 'upload_date': '20140520', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TVPlayIE.ie_key()], + }, { + 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', + 'only_matching': True, + }, { + 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P\d{6,})', + webpage, 'video id') + + return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key()) From 502d87c5464f1894a8777873b9d11b76ba5a6375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:32:28 +0700 Subject: [PATCH 194/775] [mtg] Improve view count extraction --- youtube_dl/extractor/tvplay.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c8ec2465c..4186e82db 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -15,6 +15,7 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, + try_get, update_url_query, ) @@ -203,6 +204,11 @@ class TVPlayIE(InfoExtractor): 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', 'only_matching': True, }, + { + # views is null + 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183', + 'only_matching': True, + }, { 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', 'only_matching': True, @@ -306,7 +312,7 @@ class TVPlayIE(InfoExtractor): 'season_number': season_number, 'duration': int_or_none(video.get('duration')), 'timestamp': parse_iso8601(video.get('created_at')), - 'view_count': int_or_none(video.get('views', {}).get('total')), + 'view_count': try_get(video, lambda x: x['views']['total'], int), 'age_limit': int_or_none(video.get('age_limit', 0)), 'formats': formats, 'subtitles': subtitles, From 9c0fa60bf375959c7d8582f655b441c534865c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:42:02 +0700 Subject: [PATCH 195/775] [vbox7] Add support for embed URLs --- youtube_dl/extractor/vbox7.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index fa7899e6d..8e6d7efe7 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -6,7 +6,7 @@ from ..utils import urlencode_postdata class Vbox7IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P[\da-fA-F]+)' _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -24,15 +24,19 @@ class Vbox7IE(InfoExtractor): 'title': 'Смях! Чудо - чист за секунди - Скрита камера', }, 'skip': 'georestricted', + }, { + 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://vbox7.com/play:%s' % video_id, video_id) title = self._html_search_regex( - r'(.*)', webpage, 'title').split('/')[0].strip() + r'(.+?)', webpage, 'title').split('/')[0].strip() video_url = self._search_regex( r'src\s*:\s*(["\'])(?P.+?.mp4.*?)\1', From 2a1321a272c7b410db25654cdfdc33c3cd8bd440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 01:02:59 +0700 Subject: [PATCH 196/775] [vbox7:generic] Add support for vbox7 embeds --- youtube_dl/extractor/generic.py | 18 ++++++++++++++++++ youtube_dl/extractor/vbox7.py | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 50500ce0e..197ab9531 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -72,6 +72,7 @@ from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .soundcloud import SoundcloudIE +from .vbox7 import Vbox7IE class GenericIE(InfoExtractor): @@ -1373,6 +1374,18 @@ class GenericIE(InfoExtractor): }, 'add_ie': [ArkenaIE.ie_key()], }, + { + 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/', + 'info_dict': { + 'id': '1c7141f46c', + 'ext': 'mp4', + 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [Vbox7IE.ie_key()], + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2239,6 +2252,11 @@ class GenericIE(InfoExtractor): 'uploader': video_uploader, } + # Look for VBOX7 embeds + vbox7_url = Vbox7IE._extract_url(webpage) + if vbox7_url: + return self.url_result(vbox7_url, Vbox7IE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 8e6d7efe7..e17988573 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,6 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import urlencode_postdata @@ -29,6 +31,14 @@ class Vbox7IE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + ']+src=(?P["\'])(?P(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)', + webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From 8652770bd23ff5f46c5687d94f71cec08d2c5886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 05:44:46 +0700 Subject: [PATCH 197/775] [keezmovies] Improve and modernize --- youtube_dl/extractor/keezmovies.py | 138 +++++++++++++++++++++-------- 1 file changed, 99 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 126ca13df..ad2f8a8c8 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -3,64 +3,124 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..aes import aes_decrypt_text +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) from ..utils import ( - sanitized_Request, - url_basename, + determine_ext, + ExtractorError, + int_or_none, + str_to_int, + strip_or_none, ) class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P[0-9]+)(?:[/?&]|$)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P[^/]+)-)?(?P\d+)' + _TESTS = [{ 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0', 'info_dict': { 'id': '1214711', + 'display_id': 'petite-asian-lady-mai-playing-in-bathtub', 'ext': 'mp4', 'title': 'Petite Asian Lady Mai Playing In Bathtub', - 'age_limit': 18, 'thumbnail': 're:^https?://.*\.jpg$', + 'view_count': int, + 'age_limit': 18, } - } + }, { + 'url': 'http://www.keezmovies.com/video/1214711', + 'only_matching': True, + }] - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_info(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) - - # embedded video - mobj = re.search(r'href="([^"]+)">', webpage) - if mobj: - embedded_url = mobj.group(1) - return self.url_result(embedded_url) - - video_title = self._html_search_regex( - r'

    ]*>([^<]+)', webpage, 'title') - flashvars = self._parse_json(self._search_regex( - r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id) + webpage = self._download_webpage( + url, display_id, headers={'Cookie': 'age_verified=1'}) formats = [] - for height in (180, 240, 480): - if flashvars.get('quality_%dp' % height): - video_url = flashvars['quality_%dp' % height] - a_format = { - 'url': video_url, - 'height': height, - 'format_id': '%dp' % height, - } - filename_parts = url_basename(video_url).split('_') - if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]): - a_format['tbr'] = int(filename_parts[1][:-1]) - formats.append(a_format) + format_urls = set() - age_limit = self._rta_search(webpage) + title = None + thumbnail = None + duration = None + encrypted = False - return { + def extract_format(format_url, height=None): + if not isinstance(format_url, compat_str) or not format_url.startswith('http'): + return + if format_url in format_urls: + return + format_urls.add(format_url) + tbr = int_or_none(self._search_regex( + r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) + if not height: + height = int_or_none(self._search_regex( + r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) + if encrypted: + format_url = aes_decrypt_text( + video_url, title, 32).decode('utf-8') + formats.append({ + 'url': format_url, + 'format_id': '%dp' % height if height else None, + 'height': height, + 'tbr': tbr, + }) + + flashvars = self._parse_json( + self._search_regex( + r'flashvars\s*=\s*({.+?});', webpage, + 'flashvars', default='{}'), + display_id, fatal=False) + + if flashvars: + title = flashvars.get('video_title') + thumbnail = flashvars.get('image_url') + duration = int_or_none(flashvars.get('video_duration')) + encrypted = flashvars.get('encrypted') is True + for key, value in flashvars.items(): + mobj = re.search(r'quality_(\d+)[pP]', key) + if mobj: + extract_format(value, int(mobj.group(1))) + video_url = flashvars.get('video_url') + if video_url and determine_ext(video_url, None): + extract_format(video_url) + + video_url = self._html_search_regex( + r'flashvars\.video_url\s*=\s*(["\'])(?Phttp.+?)\1', + webpage, 'video url', default=None, group='url') + if video_url: + extract_format(compat_urllib_parse_unquote(video_url)) + + if not formats: + if 'title="This video is no longer available"' in webpage: + raise ExtractorError( + 'Video %s is no longer available' % video_id, expected=True) + + self._sort_formats(formats) + + if not title: + title = self._html_search_regex( + r']*>([^<]+)', webpage, 'title') + + return webpage, { 'id': video_id, - 'title': video_title, + 'display_id': display_id, + 'title': strip_or_none(title), + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': 18, 'formats': formats, - 'age_limit': age_limit, - 'thumbnail': flashvars.get('image_url') } + + def _real_extract(self, url): + webpage, info = self._extract_info(url) + info['view_count'] = str_to_int(self._search_regex( + r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) + return info From 6be17c08703ad8ec89c6fb62f31f280956694cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 05:45:49 +0700 Subject: [PATCH 198/775] [mofosex] Extract all formats and modernize (Closes #10335) --- youtube_dl/extractor/mofosex.py | 81 +++++++++++++++++---------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index e47c80119..e3bbe5aa8 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -1,53 +1,56 @@ from __future__ import unicode_literals -import os -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, +from ..utils import ( + int_or_none, + str_to_int, + unified_strdate, ) -from ..utils import sanitized_Request +from .keezmovies import KeezMoviesIE -class MofosexIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' - _TEST = { - 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', - 'md5': '1b2eb47ac33cc75d4a80e3026b613c5a', +class MofosexIE(KeezMoviesIE): + _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P\d+)/(?P[^/?#&.]+)\.html' + _TESTS = [{ + 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html', + 'md5': '39a15853632b7b2e5679f92f69b78e91', 'info_dict': { - 'id': '5018', + 'id': '318131', + 'display_id': 'amateur-teen-playing-and-masturbating-318131', 'ext': 'mp4', - 'title': 'Japanese Teen Music Video', + 'title': 'amateur teen playing and masturbating', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20121114', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, 'age_limit': 18, } - } + }, { + # This video is no longer available + 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - url = 'http://www.' + mobj.group('url') + webpage, info = self._extract_info(url) - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + view_count = str_to_int(self._search_regex( + r'VIEWS:\s*([\d,.]+)', webpage, 'view count', fatal=False)) + like_count = int_or_none(self._search_regex( + r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage, + 'like count', fatal=False)) + dislike_count = int_or_none(self._search_regex( + r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage, + 'like count', fatal=False)) + upload_date = unified_strdate(self._html_search_regex( + r'Added:([^<]+)', webpage, 'upload date', fatal=False)) - video_title = self._html_search_regex(r'

    (.+?)<', webpage, 'title') - video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url')) - path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] - format = path.split('/')[5].split('_')[:2] - format = '-'.join(format) + info.update({ + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'upload_date': upload_date, + 'thumbnail': self._og_search_thumbnail(webpage), + }) - age_limit = self._rta_search(webpage) - - return { - 'id': video_id, - 'title': video_title, - 'url': video_url, - 'ext': extension, - 'format': format, - 'format_id': format, - 'age_limit': age_limit, - } + return info From 8804f10e6b580db38df7301a174cb48ea374f9eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 05:46:45 +0700 Subject: [PATCH 199/775] [tube8] Modernize --- youtube_dl/extractor/tube8.py | 60 ++++++----------------------------- 1 file changed, 10 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 1d9271d1e..4053f6c21 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,18 +1,13 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, - sanitized_Request, str_to_int, ) -from ..aes import aes_decrypt_text +from .keezmovies import KeezMoviesIE -class Tube8IE(InfoExtractor): +class Tube8IE(KeezMoviesIE): _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P[^/]+)/(?P\d+)' _TESTS = [{ 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', @@ -33,47 +28,17 @@ class Tube8IE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + webpage, info = self._extract_info(url) - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, display_id) + if not info['title']: + info['title'] = self._html_search_regex( + r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') - flashvars = self._parse_json( - self._search_regex( - r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'), - video_id) - - formats = [] - for key, video_url in flashvars.items(): - if not isinstance(video_url, compat_str) or not video_url.startswith('http'): - continue - height = self._search_regex( - r'quality_(\d+)[pP]', key, 'height', default=None) - if not height: - continue - if flashvars.get('encrypted') is True: - video_url = aes_decrypt_text( - video_url, flashvars['video_title'], 32).decode('utf-8') - formats.append({ - 'url': video_url, - 'format_id': '%sp' % height, - 'height': int(height), - }) - self._sort_formats(formats) - - thumbnail = flashvars.get('image_url') - - title = self._html_search_regex( - r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( r'>Description:\s*(.+?)\s*<', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'\s*(.+?)\s*<', webpage, 'uploader', fatal=False) - duration = int_or_none(flashvars.get('video_duration')) like_count = int_or_none(self._search_regex( r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False)) @@ -86,18 +51,13 @@ class Tube8IE(InfoExtractor): r'(\d+)', webpage, 'comment count', fatal=False)) - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, + info.update({ 'description': description, - 'thumbnail': thumbnail, 'uploader': uploader, - 'duration': duration, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - 'age_limit': 18, - 'formats': formats, - } + }) + + return info From ab19b46b88bb54971b973176976d8d189222a6d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 06:02:12 +0700 Subject: [PATCH 200/775] [extremetube] Modernize --- youtube_dl/extractor/extremetube.py | 73 +++++++---------------------- 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 3403581fd..b4fd9334a 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -1,22 +1,17 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - sanitized_Request, - str_to_int, -) +from ..utils import str_to_int +from .keezmovies import KeezMoviesIE -class ExtremeTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P[^/#?&]+)' +class ExtremeTubeIE(KeezMoviesIE): + _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P[^/]+)-)(?P\d+)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'md5': '344d0c6d50e2f16b06e49ca011d8ac69', + 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', 'info_dict': { - 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', + 'id': '652431', + 'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow', 'ext': 'mp4', 'title': 'Music Video 14 british euro brit european cumshots swallow', 'uploader': 'unknown', @@ -35,58 +30,22 @@ class ExtremeTubeIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + webpage, info = self._extract_info(url) - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + if not info['title']: + info['title'] = self._search_regex( + r']+title="([^"]+)"[^>]*>', webpage, 'title') - video_title = self._html_search_regex( - r'

    ]*?title="([^"]+)"[^>]*>', webpage, 'title') uploader = self._html_search_regex( r'Uploaded by:\s*\s*(.+?)\s*', webpage, 'uploader', fatal=False) - view_count = str_to_int(self._html_search_regex( + view_count = str_to_int(self._search_regex( r'Views:\s*\s*([\d,\.]+)', webpage, 'view count', fatal=False)) - flash_vars = self._parse_json( - self._search_regex( - r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'), - video_id) - - formats = [] - for quality_key, video_url in flash_vars.items(): - height = int_or_none(self._search_regex( - r'quality_(\d+)[pP]$', quality_key, 'height', default=None)) - if not height: - continue - f = { - 'url': video_url, - } - mobj = re.search( - r'/(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', video_url) - if mobj: - height = int(mobj.group('height')) - bitrate = int(mobj.group('bitrate')) - f.update({ - 'format_id': '%dp-%dk' % (height, bitrate), - 'height': height, - 'tbr': bitrate, - }) - else: - f.update({ - 'format_id': '%dp' % height, - 'height': height, - }) - formats.append(f) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_title, - 'formats': formats, + info.update({ 'uploader': uploader, 'view_count': view_count, - 'age_limit': 18, - } + }) + + return info From a44694ab4e1ee6ac496ea09c3759923c03b9430c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 06:19:22 +0700 Subject: [PATCH 201/775] [ChangeLog] Actualize --- ChangeLog | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 32504dab5..8f27019c1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,28 @@ version +Core ++ Add _get_netrc_login_info + Extractors +* [mofosex] Extract all formats (#10335) ++ [generic] Add support for vbox7 embeds ++ [vbox7] Add support for embed URLs ++ [viafree] Add extractor (#10358) ++ [mtg] Add support for viafree URLs (#10358) +* [theplatform] Extract all subtitles per language ++ [xvideos] Fix HLS extraction (#10356) ++ [amcnetworks] Add extractor ++ [bbc:playlist] Add support for pagination (#10349) ++ [fxnetworks] Add extractor (#9462) * [cbslocal] Fix extraction for SendtoNews-based videos * [sendtonews] Fix extraction -* [jwplatform] Now can parse video_id from JWPlayer data +* [jwplatform] Extract video id from JWPlayer data +- [zippcast] Remove extractor (#10332) ++ [viceland] Add extractor (#8799) ++ [adobepass] Add base extractor for Adobe Pass Authentication +* [life:embed] Improve extraction +* [vgtv] Detect geo restricted videos (#10348) ++ [uplynk] Add extractor * [xiami] Fix extraction (#10342) From b3d7dce42952cf23b8f9ea883c75736dadfee12e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 06:21:21 +0700 Subject: [PATCH 202/775] release 2016.08.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1e0d99b43..ae28d83d5 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.13 +[debug] youtube-dl version 2016.08.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8f27019c1..354306a97 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.17 Core + Add _get_netrc_login_info diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 56fc41a40..189b9301d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -35,6 +35,7 @@ - **AlJazeera** - **Allocine** - **AlphaPorno** + - **AMCNetworks** - **AnimeOnDemand** - **anitube.se** - **AnySex** @@ -247,6 +248,7 @@ - **Funimation** - **FunnyOrDie** - **Fusion** + - **FXNetworks** - **GameInformer** - **GameOne** - **gameone:playlist** @@ -398,6 +400,7 @@ - **Moviezine** - **MPORA** - **MSN** + - **mtg**: MTG services - **MTV** - **mtv.de** - **mtvservices:embedded** @@ -731,7 +734,6 @@ - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** - - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** - **twitch:clips** @@ -748,6 +750,8 @@ - **UDNEmbed**: 聯合影音 - **Unistra** - **uol.com.br** + - **uplynk** + - **uplynk:preplay** - **Urort**: NRK P3 Urørt - **URPlay** - **USAToday** @@ -765,7 +769,9 @@ - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** + - **Viafree** - **Vice** + - **Viceland** - **ViceShow** - **Vidbit** - **Viddler** @@ -887,4 +893,3 @@ - **ZDFChannel** - **zingmp3:album**: mp3.zing.vn albums - **zingmp3:song**: mp3.zing.vn songs - - **ZippCast** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cc93d22aa..cf5950117 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.13' +__version__ = '2016.08.17' From 92cd9fd56574f22087a8f8df52192df1d4c11a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 07:01:32 +0700 Subject: [PATCH 203/775] [keezmovies] Make display_id optional --- youtube_dl/extractor/keezmovies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index ad2f8a8c8..b002c0dd1 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -39,7 +39,8 @@ class KeezMoviesIE(InfoExtractor): def _extract_info(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + display_id = (mobj.group('display_id') if 'display_id' + in mobj.groupdict() else None) or mobj.group('id') webpage = self._download_webpage( url, display_id, headers={'Cookie': 'age_verified=1'}) From b505e98784b2c1cc07f734e9709702ee9d01287e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 07:02:13 +0700 Subject: [PATCH 204/775] [extremetube] Revert display_id --- youtube_dl/extractor/extremetube.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index b4fd9334a..445f9438d 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -5,13 +5,12 @@ from .keezmovies import KeezMoviesIE class ExtremeTubeIE(KeezMoviesIE): - _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P[^/]+)-)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P[^/#?&]+)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', 'info_dict': { - 'id': '652431', - 'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow', + 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'ext': 'mp4', 'title': 'Music Video 14 british euro brit european cumshots swallow', 'uploader': 'unknown', From 7273e5849b27cb7d0f4d5f40e7801cab2da85ae3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 17 Aug 2016 11:03:09 +0100 Subject: [PATCH 205/775] [discoverygo] extend _VALID_URL to support other networks --- youtube_dl/extractor/discoverygo.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index adb68b96c..cba709935 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -11,7 +11,17 @@ from ..utils import ( class DiscoveryGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'''(?x)https?://(?:www\.)?(?: + discovery| + investigationdiscovery| + discoverylife| + animalplanet| + ahctv| + destinationamerica| + sciencechannel| + tlc| + velocitychannel + )go\.com/(?:[^/]+/)*(?P[^/?#&]+)''' _TEST = { 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', 'info_dict': { From 4e9fee101508fe90c5b103738d1b6458e40affd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 04:37:14 +0700 Subject: [PATCH 206/775] [hgtvcom:show] Add extractor (Closes #10365) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/hgtv.py | 31 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 55c639158..e61bb11c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -324,7 +324,10 @@ from .heise import HeiseIE from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE -from .hgtv import HGTVIE +from .hgtv import ( + HGTVIE, + HGTVComShowIE, +) from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE diff --git a/youtube_dl/extractor/hgtv.py b/youtube_dl/extractor/hgtv.py index c3f0733cf..69543bff2 100644 --- a/youtube_dl/extractor/hgtv.py +++ b/youtube_dl/extractor/hgtv.py @@ -46,3 +46,34 @@ class HGTVIE(InfoExtractor): 'episode_number': int_or_none(embed_vars.get('episode')), 'ie_key': 'ThePlatform', } + + +class HGTVComShowIE(InfoExtractor): + IE_NAME = 'hgtv.com:show' + _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P[^/?#&]+)' + _TEST = { + 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos', + 'info_dict': { + 'id': 'flip-or-flop-full-episodes-videos', + 'title': 'Flip or Flop Full Episodes', + }, + 'playlist_mincount': 15, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + config = self._parse_json( + self._search_regex( + r'(?s)data-module=["\']video["\'][^>]*>.*?]+type=["\']text/x-config["\'][^>]*>(.+?) Date: Thu, 18 Aug 2016 04:39:31 +0700 Subject: [PATCH 207/775] [keezmovies] PEP 8 --- youtube_dl/extractor/keezmovies.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index b002c0dd1..588a4d0ec 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -39,8 +39,9 @@ class KeezMoviesIE(InfoExtractor): def _extract_info(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = (mobj.group('display_id') if 'display_id' - in mobj.groupdict() else None) or mobj.group('id') + display_id = (mobj.group('display_id') + if 'display_id' in mobj.groupdict() + else None) or mobj.group('id') webpage = self._download_webpage( url, display_id, headers={'Cookie': 'age_verified=1'}) From 08a42f9c741aa37a599e6fe54ec8b9660df117e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 05:22:23 +0700 Subject: [PATCH 208/775] [vk] Fix authentication on python3 --- youtube_dl/extractor/vk.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 3ee66e23e..634d17d91 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -52,8 +52,9 @@ class VKBaseIE(InfoExtractor): # what actually happens. # We will workaround this VK issue by resetting the remixlhk cookie to # the first one manually. - cookies = url_handle.headers.get('Set-Cookie') - if cookies: + for header, cookies in url_handle.headers.items(): + if header.lower() != 'set-cookie': + continue if sys.version_info[0] >= 3: cookies = cookies.encode('iso-8859-1') cookies = cookies.decode('utf-8') @@ -61,6 +62,7 @@ class VKBaseIE(InfoExtractor): if remixlhk: value, domain = remixlhk.groups() self._set_cookie(domain, 'remixlhk', value) + break login_page = self._download_webpage( 'https://login.vk.com/?act=login', None, From 51815886a98503593524ec6ffa778ff19d840e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 06:14:05 +0700 Subject: [PATCH 209/775] [vk:wallpost] Fix audio extraction --- youtube_dl/extractor/vk.py | 66 ++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 634d17d91..cd22df25a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,6 +1,7 @@ # encoding: utf-8 from __future__ import unicode_literals +import collections import re import json import sys @@ -16,7 +17,6 @@ from ..utils import ( get_element_by_class, int_or_none, orderedSet, - parse_duration, remove_start, str_to_int, unescapeHTML, @@ -447,6 +447,9 @@ class VKWallPostIE(VKBaseIE): 'skip_download': True, }, }], + 'params': { + 'usenetrc': True, + }, 'skip': 'Requires vk account credentials', }, { # single YouTube embed, no leading - @@ -456,6 +459,9 @@ class VKWallPostIE(VKBaseIE): 'title': 'Sergey Gorbunov - Wall post 85155021_6319', }, 'playlist_count': 1, + 'params': { + 'usenetrc': True, + }, 'skip': 'Requires vk account credentials', }, { # wall page URL @@ -483,37 +489,41 @@ class VKWallPostIE(VKBaseIE): raise ExtractorError('VK said: %s' % error, expected=True) description = clean_html(get_element_by_class('wall_post_text', webpage)) - uploader = clean_html(get_element_by_class( - 'fw_post_author', webpage)) or self._og_search_description(webpage) + uploader = clean_html(get_element_by_class('author', webpage)) thumbnail = self._og_search_thumbnail(webpage) entries = [] - for audio in re.finditer(r'''(?sx) - ]+ - id=(?P["\'])audio_info(?P\d+_\d+).*?(?P=q1)[^>]+ - value=(?P["\'])(?Phttp.+?)(?P=q2) - .+? - ''', webpage): - audio_html = audio.group(0) - audio_id = audio.group('id') - duration = parse_duration(get_element_by_class('duration', audio_html)) - track = self._html_search_regex( - r']+id=["\']title%s[^>]*>([^<]+)' % audio_id, - audio_html, 'title', default=None) - artist = self._html_search_regex( - r'>([^<]+)\s*&ndash', audio_html, - 'artist', default=None) - entries.append({ - 'id': audio_id, - 'url': audio.group('url'), - 'title': '%s - %s' % (artist, track) if artist and track else audio_id, - 'thumbnail': thumbnail, - 'duration': duration, - 'uploader': uploader, - 'artist': artist, - 'track': track, - }) + audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) + if audio_ids: + al_audio = self._download_webpage( + 'https://vk.com/al_audio.php', post_id, + note='Downloading audio info', fatal=False, + data=urlencode_postdata({ + 'act': 'reload_audio', + 'al': '1', + 'ids': ','.join(audio_ids) + })) + if al_audio: + Audio = collections.namedtuple( + 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) + audios = self._parse_json( + self._search_regex( + r'(.+?)', al_audio, 'audios', default='[]'), + post_id, fatal=False, transform_source=unescapeHTML) + if isinstance(audios, list): + for audio in audios: + a = Audio._make(audio[:6]) + entries.append({ + 'id': '%s_%s' % (a.user_id, a.id), + 'url': a.url, + 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id, + 'thumbnail': thumbnail, + 'duration': a.duration, + 'uploader': uploader, + 'artist': a.artist, + 'track': a.track, + }) for video in re.finditer( r']+href=(["\'])(?P/video(?:-?[\d_]+).*?)\1', webpage): From b0c8f2e9c8946f8aab4be0d1435e504aac0d317f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 17 Aug 2016 12:45:24 +0200 Subject: [PATCH 210/775] [DBTV:generic] Add support for embeds --- youtube_dl/extractor/dbtv.py | 6 ++++++ youtube_dl/extractor/generic.py | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index caff8842e..73dba5e2a 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -38,6 +38,12 @@ class DBTVIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/lazyplayer/\d+.*?)\1', + webpage)] + def _real_extract(self, url): video_id, display_id = re.match(self._VALID_URL, url).groups() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 197ab9531..1b71f7ac8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -73,6 +73,7 @@ from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .soundcloud import SoundcloudIE from .vbox7 import Vbox7IE +from .dbtv import DBTVIE class GenericIE(InfoExtractor): @@ -1386,6 +1387,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': [Vbox7IE.ie_key()], }, + { + # DBTV embeds + 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/', + 'playlist_mincount': 3, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2257,6 +2263,11 @@ class GenericIE(InfoExtractor): if vbox7_url: return self.url_result(vbox7_url, Vbox7IE.ie_key()) + # Look for DBTV embeds + dbtv_urls = DBTVIE._extract_urls(webpage) + if dbtv_urls: + return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From b0d578ff7b54c521776cf8d1e050dc198bbc26e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 21:30:55 +0700 Subject: [PATCH 211/775] [dbtv] Relax embed regex --- youtube_dl/extractor/dbtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index 73dba5e2a..6d880d43d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -41,7 +41,7 @@ class DBTVIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/lazyplayer/\d+.*?)\1', + r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', webpage)] def _real_extract(self, url): From fd3ec986a4217319d0cc345c5e2eb910d90be6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 21:35:41 +0700 Subject: [PATCH 212/775] [generic] Fix dbtv test (Closes #10364) --- youtube_dl/extractor/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1b71f7ac8..506892b11 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1390,6 +1390,10 @@ class GenericIE(InfoExtractor): { # DBTV embeds 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/', + 'info_dict': { + 'id': '43254897', + 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans', + }, 'playlist_mincount': 3, }, # { From 13585d7682ef6351bfcd463cf1802bc8fbadaf43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 23:32:00 +0700 Subject: [PATCH 213/775] [utils] Recognize lowercase units in parse_filesize --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 74fcf91c0..cb578cd53 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -817,7 +817,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_filesize('2 MiB'), 2097152) self.assertEqual(parse_filesize('5 GB'), 5000000000) self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) + self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) + self.assertEqual(parse_filesize('1,24 kb'), 1240) def test_parse_count(self): self.assertEqual(parse_count(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b3b687a31..35362e767 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1508,34 +1508,42 @@ def parse_filesize(s): 'KB': 1000, 'kB': 1024, 'Kb': 1000, + 'kb': 1000, 'MiB': 1024 ** 2, 'MB': 1000 ** 2, 'mB': 1024 ** 2, 'Mb': 1000 ** 2, + 'mb': 1000 ** 2, 'GiB': 1024 ** 3, 'GB': 1000 ** 3, 'gB': 1024 ** 3, 'Gb': 1000 ** 3, + 'gb': 1000 ** 3, 'TiB': 1024 ** 4, 'TB': 1000 ** 4, 'tB': 1024 ** 4, 'Tb': 1000 ** 4, + 'tb': 1000 ** 4, 'PiB': 1024 ** 5, 'PB': 1000 ** 5, 'pB': 1024 ** 5, 'Pb': 1000 ** 5, + 'pb': 1000 ** 5, 'EiB': 1024 ** 6, 'EB': 1000 ** 6, 'eB': 1024 ** 6, 'Eb': 1000 ** 6, + 'eb': 1000 ** 6, 'ZiB': 1024 ** 7, 'ZB': 1000 ** 7, 'zB': 1024 ** 7, 'Zb': 1000 ** 7, + 'zb': 1000 ** 7, 'YiB': 1024 ** 8, 'YB': 1000 ** 8, 'yB': 1024 ** 8, 'Yb': 1000 ** 8, + 'yb': 1000 ** 8, } return lookup_unit_table(_UNIT_TABLE, s) From 850837b67ada7cf0a139117a7335aa40990cd0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 23:52:41 +0700 Subject: [PATCH 214/775] [porncom] Add extractor (Closes #2251, closes #10251) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/porncom.py | 89 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/porncom.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e61bb11c3..6c5d46015 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -642,6 +642,7 @@ from .podomatic import PodomaticIE from .pokemon import PokemonIE from .polskieradio import PolskieRadioIE from .porn91 import Porn91IE +from .porncom import PornComIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py new file mode 100644 index 000000000..4baf79688 --- /dev/null +++ b/youtube_dl/extractor/porncom.py @@ -0,0 +1,89 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + js_to_json, + parse_filesize, + str_to_int, +) + + +class PornComIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P[^/]+)-)?(?P\d+)' + _TESTS = [{ + 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', + 'md5': '3f30ce76267533cd12ba999263156de7', + 'info_dict': { + 'id': '2603339', + 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', + 'ext': 'mp4', + 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 551, + 'view_count': int, + 'age_limit': 18, + }, + }, { + 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + config = self._parse_json( + self._search_regex( + r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=', + webpage, 'config', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + if config: + title = config['title'] + formats = [{ + 'url': stream['url'], + 'format_id': stream.get('id'), + 'height': int_or_none(self._search_regex( + r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) + } for stream in config['streams'] if stream.get('url')] + thumbnail = (compat_urlparse.urljoin( + config['thumbCDN'], config['poster']) + if config.get('thumbCDN') and config.get('poster') else None) + duration = int_or_none(config.get('length')) + else: + title = self._search_regex( + (r'([^<]+)', r']*>([^<]+)

    '), + webpage, 'title') + formats = [{ + 'url': compat_urlparse.urljoin(url, format_url), + 'format_id': '%sp' % height, + 'height': int(height), + 'filesize_approx': parse_filesize(filesize), + } for format_url, height, filesize in re.findall( + r']+href="(/download/[^"]+)">MPEG4 (\d+)p]*>(\d+\s+[a-zA-Z]+)<', + webpage)] + thumbnail = None + duration = None + + self._sort_formats(formats) + + view_count = str_to_int(self._search_regex( + r'class=["\']views["\'][^>]*>

    ([\d,.]+)', webpage, 'view count')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + 'age_limit': 18, + } From 8b2dc4c3287e5e90f339af687f3a272818c94fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 23:59:13 +0700 Subject: [PATCH 215/775] [options] Remove output template description from --help Same reasons as for --format --- youtube_dl/options.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d32a9e32c..5d62deef4 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -628,22 +628,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-o', '--output', dest='outtmpl', metavar='TEMPLATE', - help=('Output filename template. Use %(title)s to get the title, ' - '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' - '%(autonumber)s to get an automatically incremented number, ' - '%(ext)s for the filename extension, ' - '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' - '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (youtube, metacafe, etc), ' - '%(id)s for the video id, ' - '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' - '%(playlist_index)s for the position in the playlist. ' - '%(height)s and %(width)s for the width and height of the video format. ' - '%(resolution)s for a textual description of the resolution of the video format. ' - '%% for a literal percent. ' - 'Use - to output to stdout. Can also be used to download to a different directory, ' - 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) + help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info')) filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', From 93a63b36f1c52a9981050e393d1876d6162abb49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 19 Aug 2016 00:13:24 +0700 Subject: [PATCH 216/775] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 354306a97..7e8bb834d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +- Remove output template description from --help +* Recognize lowercase units in parse_filesize + +Extractors ++ [porncom] Add extractor for porn.com (#2251, #10251) ++ [generic] Add support for DBTV embeds +* [vk:wallpost] Fix audio extraction for new site layout +* [vk] Fix authentication ++ [hgtvcom:show] Add extractor for hgtv.com shows (#10365) ++ [discoverygo] Add support for another GO network sites + + version 2016.08.17 Core From bd1bcd3ea079889cfd7cd44c0ea750ac9d432e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 19 Aug 2016 00:15:12 +0700 Subject: [PATCH 217/775] release 2016.08.19 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 28 ++-------------------------- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 31 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ae28d83d5..7af3c7099 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.17 +[debug] youtube-dl version 2016.08.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e8bb834d..e99ffcec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.19 Core - Remove output template description from --help diff --git a/README.md b/README.md index cabbbef76..952db7abb 100644 --- a/README.md +++ b/README.md @@ -201,32 +201,8 @@ which means you can modify it, redistribute it or use it however you like. -a, --batch-file FILE File containing URLs to download ('-' for stdin) --id Use only video ID in file name - -o, --output TEMPLATE Output filename template. Use %(title)s to - get the title, %(uploader)s for the - uploader name, %(uploader_id)s for the - uploader nickname if different, - %(autonumber)s to get an automatically - incremented number, %(ext)s for the - filename extension, %(format)s for the - format description (like "22 - 1280x720" or - "HD"), %(format_id)s for the unique id of - the format (like YouTube's itags: "137"), - %(upload_date)s for the upload date - (YYYYMMDD), %(extractor)s for the provider - (youtube, metacafe, etc), %(id)s for the - video id, %(playlist_title)s, - %(playlist_id)s, or %(playlist)s (=title if - present, ID otherwise) for the playlist the - video is in, %(playlist_index)s for the - position in the playlist. %(height)s and - %(width)s for the width and height of the - video format. %(resolution)s for a textual - description of the resolution of the video - format. %% for a literal percent. Use - to - output to stdout. Can also be used to - download to a different directory, for - example with -o '/my/downloads/%(uploader)s - /%(title)s-%(id)s.%(ext)s' . + -o, --output TEMPLATE Output filename template, see the "OUTPUT + TEMPLATE" for all the info --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 189b9301d..edf192138 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -279,6 +279,7 @@ - **Helsinki**: helsinki.fi - **HentaiStigma** - **HGTV** + - **hgtv.com:show** - **HistoricFilms** - **history:topic**: History.com Topic - **hitbox** @@ -523,6 +524,7 @@ - **podomatic** - **Pokemon** - **PolskieRadio** + - **PornCom** - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cf5950117..691f2c591 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.17' +__version__ = '2016.08.19' From 9e5751b9fe72f7425e4cb3f22a56b6a95b59e41d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 19 Aug 2016 01:13:45 +0700 Subject: [PATCH 218/775] [globo:article] Relax _VALID_URL and video id regex (Closes #10379) --- youtube_dl/extractor/globo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 3de8356f6..dbacbfc61 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -396,12 +396,12 @@ class GloboIE(InfoExtractor): class GloboArticleIE(InfoExtractor): - _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)\.html' + _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)(?:\.html)?' _VIDEOID_REGEXES = [ r'\bdata-video-id=["\'](\d{7,})', r'\bdata-player-videosids=["\'](\d{7,})', - r'\bvideosIDs\s*:\s*["\'](\d{7,})', + r'\bvideosIDs\s*:\s*["\']?(\d{7,})', r'\bdata-id=["\'](\d{7,})', r']+\bid=["\'](\d{7,})', ] @@ -423,6 +423,9 @@ class GloboArticleIE(InfoExtractor): }, { 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html', 'only_matching': True, + }, { + 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271', + 'only_matching': True, }] @classmethod From e4659b45474acb563db0ab4284abdfc80837307e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 20:37:17 +0800 Subject: [PATCH 219/775] [utils] Correct octal/hexadecimal number detection in js_to_json --- ChangeLog | 6 ++++++ test/test_utils.py | 3 +++ youtube_dl/utils.py | 6 +++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e99ffcec6..98a3dbca3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Fix js_to_json(): correct octal or hexadecimal number detection + + version 2016.08.19 Core diff --git a/test/test_utils.py b/test/test_utils.py index cb578cd53..b83da93b4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -712,6 +712,9 @@ class TestUtil(unittest.TestCase): inp = '''{"foo":101}''' self.assertEqual(js_to_json(inp), '''{"foo":101}''') + inp = '''{"duration": "00:01:07"}''' + self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 35362e767..0c36c1b80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2038,14 +2038,14 @@ def js_to_json(code): }.get(m.group(0), m.group(0)), v[1:-1]) INTEGER_TABLE = ( - (r'^0[xX][0-9a-fA-F]+', 16), - (r'^0+[0-7]+', 8), + (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16), + (r'^(0+[0-7]+)\s*:?$', 8), ) for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: - i = int(im.group(0), base) + i = int(im.group(1), base) return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From b82232036a019e340b715779108c3f4caea8a78d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 20:39:28 +0800 Subject: [PATCH 220/775] [n-tv.de] Fix extraction (closes #10331) --- ChangeLog | 3 +++ youtube_dl/extractor/ntvde.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 98a3dbca3..6281fe325 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * Fix js_to_json(): correct octal or hexadecimal number detection +Extractors +* [n-tv.de] Fix extraction (#10331) + version 2016.08.19 diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index a83e85cb8..d28a81542 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -1,6 +1,8 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( @@ -40,8 +42,8 @@ class NTVDeIE(InfoExtractor): timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) vdata = self._parse_json(self._search_regex( r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);', - webpage, 'player data'), - video_id, transform_source=js_to_json) + webpage, 'player data'), video_id, + transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s))) duration = parse_duration(vdata.get('duration')) formats = [] From 55af45fcab4295a92d56180cdbebe7b47e094bc3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 23:12:30 +0800 Subject: [PATCH 221/775] [radiobremen] Update _TEST (closes #10337) --- youtube_dl/extractor/radiobremen.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 0cbb15f08..19a751da0 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -13,15 +13,15 @@ class RadioBremenIE(InfoExtractor): IE_NAME = 'radiobremen' _TEST = { - 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', + 'url': 'http://www.radiobremen.de/mediathek/?id=141876', 'info_dict': { - 'id': '114720', + 'id': '141876', 'ext': 'mp4', - 'duration': 1685, + 'duration': 178, 'width': 512, - 'title': 'buten un binnen vom 22. Dezember', + 'title': 'Druck auf Patrick Öztürk', 'thumbnail': 're:https?://.*\.jpg$', - 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', + 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', }, } From 520251c093f5e0fe6af5e57203a0452aef0682ac Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 23:53:47 +0800 Subject: [PATCH 222/775] [extractor/common] Recognize m3u8 manifests in HTML5 multimedia tags --- ChangeLog | 1 + youtube_dl/extractor/common.py | 36 +++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6281fe325..450351231 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* Support m3u8 manifests in HTML5 multimedia tags * Fix js_to_json(): correct octal or hexadecimal number detection Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9427ff449..07d58afe7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1695,7 +1695,7 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats - def _parse_html5_media_entries(self, base_url, webpage): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -1710,6 +1710,21 @@ class InfoExtractor(object): return f return {} + def _media_formats(src, cur_media_type): + full_url = absolute_url(src) + if determine_ext(full_url) == 'm3u8': + is_plain_url = False + formats = self._extract_m3u8_formats( + full_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=m3u8_id) + else: + is_plain_url = True + formats = [{ + 'url': full_url, + 'vcodec': 'none' if cur_media_type == 'audio' else None, + }] + return is_plain_url, formats + entries = [] for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): media_info = { @@ -1719,10 +1734,8 @@ class InfoExtractor(object): media_attributes = extract_attributes(media_tag) src = media_attributes.get('src') if src: - media_info['formats'].append({ - 'url': absolute_url(src), - 'vcodec': 'none' if media_type == 'audio' else None, - }) + _, formats = _media_formats(src) + media_info['formats'].extend(formats) media_info['thumbnail'] = media_attributes.get('poster') if media_content: for source_tag in re.findall(r']+>', media_content): @@ -1730,12 +1743,13 @@ class InfoExtractor(object): src = source_attributes.get('src') if not src: continue - f = parse_content_type(source_attributes.get('type')) - f.update({ - 'url': absolute_url(src), - 'vcodec': 'none' if media_type == 'audio' else None, - }) - media_info['formats'].append(f) + is_plain_url, formats = _media_formats(src, media_type) + if is_plain_url: + f = parse_content_type(source_attributes.get('type')) + f.update(formats[0]) + media_info['formats'].append(f) + else: + media_info['formats'].extend(formats) for track_tag in re.findall(r']+>', media_content): track_attributes = extract_attributes(track_tag) kind = track_attributes.get('kind') From ecc90093f9c3793439832f4c9d279605da3489a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 23:56:09 +0800 Subject: [PATCH 223/775] [vuclip] Adapt to the new API and update _TEST --- youtube_dl/extractor/vuclip.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index b73da5cd0..55e087bdb 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -17,12 +17,12 @@ class VuClipIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P[0-9]+)' _TEST = { - 'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html', + 'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247§ion=recommend', 'info_dict': { - 'id': '922692425', + 'id': '1129900602', 'ext': '3gp', - 'title': 'The Toy Soldiers - Hollywood Movie Trailer', - 'duration': 177, + 'title': 'Top 10 TV Convicts', + 'duration': 733, } } @@ -54,7 +54,7 @@ class VuClipIE(InfoExtractor): 'url': video_url, }] else: - formats = self._parse_html5_media_entries(url, webpage)[0]['formats'] + formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats'] title = remove_end(self._html_search_regex( r'(.*?)-\s*Vuclip', webpage, 'title').strip(), ' - Video') From a9a3b4a081a6793f0dd0b40be8429a2aa3c1c36d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:08:23 +0800 Subject: [PATCH 224/775] [miomio] Adapt to the new API and update _TESTS The test case is from #9680 --- youtube_dl/extractor/miomio.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 937ba0f28..ec1b4c4fe 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -25,10 +25,7 @@ class MioMioIE(InfoExtractor): 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', 'duration': 5923, }, - 'params': { - # The server provides broken file - 'skip_download': True, - } + 'skip': 'Unable to load videos', }, { 'url': 'http://www.miomio.tv/watch/cc184024/', 'info_dict': { @@ -47,16 +44,12 @@ class MioMioIE(InfoExtractor): 'skip': 'Unable to load videos', }, { # new 'h5' player - 'url': 'http://www.miomio.tv/watch/cc273295/', - 'md5': '', + 'url': 'http://www.miomio.tv/watch/cc273997/', + 'md5': '0b27a4b4495055d826813f8c3a6b2070', 'info_dict': { - 'id': '273295', + 'id': '273997', 'ext': 'mp4', - 'title': 'アウト×デラックス 20160526', - }, - 'params': { - # intermittent HTTP 500 - 'skip_download': True, + 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', }, }] @@ -116,7 +109,7 @@ class MioMioIE(InfoExtractor): player_webpage = self._download_webpage( player_url, video_id, note='Downloading player webpage', headers={'Referer': url}) - entries = self._parse_html5_media_entries(player_url, player_webpage) + entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) http_headers = {'Referer': player_url} else: http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} From 70852b47ca101f0b4acc76eb3213b763a14b3602 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:12:32 +0800 Subject: [PATCH 225/775] [utils] Recognize units with full names in parse_filename Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes --- ChangeLog | 4 +++- test/test_utils.py | 1 + youtube_dl/utils.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 450351231..b36e4438c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,9 @@ version Core -* Support m3u8 manifests in HTML5 multimedia tags ++ Recognize file size strings with full unit names (for example "8.5 + megabytes") ++ Support m3u8 manifests in HTML5 multimedia tags * Fix js_to_json(): correct octal or hexadecimal number detection Extractors diff --git a/test/test_utils.py b/test/test_utils.py index b83da93b4..d16ea7f77 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -823,6 +823,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 kb'), 1240) + self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) def test_parse_count(self): self.assertEqual(parse_count(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0c36c1b80..41ca562f1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1504,46 +1504,63 @@ def parse_filesize(s): _UNIT_TABLE = { 'B': 1, 'b': 1, + 'bytes': 1, 'KiB': 1024, 'KB': 1000, 'kB': 1024, 'Kb': 1000, 'kb': 1000, + 'kilobytes': 1000, + 'kibibytes': 1024, 'MiB': 1024 ** 2, 'MB': 1000 ** 2, 'mB': 1024 ** 2, 'Mb': 1000 ** 2, 'mb': 1000 ** 2, + 'megabytes': 1000 ** 2, + 'mebibytes': 1024 ** 2, 'GiB': 1024 ** 3, 'GB': 1000 ** 3, 'gB': 1024 ** 3, 'Gb': 1000 ** 3, 'gb': 1000 ** 3, + 'gigabytes': 1000 ** 3, + 'gibibytes': 1024 ** 3, 'TiB': 1024 ** 4, 'TB': 1000 ** 4, 'tB': 1024 ** 4, 'Tb': 1000 ** 4, 'tb': 1000 ** 4, + 'terabytes': 1000 ** 4, + 'tebibytes': 1024 ** 4, 'PiB': 1024 ** 5, 'PB': 1000 ** 5, 'pB': 1024 ** 5, 'Pb': 1000 ** 5, 'pb': 1000 ** 5, + 'petabytes': 1000 ** 5, + 'pebibytes': 1024 ** 5, 'EiB': 1024 ** 6, 'EB': 1000 ** 6, 'eB': 1024 ** 6, 'Eb': 1000 ** 6, 'eb': 1000 ** 6, + 'exabytes': 1000 ** 6, + 'exbibytes': 1024 ** 6, 'ZiB': 1024 ** 7, 'ZB': 1000 ** 7, 'zB': 1024 ** 7, 'Zb': 1000 ** 7, 'zb': 1000 ** 7, + 'zettabytes': 1000 ** 7, + 'zebibytes': 1024 ** 7, 'YiB': 1024 ** 8, 'YB': 1000 ** 8, 'yB': 1024 ** 8, 'Yb': 1000 ** 8, 'yb': 1000 ** 8, + 'yottabytes': 1000 ** 8, + 'yobibytes': 1024 ** 8, } return lookup_unit_table(_UNIT_TABLE, s) From 19f35402c5296e93213d56034d85698087ce3fe1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:18:22 +0800 Subject: [PATCH 226/775] [snotr] Fix extraction (closes #10338) --- ChangeLog | 1 + youtube_dl/extractor/snotr.py | 38 +++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index b36e4438c..13c3d3ffc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Core * Fix js_to_json(): correct octal or hexadecimal number detection Extractors +* [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index 0d1ab07f8..3bb78cb84 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -5,9 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( - float_or_none, - str_to_int, parse_duration, + parse_filesize, + str_to_int, ) @@ -17,21 +17,24 @@ class SnotrIE(InfoExtractor): 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', 'info_dict': { 'id': '13708', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Drone flying through fireworks!', - 'duration': 247, - 'filesize_approx': 98566144, + 'duration': 248, + 'filesize_approx': 40700000, 'description': 'A drone flying through Fourth of July Fireworks', - } + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['description'], }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', 'info_dict': { 'id': '530', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, - 'filesize_approx': 8912896, + 'filesize_approx': 8500000, 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', + 'thumbnail': 're:^https?://.*\.jpg$', } }] @@ -43,26 +46,27 @@ class SnotrIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - video_url = 'http://cdn.videos.snotr.com/%s.flv' % video_id + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] view_count = str_to_int(self._html_search_regex( - r'

    \nViews:\n([\d,\.]+)

    ', + r']*>\s*]*>Views:\s*]*>([\d,\.]+)', webpage, 'view count', fatal=False)) duration = parse_duration(self._html_search_regex( - r'

    \nLength:\n\s*([0-9:]+).*?

    ', + r']*>\s*]*>Length:\s*]*>([\d:]+)', webpage, 'duration', fatal=False)) - filesize_approx = float_or_none(self._html_search_regex( - r'

    \nFilesize:\n\s*([0-9.]+)\s*megabyte

    ', - webpage, 'filesize', fatal=False), invscale=1024 * 1024) + filesize_approx = parse_filesize(self._html_search_regex( + r']*>\s*]*>Filesize:\s*]*>([^<]+)', + webpage, 'filesize', fatal=False)) - return { + info_dict.update({ 'id': video_id, 'description': description, 'title': title, - 'url': video_url, 'view_count': view_count, 'duration': duration, 'filesize_approx': filesize_approx, - } + }) + + return info_dict From 39e1c4f08c4cfca81943e73523bd66b890f5aff2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:52:37 +0800 Subject: [PATCH 227/775] [litv] Support 'promo' URLs (closes #10385) --- ChangeLog | 1 + youtube_dl/extractor/litv.py | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 13c3d3ffc..a8d8d05a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Core * Fix js_to_json(): correct octal or hexadecimal number detection Extractors ++ [litv] Support 'promo' URLs (#10385) * [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index 3356d015d..05c6579f1 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -14,7 +14,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P[^&]+)' + _VALID_URL = r'https?://www\.litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P[^&]+)' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' @@ -27,6 +27,7 @@ class LiTVIE(InfoExtractor): 'playlist_count': 50, }, { 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'md5': '969e343d9244778cb29acec608e53640', 'info_dict': { 'id': 'VOD00041610', 'ext': 'mp4', @@ -37,7 +38,16 @@ class LiTVIE(InfoExtractor): }, 'params': { 'noplaylist': True, - 'skip_download': True, # m3u8 download + }, + 'skip': 'Georestricted to Taiwan', + }, { + 'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&', + 'md5': '88322ea132f848d6e3e18b32a832b918', + 'info_dict': { + 'id': 'VOD00044841', + 'ext': 'mp4', + 'title': '芈月傳第1集 霸星芈月降世楚國', + 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。', }, 'skip': 'Georestricted to Taiwan', }] @@ -92,13 +102,18 @@ class LiTVIE(InfoExtractor): # endpoint gives the same result as the data embedded in the webpage. # If georestricted, there are no embedded data, so an extra request is # necessary to get the error code + if 'assetId' not in view_data: + view_data = self._download_json( + 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, + query={'contentId': video_id}, + headers={'Accept': 'application/json'}) video_data = self._parse_json(self._search_regex( r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', webpage, 'video data', default='{}'), video_id) if not video_data: payload = { 'assetId': view_data['assetId'], - 'watchDevices': vod_data['watchDevices'], + 'watchDevices': view_data['watchDevices'], 'contentType': view_data['contentType'], } video_data = self._download_json( @@ -115,7 +130,8 @@ class LiTVIE(InfoExtractor): raise ExtractorError('Unexpected result from %s' % self.IE_NAME) formats = self._extract_m3u8_formats( - video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls') + video_data['fullpath'], video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls') for a_format in formats: # LiTV HLS segments doesn't like compressions a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True From 5b9d187cc6545c0c5209a4db5525b1023ca8ea41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 19 Aug 2016 22:59:26 +0200 Subject: [PATCH 228/775] [imdb] Improve title extraction and make thumbnail non-fatal --- youtube_dl/extractor/imdb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 0acce9f4c..3a6a6f5ad 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( mimetype2ext, qualities, + remove_end, ) @@ -19,7 +20,7 @@ class ImdbIE(InfoExtractor): 'info_dict': { 'id': '2524815897', 'ext': 'mp4', - 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', + 'title': 'Ice Age: Continental Drift Trailer (No. 2)', 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } }, { @@ -83,10 +84,10 @@ class ImdbIE(InfoExtractor): return { 'id': video_id, - 'title': self._og_search_title(webpage), + 'title': remove_end(self._og_search_title(webpage), ' - IMDb'), 'formats': formats, 'description': descr, - 'thumbnail': format_info['slate'], + 'thumbnail': format_info.get('slate'), } From 4245f55880c42e670cebd5a8a2b10929be834682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Aug 2016 06:18:20 +0700 Subject: [PATCH 229/775] [dotsub] Replace test (Closes #10386) --- youtube_dl/extractor/dotsub.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index e9ca236d4..fd64d1a7f 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -10,18 +10,18 @@ from ..utils import ( class DotsubIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P[^/]+)' _TEST = { - 'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', - 'md5': '0914d4d69605090f623b7ac329fea66e', + 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', + 'md5': '21c7ff600f545358134fea762a6d42b6', 'info_dict': { - 'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', + 'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09', 'ext': 'flv', - 'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', - 'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074', - 'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', - 'duration': 3169, - 'uploader': '4v4l0n42', - 'timestamp': 1292248482.625, - 'upload_date': '20101213', + 'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever', + 'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6', + 'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p', + 'duration': 198, + 'uploader': 'liuxt', + 'timestamp': 1385778501.104, + 'upload_date': '20131130', 'view_count': int, } } From dabe15701b3c12ef7e6af1f3333e1d3e39149592 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 20 Aug 2016 13:25:32 +0100 Subject: [PATCH 230/775] [cbs, cbsnews] fix extraction(fixes #10393) --- youtube_dl/extractor/cbs.py | 47 ++++++++++++++++++++----------- youtube_dl/extractor/cbsnews.py | 9 +++--- youtube_dl/extractor/cbssports.py | 3 ++ 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index a23173d6f..c72ed2dbb 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,6 +4,7 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( int_or_none, find_xpath_attr, + ExtractorError, ) @@ -17,19 +18,6 @@ class CBSBaseIE(ThePlatformFeedIE): }] } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] - def _extract_video_info(self, filter_query, video_id): - return self._extract_feed_info( - 'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { - 'series': entry.get('cbs$SeriesTitle'), - 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), - 'episode': entry.get('cbs$EpisodeTitle'), - 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), - }, { - 'StreamPack': { - 'manifest': 'm3u', - } - }) - class CBSIE(CBSBaseIE): _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P[\w-]+)' @@ -38,7 +26,6 @@ class CBSIE(CBSBaseIE): 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'info_dict': { 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', - 'display_id': 'connect-chat-feat-garth-brooks', 'ext': 'mp4', 'title': 'Connect Chat feat. Garth Brooks', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', @@ -47,7 +34,10 @@ class CBSIE(CBSBaseIE): 'upload_date': '20131127', 'uploader': 'CBSI-NEW', }, - 'expected_warnings': ['Failed to download m3u8 information'], + 'params': { + # m3u8 download + 'skip_download': True, + }, '_skip': 'Blocked outside the US', }, { 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', @@ -56,8 +46,31 @@ class CBSIE(CBSBaseIE): 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'only_matching': True, }] - TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' + + def _extract_video_info(self, guid): + path = 'dJ5BDC/media/guid/2198311517/' + guid + smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path + formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) + for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): + try: + tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) + formats.extend(tp_formats) + except ExtractorError: + continue + self._sort_formats(formats) + metadata = self._download_theplatform_metadata(path, guid) + info = self._parse_theplatform_metadata(metadata) + info.update({ + 'id': guid, + 'formats': formats, + 'subtitles': subtitles, + 'series': metadata.get('cbs$SeriesTitle'), + 'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), + 'episode': metadata.get('cbs$EpisodeTitle'), + 'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), + }) + return info def _real_extract(self, url): content_id = self._match_id(url) - return self._extract_video_info('byGuid=%s' % content_id, content_id) + return self._extract_video_info(content_id) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 9d3b75526..4aa6917a0 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .cbs import CBSBaseIE +from .cbs import CBSIE from ..utils import ( parse_duration, ) -class CBSNewsIE(CBSBaseIE): +class CBSNewsIE(CBSIE): IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P[\da-z_-]+)' @@ -35,7 +35,8 @@ class CBSNewsIE(CBSBaseIE): 'ext': 'mp4', 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', - 'upload_date': '19700101', + 'upload_date': '20140404', + 'timestamp': 1396650660, 'uploader': 'CBSI-NEW', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 205, @@ -63,7 +64,7 @@ class CBSNewsIE(CBSBaseIE): item = video_info['item'] if 'item' in video_info else video_info guid = item['mpxRefId'] - return self._extract_video_info('byGuid=%s' % guid, guid) + return self._extract_video_info(guid) class CBSNewsLiveVideoIE(InfoExtractor): diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index 78ca44b02..bf7915626 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -23,6 +23,9 @@ class CBSSportsIE(CBSBaseIE): } }] + def _extract_video_info(self, filter_query, video_id): + return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) + def _real_extract(self, url): video_id = self._match_id(url) return self._extract_video_info('byId=%s' % video_id, video_id) From 292a2301bf0b99be81640c4511d78ebc3c622dad Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 20 Aug 2016 19:00:25 +0100 Subject: [PATCH 231/775] [cnn] add support for money.cnn.com videos(closes #2797) --- youtube_dl/extractor/cnn.py | 41 ++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 53489a14e..220bb55e8 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -11,7 +11,7 @@ from ..utils import ( class CNNIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ + _VALID_URL = r'''(?x)https?://(?:(?Pedition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ (?P.+?/(?P[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' _TESTS = [{ @@ -45,19 +45,46 @@ class CNNIE(InfoExtractor): 'description': 'md5:e7223a503315c9f150acac52e76de086', 'upload_date': '20141222', } + }, { + 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', + 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', + 'info_dict': { + 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', + 'ext': 'mp4', + 'title': '5 stunning stats about Netflix', + 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', + 'upload_date': '20160819', + } }, { 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'only_matching': True, }, { 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', 'only_matching': True, + }, { + 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', + 'only_matching': True, }] + _CONFIG = { + # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml + 'edition': { + 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml', + 'media_src': 'http://pmd.cdn.turner.com/cnn/big', + }, + # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml + 'money': { + 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml', + 'media_src': 'http://ht3.cdn.turner.com/money/big', + }, + } + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - path = mobj.group('path') - page_title = mobj.group('title') - info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path + sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() + if sub_domain not in ('money', 'edition'): + sub_domain = 'edition' + config = self._CONFIG[sub_domain] + info_url = config['data_src'] % path info = self._download_xml(info_url, page_title) formats = [] @@ -66,7 +93,7 @@ class CNNIE(InfoExtractor): (?:_(?P<bitrate>[0-9]+)k)? ''') for f in info.findall('files/file'): - video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) + video_url = config['media_src'] + f.text.strip() fdct = { 'format_id': f.attrib['bitrate'], 'url': video_url, @@ -146,7 +173,7 @@ class CNNBlogsIE(InfoExtractor): class CNNArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)' + _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)' _TEST = { 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', From e25586e47163c83e519ae0af9aa6d8fbc3d58ef4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 20 Aug 2016 20:02:49 +0100 Subject: [PATCH 232/775] [cultureunplugged] fix extraction(closes #10330) --- youtube_dl/extractor/cultureunplugged.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py index 9c764fe68..9f26fa587 100644 --- a/youtube_dl/extractor/cultureunplugged.py +++ b/youtube_dl/extractor/cultureunplugged.py @@ -1,9 +1,13 @@ from __future__ import unicode_literals import re +import time from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + HEADRequest, +) class CultureUnpluggedIE(InfoExtractor): @@ -32,6 +36,9 @@ class CultureUnpluggedIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id + # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request + self._request_webpage(HEADRequest( + 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id) movie_data = self._download_json( 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id) From 5b1d85754ee2f1a8b94c979bc5122b4130ef8cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:07:26 +0700 Subject: [PATCH 233/775] [YoutubeDL] Autocalculate ext when ext is None --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e844dc98a..0b3e3da82 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1299,7 +1299,7 @@ class YoutubeDL(object): for subtitle_format in subtitle: if subtitle_format.get('url'): subtitle_format['url'] = sanitize_url(subtitle_format['url']) - if 'ext' not in subtitle_format: + if subtitle_format.get('ext') is None: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() if self.params.get('listsubtitles', False): @@ -1354,7 +1354,7 @@ class YoutubeDL(object): note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing - if 'ext' not in format: + if format.get('ext') is None: format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) From d8f30a7e6606d2300dfffb8fc0aaf8d6a0c79b0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:10:49 +0700 Subject: [PATCH 234/775] [kaltura] Remove unused code --- youtube_dl/extractor/kaltura.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index ddf1165ff..66c7b36bc 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -122,18 +122,6 @@ class KalturaIE(InfoExtractor): return data - def _get_kaltura_signature(self, video_id, partner_id, service_url=None): - actions = [{ - 'apiVersion': '3.1', - 'expiry': 86400, - 'format': 1, - 'service': 'session', - 'action': 'startWidgetSession', - 'widgetId': '_%s' % partner_id, - }] - return self._kaltura_api_call( - video_id, actions, service_url, note='Downloading Kaltura signature')['ks'] - def _get_video_info(self, video_id, partner_id, service_url=None): actions = [ { From a80944675010617cc0124c57ab597f9d9004c0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:26:45 +0700 Subject: [PATCH 235/775] [kaltura] Add subtitles support when entry_id is unknown beforehand (Closes #10279) --- youtube_dl/extractor/kaltura.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 66c7b36bc..a8ce6dda2 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -196,6 +196,17 @@ class KalturaIE(InfoExtractor): reference_id)['entryResult'] info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets'] entry_id = info['id'] + # Unfortunately, data returned in kalturaIframePackageData lacks + # captions so we will try requesting the complete data using + # regular approach since we now know the entry_id + try: + _, info, flavor_assets, captions = self._get_video_info( + entry_id, partner_id) + except ExtractorError: + # Regular scenario failed but we already have everything + # extracted apart from captions and can process at least + # with this + pass else: raise ExtractorError('Invalid URL', expected=True) ks = params.get('flashvars[ks]', [None])[0] From fddaa76a599a7df00dc94dd5663d43c881f8fee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:27:53 +0700 Subject: [PATCH 236/775] [kaltura] Assume ttml to be default subtitles' extension --- youtube_dl/extractor/kaltura.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index a8ce6dda2..15f2fe24f 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -266,7 +266,7 @@ class KalturaIE(InfoExtractor): continue subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), - 'ext': caption.get('fileExt'), + 'ext': caption.get('fileExt', 'ttml'), }) return { From 2c6acdfd2d31b7ce9500e9efe411620c61059b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:37:01 +0700 Subject: [PATCH 237/775] [kaltura] Add test for #10279 --- youtube_dl/extractor/kaltura.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 15f2fe24f..e0f7366c2 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -67,6 +67,27 @@ class KalturaIE(InfoExtractor): # video with subtitles 'url': 'kaltura:111032:1_cw786r8q', 'only_matching': True, + }, + { + # video with ttml subtitles (no fileExt) + 'url': 'kaltura:1926081:0_l5ye1133', + 'info_dict': { + 'id': '0_l5ye1133', + 'ext': 'mp4', + 'title': 'What Can You Do With Python?', + 'upload_date': '20160221', + 'uploader_id': 'stork', + 'thumbnail': 're:^https?://.*/thumbnail/.*', + 'timestamp': int, + 'subtitles': { + 'en': [{ + 'ext': 'ttml', + }], + }, + }, + 'params': { + 'skip_download': True, + }, } ] From db29af6d36b3d16614355dac70f22c4f2d8410d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <trox1972@users.noreply.github.com> Date: Fri, 19 Aug 2016 12:53:34 +0200 Subject: [PATCH 238/775] [charlierose] Add new extractor --- youtube_dl/extractor/charlierose.py | 45 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 46 insertions(+) create mode 100644 youtube_dl/extractor/charlierose.py diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py new file mode 100644 index 000000000..ba1d1b833 --- /dev/null +++ b/youtube_dl/extractor/charlierose.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class CharlieRoseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)' + _TEST = { + 'url': 'https://charlierose.com/videos/27996', + 'info_dict': { + 'id': '27996', + 'ext': 'mp4', + 'title': 'Remembering Zaha Hadid', + 'thumbnail': 're:^https?://.*\.jpg\?\d+', + 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + _PLAYER_BASE = 'https://charlierose.com/video/player/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) + + title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') + + entries = self._parse_html5_media_entries(self._PLAYER_BASE % video_id, webpage, video_id)[0] + formats = entries['formats'] + + self._sort_formats(formats) + self._remove_duplicate_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'subtitles': entries.get('subtitles'), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6c5d46015..d4d90c1f8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -134,6 +134,7 @@ from .ccc import CCCIE from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE +from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE from .chirbit import ( From d164a0d41bdc95caa2b1458b9f51381de7d6a5a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 21 Aug 2016 20:00:48 +0800 Subject: [PATCH 239/775] [README.md] Add a format selection example using comma Ref: #10399 --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 952db7abb..a10aaf35c 100644 --- a/README.md +++ b/README.md @@ -645,7 +645,11 @@ $ youtube-dl -f 'best[filesize<50M]' # Download best format available via direct link over HTTP/HTTPS protocol $ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]' + +# Download the best video format and the best audio format without merging them +$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s' ``` +Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name. # VIDEO SELECTION From 3d47ee0a9eb37b2c91dfae80c7f22fda0242dd61 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 21 Aug 2016 14:09:18 +0100 Subject: [PATCH 240/775] [zingmp3] fix extraction and add support for video clips(closes #10041) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/zingmp3.py | 127 +++++++++++++++++------------ 2 files changed, 76 insertions(+), 56 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6c5d46015..20fb23527 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1114,7 +1114,4 @@ from .youtube import ( ) from .zapiks import ZapiksIE from .zdf import ZDFIE, ZDFChannelIE -from .zingmp3 import ( - ZingMp3SongIE, - ZingMp3AlbumIE, -) +from .zingmp3 import ZingMp3IE diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index 437eecb67..bd708b42c 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -4,13 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, + update_url_query, +) class ZingMp3BaseInfoExtractor(InfoExtractor): - def _extract_item(self, item, fatal=True): - error_message = item.find('./errormessage').text + def _extract_item(self, item, page_type, fatal=True): + error_message = item.get('msg') if error_message: if not fatal: return @@ -18,25 +22,48 @@ class ZingMp3BaseInfoExtractor(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) - title = item.find('./title').text.strip() - source = item.find('./source').text - extension = item.attrib['type'] - thumbnail = item.find('./backimage').text + formats = [] + for quality, source_url in zip(item.get('qualities') or item.get('quality', []), item.get('source_list') or item.get('source', [])): + if not source_url or source_url == 'require vip': + continue + if not re.match(r'https?://', source_url): + source_url = '//' + source_url + source_url = self._proto_relative_url(source_url, 'http:') + quality_num = int_or_none(quality) + f = { + 'format_id': quality, + 'url': source_url, + } + if page_type == 'video': + f.update({ + 'height': quality_num, + 'ext': 'mp4', + }) + else: + f.update({ + 'abr': quality_num, + 'ext': 'mp3', + }) + formats.append(f) + + cover = item.get('cover') return { - 'title': title, - 'url': source, - 'ext': extension, - 'thumbnail': thumbnail, + 'title': (item.get('name') or item.get('title')).strip(), + 'formats': formats, + 'thumbnail': 'http:/' + cover if cover else None, + 'artist': item.get('artist'), } - def _extract_player_xml(self, player_xml_url, id, playlist_title=None): - player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML') - items = player_xml.findall('./item') + def _extract_player_json(self, player_json_url, id, page_type, playlist_title=None): + player_json = self._download_json(player_json_url, id, 'Downloading Player JSON') + items = player_json['data'] + if 'item' in items: + items = items['item'] if len(items) == 1: # one single song - data = self._extract_item(items[0]) + data = self._extract_item(items[0], page_type) data['id'] = id return data @@ -45,7 +72,7 @@ class ZingMp3BaseInfoExtractor(InfoExtractor): entries = [] for i, item in enumerate(items, 1): - entry = self._extract_item(item, fatal=False) + entry = self._extract_item(item, page_type, fatal=False) if not entry: continue entry['id'] = '%s-%d' % (id, i) @@ -59,8 +86,8 @@ class ZingMp3BaseInfoExtractor(InfoExtractor): } -class ZingMp3SongIE(ZingMp3BaseInfoExtractor): - _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html' +class ZingMp3IE(ZingMp3BaseInfoExtractor): + _VALID_URL = r'https?://mp3\.zing\.vn/(?:bai-hat|album|playlist|video-clip)/[^/]+/(?P<id>\w+)\.html' _TESTS = [{ 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'md5': 'ead7ae13693b3205cbc89536a077daed', @@ -70,51 +97,47 @@ class ZingMp3SongIE(ZingMp3BaseInfoExtractor): 'ext': 'mp3', 'thumbnail': 're:^https?://.*\.jpg$', }, - }] - IE_NAME = 'zingmp3:song' - IE_DESC = 'mp3.zing.vn songs' - - def _real_extract(self, url): - matched = re.match(self._VALID_URL, url) - slug = matched.group('slug') - song_id = matched.group('song_id') - - webpage = self._download_webpage( - 'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id) - - player_xml_url = self._search_regex( - r'&xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url') - - return self._extract_player_xml(player_xml_url, song_id) - - -class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor): - _VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html' - _TESTS = [{ + }, { + 'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html', + 'md5': '870295a9cd8045c0e15663565902618d', + 'info_dict': { + 'id': 'ZW6BAEA0', + 'title': 'Let It Go (Frozen OST)', + 'ext': 'mp4', + }, + }, { 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 'info_dict': { '_type': 'playlist', 'id': 'ZWZBWDAF', - 'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless', + 'title': 'Lâu Đài Tình Ái - Bằng Kiều,Minh Tuyết | Album 320 lossless', }, 'playlist_count': 10, + 'skip': 'removed at the request of the owner', }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, }] - IE_NAME = 'zingmp3:album' - IE_DESC = 'mp3.zing.vn albums' + IE_NAME = 'zingmp3' + IE_DESC = 'mp3.zing.vn' def _real_extract(self, url): - matched = re.match(self._VALID_URL, url) - slug = matched.group('slug') - album_id = matched.group('album_id') + page_id = self._match_id(url) - webpage = self._download_webpage( - 'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id) - player_xml_url = self._search_regex( - r'&xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url') + webpage = self._download_webpage(url, page_id) - return self._extract_player_xml( - player_xml_url, album_id, - playlist_title=self._og_search_title(webpage)) + player_json_url = self._search_regex([ + r'data-xml="([^"]+)', + r'&xmlURL=([^&]+)&' + ], webpage, 'player xml url') + + playlist_title = None + page_type = self._search_regex(r'/(?:html5)?xml/([^/-]+)', player_json_url, 'page type') + if page_type == 'video': + player_json_url = update_url_query(player_json_url, {'format': 'json'}) + else: + player_json_url = player_json_url.replace('/xml/', '/html5xml/') + if page_type == 'album': + playlist_title = self._og_search_title(webpage) + + return self._extract_player_json(player_json_url, page_id, page_type, playlist_title) From 92d4cfa358bacff0e79da30ffb0908c7096e82f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 21:01:01 +0700 Subject: [PATCH 241/775] [kaltura] Fallback ext calculation on caption's format --- youtube_dl/extractor/kaltura.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index e0f7366c2..6a8464998 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -36,6 +36,12 @@ class KalturaIE(InfoExtractor): ''' _SERVICE_URL = 'http://cdnapi.kaltura.com' _SERVICE_BASE = '/api_v3/index.php' + # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php + _CAPTION_TYPES = { + 1: 'srt', + 2: 'ttml', + 3: 'vtt', + } _TESTS = [ { 'url': 'kaltura:269692:1_1jc2y3e4', @@ -285,9 +291,12 @@ class KalturaIE(InfoExtractor): # Continue if caption is not ready if f.get('status') != 2: continue + if not caption.get('id'): + continue + caption_format = int_or_none(caption.get('format')) subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), - 'ext': caption.get('fileExt', 'ttml'), + 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml', }) return { From b1e676fde81d33116f6739006d9aa0b68eebc072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 21:28:02 +0700 Subject: [PATCH 242/775] [twitch] Modernize --- youtube_dl/extractor/twitch.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 890f55180..4b5b2030c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -20,7 +20,6 @@ from ..utils import ( orderedSet, parse_duration, parse_iso8601, - sanitized_Request, urlencode_postdata, ) @@ -50,8 +49,8 @@ class TwitchBaseIE(InfoExtractor): for cookie in self._downloader.cookiejar: if cookie.name == 'api_token': headers['Twitch-Api-Token'] = cookie.value - request = sanitized_Request(url, headers=headers) - response = super(TwitchBaseIE, self)._download_json(request, video_id, note) + response = super(TwitchBaseIE, self)._download_json( + url, video_id, note, headers=headers) self._handle_error(response) return response @@ -82,11 +81,10 @@ class TwitchBaseIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(redirect_url, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Referer', redirect_url) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers={'Referer': redirect_url}) error_message = self._search_regex( r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>', From e3f6b569096ba6faa8de230333849817c8b31a2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 22:09:29 +0700 Subject: [PATCH 243/775] [twitch] Refactor API calls --- youtube_dl/extractor/twitch.py | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4b5b2030c..f0a9370c8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -41,7 +41,7 @@ class TwitchBaseIE(InfoExtractor): '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), expected=True) - def _download_json(self, url, video_id, note='Downloading JSON metadata'): + def _call_api(self, path, item_id, note): headers = { 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', 'X-Requested-With': 'XMLHttpRequest', @@ -49,8 +49,8 @@ class TwitchBaseIE(InfoExtractor): for cookie in self._downloader.cookiejar: if cookie.name == 'api_token': headers['Twitch-Api-Token'] = cookie.value - response = super(TwitchBaseIE, self)._download_json( - url, video_id, note, headers=headers) + response = self._download_json( + '%s/%s' % (self._API_BASE, path), item_id, note) self._handle_error(response) return response @@ -107,14 +107,14 @@ class TwitchBaseIE(InfoExtractor): class TwitchItemBaseIE(TwitchBaseIE): def _download_info(self, item, item_id): - return self._extract_info(self._download_json( - '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, + return self._extract_info(self._call_api( + 'kraken/videos/%s%s' % (item, item_id), item_id, 'Downloading %s info JSON' % self._ITEM_TYPE)) def _extract_media(self, item_id): info = self._download_info(self._ITEM_SHORTCUT, item_id) - response = self._download_json( - '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id, + response = self._call_api( + 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 'Downloading %s playlist JSON' % self._ITEM_TYPE) entries = [] chunks = response['chunks'] @@ -244,8 +244,8 @@ class TwitchVodIE(TwitchItemBaseIE): item_id = self._match_id(url) info = self._download_info(self._ITEM_SHORTCUT, item_id) - access_token = self._download_json( - '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id, + access_token = self._call_api( + 'api/vods/%s/access_token' % item_id, item_id, 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( @@ -273,12 +273,12 @@ class TwitchVodIE(TwitchItemBaseIE): class TwitchPlaylistBaseIE(TwitchBaseIE): - _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE + _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d' _PAGE_LIMIT = 100 def _extract_playlist(self, channel_id): - info = self._download_json( - '%s/kraken/channels/%s' % (self._API_BASE, channel_id), + info = self._call_api( + 'kraken/channels/%s' % channel_id, channel_id, 'Downloading channel info JSON') channel_name = info.get('display_name') or info.get('name') entries = [] @@ -287,8 +287,8 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): broken_paging_detected = False counter_override = None for counter in itertools.count(1): - response = self._download_json( - self._PLAYLIST_URL % (channel_id, offset, limit), + response = self._call_api( + self._PLAYLIST_PATH % (channel_id, offset, limit), channel_id, 'Downloading %s videos JSON page %s' % (self._PLAYLIST_TYPE, counter_override or counter)) @@ -343,7 +343,7 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): IE_NAME = 'twitch:past_broadcasts' _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true' + _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true' _PLAYLIST_TYPE = 'past broadcasts' _TEST = { @@ -387,8 +387,8 @@ class TwitchStreamIE(TwitchBaseIE): def _real_extract(self, url): channel_id = self._match_id(url) - stream = self._download_json( - '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id, + stream = self._call_api( + 'kraken/streams/%s' % channel_id, channel_id, 'Downloading stream JSON').get('stream') # Fallback on profile extraction if stream is offline @@ -403,8 +403,8 @@ class TwitchStreamIE(TwitchBaseIE): # JSON and fallback to lowercase if it's not available. channel_id = stream.get('channel', {}).get('name') or channel_id.lower() - access_token = self._download_json( - '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id, + access_token = self._call_api( + 'api/channels/%s/access_token' % channel_id, channel_id, 'Downloading channel access token') query = { From efe470e2614d8a50a5cc2d14551e9bc4fc41cc8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 22:45:50 +0700 Subject: [PATCH 244/775] [twitch] Renew authentication --- youtube_dl/extractor/twitch.py | 38 +++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f0a9370c8..359a8859c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -7,6 +7,7 @@ import random from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_parse_qs, compat_str, compat_urllib_parse_urlencode, @@ -14,6 +15,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + clean_html, ExtractorError, int_or_none, js_to_json, @@ -62,9 +64,17 @@ class TwitchBaseIE(InfoExtractor): if username is None: return + def fail(message): + raise ExtractorError( + 'Unable to login. Twitch said: %s' % message, expected=True) + login_page, handle = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login page') + # Some TOR nodes and public proxies are blocked completely + if 'blacklist_message' in login_page: + fail(clean_html(login_page)) + login_form = self._hidden_inputs(login_page) login_form.update({ @@ -81,20 +91,24 @@ class TwitchBaseIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(redirect_url, post_url) - response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, - data=urlencode_postdata(login_form), - headers={'Referer': redirect_url}) + headers = {'Referer': redirect_url} - error_message = self._search_regex( - r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>', - response, 'error message', default=None) - if error_message: - raise ExtractorError( - 'Unable to login. Twitch said: %s' % error_message, expected=True) + try: + response = self._download_json( + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + fail(response['message']) + raise - if '>Reset your password<' in response: - self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit') + if response.get('redirect'): + self._download_webpage( + response['redirect'], None, 'Downloading login redirect page', + headers=headers) def _prefer_source(self, formats): try: From 9b8c554ea70ee970009de2628bafe7fd7390bf9e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 21 Aug 2016 17:55:47 +0100 Subject: [PATCH 245/775] [firsttv] fix extraction(closes #9249) --- youtube_dl/extractor/firsttv.py | 133 ++++++++++---------------------- 1 file changed, 39 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 88bca1007..af7de10b7 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,130 +2,75 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_xpath +from ..compat import compat_urlparse from ..utils import ( int_or_none, qualities, unified_strdate, - xpath_attr, - xpath_element, - xpath_text, - xpath_with_ns, ) class FirstTVIE(InfoExtractor): IE_NAME = '1tv' IE_DESC = 'Первый канал' - _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ - # single format via video_materials.json API - 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', - 'md5': '82a2777648acae812d58b3f5bd42882b', + 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', + 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', 'info_dict': { - 'id': '35930', + 'id': '40049', 'ext': 'mp4', 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'description': 'md5:357933adeede13b202c7c21f91b871b2', + 'description': 'md5:36a39c1d19618fec57d12efe212a8370', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20150212', 'duration': 2694, }, }, { - # multiple formats via video_materials.json API - 'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641', - 'info_dict': { - 'id': '113641', - 'ext': 'mp4', - 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'upload_date': '20160407', - 'duration': 179, - 'formats': 'mincount:3', - }, - 'params': { - 'skip_download': True, - }, - }, { - # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API - 'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038', - 'md5': '519d306c5b5669761fd8906c39dbee23', - 'info_dict': { - 'id': '47038', - 'ext': 'mp4', - 'title': '"Побег". Второй сезон. 3 серия', - 'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'upload_date': '20120516', - 'duration': 3080, - }, - }, { - 'url': 'http://www.1tv.ru/videoarchive/9967', - 'only_matching': True, + 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', + 'only_matching': 'true', }] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - # Videos with multiple formats only available via this API - video = self._download_json( - 'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id, - video_id, fatal=False) - - description, thumbnail, upload_date, duration = [None] * 4 - - if video: - item = video[0] - title = item['title'] - quality = qualities(('ld', 'sd', 'hd', )) - formats = [{ - 'url': f['src'], - 'format_id': f.get('name'), - 'quality': quality(f.get('name')), - } for f in item['mbr'] if f.get('src')] - thumbnail = item.get('poster') - else: - # Some videos are not available via video_materials.json - video = self._download_xml( - 'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id, - video_id) - - NS_MAP = { - 'media': 'http://search.yahoo.com/mrss/', - } - - item = xpath_element(video, './channel/item', fatal=True) - title = xpath_text(item, './title', fatal=True) - formats = [{ - 'url': content.attrib['url'], - } for content in item.findall( - compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')] - thumbnail = xpath_attr( - item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url') + webpage = self._download_webpage(url, display_id) + playlist_url = compat_urlparse.urljoin(url, self._search_regex( + r'data-playlist-url="([^"]+)', webpage, 'playlist url')) + item = self._download_json(playlist_url, display_id)[0] + video_id = item['id'] + quality = qualities(('ld', 'sd', 'hd', )) + formats = [] + for f in item.get('mbr', []): + src = f.get('src') + if not src: + continue + fname = f.get('name') + formats.append({ + 'url': src, + 'format_id': fname, + 'quality': quality(fname), + }) self._sort_formats(formats) - webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False) - if webpage: - title = self._html_search_regex( - (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', - r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or title - description = self._html_search_regex( - r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', - webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description') - thumbnail = thumbnail or self._og_search_thumbnail(webpage) - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) + title = self._html_search_regex( + (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', + r"'title'\s*:\s*'([^']+)'"), + webpage, 'title', default=None) or item['title'] + description = self._html_search_regex( + r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', + webpage, 'description', default=None) or self._html_search_meta( + 'description', webpage, 'description') + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) return { 'id': video_id, - 'thumbnail': thumbnail, + 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), 'title': title, 'description': description, 'upload_date': upload_date, From 526656726b13f47a33c36e56821136b90d6decf1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 22 Aug 2016 02:06:47 +0800 Subject: [PATCH 246/775] [charlierose] Simplify and improve --- youtube_dl/extractor/charlierose.py | 33 +++++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index ba1d1b833..817f7128f 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -6,20 +6,25 @@ from ..utils import remove_end class CharlieRoseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://charlierose.com/videos/27996', + 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', 'info_dict': { 'id': '27996', 'ext': 'mp4', 'title': 'Remembering Zaha Hadid', 'thumbnail': 're:^https?://.*\.jpg\?\d+', 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, }, - 'params': { - # m3u8 download - 'skip_download': True, - } - } + }, { + 'url': 'https://charlierose.com/videos/27996', + 'only_matching': True, + }] _PLAYER_BASE = 'https://charlierose.com/video/player/%s' @@ -29,17 +34,17 @@ class CharlieRoseIE(InfoExtractor): title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') - entries = self._parse_html5_media_entries(self._PLAYER_BASE % video_id, webpage, video_id)[0] - formats = entries['formats'] + info_dict = self._parse_html5_media_entries( + self._PLAYER_BASE % video_id, webpage, video_id)[0] - self._sort_formats(formats) - self._remove_duplicate_formats(formats) + self._sort_formats(info_dict['formats']) + self._remove_duplicate_formats(info_dict['formats']) - return { + info_dict.update({ 'id': video_id, 'title': title, - 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), - 'subtitles': entries.get('subtitles'), - } + }) + + return info_dict From d0fa172e5fc1d676834252dcd395ec495b20b0bc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 21 Aug 2016 19:11:51 +0100 Subject: [PATCH 247/775] [firsttv] keep a test videos with multiple formats --- youtube_dl/extractor/firsttv.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index af7de10b7..332d12020 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -16,6 +16,7 @@ class FirstTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ + # single format 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', 'info_dict': { @@ -28,8 +29,21 @@ class FirstTVIE(InfoExtractor): 'duration': 2694, }, }, { + # multiple formats 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', - 'only_matching': 'true', + 'info_dict': { + 'id': '364746', + 'ext': 'mp4', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', + 'description': 'md5:a242eea0031fd180a4497d52640a9572', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20160407', + 'duration': 179, + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): From ad120ae1c57fe3ff0c7f5559d280cb8230a2b38c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 22 Aug 2016 02:18:46 +0800 Subject: [PATCH 248/775] [extractor/common] Change the default m3u8 protocol in HTML5 Helper functions should have consistent default values --- ChangeLog | 1 + youtube_dl/extractor/charlierose.py | 3 ++- youtube_dl/extractor/common.py | 6 +++--- youtube_dl/extractor/snotr.py | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index a8d8d05a3..383ff59ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Core * Fix js_to_json(): correct octal or hexadecimal number detection Extractors ++ [charlierose] Add new extractor (#10382) + [litv] Support 'promo' URLs (#10385) * [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index 817f7128f..4bf2cf7b0 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -35,7 +35,8 @@ class CharlieRoseIE(InfoExtractor): title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') info_dict = self._parse_html5_media_entries( - self._PLAYER_BASE % video_id, webpage, video_id)[0] + self._PLAYER_BASE % video_id, webpage, video_id, + m3u8_entry_protocol='m3u8_native')[0] self._sort_formats(info_dict['formats']) self._remove_duplicate_formats(info_dict['formats']) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 07d58afe7..ba4c03d3d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1695,7 +1695,7 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -1715,8 +1715,8 @@ class InfoExtractor(object): if determine_ext(full_url) == 'm3u8': is_plain_url = False formats = self._extract_m3u8_formats( - full_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=m3u8_id) + full_url, video_id, ext='mp4', + entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) else: is_plain_url = True formats = [{ diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index 3bb78cb84..4819fe5b4 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -46,7 +46,8 @@ class SnotrIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + info_dict = self._parse_html5_media_entries( + url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0] view_count = str_to_int(self._html_search_regex( r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)', From cf143c4d977915c993f4aa467b491a6c284bb569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 03:31:33 +0700 Subject: [PATCH 249/775] [ivi] Add support for 720p and 1080p --- youtube_dl/extractor/ivi.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 472d72b4c..f5ab5f4af 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -8,7 +8,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, - sanitized_Request, + qualities, ) @@ -49,11 +49,27 @@ class IviIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', + }, + { + # with MP4-HD720 format + 'url': 'http://www.ivi.ru/watch/146500', + 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e', + 'info_dict': { + 'id': '146500', + 'ext': 'mp4', + 'title': 'Кукла', + 'description': 'md5:ffca9372399976a2d260a407cc74cce6', + 'duration': 5599, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'skip': 'Only works from Russia', } ] # Sorted by quality - _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] + _KNOWN_FORMATS = ( + 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', + 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): video_id = self._match_id(url) @@ -69,10 +85,9 @@ class IviIE(InfoExtractor): ] } - request = sanitized_Request( - 'http://api.digitalaccess.ru/api/json/', json.dumps(data)) video_json = self._download_json( - request, video_id, 'Downloading video JSON') + 'http://api.digitalaccess.ru/api/json/', video_id, + 'Downloading video JSON', data=json.dumps(data)) if 'error' in video_json: error = video_json['error'] @@ -84,11 +99,13 @@ class IviIE(InfoExtractor): result = video_json['result'] + quality = qualities(self._KNOWN_FORMATS) + formats = [{ 'url': x['url'], - 'format_id': x['content_format'], - 'preference': self._KNOWN_FORMATS.index(x['content_format']), - } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS] + 'format_id': x.get('content_format'), + 'quality': quality(x.get('content_format')), + } for x in result['files'] if x.get('url')] self._sort_formats(formats) From 3d897cc791781430f371da98f2f3a05a0b856c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 03:34:27 +0700 Subject: [PATCH 250/775] [ivi] Fix episode number extraction --- youtube_dl/extractor/ivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index f5ab5f4af..7c8cb21c2 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -132,7 +132,7 @@ class IviIE(InfoExtractor): webpage, 'season number', default=None)) episode_number = int_or_none(self._search_regex( - r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', + r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', webpage, 'episode number', default=None)) description = self._og_search_description(webpage, default=None) or self._html_search_meta( From afbab5688e837d9b1617119b1ac26b4a4e343bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 04:15:46 +0700 Subject: [PATCH 251/775] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 383ff59ea..ee9b9500f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,16 +1,28 @@ version <unreleased> Core -+ Recognize file size strings with full unit names (for example "8.5 - megabytes") -+ Support m3u8 manifests in HTML5 multimedia tags -* Fix js_to_json(): correct octal or hexadecimal number detection +* Improve formats and subtitles extension auto calculation ++ Recognize full unit names in parse_filesize ++ Add support for m3u8 manifests in HTML5 multimedia tags +* Fix octal/hexadecimal number detection in js_to_json Extractors ++ [ivi] Add support for 720p and 1080p + [charlierose] Add new extractor (#10382) +* [1tv] Fix extraction (#9249) +* [twitch] Renew authentication +* [kaltura] Improve subtitles extension calculation ++ [zingmp3] Add support for video clips +* [zingmp3] Fix extraction (#10041) +* [kaltura] Improve subtitles extraction (#10279) +* [cultureunplugged] Fix extraction (#10330) ++ [cnn] Add support for money.cnn.com (#2797) +* [cbsnews] Fix extraction (#10362) +* [cbs] Fix extraction (#10393) + [litv] Support 'promo' URLs (#10385) * [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) +* [globo:article] Relax URL and video id regular expressions (#10379) version 2016.08.19 From 6d2679ee26eb6ad0587d01e40ca7a17a6edd6e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 04:17:34 +0700 Subject: [PATCH 252/775] release 2016.08.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7af3c7099..7dcca18a1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.19 +[debug] youtube-dl version 2016.08.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee9b9500f..a8202d3de 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.22 Core * Improve formats and subtitles extension auto calculation diff --git a/docs/supportedsites.md b/docs/supportedsites.md index edf192138..ca96d2b07 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -121,6 +121,7 @@ - **CDA** - **CeskaTelevize** - **channel9**: Channel 9 + - **CharlieRose** - **Chaturbate** - **Chilloutzone** - **chirbit** @@ -893,5 +894,4 @@ - **Zapiks** - **ZDF** - **ZDFChannel** - - **zingmp3:album**: mp3.zing.vn albums - - **zingmp3:song**: mp3.zing.vn songs + - **zingmp3**: mp3.zing.vn diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 691f2c591..e33d32e97 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.19' +__version__ = '2016.08.22' From 55d119e2a10ccbfadc12b9af30c495f46874c2a3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 00:06:39 +0100 Subject: [PATCH 253/775] [abc:iview] Add new extractor(closes #6148) --- youtube_dl/extractor/abc.py | 63 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index b584277be..879ded88d 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -7,6 +7,8 @@ from ..utils import ( ExtractorError, js_to_json, int_or_none, + update_url_query, + parse_iso8601, ) @@ -93,3 +95,64 @@ class ABCIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } + + +class ABCIViewIE(InfoExtractor): + IE_NAME = 'abc.net.au:iview' + _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)' + + _TESTS = [{ + 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', + 'md5': '979d10b2939101f0d27a06b79edad536', + 'info_dict': { + 'id': 'FA1505V024S00', + 'ext': 'mp4', + 'title': 'Series 27 Ep 24', + 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', + 'upload_date': '20160820', + 'uploader_id': 'abc1', + 'timestamp': 1471719600, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_params = self._parse_json(self._search_regex( + r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) + title = video_params['title'] + stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') + + formats = [] + f4m_url = stream.get('hds-unmetered') or stream['hds-metered'] + formats.extend(self._extract_f4m_formats( + update_url_query(f4m_url, {'hdcore': '3.7.0'}), + video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_m3u8_formats(f4m_url.replace( + 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] + + return { + 'id': video_id, + 'title': title, + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage), + 'duration': int_or_none(video_params.get('eventDuration')), + 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), + 'series': video_params.get('seriesTitle'), + 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), + 'episode': self._html_search_meta('episode_title', webpage), + 'uploader_id': video_params.get('channel'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b0644be11..8e405ad72 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1,7 +1,10 @@ # flake8: noqa from __future__ import unicode_literals -from .abc import ABCIE +from .abc import ( + ABCIE, + ABCIViewIE, +) from .abc7news import Abc7NewsIE from .abcnews import ( AbcNewsIE, From 96229e5f95a5be622a694b464085bdea59134ccf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 22 Aug 2016 13:56:09 +0800 Subject: [PATCH 254/775] [mtvservices:embedded] Update config URL All starts from #10363. The test case in mtvservices:embedded uses config.xml, while the video from #10363 and the test case in generic.py is broken. Both uses index.html for fetching the feed URL. --- youtube_dl/extractor/mtv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 2f455680e..200f340de 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -257,8 +257,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): def _get_feed_url(self, uri): video_id = self._id_from_uri(uri) site_id = uri.replace(video_id, '') - config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/' - 'context4/context5/config.xml'.format(site_id)) + config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/' + 'context52/config.xml'.format(site_id)) config_doc = self._download_xml(config_url, video_id) feed_node = config_doc.find('.//feed') feed_url = feed_node.text.strip().split('?')[0] From c7c43a93ba4abbd2175ab0891b63def7e25aa385 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 07:47:25 +0100 Subject: [PATCH 255/775] [common] add helper method to extract akamai m3u8 and f4m formats --- youtube_dl/extractor/common.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ba4c03d3d..8ed16deee 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1765,6 +1765,18 @@ class InfoExtractor(object): entries.append(media_info) return entries + def _extract_akamai_formats(self, manifest_url, video_id): + formats = [] + f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + formats.extend(self._extract_f4m_formats( + update_url_query(f4m_url, {'hdcore': '3.7.0'}), + video_id, f4m_id='hds', fatal=False)) + m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From ad316425840315b40405a55243635fcfbcae5f19 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 07:48:40 +0100 Subject: [PATCH 256/775] [nrk,abc:iview] use _extract_akamai_formats --- youtube_dl/extractor/abc.py | 10 +--------- youtube_dl/extractor/nrk.py | 14 ++------------ 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 879ded88d..c7b6df7d0 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -7,7 +7,6 @@ from ..utils import ( ExtractorError, js_to_json, int_or_none, - update_url_query, parse_iso8601, ) @@ -123,14 +122,7 @@ class ABCIViewIE(InfoExtractor): title = video_params['title'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') - formats = [] - f4m_url = stream.get('hds-unmetered') or stream['hds-metered'] - formats.extend(self._extract_f4m_formats( - update_url_query(f4m_url, {'hdcore': '3.7.0'}), - video_id, f4m_id='hds', fatal=False)) - formats.extend(self._extract_m3u8_formats(f4m_url.replace( - 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 6ded5bd45..ed42eb301 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -14,16 +14,6 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): - def _extract_formats(self, manifest_url, video_id, fatal=True): - formats = [] - formats.extend(self._extract_f4m_formats( - manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', - video_id, f4m_id='hds', fatal=fatal)) - formats.extend(self._extract_m3u8_formats(manifest_url.replace( - 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal)) - return formats - def _real_extract(self, url): video_id = self._match_id(url) @@ -45,7 +35,7 @@ class NRKBaseIE(InfoExtractor): asset_url = asset.get('url') if not asset_url: continue - formats = self._extract_formats(asset_url, video_id, fatal=False) + formats = self._extract_akamai_formats(asset_url, video_id) if not formats: continue self._sort_formats(formats) @@ -69,7 +59,7 @@ class NRKBaseIE(InfoExtractor): if not entries: media_url = data.get('mediaUrl') if media_url: - formats = self._extract_formats(media_url, video_id) + formats = self._extract_akamai_formats(media_url, video_id) self._sort_formats(formats) duration = parse_duration(data.get('duration')) entries = [{ From 7367bdef23a3db4691ba99f01613b7759340f05e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 23:10:06 +0100 Subject: [PATCH 257/775] [awaan] fix extraction, modernize, rename the extractors and add test for live stream --- youtube_dl/extractor/{dcn.py => awaan.py} | 89 +++++++++++------------ youtube_dl/extractor/extractors.py | 12 +-- 2 files changed, 50 insertions(+), 51 deletions(-) rename youtube_dl/extractor/{dcn.py => awaan.py} (75%) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/awaan.py similarity index 75% rename from youtube_dl/extractor/dcn.py rename to youtube_dl/extractor/awaan.py index b8542820a..bdf23c6a9 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/awaan.py @@ -12,46 +12,41 @@ from ..compat import ( from ..utils import ( int_or_none, parse_iso8601, - sanitized_Request, smuggle_url, unsmuggle_url, urlencode_postdata, ) -class DCNIE(InfoExtractor): +class AWAANIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' def _real_extract(self, url): show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo') + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://www.dcndigital.ae/program/season/%s' % season_id, - {'show_id': show_id}), 'DCNSeason') + 'http://awaan.ae/program/season/%s' % season_id, + {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason') + 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') -class DCNBaseIE(InfoExtractor): - def _extract_video_info(self, video_data, video_id, is_live): +class AWAANBaseIE(InfoExtractor): + def _parse_video_data(self, video_data, video_id, is_live): title = video_data.get('title_en') or video_data['title_ar'] img = video_data.get('img') - thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None - duration = int_or_none(video_data.get('duration')) - description = video_data.get('description_en') or video_data.get('description_ar') - timestamp = parse_iso8601(video_data.get('create_time'), ' ') return { 'id': video_id, 'title': self._live_title(title) if is_live else title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, + 'description': video_data.get('description_en') or video_data.get('description_ar'), + 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), 'is_live': is_live, } @@ -75,11 +70,12 @@ class DCNBaseIE(InfoExtractor): return formats -class DCNVideoIE(DCNBaseIE): - IE_NAME = 'dcn:video' +class AWAANVideoIE(AWAANBaseIE): + IE_NAME = 'awaan:video' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', + 'md5': '5f61c33bfc7794315c671a62d43116aa', 'info_dict': { 'id': '17375', @@ -90,10 +86,6 @@ class DCNVideoIE(DCNBaseIE): 'timestamp': 1227504126, 'upload_date': '20081124', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', 'only_matching': True, @@ -102,11 +94,10 @@ class DCNVideoIE(DCNBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request( + video_data = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - video_data = self._download_json(request, video_id) - info = self._extract_video_info(video_data, video_id, False) + video_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(video_data, video_id, False) webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + @@ -121,19 +112,31 @@ class DCNVideoIE(DCNBaseIE): return info -class DCNLiveIE(DCNBaseIE): - IE_NAME = 'dcn:live' +class AWAANLiveIE(AWAANBaseIE): + IE_NAME = 'awaan:live' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' + _TEST = { + 'url': 'http://awaan.ae/live/6/dubai-tv', + 'info_dict': { + 'id': '6', + 'ext': 'mp4', + 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'upload_date': '20150107', + 'timestamp': 1420588800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } def _real_extract(self, url): channel_id = self._match_id(url) - request = sanitized_Request( + channel_data = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - - channel_data = self._download_json(request, channel_id) - info = self._extract_video_info(channel_data, channel_id, True) + channel_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(channel_data, channel_id, True) webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + @@ -148,8 +151,8 @@ class DCNLiveIE(DCNBaseIE): return info -class DCNSeasonIE(InfoExtractor): - IE_NAME = 'dcn:season' +class AWAANSeasonIE(InfoExtractor): + IE_NAME = 'awaan:season' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' _TEST = { 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', @@ -170,21 +173,17 @@ class DCNSeasonIE(InfoExtractor): data['season'] = season_id show_id = smuggled_data.get('show_id') if show_id is None: - request = sanitized_Request( + season = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - season = self._download_json(request, season_id) + season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id - request = sanitized_Request( + show = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/show', - urlencode_postdata(data), - { - 'Origin': 'http://www.dcndigital.ae', + show_id, data=urlencode_postdata(data), headers={ + 'Origin': 'http://awaan.ae', 'Content-Type': 'application/x-www-form-urlencoded' }) - - show = self._download_json(request, show_id) if not season_id: season_id = show['default_season'] for season in show['seasons']: @@ -195,6 +194,6 @@ class DCNSeasonIE(InfoExtractor): for video in show['videos']: video_id = compat_str(video['id']) entries.append(self.url_result( - 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id)) + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8e405ad72..04cd23bdb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,12 @@ from .atttechchannel import ATTTechChannelIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiomack import AudiomackIE, AudiomackAlbumIE +from .awaan import ( + AWAANIE, + AWAANVideoIE, + AWAANLiveIE, + AWAANSeasonIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE @@ -200,12 +206,6 @@ from .daum import ( DaumUserIE, ) from .dbtv import DBTVIE -from .dcn import ( - DCNIE, - DCNVideoIE, - DCNLiveIE, - DCNSeasonIE, -) from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .democracynow import DemocracynowIE From 3083e4dc070d6378456f9b20ebd5cbf9ee9d92af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 23 Aug 2016 07:22:14 +0700 Subject: [PATCH 258/775] [eagleplatform] Improve detection of embedded videos (Closes #10409) --- youtube_dl/extractor/eagleplatform.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 12d28d3b9..d4dfda8cd 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -52,11 +52,24 @@ class EaglePlatformIE(InfoExtractor): @staticmethod def _extract_url(webpage): + # Regular iframe embedding mobj = re.search( r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', webpage) if mobj is not None: return mobj.group('url') + # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) + mobj = re.search( + r'''(?xs) + <script[^>]+ + src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) + .+? + <div[^>]+ + class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+ + data-id=["\'](?P<id>\d+) + ''', webpage) + if mobj is not None: + return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() @staticmethod def _handle_error(response): From fb009b7f534e600e98b93e062198ade5826b5800 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 23 Aug 2016 10:28:28 +0100 Subject: [PATCH 259/775] [bravotv] correct clip info extraction and add support for adobe pass auth(closes #10407) --- youtube_dl/extractor/bravotv.py | 81 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py index 541c76944..31763b4c6 100644 --- a/youtube_dl/extractor/bravotv.py +++ b/youtube_dl/extractor/bravotv.py @@ -1,31 +1,74 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import smuggle_url +from .adobepass import AdobePassIE +from ..utils import ( + smuggle_url, + update_url_query, + int_or_none, +) -class BravoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)' - _TEST = { +class BravoTVIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' + _TESTS = [{ 'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale', - 'md5': 'd60cdf68904e854fac669bd26cccf801', + 'md5': '9086d0b7ef0ea2aabc4781d75f4e5863', 'info_dict': { - 'id': 'LitrBdX64qLn', + 'id': 'zHyk1_HU_mPy', 'ext': 'mp4', - 'title': 'Last Chance Kitchen Returns', - 'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13', - 'timestamp': 1448926740, - 'upload_date': '20151130', + 'title': 'LCK Ep 12: Fishy Finale', + 'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.', 'uploader': 'NBCU-BRAV', + 'upload_date': '20160302', + 'timestamp': 1456945320, } - } + }, { + 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid') - release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid') - return self.url_result(smuggle_url( - 'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid), - {'force_smil_url': True}), 'ThePlatform', release_pid) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + settings = self._parse_json(self._search_regex( + r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + display_id) + info = {} + query = { + 'mbr': 'true', + } + account_pid, release_pid = [None] * 2 + tve = settings.get('sharedTVE') + if tve: + query['manifest'] = 'm3u' + account_pid = 'HNK2IC' + release_pid = tve['release_pid'] + if tve.get('entitlement') == 'auth': + adobe_pass = settings.get('adobePass', {}) + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId', 'bravo'), + tve['title'], release_pid, tve.get('rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) + else: + shared_playlist = settings['shared_playlist'] + account_pid = shared_playlist['account_pid'] + metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] + release_pid = metadata['release_pid'] + info.update({ + 'title': metadata['title'], + 'description': metadata.get('description'), + 'season_number': int_or_none(metadata.get('season_num')), + 'episode_number': int_or_none(metadata.get('episode_num')), + }) + query['switch'] = 'progressive' + info.update({ + '_type': 'url_transparent', + 'id': release_pid, + 'url': smuggle_url(update_url_query( + 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid), + query), {'force_smil_url': True}), + 'ie_key': 'ThePlatform', + }) + return info From 18b6216150fa39d5e3cdbf353339e1c010bcee8d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 23 Aug 2016 21:55:58 +0800 Subject: [PATCH 260/775] [openload] Fix extraction (closes #10408) Thanks @yokrysty for the algorithm --- ChangeLog | 6 +++ youtube_dl/extractor/openload.py | 81 +++++++------------------------- 2 files changed, 24 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index a8202d3de..651d4d5d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [openload] Fix extraction (#10408) + + version 2016.08.22 Core diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 4e80ca9ff..e181d0b3a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,12 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals, division -import math - from .common import InfoExtractor -from ..compat import compat_chr +from ..compat import ( + compat_chr, + compat_ord, +) from ..utils import ( - decode_png, determine_ext, ExtractorError, ) @@ -42,71 +42,26 @@ class OpenloadIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) - if 'File not found' in webpage: + if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True) - # The following extraction logic is proposed by @Belderak and @gdkchan - # and declared to be used freely in youtube-dl - # See https://github.com/rg3/youtube-dl/issues/9706 + # The following decryption algorithm is written by @yokrysty and + # declared to be freely used in youtube-dl + # See https://github.com/rg3/youtube-dl/issues/10408 + enc_data = self._html_search_regex( + r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data') - numbers_js = self._download_webpage( - 'https://openload.co/assets/js/obfuscator/n.js', video_id, - note='Downloading signature numbers') - signums = self._search_regex( - r'window\.signatureNumbers\s*=\s*[\'"](?P<data>[a-z]+)[\'"]', - numbers_js, 'signature numbers', group='data') + video_url_chars = [] - linkimg_uri = self._search_regex( - r'<img[^>]+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image') - linkimg = self._request_webpage( - linkimg_uri, video_id, note=False).read() + for c in enc_data: + j = compat_ord(c) + if j >= 33 and j <= 126: + j = ((j + 14) % 94) + 33 + video_url_chars += compat_chr(j) - width, height, pixels = decode_png(linkimg) - - output = '' - for y in range(height): - for x in range(width): - r, g, b = pixels[y][3 * x:3 * x + 3] - if r == 0 and g == 0 and b == 0: - break - else: - output += compat_chr(r) - output += compat_chr(g) - output += compat_chr(b) - - img_str_length = len(output) // 200 - img_str = [[0 for x in range(img_str_length)] for y in range(10)] - - sig_str_length = len(signums) // 260 - sig_str = [[0 for x in range(sig_str_length)] for y in range(10)] - - for i in range(10): - for j in range(img_str_length): - begin = i * img_str_length * 20 + j * 20 - img_str[i][j] = output[begin:begin + 20] - for j in range(sig_str_length): - begin = i * sig_str_length * 26 + j * 26 - sig_str[i][j] = signums[begin:begin + 26] - - parts = [] - # TODO: find better names for str_, chr_ and sum_ - str_ = '' - for i in [2, 3, 5, 7]: - str_ = '' - sum_ = float(99) - for j in range(len(sig_str[i])): - for chr_idx in range(len(img_str[i][j])): - if sum_ > float(122): - sum_ = float(98) - chr_ = compat_chr(int(math.floor(sum_))) - if sig_str[i][j][chr_idx] == chr_ and j >= len(str_): - sum_ += float(2.5) - str_ += img_str[i][j][chr_idx] - parts.append(str_.replace(',', '')) - - video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0]) + video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From ccb6570e9e625ff5e9adf88729e745acadcaff0e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 23 Aug 2016 17:31:08 +0100 Subject: [PATCH 261/775] [syfy,bravotv] restrict drupal settings regex --- youtube_dl/extractor/bravotv.py | 2 +- youtube_dl/extractor/syfy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py index 31763b4c6..a25d500e4 100644 --- a/youtube_dl/extractor/bravotv.py +++ b/youtube_dl/extractor/bravotv.py @@ -32,7 +32,7 @@ class BravoTVIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) settings = self._parse_json(self._search_regex( - r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id) info = {} query = { diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index cc81f6003..ab8bab5cd 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -31,7 +31,7 @@ class SyfyIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) syfy_mpx = list(self._parse_json(self._search_regex( - r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id)['syfy']['syfy_mpx'].values())[0] video_id = syfy_mpx['mpxGUID'] title = syfy_mpx['episodeTitle'] From 1212e9972fce69df6bd871a5c301294427299cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 00:25:21 +0700 Subject: [PATCH 262/775] [youtube] Fix authentication (#10392) --- youtube_dl/extractor/youtube.py | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 268080ba6..38556d86e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -91,36 +91,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if login_page is False: return - galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', - login_page, 'Login GALX parameter') + login_form = self._hidden_inputs(login_page) - # Log in - login_form_strs = { - 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', + login_form.update({ 'Email': username, - 'GALX': galx, 'Passwd': password, - - 'PersistentCookie': 'yes', - '_utf8': '霱', - 'bgresponse': 'js_disabled', - 'checkConnection': '', - 'checkedDomains': 'youtube', - 'dnConn': '', - 'pstMsg': '0', - 'rmShown': '1', - 'secTok': '', - 'signIn': 'Sign in', - 'timeStmp': '', - 'service': 'youtube', - 'uilel': '3', - 'hl': 'en_US', - } + }) login_results = self._download_webpage( self._PASSWORD_CHALLENGE_URL, None, note='Logging in', errnote='unable to log in', fatal=False, - data=urlencode_postdata(login_form_strs)) + data=urlencode_postdata(login_form)) if login_results is False: return False From 05bddcc512cd5058f1af1d5985979b70bdcf4711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 01:29:50 +0700 Subject: [PATCH 263/775] [youtube] Fix authentication (2) (Closes #10392) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 38556d86e..d5d5b7334 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -94,6 +94,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): login_form = self._hidden_inputs(login_page) login_form.update({ + 'checkConnection': 'youtube', 'Email': username, 'Passwd': password, }) From 6e52bbb41320e1b6f4b7a16a5e651d945ac14611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 01:36:27 +0700 Subject: [PATCH 264/775] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 651d4d5d7..07ab5867f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,14 @@ version <unreleased> Extractors +* [youtube] Fix authentication (#10392) * [openload] Fix extraction (#10408) ++ [bravotv] Add support for Adobe Pass (#10407) +* [bravotv] Fix clip info extraction (#10407) +* [eagleplatform] Improve embedded videos detection (#10409) +* [awaan] Fix extraction +* [mtvservices:embedded] Update config URL ++ [abc:iview] Add extractor (#6148) version 2016.08.22 From c86f51ee38b2063ad4eec2f0bb6e3d3551be0855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 01:38:46 +0700 Subject: [PATCH 265/775] release 2016.08.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7dcca18a1..00f593783 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.22 +[debug] youtube-dl version 2016.08.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 07ab5867f..b4f6dbe08 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.24 Extractors * [youtube] Fix authentication (#10392) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ca96d2b07..08db56fa9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -16,6 +16,7 @@ - **9gag** - **9now.com.au** - **abc.net.au** + - **abc.net.au:iview** - **Abc7News** - **abcnews** - **abcnews:video** @@ -66,6 +67,10 @@ - **audiomack** - **audiomack:album** - **auroravid**: AuroraVid + - **AWAAN** + - **awaan:live** + - **awaan:season** + - **awaan:video** - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -172,10 +177,6 @@ - **daum.net:playlist** - **daum.net:user** - **DBTV** - - **DCN** - - **dcn:live** - - **dcn:season** - - **dcn:video** - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e33d32e97..c1194124e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.22' +__version__ = '2016.08.24' From 8c3e35dd441ceed682da885368f5cd97afb1816e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 08:41:52 +0700 Subject: [PATCH 266/775] [pluralsight] Add support for subtitles (Closes #9681) --- youtube_dl/extractor/pluralsight.py | 74 ++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 9aab77645..afd3217d9 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -import re -import json -import random import collections +import json +import os +import random +import re from .common import InfoExtractor from ..compat import ( @@ -12,10 +13,12 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + float_or_none, int_or_none, parse_duration, qualities, sanitized_Request, + srt_subtitles_timecode, urlencode_postdata, ) @@ -91,6 +94,51 @@ class PluralsightIE(PluralsightBaseIE): if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): raise ExtractorError('Unable to log in') + def _get_subtitles(self, author, clip_id, lang, name, duration, video_id): + captions_post = { + 'a': author, + 'cn': clip_id, + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/training/Player/Captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + if captions: + return { + lang: [{ + 'ext': 'json', + 'data': json.dumps(captions), + }, { + 'ext': 'srt', + 'data': self._convert_subtitles(duration, captions), + }] + } + + @staticmethod + def _convert_subtitles(duration, subs): + srt = '' + for num, current in enumerate(subs): + current = subs[num] + start, text = float_or_none( + current.get('DisplayTimeOffset')), current.get('Text') + if start is None or text is None: + continue + end = duration if num == len(subs) - 1 else float_or_none( + subs[num + 1].get('DisplayTimeOffset')) + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + return srt + def _real_extract(self, url): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) @@ -138,6 +186,8 @@ class PluralsightIE(PluralsightBaseIE): if not clip: raise ExtractorError('Unable to resolve clip') + title = '%s - %s' % (module['title'], clip['title']) + QUALITIES = { 'low': {'width': 640, 'height': 480}, 'medium': {'width': 848, 'height': 640}, @@ -225,18 +275,20 @@ class PluralsightIE(PluralsightBaseIE): formats.append(f) self._sort_formats(formats) - # TODO: captions - # http://www.pluralsight.com/training/Player/ViewClip + cap = true - # or - # http://www.pluralsight.com/training/Player/Captions - # { a = author, cn = clip_id, lc = end, m = name } + duration = int_or_none( + clip.get('duration')) or parse_duration(clip.get('formattedDuration')) + + # TODO: other languages? + subtitles = self.extract_subtitles( + author, clip_id, 'en', name, duration, display_id) return { 'id': clip.get('clipName') or clip['name'], - 'title': '%s - %s' % (module['title'], clip['title']), - 'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), + 'title': title, + 'duration': duration, 'creator': author, - 'formats': formats + 'formats': formats, + 'subtitles': subtitles, } From 30317f4887178082809706ce8ac9cb989014c8fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 08:52:12 +0700 Subject: [PATCH 267/775] [pluralsight] Modernize and make more robust --- youtube_dl/extractor/pluralsight.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index afd3217d9..ea5caefa9 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -17,7 +17,6 @@ from ..utils import ( int_or_none, parse_duration, qualities, - sanitized_Request, srt_subtitles_timecode, urlencode_postdata, ) @@ -78,12 +77,10 @@ class PluralsightIE(PluralsightBaseIE): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) error = self._search_regex( r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', @@ -128,6 +125,8 @@ class PluralsightIE(PluralsightBaseIE): continue end = duration if num == len(subs) - 1 else float_or_none( subs[num + 1].get('DisplayTimeOffset')) + if end is None: + continue srt += os.linesep.join( ( '%d' % num, @@ -246,13 +245,12 @@ class PluralsightIE(PluralsightBaseIE): 'mt': ext, 'q': '%dx%d' % (f['width'], f['height']), } - request = sanitized_Request( - '%s/training/Player/ViewClip' % self._API_BASE, - json.dumps(clip_post).encode('utf-8')) - request.add_header('Content-Type', 'application/json;charset=utf-8') format_id = '%s-%s' % (ext, quality) clip_url = self._download_webpage( - request, display_id, 'Downloading %s URL' % format_id, fatal=False) + '%s/training/Player/ViewClip' % self._API_BASE, display_id, + 'Downloading %s URL' % format_id, fatal=False, + data=json.dumps(clip_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) # Pluralsight tracks multiple sequential calls to ViewClip API and start # to return 429 HTTP errors after some time (see From 6d94cbd2f43548575b32907724f48331df1693ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 10:07:06 +0700 Subject: [PATCH 268/775] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index b4f6dbe08..b63f49ae1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [pluralsight] Add support for subtitles (#9681) + + version 2016.08.24 Extractors From d38b27dd9b108a7518dd291c5c231a53abd3f2df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 10:11:04 +0700 Subject: [PATCH 269/775] release 2016.08.24.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 00f593783..15acc025a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.24 +[debug] youtube-dl version 2016.08.24.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b63f49ae1..4f3f1265f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.24.1 Extractors + [pluralsight] Add support for subtitles (#9681) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c1194124e..7447d3d7e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.24' +__version__ = '2016.08.24.1' From 97653f81b2565c752f2c107fc44167a93c3eef42 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 24 Aug 2016 21:18:56 +0800 Subject: [PATCH 270/775] [bilibili] Mark as broken Bilibili now uses emscripten, which is very difficult for reverse engineering. I don't expect it to be fixed in near future, so I mark it as broken. Ref: #10375 --- youtube_dl/extractor/bilibili.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index d8eb71821..d87c38a02 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -21,6 +21,8 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' _TESTS = [{ From 0c75abbb7bb9135d145805e86c87a5a43b69ac15 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 24 Aug 2016 23:58:22 +0800 Subject: [PATCH 271/775] [mtvservices:embedded] Use another endpoint to get feed URL Closes #10363 In the original mtvservices:embedded test case, config.xml is still used to get the feed URL. Some other examples, including test_Generic_40 (http://www.vulture.com/2016/06/new-key-peele-sketches-released.html), and the video mentioned in #10363, use another endpoint to get the feed URL. The 'index.html' approach works for the original test case, too. So I didn't keep the old approach. --- ChangeLog | 6 ++++++ youtube_dl/extractor/bet.py | 5 ++--- youtube_dl/extractor/mtv.py | 27 +++++++++++++-------------- youtube_dl/extractor/nick.py | 5 ++--- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4f3f1265f..c3cc8f38f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) + + version 2016.08.24.1 Extractors diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index bd3ee2e2e..1f8ef0303 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor from ..utils import unified_strdate -from ..compat import compat_urllib_parse_urlencode class BetIE(MTVServicesInfoExtractor): @@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor): _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" def _get_feed_query(self, uri): - return compat_urllib_parse_urlencode({ + return { 'uuid': uri, - }) + } def _extract_mgid(self, webpage): return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 200f340de..bdda68819 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -4,7 +4,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_str, compat_xpath, ) @@ -14,12 +13,13 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, + RegexNotFoundError, sanitized_Request, strip_or_none, timeconvert, unescapeHTML, + update_url_query, url_basename, - RegexNotFoundError, xpath_text, ) @@ -36,6 +36,11 @@ class MTVServicesInfoExtractor(InfoExtractor): def _id_from_uri(uri): return uri.split(':')[-1] + @staticmethod + def _remove_template_parameter(url): + # Remove the templates, like &device={device} + return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) + # This was originally implemented for ComedyCentral, but it also works here @classmethod def _transform_rtmp_url(cls, rtmp_video_url): @@ -117,9 +122,7 @@ class MTVServicesInfoExtractor(InfoExtractor): video_id = self._id_from_uri(uri) self.report_extraction(video_id) content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))) - mediagen_url = content_el.attrib['url'] - # Remove the templates, like &device={device} - mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) + mediagen_url = self._remove_template_parameter(content_el.attrib['url']) if 'acceptMethods' not in mediagen_url: mediagen_url += '&' if '?' in mediagen_url else '?' mediagen_url += 'acceptMethods=fms' @@ -178,12 +181,12 @@ class MTVServicesInfoExtractor(InfoExtractor): data = {'uri': uri} if self._LANG: data['lang'] = self._LANG - return compat_urllib_parse_urlencode(data) + return data def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) - info_url = feed_url + '?' + self._get_feed_query(uri) + info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id) def _get_videos_info_from_url(self, url, video_id): @@ -256,13 +259,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): def _get_feed_url(self, uri): video_id = self._id_from_uri(uri) - site_id = uri.replace(video_id, '') - config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/' - 'context52/config.xml'.format(site_id)) - config_doc = self._download_xml(config_url, video_id) - feed_node = config_doc.find('.//feed') - feed_url = feed_node.text.strip().split('?')[0] - return feed_url + config = self._download_json( + 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id) + return self._remove_template_parameter(config['feedWithQueryParams']) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 9c54846e1..64730a624 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import update_url_query @@ -59,10 +58,10 @@ class NickIE(MTVServicesInfoExtractor): }] def _get_feed_query(self, uri): - return compat_urllib_parse_urlencode({ + return { 'feed': 'nick_arc_player_prime', 'mgid': uri, - }) + } def _extract_mgid(self, webpage): return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') From 08773689f37341f8c70c3fd298f5910235b8c151 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 01:29:32 +0800 Subject: [PATCH 272/775] [kickstarter] Silent the warning for og:description Closes #10415 --- youtube_dl/extractor/kickstarter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 9f1ade2e4..c61e78622 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -37,7 +37,6 @@ class KickStarterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Power Drive 2000', }, - 'expected_warnings': ['OpenGraph description'], }] def _real_extract(self, url): @@ -67,6 +66,6 @@ class KickStarterIE(InfoExtractor): 'id': video_id, 'url': video_url, 'title': title, - 'description': self._og_search_description(webpage), + 'description': self._og_search_description(webpage, default=None), 'thumbnail': thumbnail, } From 0c6422cdd649c6f39cb2d8680e29f91da18d8c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 07:34:55 +0700 Subject: [PATCH 273/775] [README.md] Add FAQ entry for streaming to player --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a10aaf35c..52e53803e 100644 --- a/README.md +++ b/README.md @@ -730,7 +730,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [ ### I have downloaded a video but how can I play it? -Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). +Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/). ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. @@ -816,6 +816,12 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. N Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). +### How do I stream directly to media player? + +You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: + + youtube-dl -o - http://www.youtube.com/watch?v=BaW_jenozKcj | vlc - + ### Can you add support for this anime video site, or site which shows current movies for free? As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. From 073ac1225f6fe28905e11f29f2d23f4b4db50f9c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:33:16 +0100 Subject: [PATCH 274/775] [utils] add ac-3 to the list of audio codecs in parse_codecs --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 41ca562f1..1091f17f3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2183,7 +2183,7 @@ def parse_codecs(codecs_str): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): if not vcodec: vcodec = full_codec - elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'): + elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'): if not acodec: acodec = full_codec else: From 07ea9c9b05359aef14472dfa66a6578d21c88e96 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:37:41 +0100 Subject: [PATCH 275/775] [downloader/hls] fill IV with zeros for IVs shorter than 16-octet --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8d7971e5d..8dd1b898e 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -120,7 +120,7 @@ class HlsFD(FragmentFD): decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': if 'IV' in decrypt_info: - decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) + decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) From f39ffc5877e4e9f112fa26ff21079f179b4aec46 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:39:23 +0100 Subject: [PATCH 276/775] [common] extract formats from #EXT-X-MEDIA tags --- youtube_dl/extractor/common.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8ed16deee..da0af29ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1202,30 +1202,45 @@ class InfoExtractor(object): 'preference': preference, }] last_info = None - last_media = None for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) elif line.startswith('#EXT-X-MEDIA:'): - last_media = parse_m3u8_attributes(line) + media = parse_m3u8_attributes(line) + media_type = media.get('TYPE') + if media_type in ('VIDEO', 'AUDIO'): + media_url = media.get('URI') + if media_url: + format_id = [] + for v in (media.get('GROUP-ID'), media.get('NAME')): + if v: + format_id.append(v) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': format_url(media_url), + 'language': media.get('LANGUAGE'), + 'vcodec': 'none' if media_type == 'AUDIO' else None, + 'ext': ext, + 'protocol': entry_protocol, + 'preference': preference, + }) elif line.startswith('#') or not line.strip(): continue else: if last_info is None: formats.append({'url': format_url(line)}) continue - tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) + tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None - # Despite specification does not mention NAME attribute for - # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), @@ -1252,9 +1267,6 @@ class InfoExtractor(object): 'abr': abr, }) f.update(parse_codecs(last_info.get('CODECS'))) - if last_media is not None: - f['m3u8_media'] = last_media - last_media = None formats.append(f) last_info = {} return formats From 75fa990dc669563b51f22eeddd2f33acc41c8599 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:46:54 +0100 Subject: [PATCH 277/775] [YoutubeDL] add fallback value for thumbnails values in thumbnails sorting --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0b3e3da82..c499c1da4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1256,8 +1256,8 @@ class YoutubeDL(object): info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('preference'), t.get('width'), t.get('height'), - t.get('id'), t.get('url'))) + t.get('preference') or -1, t.get('width') or -1, t.get('height') or -1, + t.get('id') or '', t.get('url'))) for i, t in enumerate(thumbnails): t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): From 30afe4aeb25576225d3f3ca486983b5ad9258aa0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:49:15 +0100 Subject: [PATCH 278/775] [cbc] Add support for watch.cbc.ca --- youtube_dl/extractor/cbc.py | 172 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 2 + 2 files changed, 174 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index a87e97140..d71fddf58 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -9,10 +9,19 @@ from ..utils import ( js_to_json, smuggle_url, try_get, + xpath_text, + xpath_element, + xpath_with_ns, + find_xpath_attr, + parse_iso8601, + parse_age_limit, + int_or_none, + ExtractorError, ) class CBCIE(InfoExtractor): + IE_NAME = 'cbc.ca' _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ # with mediaId @@ -114,6 +123,7 @@ class CBCIE(InfoExtractor): class CBCPlayerIE(InfoExtractor): + IE_NAME = 'cbc.ca:player' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', @@ -167,3 +177,165 @@ class CBCPlayerIE(InfoExtractor): }), 'id': video_id, } + + +class CBCWatchBaseIE(InfoExtractor): + _device_id = None + _device_token = None + _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' + _NS_MAP = { + 'media': 'http://search.yahoo.com/mrss/', + 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', + } + + def _call_api(self, path, video_id): + url = path if path.startswith('http') else self._API_BASE_URL + path + result = self._download_xml(url, video_id, headers={ + 'X-Clearleap-DeviceId': self._device_id, + 'X-Clearleap-DeviceToken': self._device_token, + }) + error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') + if error_message: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) + return result + + def _real_initialize(self): + if not self._device_id or not self._device_token: + device = self._downloader.cache.load('cbcwatch', 'device') or {} + self._device_id, self._device_token = device.get('id'), device.get('token') + if not self._device_id or not self._device_token: + result = self._download_xml( + self._API_BASE_URL + 'device/register', + None, data=b'<device><type>web</type></device>') + self._device_id = xpath_text(result, 'deviceId', fatal=True) + self._device_token = xpath_text(result, 'deviceToken', fatal=True) + self._downloader.cache.store( + 'cbcwatch', 'device', { + 'id': self._device_id, + 'token': self._device_token, + }) + + def _parse_rss_feed(self, rss): + channel = xpath_element(rss, 'channel', fatal=True) + + def _add_ns(path): + return xpath_with_ns(path, self._NS_MAP) + + entries = [] + for item in channel.findall('item'): + guid = xpath_text(item, 'guid', fatal=True) + title = xpath_text(item, 'title', fatal=True) + + media_group = xpath_element(item, _add_ns('media:group'), fatal=True) + content = xpath_element(media_group, _add_ns('media:content'), fatal=True) + content_url = content.attrib['url'] + + thumbnails = [] + for thumbnail in media_group.findall(_add_ns('media:thumbnail')): + thumbnail_url = thumbnail.get('url') + if not thumbnail_url: + continue + thumbnails.append({ + 'id': thumbnail.get('profile'), + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + timestamp = None + release_date = find_xpath_attr( + item, _add_ns('media:credit'), 'role', 'releaseDate') + if release_date is not None: + timestamp = parse_iso8601(release_date.text) + + entries.append({ + '_type': 'url_transparent', + 'url': content_url, + 'id': guid, + 'title': title, + 'description': xpath_text(item, 'description'), + 'timestamp': timestamp, + 'duration': int_or_none(content.get('duration')), + 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))), + 'episode': xpath_text(item, _add_ns('clearleap:episode')), + 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))), + 'series': xpath_text(item, _add_ns('clearleap:series')), + 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))), + 'thumbnails': thumbnails, + 'ie_key': 'CBCWatchVideo', + }) + + return self.playlist_result( + entries, xpath_text(channel, 'guid'), + xpath_text(channel, 'title'), + xpath_text(channel, 'description')) + + +class CBCWatchVideoIE(CBCWatchBaseIE): + IE_NAME = 'cbc.ca:watch:video' + _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + def _real_extract(self, url): + video_id = self._match_id(url) + result = self._call_api(url, video_id) + + m3u8_url = xpath_text(result, 'url', fatal=True) + formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) + if len(formats) < 2: + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + # Despite metadata in m3u8 all video+audio formats are + # actually video-only (no audio) + for f in formats: + if f.get('acodec') != 'none' and f.get('vcodec') != 'none': + f['acodec'] = 'none' + self._sort_formats(formats) + + info = { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + rss = xpath_element(result, 'rss') + if rss: + info.update(self._parse_rss_feed(rss)['entries'][0]) + del info['url'] + del info['_type'] + del info['ie_key'] + return info + + +class CBCWatchIE(CBCWatchBaseIE): + IE_NAME = 'cbc.ca:watch' + _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)' + _TESTS = [{ + 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', + 'info_dict': { + 'id': '38e815a-009e3ab12e4', + 'ext': 'mp4', + 'title': 'Customer (Dis)Service', + 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', + 'upload_date': '20160219', + 'timestamp': 1455840000, + }, + 'params': { + # m3u8 download + 'skip_download': True, + 'format': 'bestvideo', + }, + 'skip': 'Geo-restricted to Canada', + }, { + 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', + 'info_dict': { + 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', + 'title': 'Arthur', + 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', + }, + 'playlist_mincount': 30, + 'skip': 'Geo-restricted to Canada', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + rss = self._call_api('web/browse/' + video_id, video_id) + return self._parse_rss_feed(rss) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 04cd23bdb..a58145e3e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -130,6 +130,8 @@ from .carambatv import ( from .cbc import ( CBCIE, CBCPlayerIE, + CBCWatchVideoIE, + CBCWatchIE, ) from .cbs import CBSIE from .cbslocal import CBSLocalIE From f70e9229e623eb041ad514605ceca484b176b850 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 09:11:23 +0100 Subject: [PATCH 279/775] [discoverygo] detect when video needs authentication(closes #10425) --- youtube_dl/extractor/discoverygo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index cba709935..e86d16d36 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, parse_age_limit, unescapeHTML, + ExtractorError, ) @@ -53,7 +54,13 @@ class DiscoveryGoIE(InfoExtractor): title = video['name'] - stream = video['stream'] + stream = video.get('stream') + if not stream: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.' + if video.get('authenticated') is True else 'Unable to find stream', + expected=True) STREAM_URL_SUFFIX = 'streamUrl' formats = [] for stream_kind in ('', 'hds'): From 5c13c285660c2811206c5bb29acf43b114ab31e3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 09:55:23 +0100 Subject: [PATCH 280/775] raise unexpected error when no stream found --- youtube_dl/extractor/adultswim.py | 11 ++++++----- youtube_dl/extractor/discoverygo.py | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 3f7f8c036..96599048f 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -162,11 +162,12 @@ class AdultSwimIE(InfoExtractor): elif video_info.get('videoPlaybackID'): segment_ids = [video_info['videoPlaybackID']] else: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.' - if video_info.get('auth') is True else 'Unable to find stream or clips', - expected=True) + if video_info.get('auth') is True: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.', expected=True) + else: + raise ExtractorError('Unable to find stream or clips') episode_id = video_info['id'] episode_title = video_info['title'] diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index e86d16d36..c4e83b2c3 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -56,11 +56,12 @@ class DiscoveryGoIE(InfoExtractor): stream = video.get('stream') if not stream: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.' - if video.get('authenticated') is True else 'Unable to find stream', - expected=True) + if video.get('authenticated') is True: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.', expected=True) + else: + raise ExtractorError('Unable to find stream') STREAM_URL_SUFFIX = 'streamUrl' formats = [] for stream_kind in ('', 'hds'): From d37708fc861b3534c522f2892b5cd2ee716e1035 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 11:53:47 +0100 Subject: [PATCH 281/775] [YoutubeDL] check only for None Value in thumbnails sorting --- youtube_dl/YoutubeDL.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c499c1da4..805733fb7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1256,8 +1256,10 @@ class YoutubeDL(object): info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('preference') or -1, t.get('width') or -1, t.get('height') or -1, - t.get('id') or '', t.get('url'))) + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', t.get('url'))) for i, t in enumerate(thumbnails): t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): From 6a76b53355947eef2a534d8f2505ed683db8754f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:05:01 +0700 Subject: [PATCH 282/775] [README.md] Quote URL in streaming to player FAQ entry --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 52e53803e..1aa267752 100644 --- a/README.md +++ b/README.md @@ -820,7 +820,7 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: - youtube-dl -o - http://www.youtube.com/watch?v=BaW_jenozKcj | vlc - + youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - ### Can you add support for this anime video site, or site which shows current movies for free? From ea01cdbf61c9a689e7914dd2d06371f3ef73b490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:17:45 +0700 Subject: [PATCH 283/775] [README.md] Clarify how to export cookies from browser for cookies FAQ entry --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1aa267752..0bb7b791f 100644 --- a/README.md +++ b/README.md @@ -812,7 +812,11 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt ### How do I pass cookies to youtube-dl? -Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. + +In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/ru/firefox/addon/export-cookies/) (for Firefox). + +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). From f26a298247fe19bc8114d6f7a280140dfabee984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:19:41 +0700 Subject: [PATCH 284/775] [README.md] Use en-US URL in cookies FAQ entry --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bb7b791f..04f423c17 100644 --- a/README.md +++ b/README.md @@ -814,7 +814,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. -In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/ru/firefox/addon/export-cookies/) (for Firefox). +In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. From 4c8f9c2577da2f4ba7300d44613599e96cde5c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:27:15 +0700 Subject: [PATCH 285/775] [README.md] Add comments in sample configuration for clarity --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 04f423c17..e01b71cff 100644 --- a/README.md +++ b/README.md @@ -412,11 +412,19 @@ You can configure youtube-dl by placing any supported command line option to a c For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: ``` --x ---no-mtime ---proxy 127.0.0.1:3128 --o ~/Movies/%(title)s.%(ext)s # Lines starting with # are comments + +# Always extract audio +-x + +# Do not copy the mtime +--no-mtime + +# Use this proxy +--proxy 127.0.0.1:3128 + +# Save all videos under Movies directory in your home directory +-o ~/Movies/%(title)s.%(ext)s ``` Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. From 5a3efcd27c1262cc7132f7e1a092524b580788ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:57:31 +0700 Subject: [PATCH 286/775] [README.md] Add FAQ entry for download archive --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index e01b71cff..1b9e2a989 100644 --- a/README.md +++ b/README.md @@ -834,6 +834,18 @@ You will first need to tell youtube-dl to stream media to stdout with `-o -`, an youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - +### How do I download only new videos from playlist? + +Use the download archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special *download archive file*. Each subsequent run with the same `--download-archive` will download only new videos that are not yet in download archive (if any) and also record them in download archive. Note that only successful downloads are recorded in download archive. + +For example, first run will download complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create download archive `archive.txt`: + + youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +Each subsequent run will only download new videos if any: + + youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + ### Can you add support for this anime video site, or site which shows current movies for free? As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. From c1f62dd338e0965507ee0976bc88885fdb0fa780 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 25 Aug 2016 14:45:01 +0200 Subject: [PATCH 287/775] [README] Clean up grammar in --download-archive paragraph --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b9e2a989..20241307f 100644 --- a/README.md +++ b/README.md @@ -834,15 +834,15 @@ You will first need to tell youtube-dl to stream media to stdout with `-o -`, an youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - -### How do I download only new videos from playlist? +### How do I download only new videos from a playlist? -Use the download archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special *download archive file*. Each subsequent run with the same `--download-archive` will download only new videos that are not yet in download archive (if any) and also record them in download archive. Note that only successful downloads are recorded in download archive. +Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos that and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. -For example, first run will download complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create download archive `archive.txt`: +For example, at first, youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" -Each subsequent run will only download new videos if any: +will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any: youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" From dc2c37f3162da534281f5f3758231e4c2cb8d1b2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 20:45:57 +0800 Subject: [PATCH 288/775] [spankbang] Fix description and uploader (closes #10339) --- ChangeLog | 1 + youtube_dl/extractor/spankbang.py | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index c3cc8f38f..5fb596e33 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [spankbang] Fix description and uploader (#10339) * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 50433d0f6..186d22b7d 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -14,7 +14,7 @@ class SpankBangIE(InfoExtractor): 'id': '3vvn', 'ext': 'mp4', 'title': 'fantasy solo', - 'description': 'dillion harper masturbates on a bed', + 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, @@ -44,12 +44,10 @@ class SpankBangIE(InfoExtractor): title = self._html_search_regex( r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') - description = self._search_regex( - r'class="desc"[^>]*>([^<]+)', - webpage, 'description', default=None) + description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) uploader = self._search_regex( - r'class="user"[^>]*>([^<]+)', + r'class="user"[^>]*><img[^>]+>([^<]+)', webpage, 'uploader', fatal=False) age_limit = self._rta_search(webpage) From b54a2da4333556baa3b34fc595060223181320d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 22:22:31 +0800 Subject: [PATCH 289/775] [crackle] Fix extraction and update _TESTS (closes #10333) --- ChangeLog | 1 + youtube_dl/extractor/crackle.py | 58 ++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5fb596e33..0789549c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [crackle] Fix extraction (#10333) * [spankbang] Fix description and uploader (#10339) * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 79238cce7..21f94d33c 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,5 +1,7 @@ # coding: utf-8 -from __future__ import unicode_literals +from __future__ import unicode_literals, division + +import re from .common import InfoExtractor from ..utils import int_or_none @@ -8,12 +10,22 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { - 'url': 'http://www.crackle.com/the-art-of-more/2496419', + 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', 'info_dict': { - 'id': '2496419', + 'id': '2498934', 'ext': 'mp4', - 'title': 'Heavy Lies the Head', - 'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', + 'title': 'Everybody Respects A Bloody Nose', + 'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 906, + 'series': 'Comedians In Cars Getting Coffee', + 'season_number': 8, + 'episode_number': 4, + 'subtitles': { + 'en-US': [{ + 'ext': 'ttml', + }] + }, }, 'params': { # m3u8 download @@ -21,11 +33,6 @@ class CrackleIE(InfoExtractor): } } - # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx - _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' - _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' - _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' - # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx _MEDIA_FILE_SLOTS = { 'c544.flv': { @@ -48,19 +55,22 @@ class CrackleIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + + config_doc = self._download_xml( + 'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', + video_id, 'Downloading config') + item = self._download_xml( 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, video_id).find('i') title = item.attrib['t'] - thumbnail = None subtitles = {} formats = self._extract_m3u8_formats( - 'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), + 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) path = item.attrib.get('p') if path: - thumbnail = self._THUMBNAIL_TEMPLATE % path http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -76,20 +86,36 @@ class CrackleIE(InfoExtractor): if locale not in subtitles: subtitles[locale] = [] subtitles[locale] = [{ - 'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), + 'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v), 'ext': 'ttml', }] self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + media_details = self._download_json( + 'https://web-api-us.crackle.com/Service.svc/details/media/%s/TW?format=json' % video_id, + video_id, fatal=False) + thumbnails = [] + if media_details: + for key, value in media_details.items(): + mobj = re.match('^Thumbnail_(\d+)x(\d+)$', key) + if mobj: + width, height = list(map(int, mobj.groups())) + thumbnails.append({ + 'id': '%dp' % height, + 'url': value, + 'width': width, + 'height': height, + }) + return { 'id': video_id, 'title': title, 'description': item.attrib.get('d'), - 'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, + 'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From 20bad91d765284e06f8a8c600a122857d23efeea Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 22:38:06 +0800 Subject: [PATCH 290/775] [downloader/external] Clarify that ffmpeg doesn't support SOCKS Ref: #10304 --- youtube_dl/downloader/external.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index cf4556221..17f12e970 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -220,6 +220,11 @@ class FFmpegFD(ExternalFD): if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): proxy = 'http://%s' % proxy + + if proxy.startswith('socks'): + self.report_warning( + '%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename()) + # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) # We could switch to the following code if we are able to detect version properly From a0f071a50dc611a66a5fc8ceceb0b455a88f1cb0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 19:40:56 +0100 Subject: [PATCH 291/775] [usanetwork] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/usanetwork.py | 76 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/usanetwork.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a58145e3e..74d916e64 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -947,6 +947,7 @@ from .uplynk import ( ) from .urort import UrortIE from .urplay import URPlayIE +from .usanetwork import USANetworkIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE from .ustudio import ( diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py new file mode 100644 index 000000000..823340776 --- /dev/null +++ b/youtube_dl/extractor/usanetwork.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .adobepass import AdobePassIE +from ..utils import ( + extract_attributes, + smuggle_url, + update_url_query, +) + + +class USANetworkIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)' + _TEST = { + 'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity', + 'md5': '33c0d2ba381571b414024440d08d57fd', + 'info_dict': { + 'id': '3086229', + 'ext': 'mp4', + 'title': 'HPE Cybersecurity', + 'description': 'The more we digitize our world, the more vulnerable we are.', + 'upload_date': '20160818', + 'timestamp': 1471535460, + 'uploader': 'NBCU-USA', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + player_params = extract_attributes(self._search_regex( + r'(<div[^>]+data-usa-tve-player-container[^>]*>)', webpage, 'player params')) + video_id = player_params['data-mpx-guid'] + title = player_params['data-episode-title'] + + account_pid, path = re.search( + r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)', + webpage).groups() + + query = { + 'mbr': 'true', + } + if player_params.get('data-is-full-episode') == '1': + query['manifest'] = 'm3u' + + if player_params.get('data-entitlement') == 'auth': + adobe_pass = {} + drupal_settings = self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings', fatal=False) + if drupal_settings: + drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False) + if drupal_settings: + adobe_pass = drupal_settings.get('adobePass', {}) + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId', 'usa'), + title, video_id, player_params.get('data-episode-rating', 'TV-14')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource) + + info = self._search_json_ld(webpage, video_id, default={}) + info.update({ + '_type': 'url_transparent', + 'url': smuggle_url(update_url_query( + 'http://link.theplatform.com/s/%s/%s' % (account_pid, path), + query), {'force_smil_url': True}), + 'id': video_id, + 'title': title, + 'series': player_params.get('data-show-title'), + 'episode': title, + 'ie_key': 'ThePlatform', + }) + return info From e3faecde30d85f54c1a341350cba609d3f5b6691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 03:43:13 +0700 Subject: [PATCH 292/775] [trutube] Remove extractor (Closes #10438) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/trutube.py | 26 -------------------------- 2 files changed, 27 deletions(-) delete mode 100644 youtube_dl/extractor/trutube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74d916e64..717ba9375 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -873,7 +873,6 @@ from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trollvids import TrollvidsIE -from .trutube import TruTubeIE from .tube8 import Tube8IE from .tubitv import TubiTvIE from .tudou import ( diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py deleted file mode 100644 index d55e0c563..000000000 --- a/youtube_dl/extractor/trutube.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import unicode_literals - -from .nuevo import NuevoBaseIE - - -class TruTubeIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', - 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', - 'info_dict': { - 'id': '14880', - 'ext': 'flv', - 'title': 'Ramses II - Proven To Be A Red Headed Caucasoid', - 'thumbnail': 're:^http:.*\.jpg$', - } - }, { - 'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_nuevo( - 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, - video_id) From 298a120ab76008c900e30de50dc738dd63e79fb4 Mon Sep 17 00:00:00 2001 From: Aleksander Nitecki <ixendr@itogi.re> Date: Thu, 25 Aug 2016 20:21:06 +0200 Subject: [PATCH 293/775] [nhk] Add extractor for VoD. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nhk.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/nhk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 717ba9375..8d88d6cb4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -541,6 +541,7 @@ from .nextmedia import ( ) from .nfb import NFBIE from .nfl import NFLIE +from .nhk import NhkVodIE from .nhl import ( NHLVideocenterIE, NHLNewsIE, diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py new file mode 100644 index 000000000..90e935351 --- /dev/null +++ b/youtube_dl/extractor/nhk.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class NhkVodIE(InfoExtractor): + _VALID_URL = r'http://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+)\.html' + _TESTS = [{ + 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', + 'info_dict': { + 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', + 'ext': 'flv', + 'title': '[nhkworld]VOD;2009-251-2016;TOKYO FASHION EXPRESS;The Kimono as Global Fashion;en', + }, + 'params': { + 'skip_download': True # Videos available only for a limited period of time. + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + embed_code = self._search_regex( + r'''nw_vod_ooplayer\('movie-area', '([^']+)'\);''', + webpage, + 'ooyala embed code') + + return self.url_result('ooyala:' + embed_code, 'Ooyala') From f9b373afda2a936c4f8303671f3160c532ccae67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 04:48:40 +0700 Subject: [PATCH 294/775] [nhk:vod] Improve extraction (Closes #10424) --- youtube_dl/extractor/nhk.py | 43 +++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 90e935351..691bdfa4e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -4,26 +4,47 @@ from .common import InfoExtractor class NhkVodIE(InfoExtractor): - _VALID_URL = r'http://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+)\.html' - _TESTS = [{ + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+?)\.html' + _TEST = { + # Videos available only for a limited period of time. Visit + # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', 'info_dict': { 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'ext': 'flv', - 'title': '[nhkworld]VOD;2009-251-2016;TOKYO FASHION EXPRESS;The Kimono as Global Fashion;en', + 'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion', + 'description': 'md5:db338ee6ce8204f415b754782f819824', + 'series': 'TOKYO FASHION EXPRESS', + 'episode': 'The Kimono as Global Fashion', }, - 'params': { - 'skip_download': True # Videos available only for a limited period of time. - }, - }] + 'skip': 'Videos available only for a limited period of time', + } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) embed_code = self._search_regex( - r'''nw_vod_ooplayer\('movie-area', '([^']+)'\);''', - webpage, - 'ooyala embed code') + r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'ooyala embed code', group='id') - return self.url_result('ooyala:' + embed_code, 'Ooyala') + title = self._search_regex( + r'<div[^>]+class=["\']episode-detail["\']>\s*<h\d+>([^<]+)', + webpage, 'title', default=None) + description = self._html_search_regex( + r'(?s)<p[^>]+class=["\']description["\'][^>]*>(.+?)</p>', + webpage, 'description', default=None) + series = self._search_regex( + r'<h2[^>]+class=["\']detail-top-player-title[^>]+><a[^>]+>([^<]+)', + webpage, 'series', default=None) + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % embed_code, + 'title': '%s - %s' % (series, title) if series and title else title, + 'description': description, + 'series': series, + 'episode': title, + } From c9de980106990485fd9bff9a86d463349fe1d384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 04:49:52 +0700 Subject: [PATCH 295/775] Credit @Xender for nhk:vod (#10424) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 1fd4be785..b9a602c12 100644 --- a/AUTHORS +++ b/AUTHORS @@ -181,3 +181,4 @@ Nehal Patel Rob van Bekkum Petr Zvoníček Pratyush Singh +Aleksander Nitecki From 6b18a24e6ee39ab2fdb5e3d9e1cf2eec547ca3f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 05:57:52 +0700 Subject: [PATCH 296/775] [tnaflix] Fix extraction (Closes #10434) --- youtube_dl/extractor/tnaflix.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 7ddf77767..77d56b8ca 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, parse_duration, str_to_int, + unescapeHTML, xpath_text, ) @@ -80,7 +81,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): if not cfg_url: inputs = self._hidden_inputs(webpage) - cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey']) + cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' + % (inputs['vkey'], inputs['nkey'], video_id)) cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', @@ -89,7 +91,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): formats = [] def extract_video_url(vl): - return re.sub('speed=\d+', 'speed=', vl.text) + return re.sub('speed=\d+', 'speed=', unescapeHTML(vl.text)) video_link = cfg_xml.find('./videoLink') if video_link is not None: @@ -201,7 +203,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): _TESTS = [{ # anonymous uploader, no categories 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': '7e569419fe6d69543d01e6be22f5f7c4', + 'md5': 'ecf3498417d09216374fc5907f9c6ec0', 'info_dict': { 'id': '553878', 'display_id': 'Carmella-Decesare-striptease', @@ -215,11 +217,11 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): }, { # non-anonymous uploader, categories 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538', - 'md5': 'fcba2636572895aba116171a899a5658', + 'md5': '0f5d4d490dbfd117b8607054248a07c0', 'info_dict': { 'id': '6538', 'display_id': 'Educational-xxx-video', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Educational xxx video', 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8', 'thumbnail': 're:https?://.*\.jpg$', From b281aad2dc658e3c6535579d75b42a5634487b83 Mon Sep 17 00:00:00 2001 From: steven7851 <steven7851@msn.com> Date: Fri, 26 Aug 2016 07:32:54 +0800 Subject: [PATCH 297/775] [douyutv] Use new api use lapi for flv info, and html5 api for room info #10153 #10318 --- youtube_dl/extractor/douyutv.py | 87 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index ce6962755..33efc993e 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals import hashlib import time +import uuid from .common import InfoExtractor from ..utils import (ExtractorError, unescapeHTML) -from ..compat import (compat_str, compat_basestring) +from ..compat import (compat_str, compat_basestring, compat_urllib_parse_urlencode) class DouyuTVIE(InfoExtractor): @@ -21,7 +22,6 @@ class DouyuTVIE(InfoExtractor): 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -37,7 +37,6 @@ class DouyuTVIE(InfoExtractor): 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'douyu小漠', - 'uploader_id': '3769985', 'is_live': True, }, 'params': { @@ -54,7 +53,6 @@ class DouyuTVIE(InfoExtractor): 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -75,19 +73,39 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id"\s*:\s*(\d+),', page, 'room id') - config = None + room_url = 'http://m.douyu.com/html5/live?roomId=%s' % room_id + room_content = self._download_webpage(room_url, video_id) + room_json = self._parse_json(room_content, video_id, fatal=False) + + room = room_json['data'] + + show_status = room.get('show_status') + # 1 = live, 2 = offline + if show_status == '2': + raise ExtractorError( + 'Live stream is offline', expected=True) + + flv_json = None # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" # Retry with different parameters - same parameters cause same errors for i in range(5): - prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( - room_id, int(time.time())) - auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() + tt = int(time.time() / 60) + did = uuid.uuid4().hex.upper() - config_page = self._download_webpage( - 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), - video_id) + # Decompile core.swf in webpage by ffdec "Search SWFs in memory" + # core.swf is encrypted originally, but ffdec can dump memory to get the decrypted one + # If API changes in the future, just use this way to update + sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt) + sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() + + payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign} + flv_data = compat_urllib_parse_urlencode(payload) + + flv_request_url = 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id + flv_content = self._download_webpage(flv_request_url, video_id, data=flv_data, + headers={'Content-Type': 'application/x-www-form-urlencoded'}) try: - config = self._parse_json(config_page, video_id, fatal=False) + flv_json = self._parse_json(flv_content, video_id, fatal=False) except ExtractorError: # Wait some time before retrying to get a different time() value self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' @@ -95,54 +113,35 @@ class DouyuTVIE(InfoExtractor): continue else: break - if config is None: + if flv_json is None: raise ExtractorError('Unable to fetch API result') - data = config['data'] + flv = flv_json['data'] - error_code = config.get('error', 0) + error_code = flv_json.get('error', 0) if error_code is not 0: error_desc = 'Server reported error %i' % error_code - if isinstance(data, (compat_str, compat_basestring)): - error_desc += ': ' + data + if isinstance(flv, (compat_str, compat_basestring)): + error_desc += ': ' + flv raise ExtractorError(error_desc, expected=True) - show_status = data.get('show_status') - # 1 = live, 2 = offline - if show_status == '2': - raise ExtractorError( - 'Live stream is offline', expected=True) + base_url = flv['rtmp_url'] + live_path = flv['rtmp_live'] - base_url = data['rtmp_url'] - live_path = data['rtmp_live'] + video_url = '%s/%s' % (base_url, live_path) - title = self._live_title(unescapeHTML(data['room_name'])) - description = data.get('show_details') - thumbnail = data.get('room_src') - - uploader = data.get('nickname') - uploader_id = data.get('owner_uid') - - multi_formats = data.get('rtmp_multi_bitrate') - if not isinstance(multi_formats, dict): - multi_formats = {} - multi_formats['live'] = live_path - - formats = [{ - 'url': '%s/%s' % (base_url, format_path), - 'format_id': format_id, - 'preference': 1 if format_id == 'live' else 0, - } for format_id, format_path in multi_formats.items()] - self._sort_formats(formats) + title = self._live_title(unescapeHTML(room['room_name'])) + description = room.get('notice') + thumbnail = room.get('room_src') + uploader = room.get('nickname') return { 'id': room_id, 'display_id': video_id, + 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, - 'uploader_id': uploader_id, - 'formats': formats, 'is_live': True, } From 906b87cf5f6ccf28ebd75d6a92367d7c238f2ad9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 26 Aug 2016 19:58:17 +0800 Subject: [PATCH 298/775] [crackle] Revert to template-based thumbnail extraction To reduce to number of HTTP requests --- youtube_dl/extractor/crackle.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 21f94d33c..cc68f1c00 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals, division -import re - from .common import InfoExtractor from ..utils import int_or_none @@ -34,6 +32,7 @@ class CrackleIE(InfoExtractor): } # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx + _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' _MEDIA_FILE_SLOTS = { 'c544.flv': { 'width': 544, @@ -69,8 +68,10 @@ class CrackleIE(InfoExtractor): formats = self._extract_m3u8_formats( 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) + thumbnail = None path = item.attrib.get('p') if path: + thumbnail = self._THUMBNAIL_TEMPLATE % path http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -91,22 +92,6 @@ class CrackleIE(InfoExtractor): }] self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) - media_details = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/details/media/%s/TW?format=json' % video_id, - video_id, fatal=False) - thumbnails = [] - if media_details: - for key, value in media_details.items(): - mobj = re.match('^Thumbnail_(\d+)x(\d+)$', key) - if mobj: - width, height = list(map(int, mobj.groups())) - thumbnails.append({ - 'id': '%dp' % height, - 'url': value, - 'width': width, - 'height': height, - }) - return { 'id': video_id, 'title': title, @@ -115,7 +100,7 @@ class CrackleIE(InfoExtractor): 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnails': thumbnails, + 'thumbnail': thumbnail, 'subtitles': subtitles, 'formats': formats, } From 3b4b82d4cec702fc06e2d6b38a44dd0c7bd77a5b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 27 Aug 2016 01:16:39 +0800 Subject: [PATCH 299/775] [douyutv] Simplify --- youtube_dl/extractor/douyutv.py | 86 +++++++++++++++------------------ 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 33efc993e..e366e17e6 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -4,9 +4,16 @@ from __future__ import unicode_literals import hashlib import time import uuid + from .common import InfoExtractor -from ..utils import (ExtractorError, unescapeHTML) -from ..compat import (compat_str, compat_basestring, compat_urllib_parse_urlencode) +from ..compat import ( + compat_str, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + unescapeHTML, +) class DouyuTVIE(InfoExtractor): @@ -63,6 +70,10 @@ class DouyuTVIE(InfoExtractor): 'only_matching': True, }] + # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf + # is encrypted originally, but ffdec can dump memory to get the decrypted one. + _API_KEY = 'A12Svb&%1UUmf@hC' + def _real_extract(self, url): video_id = self._match_id(url) @@ -73,60 +84,41 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id"\s*:\s*(\d+),', page, 'room id') - room_url = 'http://m.douyu.com/html5/live?roomId=%s' % room_id - room_content = self._download_webpage(room_url, video_id) - room_json = self._parse_json(room_content, video_id, fatal=False) + room = self._download_json( + 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, + note='Downloading room info')['data'] - room = room_json['data'] - - show_status = room.get('show_status') # 1 = live, 2 = offline - if show_status == '2': - raise ExtractorError( - 'Live stream is offline', expected=True) + if room.get('show_status') == '2': + raise ExtractorError('Live stream is offline', expected=True) - flv_json = None - # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" - # Retry with different parameters - same parameters cause same errors - for i in range(5): - tt = int(time.time() / 60) - did = uuid.uuid4().hex.upper() + tt = compat_str(int(time.time() / 60)) + did = uuid.uuid4().hex.upper() - # Decompile core.swf in webpage by ffdec "Search SWFs in memory" - # core.swf is encrypted originally, but ffdec can dump memory to get the decrypted one - # If API changes in the future, just use this way to update - sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt) - sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() + sign_content = ''.join((room_id, did, self._API_KEY, tt)) + sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() - payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign} - flv_data = compat_urllib_parse_urlencode(payload) + flv_data = compat_urllib_parse_urlencode({ + 'cdn': 'ws', + 'rate': '0', + 'tt': tt, + 'did': did, + 'sign': sign, + }) - flv_request_url = 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id - flv_content = self._download_webpage(flv_request_url, video_id, data=flv_data, - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - try: - flv_json = self._parse_json(flv_content, video_id, fatal=False) - except ExtractorError: - # Wait some time before retrying to get a different time() value - self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' - 'Waiting for %(timeout)s seconds before retrying') - continue - else: - break - if flv_json is None: - raise ExtractorError('Unable to fetch API result') + video_info = self._download_json( + 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id, + data=flv_data, note='Downloading video info', + headers={'Content-Type': 'application/x-www-form-urlencoded'}) - flv = flv_json['data'] - - error_code = flv_json.get('error', 0) + error_code = video_info.get('error', 0) if error_code is not 0: - error_desc = 'Server reported error %i' % error_code - if isinstance(flv, (compat_str, compat_basestring)): - error_desc += ': ' + flv - raise ExtractorError(error_desc, expected=True) + raise ExtractorError( + '%s reported error %i' % (self.IE_NAME, error_code), + expected=True) - base_url = flv['rtmp_url'] - live_path = flv['rtmp_live'] + base_url = video_info['data']['rtmp_url'] + live_path = video_info['data']['rtmp_live'] video_url = '%s/%s' % (base_url, live_path) From 92c27a0dbf19eff211e7ffdd8db5895387e75529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 02:35:49 +0700 Subject: [PATCH 300/775] [periscope:user] Fix extraction (Closes #10453) --- youtube_dl/extractor/periscope.py | 47 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 75f5884a9..6c640089d 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -8,7 +8,14 @@ from ..utils import ( ) -class PeriscopeIE(InfoExtractor): +class PeriscopeBaseIE(InfoExtractor): + def _call_api(self, method, query, item_id): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s' % method, + item_id, query=query) + + +class PeriscopeIE(PeriscopeBaseIE): IE_DESC = 'Periscope' IE_NAME = 'periscope' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)' @@ -34,14 +41,11 @@ class PeriscopeIE(InfoExtractor): 'only_matching': True, }] - def _call_api(self, method, value): - return self._download_json( - 'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value) - def _real_extract(self, url): token = self._match_id(url) - broadcast_data = self._call_api('getBroadcastPublic', token) + broadcast_data = self._call_api( + 'getBroadcastPublic', {'broadcast_id': token}, token) broadcast = broadcast_data['broadcast'] status = broadcast['status'] @@ -61,7 +65,8 @@ class PeriscopeIE(InfoExtractor): 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] - stream = self._call_api('getAccessPublic', token) + stream = self._call_api( + 'getAccessPublic', {'broadcast_id': token}, token) formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): @@ -88,7 +93,7 @@ class PeriscopeIE(InfoExtractor): } -class PeriscopeUserIE(InfoExtractor): +class PeriscopeUserIE(PeriscopeBaseIE): _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' @@ -106,26 +111,34 @@ class PeriscopeUserIE(InfoExtractor): } def _real_extract(self, url): - user_id = self._match_id(url) + user_name = self._match_id(url) - webpage = self._download_webpage(url, user_id) + webpage = self._download_webpage(url, user_name) data_store = self._parse_json( unescapeHTML(self._search_regex( r'data-store=(["\'])(?P<data>.+?)\1', webpage, 'data store', default='{}', group='data')), - user_id) + user_name) - user = data_store.get('User', {}).get('user', {}) - title = user.get('display_name') or user.get('username') + user = list(data_store['UserCache']['users'].values())[0]['user'] + user_id = user['id'] + session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] + + broadcasts = self._call_api( + 'getUserBroadcastsPublic', + {'user_id': user_id, 'session_id': session_id}, + user_name)['broadcasts'] + + broadcast_ids = [ + broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] + + title = user.get('display_name') or user.get('username') or user_name description = user.get('description') - broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or - data_store.get('BroadcastCache', {}).get('broadcastIds', [])) - entries = [ self.url_result( - 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id)) + 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) for broadcast_id in broadcast_ids] return self.playlist_result(entries, user_id, title, description) From d7aae610f6674d96971246f916973158374f88b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 07:00:15 +0700 Subject: [PATCH 301/775] [ChangeLog] Actualize --- ChangeLog | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0789549c0..4062c2021 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Add warning message that ffmpeg doesn't support SOCKS +* Improve thumbnail sorting ++ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats +* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative ++ Add ac-3 to the list of audio codecs in parse_codecs + Extractors +* [periscope:user] Fix extraction (#10453) +* [douyutv] Fix extraction (#10153, #10318, #10444) ++ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424) +- [trutube] Remove extractor (#10438) ++ [usanetwork] Add extractor for usanetwork.com * [crackle] Fix extraction (#10333) -* [spankbang] Fix description and uploader (#10339) +* [spankbang] Fix description and uploader extraction (#10339) +* [discoverygo] Detect cable provider restricted videos (#10425) ++ [cbc] Add support for watch.cbc.ca +* [kickstarter] Silent the warning for og:description (#10415) * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) From 71e90766b5f7d57bdbe20b71c32ce5a8f66aecc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 07:09:03 +0700 Subject: [PATCH 302/775] [README.md] Fix typo in download archive FAQ entry --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 20241307f..87465aa5e 100644 --- a/README.md +++ b/README.md @@ -836,7 +836,7 @@ You will first need to tell youtube-dl to stream media to stdout with `-o -`, an ### How do I download only new videos from a playlist? -Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos that and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. +Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. For example, at first, From 1198fe14a1eff1047652c51163266246577e3682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 07:24:08 +0700 Subject: [PATCH 303/775] release 2016.08.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 11 +++++++---- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15acc025a..a2fe59f80 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.24.1 +[debug] youtube-dl version 2016.08.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4062c2021..d3496b5dc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.28 Core + Add warning message that ffmpeg doesn't support SOCKS diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 08db56fa9..bf08697be 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,7 +89,7 @@ - **Bet** - **Bigflix** - **Bild**: Bild.de - - **BiliBili** + - **BiliBili** (Currently broken) - **BioBioChileTV** - **BIQLE** - **BleacherReport** @@ -115,8 +115,10 @@ - **Canvas** - **CarambaTV** - **CarambaTVPage** - - **CBC** - - **CBCPlayer** + - **cbc.ca** + - **cbc.ca:player** + - **cbc.ca:watch** + - **cbc.ca:watch:video** - **CBS** - **CBSInteractive** - **CBSLocal** @@ -448,6 +450,7 @@ - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **nfb**: National Film Board of Canada - **nfl.com** + - **NhkVod** - **nhl.com** - **nhl.com:news**: NHL news - **nhl.com:videocenter** @@ -713,7 +716,6 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **trollvids** - - **TruTube** - **Tube8** - **TubiTv** - **tudou** @@ -758,6 +760,7 @@ - **uplynk:preplay** - **Urort**: NRK P3 Urørt - **URPlay** + - **USANetwork** - **USAToday** - **ustream** - **ustream:channel** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7447d3d7e..ee30ca2ad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.24.1' +__version__ = '2016.08.28' From 39efc6e3e048a8323c36efcdf6b7434259a35e44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 15:46:11 +0800 Subject: [PATCH 304/775] [generic] Update some _TESTS --- youtube_dl/extractor/generic.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 506892b11..c6e655c84 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -104,7 +104,8 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' - ] + ], + 'skip': 'URL invalid', }, # Direct download with broken HEAD { @@ -268,7 +269,8 @@ class GenericIE(InfoExtractor): 'params': { # m3u8 downloads 'skip_download': True, - } + }, + 'skip': 'video gone', }, # m3u8 served with Content-Type: text/plain { @@ -283,7 +285,8 @@ class GenericIE(InfoExtractor): 'params': { # m3u8 downloads 'skip_download': True, - } + }, + 'skip': 'video gone', }, # google redirect { @@ -368,6 +371,7 @@ class GenericIE(InfoExtractor): 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', }, 'add_ie': ['BrightcoveLegacy'], + 'skip': 'video gone', }, { 'url': 'http://www.championat.com/video/football/v/87/87499.html', @@ -421,6 +425,7 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'movie expired', }, # embed.ly video { @@ -448,6 +453,8 @@ class GenericIE(InfoExtractor): 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, + # HEAD requests lead to endless 301, while GET is OK + 'expected_warnings': ['301'], }, # RUTV embed { @@ -522,6 +529,9 @@ class GenericIE(InfoExtractor): 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', }, 'playlist_mincount': 7, + # This forum does not allow <iframe> syntaxes anymore + # Now HTML tags are displayed as-is + 'skip': 'No videos on this page', }, # Embedded TED video { @@ -570,7 +580,8 @@ class GenericIE(InfoExtractor): }, 'params': { 'skip_download': 'Requires rtmpdump' - } + }, + 'skip': 'video gone', }, # francetv embed { From 40eec6b15cd3135b24cb42fde5ccf62e9a1f0807 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 20:27:08 +0800 Subject: [PATCH 305/775] [openload] Fix extraction (closes #10408) Thanks to @yokrysty again! --- ChangeLog | 6 ++++++ youtube_dl/extractor/openload.py | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d3496b5dc..5d7a052a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [openload] Fix extraction (#10408) + + version 2016.08.28 Core diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index e181d0b3a..c8dde7ae3 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -55,10 +55,12 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] - for c in enc_data: + for idx, c in enumerate(enc_data): j = compat_ord(c) if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 + if idx == len(enc_data) - 1: + j += 2 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 04b32c8f9679004d11ee97c2b7beecaedf1b477b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 22:06:31 +0800 Subject: [PATCH 306/775] [bilibili] Fix extraction (closes #10375) Thanks @gdkchan for the algorithm --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 98 ++++++++++++-------------------- 2 files changed, 36 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5d7a052a5..e055976c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [bilibili] Fix extraction (#10375) * [openload] Fix extraction (#10408) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index d87c38a02..a332fbb69 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,35 +1,26 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime +import hashlib import re from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_str, - compat_parse_qs, - compat_xml_parse_error, -) +from ..compat import compat_parse_qs from ..utils import ( - ExtractorError, int_or_none, float_or_none, - xpath_text, + unified_timestamp, ) class BiliBiliIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { - 'id': '1554319', + 'id': '1074402', 'ext': 'mp4', 'title': '【金坷垃】金泡沫', 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', @@ -43,24 +34,28 @@ class BiliBiliIE(InfoExtractor): }, { 'url': 'http://www.bilibili.com/video/av1041170/', 'info_dict': { - 'id': '1507019', + 'id': '1041170', 'ext': 'mp4', 'title': '【BD1080P】刀语【诸神&异域】', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', + 'duration': 3382.259, 'timestamp': 1396530060, 'upload_date': '20140403', + 'thumbnail': 're:^https?://.+\.jpg', 'uploader': '枫叶逝去', 'uploader_id': '520116', }, }, { 'url': 'http://www.bilibili.com/video/av4808130/', 'info_dict': { - 'id': '7802182', + 'id': '4808130', 'ext': 'mp4', 'title': '【长篇】哆啦A梦443【钉铛】', 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'duration': 1493.995, 'timestamp': 1464564180, 'upload_date': '20160529', + 'thumbnail': 're:^https?://.+\.jpg', 'uploader': '喜欢拉面', 'uploader_id': '151066', }, @@ -68,12 +63,14 @@ class BiliBiliIE(InfoExtractor): # Missing upload time 'url': 'http://www.bilibili.com/video/av1867637/', 'info_dict': { - 'id': '2880301', + 'id': '1867637', 'ext': 'mp4', 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', + 'duration': 5760.0, 'uploader': '黑夜为猫', 'uploader_id': '610729', + 'thumbnail': 're:^https?://.+\.jpg', }, 'params': { # Just to test metadata extraction @@ -82,86 +79,61 @@ class BiliBiliIE(InfoExtractor): 'expected_warnings': ['upload time'], }] - # BiliBili blocks keys from time to time. The current key is extracted from - # the Android client - # TODO: find the sign algorithm used in the flash player - _APP_KEY = '86385cdc024c0f6c' + _APP_KEY = '6f90a59ac58a4123' + _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - params = compat_parse_qs(self._search_regex( + cid = compat_parse_qs(self._search_regex( [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters')) - cid = params['cid'][0] + webpage, 'player parameters'))['cid'][0] - info_xml_str = self._download_webpage( - 'http://interface.bilibili.com/v_cdn_play', - cid, query={'appkey': self._APP_KEY, 'cid': cid}, - note='Downloading video info page') + payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) + sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() - err_msg = None - durls = None - info_xml = None - try: - info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8')) - except compat_xml_parse_error: - info_json = self._parse_json(info_xml_str, video_id, fatal=False) - err_msg = (info_json or {}).get('error_text') - else: - err_msg = xpath_text(info_xml, './message') - - if info_xml is not None: - durls = info_xml.findall('./durl') - if not durls: - if err_msg: - raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True) - else: - raise ExtractorError('No videos found!') + video_info = self._download_json( + 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), + video_id, note='Downloading video info page') entries = [] - for durl in durls: - size = xpath_text(durl, ['./filesize', './size']) + for idx, durl in enumerate(video_info['durl']): formats = [{ - 'url': durl.find('./url').text, - 'filesize': int_or_none(size), + 'url': durl['url'], + 'filesize': int_or_none(durl['size']), }] - for backup_url in durl.findall('./backup_url/url'): + for backup_url in durl['backup_url']: formats.append({ - 'url': backup_url.text, + 'url': backup_url, # backup URLs have lower priorities - 'preference': -2 if 'hd.mp4' in backup_url.text else -3, + 'preference': -2 if 'hd.mp4' in backup_url else -3, }) self._sort_formats(formats) entries.append({ - 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), - 'duration': int_or_none(xpath_text(durl, './length'), 1000), + 'id': '%s_part%s' % (video_id, idx), + 'duration': float_or_none(durl.get('length'), 1000), 'formats': formats, }) title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') description = self._html_search_meta('description', webpage) - datetime_str = self._html_search_regex( - r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) - timestamp = None - if datetime_str: - timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) + timestamp = unified_timestamp(self._html_search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) # TODO 'view_count' requires deobfuscating Javascript info = { - 'id': compat_str(cid), + 'id': video_id, 'title': title, 'description': description, 'timestamp': timestamp, 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), - 'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000), + 'duration': float_or_none(video_info.get('timelength'), scale=1000), } uploader_mobj = re.search( From 98908bcf7c50d034042ab86223b7689e91b589ba Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 22:49:46 +0800 Subject: [PATCH 307/775] [openload] Update algorithm again (#10408) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c8dde7ae3..03baf8e32 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 2 + j += 1 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 2982514072594b1f708abdf654b31da77c0bfa81 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 16:43:15 +0100 Subject: [PATCH 308/775] [turner,nba,cnn,adultswim] add base extractor to parse cvp feeds --- youtube_dl/extractor/adultswim.py | 70 +++---------- youtube_dl/extractor/cnn.py | 97 +++++------------- youtube_dl/extractor/nba.py | 70 +++---------- youtube_dl/extractor/turner.py | 163 ++++++++++++++++++++++++++++++ 4 files changed, 214 insertions(+), 186 deletions(-) create mode 100644 youtube_dl/extractor/turner.py diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 96599048f..ef3cc2a61 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -3,16 +3,11 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - float_or_none, - xpath_text, -) +from .turner import TurnerBaseIE +from ..utils import ExtractorError -class AdultSwimIE(InfoExtractor): +class AdultSwimIE(TurnerBaseIE): _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' _TESTS = [{ @@ -96,7 +91,8 @@ class AdultSwimIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'expected_warnings': ['Unable to download f4m manifest'], }] @staticmethod @@ -176,57 +172,23 @@ class AdultSwimIE(InfoExtractor): entries = [] for part_num, segment_id in enumerate(segment_ids): - segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id - + segement_info = self._extract_cvp_info( + 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id, + segment_id, { + 'secure': { + 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', + 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', + }, + }) segment_title = '%s - %s' % (show_title, episode_title) if len(segment_ids) > 1: segment_title += ' Part %d' % (part_num + 1) - - idoc = self._download_xml( - segment_url, segment_title, - 'Downloading segment information', 'Unable to download segment information') - - segment_duration = float_or_none( - xpath_text(idoc, './/trt', 'segment duration').strip()) - - formats = [] - file_els = idoc.findall('.//files/file') or idoc.findall('./files/file') - - unique_urls = [] - unique_file_els = [] - for file_el in file_els: - media_url = file_el.text - if not media_url or determine_ext(media_url) == 'f4m': - continue - if file_el.text not in unique_urls: - unique_urls.append(file_el.text) - unique_file_els.append(file_el) - - for file_el in unique_file_els: - bitrate = file_el.attrib.get('bitrate') - ftype = file_el.attrib.get('type') - media_url = file_el.text - if determine_ext(media_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - media_url, segment_title, 'mp4', preference=0, - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': '%s_%s' % (bitrate, ftype), - 'url': file_el.text.strip(), - # The bitrate may not be a number (for example: 'iphone') - 'tbr': int(bitrate) if bitrate.isdigit() else None, - }) - - self._sort_formats(formats) - - entries.append({ + segement_info.update({ 'id': segment_id, 'title': segment_title, - 'formats': formats, - 'duration': segment_duration, - 'description': episode_description + 'description': episode_description, }) + entries.append(segement_info) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 220bb55e8..1bf87f6ea 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -3,14 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - url_basename, -) +from .turner import TurnerBaseIE +from ..utils import url_basename -class CNNIE(InfoExtractor): +class CNNIE(TurnerBaseIE): _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' @@ -18,43 +15,50 @@ class CNNIE(InfoExtractor): 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'md5': '3e6121ea48df7e2259fe73a0628605c4', 'info_dict': { - 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', + 'id': 'nadal-1-on-1', 'ext': 'mp4', 'title': 'Nadal wins 8th French Open title', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'duration': 135, 'upload_date': '20130609', }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', 'info_dict': { - 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', + 'id': 'sot-student-gives-epic-speech', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", 'upload_date': '20130821', - } + }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'md5': 'f14d02ebd264df951feb2400e2c25a1b', 'info_dict': { - 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', + 'id': 'growing-america-nashville-salemtown-board-episode-1', 'ext': 'mp4', 'title': 'Nashville Ep. 1: Hand crafted skateboards', 'description': 'md5:e7223a503315c9f150acac52e76de086', 'upload_date': '20141222', - } + }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', 'info_dict': { - 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', + 'id': 'netflix-stunning-stats', 'ext': 'mp4', 'title': '5 stunning stats about Netflix', 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', 'upload_date': '20160819', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'only_matching': True, @@ -84,67 +88,12 @@ class CNNIE(InfoExtractor): if sub_domain not in ('money', 'edition'): sub_domain = 'edition' config = self._CONFIG[sub_domain] - info_url = config['data_src'] % path - info = self._download_xml(info_url, page_title) - - formats = [] - rex = re.compile(r'''(?x) - (?P<width>[0-9]+)x(?P<height>[0-9]+) - (?:_(?P<bitrate>[0-9]+)k)? - ''') - for f in info.findall('files/file'): - video_url = config['media_src'] + f.text.strip() - fdct = { - 'format_id': f.attrib['bitrate'], - 'url': video_url, - } - - mf = rex.match(f.attrib['bitrate']) - if mf: - fdct['width'] = int(mf.group('width')) - fdct['height'] = int(mf.group('height')) - fdct['tbr'] = int_or_none(mf.group('bitrate')) - else: - mf = rex.search(f.text) - if mf: - fdct['width'] = int(mf.group('width')) - fdct['height'] = int(mf.group('height')) - fdct['tbr'] = int_or_none(mf.group('bitrate')) - else: - mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) - if mi: - if mi.group(1) == 'audio': - fdct['vcodec'] = 'none' - fdct['ext'] = 'm4a' - else: - fdct['tbr'] = int(mi.group(1)) - - formats.append(fdct) - - self._sort_formats(formats) - - thumbnails = [{ - 'height': int(t.attrib['height']), - 'width': int(t.attrib['width']), - 'url': t.text, - } for t in info.findall('images/image')] - - metas_el = info.find('metas') - upload_date = ( - metas_el.attrib.get('version') if metas_el is not None else None) - - duration_el = info.find('length') - duration = parse_duration(duration_el.text) - - return { - 'id': info.attrib['id'], - 'title': info.find('headline').text, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': info.find('description').text, - 'duration': duration, - 'upload_date': upload_date, - } + return self._extract_cvp_info( + config['data_src'] % path, page_title, { + 'default': { + 'media_src': config['media_src'], + } + }) class CNNBlogsIE(InfoExtractor): diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index d896b0d04..aabd5b670 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -1,25 +1,20 @@ from __future__ import unicode_literals import functools -import os.path import re -from .common import InfoExtractor +from .turner import TurnerBaseIE from ..compat import ( compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - int_or_none, OnDemandPagedList, - parse_duration, remove_start, - xpath_text, - xpath_attr, ) -class NBAIE(InfoExtractor): +class NBAIE(TurnerBaseIE): _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$' _TESTS = [{ 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', @@ -59,7 +54,7 @@ class NBAIE(InfoExtractor): 'ext': 'mp4', 'title': 'Practice: Doc Rivers - 2/16/16', 'description': 'Head Coach Doc Rivers addresses the media following practice.', - 'upload_date': '20160217', + 'upload_date': '20160216', 'timestamp': 1455672000, }, 'params': { @@ -80,7 +75,7 @@ class NBAIE(InfoExtractor): }, { 'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', 'info_dict': { - 'id': 'Wigginsmp4', + 'id': 'Wigginsmp4-3462601', 'ext': 'mp4', 'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins', 'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.', @@ -145,53 +140,12 @@ class NBAIE(InfoExtractor): if path.startswith('video/teams'): path = 'video/channels/proxy/' + path[6:] - video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id) - video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0] - title = xpath_text(video_info, 'headline') - description = xpath_text(video_info, 'description') - duration = parse_duration(xpath_text(video_info, 'length')) - timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts')) - - thumbnails = [] - for image in video_info.find('images'): - thumbnails.append({ - 'id': image.attrib.get('cut'), - 'url': image.text, - 'width': int_or_none(image.attrib.get('width')), - 'height': int_or_none(image.attrib.get('height')), + return self._extract_cvp_info( + 'http://www.nba.com/%s.xml' % path, video_id, { + 'default': { + 'media_src': 'http://nba.cdn.turner.com/nba/big', + }, + 'm3u8': { + 'media_src': 'http://nbavod-f.akamaihd.net', + }, }) - - formats = [] - for video_file in video_info.findall('.//file'): - video_url = video_file.text - if video_url.startswith('/'): - continue - if video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) - elif video_url.endswith('.f4m'): - formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)) - else: - key = video_file.attrib.get('bitrate') - format_info = { - 'format_id': key, - 'url': video_url, - } - mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key) - if mobj: - format_info.update({ - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - 'tbr': int_or_none(mobj.group(3)), - }) - formats.append(format_info) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'thumbnails': thumbnails, - 'formats': formats, - } diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py new file mode 100644 index 000000000..0d4271f11 --- /dev/null +++ b/youtube_dl/extractor/turner.py @@ -0,0 +1,163 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + int_or_none, + determine_ext, + parse_duration, + xpath_attr, + update_url_query, +) + + +class TurnerBaseIE(InfoExtractor): + def _extract_cvp_info(self, data_src, video_id, path_data={}): + video_data = self._download_xml(data_src, video_id) + video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] + title = xpath_text(video_data, 'headline', fatal=True) + # rtmp_src = xpath_text(video_data, 'akamai/src') + # if rtmp_src: + # splited_rtmp_src = rtmp_src.split(',') + # if len(splited_rtmp_src) == 2: + # rtmp_src = splited_rtmp_src[1] + # aifp = xpath_text(video_data, 'akamai/aifp', default='') + + tokens = {} + urls = [] + formats = [] + rex = re.compile(r'''(?x) + (?P<width>[0-9]+)x(?P<height>[0-9]+) + (?:_(?P<bitrate>[0-9]+))? + ''') + for video_file in video_data.findall('files/file'): + video_url = video_file.text.strip() + if not video_url: + continue + ext = determine_ext(video_url) + if video_url.startswith('/mp4:protected/'): + continue + # TODO Correct extraction for these files + # protected_path_data = path_data.get('protected') + # if not protected_path_data or not rtmp_src: + # continue + # protected_path = self._search_regex( + # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path') + # auth = self._download_webpage( + # protected_path_data['tokenizer_src'], query={ + # 'path': protected_path, + # 'videoId': video_id, + # 'aifp': aifp, + # }) + # token = xpath_text(auth, 'token') + # if not token: + # continue + # video_url = rtmp_src + video_url + '?' + token + elif video_url.startswith('/secure/'): + secure_path_data = path_data.get('secure') + if not secure_path_data: + continue + video_url = secure_path_data['media_src'] + video_url + secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' + token = tokens.get(secure_path) + if not token: + auth = self._download_xml( + secure_path_data['tokenizer_src'], video_id, query={ + 'path': secure_path, + 'videoId': video_id, + }) + token = xpath_text(auth, 'token') + if not token: + continue + tokens[secure_path] = token + video_url = video_url + '?hdnea=' + token + elif not re.match('https?://', video_url): + base_path_data = path_data.get(ext, path_data.get('default', {})) + media_src = base_path_data.get('media_src') + if not media_src: + continue + video_url = media_src + video_url + if video_url in urls: + continue + urls.append(video_url) + format_id = video_file.attrib['bitrate'] + if ext == 'smil': + formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + update_url_query(video_url, {'hdcore': '3.7.0'}), + video_id, f4m_id=format_id, fatal=False)) + else: + f = { + 'format_id': format_id, + 'url': video_url, + 'ext': ext, + } + mobj = rex.search(format_id + video_url) + if mobj: + f.update({ + 'width': int(mobj.group('width')), + 'height': int(mobj.group('height')), + 'tbr': int_or_none(mobj.group('bitrate')), + }) + elif format_id.isdigit(): + f['tbr'] = int(format_id) + else: + mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) + if mobj: + if mobj.group(1) == 'audio': + f.update({ + 'vcodec': 'none', + 'ext': 'm4a', + }) + else: + f['tbr'] = int(mobj.group(1)) + formats.append(f) + self._sort_formats(formats) + + subtitles = {} + for source in video_data.findall('closedCaptions/source'): + for track in source.findall('track'): + source_url = source.get('url') + if not source_url: + continue + subtitles.set_default(source.get('lang') or source.get('label') or 'en', []).append({ + 'url': source_url, + 'ext': { + 'scc': 'scc', + 'webvtt': 'vtt', + 'smptett': 'tt', + }.get(source.get('format')) + }) + + thumbnails = [{ + 'id': image.get('cut'), + 'url': image.text, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + } for image in video_data.findall('images/image')] + + timestamp = None + if 'cnn.com' not in data_src: + timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + 'description': xpath_text(video_data, 'description'), + 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), + 'timestamp': timestamp, + 'upload_date': xpath_attr(video_data, 'metas', 'version'), + 'series': xpath_text(video_data, 'showTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + } From ec65b391cbb0bc42a78515915e61602f4d1ae1f9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 16:47:59 +0100 Subject: [PATCH 309/775] [cartoonnetwork] Add new extractor(#10110) --- youtube_dl/extractor/cartoonnetwork.py | 36 ++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/cartoonnetwork.py diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py new file mode 100644 index 000000000..813f53644 --- /dev/null +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .turner import TurnerBaseIE + + +class CartoonNetworkIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' + _TEST = { + 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html', + 'info_dict': { + 'id': '8a250ab04ed07e6c014ef3f1e2f9016c', + 'ext': 'mp4', + 'title': 'Starfire the Cat Lady', + 'description': 'Robin decides to become a cat so that Starfire will finally love him.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() + query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id + return self._extract_cvp_info( + 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?' + query, video_id, { + 'secure': { + 'media_src': 'http://apple-secure.cdn.turner.com/toon/big', + 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', + }, + }) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d88d6cb4..6eb495b07 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -127,6 +127,7 @@ from .carambatv import ( CarambaTVIE, CarambaTVPageIE, ) +from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, CBCPlayerIE, From b3eaeded12f470afd6f0cb851e6b7dd2ee78b7c5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 16:50:32 +0100 Subject: [PATCH 310/775] [tbs] Add new extractor(#10222) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tbs.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/tbs.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6eb495b07..06c6746ff 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -824,6 +824,7 @@ from .tagesschau import ( TagesschauIE, ) from .tass import TassIE +from .tbs import TBSIE from .tdslifeway import TDSLifewayIE from .teachertube import ( TeacherTubeIE, diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py new file mode 100644 index 000000000..79b00e376 --- /dev/null +++ b/youtube_dl/extractor/tbs.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .turner import TurnerBaseIE +from ..utils import ( + extract_attributes, + ExtractorError, +) + + +class TBSIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' + _TESTS = [{ + 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', + 'md5': '9e61d680e2285066ade7199e6408b2ee', + 'info_dict': { + 'id': '2007318', + 'ext': 'mp4', + 'title': 'Theatrical Trailer', + 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', + } + }, { + 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', + 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', + 'info_dict': { + 'id': '1538823', + 'ext': 'mp4', + 'title': 'You Better Run', + 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', + } + }] + + def _real_extract(self, url): + domain, display_id = re.match(self._VALID_URL, url).groups() + site = domain[:3] + webpage = self._download_webpage(url, display_id) + video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) + if video_params.get('isAuthRequired') == 'true': + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported.', expected=True) + query = None + clip_id = video_params.get('clipid') + if clip_id: + query = 'id=' + clip_id + else: + query = 'titleId=' + video_params['titleid'] + return self._extract_cvp_info( + 'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, { + 'default': { + 'media_src': 'http://ht.cdn.turner.com/%s/big' % site, + }, + 'secure': { + 'media_src': 'http://apple-secure.cdn.turner.com/%s/big' % site, + 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, + }, + }) From 5bc8a73af69f4aac8b2df6f7c23ecfb4ee72e518 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 17:08:26 +0100 Subject: [PATCH 311/775] [cartoonnetwork] make extraction work for more videos in the website some videos require `networkName=CN2` to be present in the feed url --- youtube_dl/extractor/cartoonnetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 813f53644..b3f30b1ca 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -28,7 +28,7 @@ class CartoonNetworkIE(TurnerBaseIE): id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id return self._extract_cvp_info( - 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?' + query, video_id, { + 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'secure': { 'media_src': 'http://apple-secure.cdn.turner.com/toon/big', 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', From b8079a40bc61326b17a672b073dce6cdfa791fb5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 17:51:53 +0100 Subject: [PATCH 312/775] [turner] fix secure m3u8 formats downloading --- youtube_dl/downloader/hls.py | 11 +++++++---- youtube_dl/extractor/turner.py | 15 +++++++++++++-- youtube_dl/extractor/uplynk.py | 4 +--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8dd1b898e..baaff44d5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -83,7 +83,10 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -97,8 +100,8 @@ class HlsFD(FragmentFD): if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) - if extra_param_to_segment_url: - frag_url = update_url_query(frag_url, extra_param_to_segment_url) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) success = ctx['dl'].download(frag_filename, {'url': frag_url}) if not success: return False @@ -124,8 +127,8 @@ class HlsFD(FragmentFD): if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) - if extra_param_to_segment_url: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 0d4271f11..108caa9d8 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -11,6 +11,7 @@ from ..utils import ( parse_duration, xpath_attr, update_url_query, + compat_urlparse, ) @@ -87,8 +88,18 @@ class TurnerBaseIE(InfoExtractor): if ext == 'smil': formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + if m3u8_formats: + # Sometimes final URLs inside m3u8 are unsigned, let's fix this + # ourselves + qs = compat_urlparse.urlparse(video_url).query + if qs: + query = compat_urlparse.parse_qs(qs) + for m3u8_format in m3u8_formats: + m3u8_format['url'] = update_url_query(m3u8_format['url'], query) + m3u8_format['extra_param_to_segment_url'] = qs + formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index ae529f690..2cd22cf8a 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -33,9 +33,7 @@ class UplynkIE(InfoExtractor): formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') if session_id: for f in formats: - f['extra_param_to_segment_url'] = { - 'pbs': session_id, - } + f['extra_param_to_segment_url'] = 'pbs=' + session_id self._sort_formats(formats) asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) if asset.get('error') == 1: From 9ba1e1dcc0dc27d36f3f396cb608cef7cd50e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 08:26:07 +0700 Subject: [PATCH 313/775] [played] Remove extractor (Closes #10470) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/played.py | 60 ------------------------------ 2 files changed, 61 deletions(-) delete mode 100644 youtube_dl/extractor/played.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06c6746ff..20e85703f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -636,7 +636,6 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .pinkbike import PinkbikeIE from .pladform import PladformIE -from .played import PlayedIE from .playfm import PlayFMIE from .plays import PlaysTVIE from .playtvak import PlaytvakIE diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py deleted file mode 100644 index 57c875ef0..000000000 --- a/youtube_dl/extractor/played.py +++ /dev/null @@ -1,60 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import os.path - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - sanitized_Request, - urlencode_postdata, -) - - -class PlayedIE(InfoExtractor): - IE_NAME = 'played.to' - _VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)' - - _TEST = { - 'url': 'http://played.to/j2f2sfiiukgt', - 'md5': 'c2bd75a368e82980e7257bf500c00637', - 'info_dict': { - 'id': 'j2f2sfiiukgt', - 'ext': 'flv', - 'title': 'youtube-dl_test_video.mp4', - }, - 'skip': 'Removed for copyright infringement.', # oh wow - } - - def _real_extract(self, url): - video_id = self._match_id(url) - orig_webpage = self._download_webpage(url, video_id) - - m_error = re.search( - r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage) - if m_error: - raise ExtractorError(m_error.group('msg'), expected=True) - - data = self._hidden_inputs(orig_webpage) - - self._sleep(2, video_id) - - post = urlencode_postdata(data) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = sanitized_Request(url, post, headers) - webpage = self._download_webpage( - req, video_id, note='Downloading video page ...') - - title = os.path.splitext(data['fname'])[0] - - video_url = self._search_regex( - r'file: "?(.+?)",', webpage, 'video URL') - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - } From 93b84045994ca88b486901f54de1102347a67537 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 07:56:54 +0100 Subject: [PATCH 314/775] [generic,vodplatform] improve embed regex --- youtube_dl/extractor/generic.py | 4 ++-- youtube_dl/extractor/vodplatform.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c6e655c84..24b217715 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2243,11 +2243,11 @@ class GenericIE(InfoExtractor): # Look for VODPlatform embeds mobj = re.search( - r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)', + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1', webpage) if mobj is not None: return self.url_result( - self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform') + self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index b49542b16..7bdd8b1dc 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -6,7 +6,7 @@ from ..utils import unescapeHTML class VODPlatformIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)' _TEST = { # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', From 6c9b71bc0862560cbb9c4c2d9ec295072c208838 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 29 Aug 2016 19:05:38 +0800 Subject: [PATCH 315/775] [downloader/external] Recommend --hls-prefer-native for SOCKS users Related: #10490 --- youtube_dl/downloader/external.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 17f12e970..0aeae3b8f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -223,7 +223,8 @@ class FFmpegFD(ExternalFD): if proxy.startswith('socks'): self.report_warning( - '%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename()) + '%s does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) From 547993dcd09dd46fda2fd429ed0ed72db7263503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 21:52:41 +0700 Subject: [PATCH 316/775] [turner] Fix subtitles extraction --- youtube_dl/extractor/turner.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 108caa9d8..d69977b56 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -135,11 +135,12 @@ class TurnerBaseIE(InfoExtractor): subtitles = {} for source in video_data.findall('closedCaptions/source'): for track in source.findall('track'): - source_url = source.get('url') - if not source_url: + track_url = track.get('url') + if not track_url: continue - subtitles.set_default(source.get('lang') or source.get('label') or 'en', []).append({ - 'url': source_url, + lang = track.get('lang') or track.get('label') or 'en' + subtitles.setdefault(lang, []).append({ + 'url': track_url, 'ext': { 'scc': 'scc', 'webvtt': 'vtt', From cd10b3ea63fd167216234932aba4d63a34aec4c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:13:49 +0700 Subject: [PATCH 317/775] [turner] Extract all formats --- youtube_dl/extractor/turner.py | 46 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index d69977b56..6df22fd24 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( xpath_text, int_or_none, @@ -30,11 +31,11 @@ class TurnerBaseIE(InfoExtractor): tokens = {} urls = [] formats = [] - rex = re.compile(r'''(?x) - (?P<width>[0-9]+)x(?P<height>[0-9]+) - (?:_(?P<bitrate>[0-9]+))? - ''') - for video_file in video_data.findall('files/file'): + rex = re.compile( + r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?') + # Possible formats locations: files/file, files/groupFiles/files + # and maybe others + for video_file in video_data.findall('.//file'): video_url = video_file.text.strip() if not video_url: continue @@ -84,12 +85,14 @@ class TurnerBaseIE(InfoExtractor): if video_url in urls: continue urls.append(video_url) - format_id = video_file.attrib['bitrate'] + format_id = video_file.get('bitrate') if ext == 'smil': - formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) + formats.extend(self._extract_smil_formats( + video_url, video_id, fatal=False)) elif ext == 'm3u8': m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + video_url, video_id, 'mp4', m3u8_id=format_id or 'hls', + fatal=False) if m3u8_formats: # Sometimes final URLs inside m3u8 are unsigned, let's fix this # ourselves @@ -103,7 +106,7 @@ class TurnerBaseIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), - video_id, f4m_id=format_id, fatal=False)) + video_id, f4m_id=format_id or 'hds', fatal=False)) else: f = { 'format_id': format_id, @@ -117,18 +120,19 @@ class TurnerBaseIE(InfoExtractor): 'height': int(mobj.group('height')), 'tbr': int_or_none(mobj.group('bitrate')), }) - elif format_id.isdigit(): - f['tbr'] = int(format_id) - else: - mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) - if mobj: - if mobj.group(1) == 'audio': - f.update({ - 'vcodec': 'none', - 'ext': 'm4a', - }) - else: - f['tbr'] = int(mobj.group(1)) + elif isinstance(format_id, compat_str): + if format_id.isdigit(): + f['tbr'] = int(format_id) + else: + mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) + if mobj: + if mobj.group(1) == 'audio': + f.update({ + 'vcodec': 'none', + 'ext': 'm4a', + }) + else: + f['tbr'] = int(mobj.group(1)) formats.append(f) self._sort_formats(formats) From 7be15d40976bf40f44bc47301d4e839a1e171e52 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Fri, 29 Jul 2016 23:21:50 +0800 Subject: [PATCH 318/775] [bilibili] Support episodes [extractor/bilibili] add md5 for testing [extractor/bilibili] remove unnecessary headers [extractor/bilibili] correct _TESTS; find thumbnail for episode [extractor/bilibili] [Fix] restore removed tests --- youtube_dl/extractor/bilibili.py | 40 ++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index a332fbb69..35313c62b 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -12,9 +12,13 @@ from ..utils import ( unified_timestamp, ) +HEADERS = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', +} + class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' + _VALID_URL = r'https?://(www.|bangumi.|)bilibili\.(?:tv|com)/(video/av|anime/v/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -77,6 +81,17 @@ class BiliBiliIE(InfoExtractor): 'skip_download': True, }, 'expected_warnings': ['upload time'], + }, { + 'url': 'http://bangumi.bilibili.com/anime/v/40068', + 'md5': '08d539a0884f3deb7b698fb13ba69696', + 'info_dict': { + 'id': '40068', + 'ext': 'mp4', + 'duration': 1402.357, + 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', + 'description': "故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子”无幻,说以50个丸子帮她搞定这群人,风觉得他莫名其妙,也就没多搭理他。而在这时,风因为一个意外而将茶水泼在了恶霸头领——龙次郎身上。愤怒的恶霸们欲将风的手指砍掉,风在无奈中大喊道:“丸子100个!”……   另一方面,龙次郎的父亲也就是当地的代官,依仗自己有着雄厚的保镖实力,在当地欺压穷人,当看到一穷人无法交齐足够的钱过桥时,欲下令将其杀死,武士仁看不惯这一幕,于是走上前,与代官的保镖交手了……   酒馆内,因为风答应给无幻100个团子,无幻将恶霸们打败了,就在这时,仁进来了。好战的无幻立刻向仁发了战书,最后两败俱伤,被代官抓入牢房,预计第二天斩首……   得知该状况的风,为报救命之恩,来到了刑场,利用烟花救出了无幻和仁。而风则以救命恩人的身份,命令二人和她一起去寻找带着向日葵香味的武士……(by百科)", + 'thumbnail': 're:^http?://.+\.jpg', + }, }] _APP_KEY = '6f90a59ac58a4123' @@ -84,13 +99,20 @@ class BiliBiliIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - cid = compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] + _is_episode = 'anime/v' in url + if not _is_episode: + cid = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters'))['cid'][0] + else: + url_t = 'http://bangumi.bilibili.com/web_api/get_source' + js = self._download_json(url_t, video_id, + data='episode_id=%s' % video_id, + headers=HEADERS) + cid = js['result']['cid'] payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() @@ -125,6 +147,10 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) + if _is_episode: + thumbnail = self._html_search_meta('og:image', webpage) + else: + thumbnail = self._html_search_meta('thumbnailUrl', webpage) # TODO 'view_count' requires deobfuscating Javascript info = { @@ -132,7 +158,7 @@ class BiliBiliIE(InfoExtractor): 'title': title, 'description': description, 'timestamp': timestamp, - 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'thumbnail': thumbnail, 'duration': float_or_none(video_info.get('timelength'), scale=1000), } From 3fb2a23029934dcbf6fe2cd283d851506dcdff5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:40:35 +0700 Subject: [PATCH 319/775] [adultswim] Extract video info from onlineOriginals (Closes #10492) --- youtube_dl/extractor/adultswim.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index ef3cc2a61..5d0bf5a68 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) class AdultSwimIE(TurnerBaseIE): @@ -144,7 +147,10 @@ class AdultSwimIE(TurnerBaseIE): if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: video_info = bootstrapped_data['slugged_video'] if not video_info: - video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video') + video_info = bootstrapped_data.get( + 'heroMetadata', {}).get('trailer', {}).get('video') + if not video_info: + video_info = bootstrapped_data.get('onlineOriginals', [None])[0] if not video_info: raise ExtractorError('Unable to find video info') @@ -167,8 +173,9 @@ class AdultSwimIE(TurnerBaseIE): episode_id = video_info['id'] episode_title = video_info['title'] - episode_description = video_info['description'] - episode_duration = video_info.get('duration') + episode_description = video_info.get('description') + episode_duration = int_or_none(video_info.get('duration')) + view_count = int_or_none(video_info.get('views')) entries = [] for part_num, segment_id in enumerate(segment_ids): @@ -197,5 +204,6 @@ class AdultSwimIE(TurnerBaseIE): 'entries': entries, 'title': '%s - %s' % (show_title, episode_title), 'description': episode_description, - 'duration': episode_duration + 'duration': episode_duration, + 'view_count': view_count, } From 5a80e7b43a7abc83e104f1cd711d8fe7985c30eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:44:15 +0700 Subject: [PATCH 320/775] [turner] Skip invalid subtitles' URLs --- youtube_dl/extractor/turner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 6df22fd24..f5736bd15 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -140,7 +140,7 @@ class TurnerBaseIE(InfoExtractor): for source in video_data.findall('closedCaptions/source'): for track in source.findall('track'): track_url = track.get('url') - if not track_url: + if not isinstance(track_url, compat_str) or track_url.endswith('/big'): continue lang = track.get('lang') or track.get('label') or 'en' subtitles.setdefault(lang, []).append({ From a06e1498aa7fc02e6db5c6ec8411e90f210ce2c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:54:33 +0700 Subject: [PATCH 321/775] [kusi] Update test --- youtube_dl/extractor/kusi.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py index 12cc56e44..2e66e8cf9 100644 --- a/youtube_dl/extractor/kusi.py +++ b/youtube_dl/extractor/kusi.py @@ -18,31 +18,20 @@ from ..utils import ( class KUSIIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' _TESTS = [{ - 'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold', - 'md5': 'f926e7684294cf8cb7bdf8858e1b3988', + 'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right', + 'md5': '4e76ce8e53660ce9697d06c0ba6fc47d', 'info_dict': { - 'id': '12203019', + 'id': '12689020', 'ext': 'mp4', - 'title': 'Turko Files: Case Closed! & Put On Hold!', - 'duration': 231.0, - 'upload_date': '20160210', - 'timestamp': 1455087571, + 'title': "Turko Files: Refused to Help, It Ain't Right!", + 'duration': 223.586, + 'upload_date': '20160826', + 'timestamp': 1472233118, 'thumbnail': 're:^https?://.*\.jpg$' }, }, { 'url': 'http://kusi.com/video?clipId=12203019', - 'info_dict': { - 'id': '12203019', - 'ext': 'mp4', - 'title': 'Turko Files: Case Closed! & Put On Hold!', - 'duration': 231.0, - 'upload_date': '20160210', - 'timestamp': 1455087571, - 'thumbnail': 're:^https?://.*\.jpg$' - }, - 'params': { - 'skip_download': True, # Same as previous one - }, + 'only_matching': True, }] def _real_extract(self, url): From fe45b0e06081752ff3617cdfae701408a1d8256a Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 18:17:32 +0100 Subject: [PATCH 322/775] [9c9media] fix multiple stacks extraction and extract more metadata(#10016) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/ninecninemedia.py | 126 +++++++++++++++++++------ 2 files changed, 103 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 20e85703f..21efa96b2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -554,7 +554,10 @@ from .nick import ( NickDeIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( + NineCNineMediaStackIE, + NineCNineMediaIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py index d889245ad..ec4d675e2 100644 --- a/youtube_dl/extractor/ninecninemedia.py +++ b/youtube_dl/extractor/ninecninemedia.py @@ -4,40 +4,36 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( parse_iso8601, - parse_duration, - ExtractorError + float_or_none, + ExtractorError, + int_or_none, ) -class NineCNineMediaIE(InfoExtractor): - _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' +class NineCNineMediaBaseIE(InfoExtractor): + _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' + + +class NineCNineMediaStackIE(NineCNineMediaBaseIE): + IE_NAME = '9c9media:stack' + _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)' def _real_extract(self, url): - destination_code, video_id = re.match(self._VALID_URL, url).groups() - api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id) - content = self._download_json(api_base_url, video_id, query={ - '$include': '[contentpackages]', - }) - title = content['Name'] - if len(content['ContentPackages']) > 1: - raise ExtractorError('multiple content packages') - content_package = content['ContentPackages'][0] - stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id'] - stacks = self._download_json(stacks_base_url, video_id)['Items'] - if len(stacks) > 1: - raise ExtractorError('multiple stacks') - stack = stacks[0] - stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id']) + destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups() + stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.' + stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id) + formats = [] formats.extend(self._extract_m3u8_formats( - stack_base_url + 'm3u8', video_id, 'mp4', + stack_base_url + 'm3u8', stack_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) formats.extend(self._extract_f4m_formats( - stack_base_url + 'f4m', video_id, + stack_base_url + 'f4m', stack_id, f4m_id='hds', fatal=False)) - mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False) + mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False) if mp4_url: formats.append({ 'url': mp4_url, @@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor): self._sort_formats(formats) return { - 'id': video_id, - 'title': title, - 'description': content.get('Desc') or content.get('ShortDesc'), - 'timestamp': parse_iso8601(content.get('BroadcastDateTime')), - 'duration': parse_duration(content.get('BroadcastTime')), + 'id': stack_id, 'formats': formats, } + + +class NineCNineMediaIE(NineCNineMediaBaseIE): + IE_NAME = '9c9media' + _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' + + def _real_extract(self, url): + destination_code, content_id = re.match(self._VALID_URL, url).groups() + api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id) + content = self._download_json(api_base_url, content_id, query={ + '$include': '[Media,Season,ContentPackages]', + }) + title = content['Name'] + if len(content['ContentPackages']) > 1: + raise ExtractorError('multiple content packages') + content_package = content['ContentPackages'][0] + package_id = content_package['Id'] + content_package_url = api_base_url + 'contentpackages/%s/' % package_id + content_package = self._download_json(content_package_url, content_id) + + if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm': + raise ExtractorError('This video is DRM protected.', expected=True) + + stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items'] + multistacks = len(stacks) > 1 + + thumbnails = [] + for image in content.get('Images', []): + image_url = image.get('Url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + 'width': int_or_none(image.get('Width')), + 'height': int_or_none(image.get('Height')), + }) + + tags, categories = [], [] + for source_name, container in (('Tags', tags), ('Genres', categories)): + for e in content.get(source_name, []): + e_name = e.get('Name') + if not e_name: + continue + container.append(e_name) + + description = content.get('Desc') or content.get('ShortDesc') + season = content.get('Season', {}) + base_info = { + 'description': description, + 'timestamp': parse_iso8601(content.get('BroadcastDateTime')), + 'episode_number': int_or_none(content.get('Episode')), + 'season': season.get('Name'), + 'season_number': season.get('Number'), + 'season_id': season.get('Id'), + 'series': content.get('Media', {}).get('Name'), + 'tags': tags, + 'categories': categories, + } + + entries = [] + for stack in stacks: + stack_id = compat_str(stack['Id']) + entry = { + '_type': 'url_transparent', + 'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id), + 'id': stack_id, + 'title': '%s_part%s' % (title, stack['Name']) if multistacks else title, + 'duration': float_or_none(stack.get('Duration')), + 'ie_key': 'NineCNineMediaStack', + } + entry.update(base_info) + entries.append(entry) + + return { + '_type': 'multi_video', + 'id': content_id, + 'title': title, + 'description': description, + 'entries': entries, + } From 42e05be8671e149f79307145eda78892003279dc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 18:20:58 +0100 Subject: [PATCH 323/775] [ctv] add support for (tsn,bnn,thecomedynetwork).ca websites(#10016) --- youtube_dl/extractor/ctv.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py index 5807fbac9..a1fe86316 100644 --- a/youtube_dl/extractor/ctv.py +++ b/youtube_dl/extractor/ctv.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -18,13 +20,27 @@ class CTVIE(InfoExtractor): 'timestamp': 1442624700, }, 'expected_warnings': ['HTTP Error 404'], + }, { + 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', + 'only_matching': True, + }, { + 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', + 'only_matching': True, + }, { + 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', + 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + domain, video_id = re.match(self._VALID_URL, url).groups() + if domain == 'thecomedynetwork': + domain = 'comedy' return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:ctv_web:%s' % video_id, + 'url': '9c9media:%s_web:%s' % (domain, video_id), 'ie_key': 'NineCNineMedia', } From 1fe48afea5f203cbcb29c0d2984b7b850df8103f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 18:23:21 +0100 Subject: [PATCH 324/775] [cnn] update _TEST for CNNBlogsIE and CNNArticleIE(closes #10489) --- youtube_dl/extractor/cnn.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 1bf87f6ea..bb42f35bd 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -102,12 +102,13 @@ class CNNBlogsIE(InfoExtractor): 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', 'info_dict': { - 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', + 'id': 'criminalizing-journalism', 'ext': 'mp4', 'title': 'Criminalizing journalism?', 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', 'upload_date': '20140209', }, + 'expected_warnings': ['Failed to download m3u8 information'], 'add_ie': ['CNN'], } @@ -127,12 +128,13 @@ class CNNArticleIE(InfoExtractor): 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', 'info_dict': { - 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', + 'id': 'ip-north-korea-obama', 'ext': 'mp4', 'title': 'Obama: Cyberattack not an act of war', - 'description': 'md5:51ce6750450603795cad0cdfbd7d05c5', + 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', 'upload_date': '20141221', }, + 'expected_warnings': ['Failed to download m3u8 information'], 'add_ie': ['CNN'], } From da30a20a4d8b0ece61c271a5d0f0c6de2817ef5f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 19:26:53 +0100 Subject: [PATCH 325/775] [turner,cnn] move a check for wrong timestamp to CNNIE --- youtube_dl/extractor/cnn.py | 4 ++++ youtube_dl/extractor/turner.py | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index bb42f35bd..a51b239cc 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -83,6 +83,10 @@ class CNNIE(TurnerBaseIE): }, } + def _extract_timestamp(self, video_data): + # TODO: fix timestamp extraction + return None + def _real_extract(self, url): sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() if sub_domain not in ('money', 'edition'): diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index f5736bd15..64fdcc56e 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -17,6 +17,9 @@ from ..utils import ( class TurnerBaseIE(InfoExtractor): + def _extract_timestamp(self, video_data): + return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) + def _extract_cvp_info(self, data_src, video_id, path_data={}): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] @@ -159,10 +162,6 @@ class TurnerBaseIE(InfoExtractor): 'height': int_or_none(image.get('height')), } for image in video_data.findall('images/image')] - timestamp = None - if 'cnn.com' not in data_src: - timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) - return { 'id': video_id, 'title': title, @@ -171,7 +170,7 @@ class TurnerBaseIE(InfoExtractor): 'thumbnails': thumbnails, 'description': xpath_text(video_data, 'description'), 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), - 'timestamp': timestamp, + 'timestamp': self._extract_timestamp(video_data), 'upload_date': xpath_attr(video_data, 'metas', 'version'), 'series': xpath_text(video_data, 'showTitle'), 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), From 3c77a54d5dfa1097d5e3a5eaa0c631b5b01e93ce Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 30 Aug 2016 10:46:48 +0100 Subject: [PATCH 326/775] [turner] keep video id intact --- youtube_dl/extractor/cnn.py | 12 ++++++------ youtube_dl/extractor/nba.py | 11 +++++++---- youtube_dl/extractor/turner.py | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index a51b239cc..5fc311f53 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -15,7 +15,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'md5': '3e6121ea48df7e2259fe73a0628605c4', 'info_dict': { - 'id': 'nadal-1-on-1', + 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', 'ext': 'mp4', 'title': 'Nadal wins 8th French Open title', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', @@ -27,7 +27,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', 'info_dict': { - 'id': 'sot-student-gives-epic-speech', + 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", @@ -38,7 +38,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'md5': 'f14d02ebd264df951feb2400e2c25a1b', 'info_dict': { - 'id': 'growing-america-nashville-salemtown-board-episode-1', + 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', 'ext': 'mp4', 'title': 'Nashville Ep. 1: Hand crafted skateboards', 'description': 'md5:e7223a503315c9f150acac52e76de086', @@ -49,7 +49,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', 'info_dict': { - 'id': 'netflix-stunning-stats', + 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', 'ext': 'mp4', 'title': '5 stunning stats about Netflix', 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', @@ -106,7 +106,7 @@ class CNNBlogsIE(InfoExtractor): 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', 'info_dict': { - 'id': 'criminalizing-journalism', + 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', 'ext': 'mp4', 'title': 'Criminalizing journalism?', 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', @@ -132,7 +132,7 @@ class CNNArticleIE(InfoExtractor): 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', 'info_dict': { - 'id': 'ip-north-korea-obama', + 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', 'ext': 'mp4', 'title': 'Obama: Cyberattack not an act of war', 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index aabd5b670..53561961c 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -39,18 +39,19 @@ class NBAIE(TurnerBaseIE): 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4', 'info_dict': { - 'id': '0041400301-cle-atl-recap', + 'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'ext': 'mp4', 'title': 'Hawks vs. Cavaliers Game 1', 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', 'duration': 228, 'timestamp': 1432134543, 'upload_date': '20150520', - } + }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake', 'info_dict': { - 'id': '1455672027478-Doc_Feb16_720', + 'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324', 'ext': 'mp4', 'title': 'Practice: Doc Rivers - 2/16/16', 'description': 'Head Coach Doc Rivers addresses the media following practice.', @@ -61,6 +62,7 @@ class NBAIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', 'info_dict': { @@ -75,7 +77,7 @@ class NBAIE(TurnerBaseIE): }, { 'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', 'info_dict': { - 'id': 'Wigginsmp4-3462601', + 'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601', 'ext': 'mp4', 'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins', 'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.', @@ -87,6 +89,7 @@ class NBAIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }] _PAGE_SIZE = 30 diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 64fdcc56e..b59dafda6 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -22,7 +22,7 @@ class TurnerBaseIE(InfoExtractor): def _extract_cvp_info(self, data_src, video_id, path_data={}): video_data = self._download_xml(data_src, video_id) - video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] + video_id = video_data.attrib['id'] title = xpath_text(video_data, 'headline', fatal=True) # rtmp_src = xpath_text(video_data, 'akamai/src') # if rtmp_src: From 245023a86145f7074dacdab4c735dea268d766ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 30 Aug 2016 23:51:18 +0700 Subject: [PATCH 327/775] [pyvideo] Fix extraction (Closes #10468) --- youtube_dl/extractor/pyvideo.py | 94 +++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index cc0416cb8..08ec09183 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -1,59 +1,73 @@ from __future__ import unicode_literals import re -import os from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none class PyvideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' - _TESTS = [ - { - 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', - 'md5': '520915673e53a5c5d487c36e0c4d85b5', - 'info_dict': { - 'id': '24_4WWkSmNo', - 'ext': 'webm', - 'title': 'Become a logging expert in 30 minutes', - 'description': 'md5:9665350d466c67fb5b1598de379021f7', - 'upload_date': '20130320', - 'uploader': 'Next Day Video', - 'uploader_id': 'NextDayVideo', - }, - 'add_ie': ['Youtube'], + _TESTS = [{ + 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', + 'info_dict': { + 'id': 'become-a-logging-expert-in-30-minutes', }, - { - 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', - 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', - 'info_dict': { - 'id': '2542', - 'ext': 'm4v', - 'title': 'Gloriajw-SpotifyWithErikBernhardsson182', - }, + 'playlist_count': 2, + }, { + 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', }, - ] + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + category = mobj.group('category') video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + entries = [] - m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) - if m_youtube is not None: - return self.url_result(m_youtube.group(1), 'Youtube') + data = self._download_json( + 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' + % (category, video_id), video_id, fatal=False) - title = self._html_search_regex( - r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>', - webpage, 'title', flags=re.DOTALL) - video_url = self._search_regex( - [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], - webpage, 'video url', flags=re.DOTALL) + if data: + print(data) + for video in data['videos']: + video_url = video.get('url') + if video_url: + if video.get('type') == 'youtube': + entries.append(self.url_result(video_url, 'Youtube')) + else: + entries.append({ + 'id': compat_str(data.get('id') or video_id), + 'url': video_url, + 'title': data['title'], + 'description': data.get('description') or data.get('summary'), + 'thumbnail': data.get('thumbnail_url'), + 'duration': int_or_none(data.get('duration')), + }) + else: + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + media_urls = self._search_regex( + r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') + for m in re.finditer( + r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): + media_url = m.group('url') + if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): + entries.append(self.url_result(media_url, 'Youtube')) + else: + entries.append({ + 'id': video_id, + 'url': media_url, + 'title': title, + }) - return { - 'id': video_id, - 'title': os.path.splitext(title)[0], - 'url': video_url, - } + return self.playlist_result(entries, video_id) From 64fc49aba018ebd51627ddcc92f8fa88f2c499cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 00:29:49 +0700 Subject: [PATCH 328/775] [bandcamp:album] Fix title extraction (Closes #10455) --- youtube_dl/extractor/bandcamp.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 991ab0676..249c3d956 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor): 'uploader_id': 'dotscale', }, 'playlist_mincount': 7, + }, { + # with escaped quote in title + 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', + 'info_dict': { + 'title': '"Entropy" EP', + 'uploader_id': 'jstrecords', + 'id': 'entropy-ep', + }, + 'playlist_mincount': 3, }] def _real_extract(self, url): @@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor): entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] - title = self._search_regex( - r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) + title = self._html_search_regex( + r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', + webpage, 'title', fatal=False) + if title: + title = title.replace(r'\"', '"') return { '_type': 'playlist', 'uploader_id': uploader_id, From f7043ef39cb73f8501d18d2e1f93997357397ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 01:56:15 +0700 Subject: [PATCH 329/775] [soundcloud] Fix _VALID_URL clashes with sets (Closes #10505) --- youtube_dl/extractor/soundcloud.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index aeae931a2..9635c2b49 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?P<uploader>[\w\d-]+)/ - (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE): 'title': 'The Royal Concept EP', }, 'playlist_mincount': 6, + }, { + 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', + 'only_matching': True, }] def _real_extract(self, url): From a249ab83cb1d7765d787a7b1d050449736aaa789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 01:56:58 +0700 Subject: [PATCH 330/775] [pyvideo] Remove debugging code --- youtube_dl/extractor/pyvideo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 08ec09183..b8ac93a62 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -38,7 +38,6 @@ class PyvideoIE(InfoExtractor): % (category, video_id), video_id, fatal=False) if data: - print(data) for video in data['videos']: video_url = video.get('url') if video_url: From 263fef43dea463ab4b897c8374dbb11c705f061c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 02:37:40 +0700 Subject: [PATCH 331/775] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index e055976c5..7e24b8c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,21 @@ version <unreleased> Extractors +* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) +* [bandcamp:album] Fix title extraction (#10455) +* [pyvideo] Fix extraction (#10468) ++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016) +* [9c9media] Extract more metadata +* [9c9media] Fix multiple stacks extraction (#10016) +* [adultswim] Improve video info extraction (#10492) +* [vodplatform] Improve embed regular expression +- [played] Remove extractor (#10470) ++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222) ++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110) +* [adultswim] Rework in terms of turner extractor +* [cnn] Rework in terms of turner extractor +* [nba] Rework in terms of turner extractor ++ [turner] Add base extractor for Turner Broadcasting System based sites * [bilibili] Fix extraction (#10375) * [openload] Fix extraction (#10408) From 4fd350611c71571733950ad2473d4148f7bb6a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 02:39:39 +0700 Subject: [PATCH 332/775] release 2016.08.31 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 8 +++++--- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a2fe59f80..2caca5115 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.28 +[debug] youtube-dl version 2016.08.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e24b8c6b..0f8076d96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.31 Extractors * [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bf08697be..42bf291e2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -13,6 +13,8 @@ - **5min** - **8tracks** - **91porn** + - **9c9media** + - **9c9media:stack** - **9gag** - **9now.com.au** - **abc.net.au** @@ -89,7 +91,7 @@ - **Bet** - **Bigflix** - **Bild**: Bild.de - - **BiliBili** (Currently broken) + - **BiliBili** - **BioBioChileTV** - **BIQLE** - **BleacherReport** @@ -115,6 +117,7 @@ - **Canvas** - **CarambaTV** - **CarambaTVPage** + - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** - **cbc.ca:watch** @@ -459,7 +462,6 @@ - **nick.de** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - - **NineCNineMedia** - **Nintendo** - **njoy**: N-JOY - **njoy:embed** @@ -517,7 +519,6 @@ - **Pinkbike** - **Pladform** - **play.fm** - - **played.to** - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** @@ -675,6 +676,7 @@ - **Tagesschau** - **tagesschau:player** - **Tass** + - **TBS** - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ee30ca2ad..fe442dd88 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.28' +__version__ = '2016.08.31' From 165620e320ecb9213ee9928466a9209e7608f83c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 30 Aug 2016 21:48:59 +0100 Subject: [PATCH 333/775] [yahoo] extract more and better formats --- youtube_dl/extractor/yahoo.py | 81 +++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b0679dfb7..d7a81ab8c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -8,7 +8,6 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_urllib_parse, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -17,6 +16,7 @@ from ..utils import ( ExtractorError, int_or_none, mimetype2ext, + determine_ext, ) from .brightcove import BrightcoveNewIE @@ -39,7 +39,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', - 'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23', + 'md5': '251af144a19ebc4a033e8ba91ac726bb', 'info_dict': { 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', 'ext': 'mp4', @@ -50,7 +50,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', - 'md5': '75ffabdb87c16d4ffe8c036dc4d1c136', + 'md5': '7993e572fac98e044588d0b5260f4352', 'info_dict': { 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', 'ext': 'mp4', @@ -61,7 +61,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html', - 'md5': '9035d38f88b1782682a3e89f985be5bb', + 'md5': '45c024bad51e63e9b6f6fad7a43a8c23', 'info_dict': { 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f', 'ext': 'mp4', @@ -72,10 +72,10 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', - 'md5': '0b51660361f0e27c9789e7037ef76f4b', + 'md5': '71298482f7c64cbb7fa064e4553ff1c1', 'info_dict': { 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', 'description': 'md5:f66c890e1490f4910a9953c941dee944', 'duration': 97, @@ -98,7 +98,7 @@ class YahooIE(InfoExtractor): 'id': '154609075', }, 'playlist': [{ - 'md5': 'f8e336c6b66f503282e5f719641d6565', + 'md5': '000887d0dc609bc3a47c974151a40fb8', 'info_dict': { 'id': 'e624c4bc-3389-34de-9dfc-025f74943409', 'ext': 'mp4', @@ -107,7 +107,7 @@ class YahooIE(InfoExtractor): 'duration': 30, }, }, { - 'md5': '958bcb90b4d6df71c56312137ee1cd5a', + 'md5': '81bc74faf10750fe36e4542f9a184c66', 'info_dict': { 'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9', 'ext': 'mp4', @@ -139,7 +139,7 @@ class YahooIE(InfoExtractor): 'skip': 'Domain name in.lifestyle.yahoo.com gone', }, { 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': 'b17ac378b1134fa44370fb27db09a744', + 'md5': '2a9752f74cb898af5d1083ea9f661b58', 'info_dict': { 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', 'ext': 'mp4', @@ -168,7 +168,7 @@ class YahooIE(InfoExtractor): }, { # Query result is embedded in webpage, but explicit request to video API fails with geo restriction 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', - 'md5': '1ddbf7c850777548438e5c4f147c7b8c', + 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', 'info_dict': { 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', 'ext': 'mp4', @@ -196,6 +196,7 @@ class YahooIE(InfoExtractor): 'description': 'Galactic', 'title': 'Dolla Diva (feat. Maggie Koerner)', }, + 'skip': 'redirect to https://www.yahoo.com/music', }, ] @@ -213,15 +214,7 @@ class YahooIE(InfoExtractor): entries = [] iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage) for idx, iframe_url in enumerate(iframe_urls): - iframepage = self._download_webpage( - host + iframe_url, display_id, - note='Downloading iframe webpage for video #%d' % idx) - items_json = self._search_regex( - r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None) - if items_json: - items = json.loads(items_json) - video_id = items[0]['id'] - entries.append(self._get_info(video_id, display_id, webpage)) + entries.append(self.url_result(host + iframe_url, 'Yahoo')) if entries: return self.playlist_result(entries, page_id) @@ -246,7 +239,9 @@ class YahooIE(InfoExtractor): if config: sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') if sapi and 'query' in sapi: - return self._extract_info(display_id, sapi, webpage) + info = self._extract_info(display_id, sapi, webpage) + self._sort_formats(info['formats']) + return info items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, @@ -292,15 +287,17 @@ class YahooIE(InfoExtractor): formats = [] for s in info['streams']: + tbr = int_or_none(s.get('bitrate')) format_info = { 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), - 'tbr': int_or_none(s.get('bitrate')), + 'tbr': tbr, } host = s['host'] path = s['path'] if host.startswith('rtmp'): + fmt = 'rtmp' format_info.update({ 'url': host, 'play_path': path, @@ -308,14 +305,18 @@ class YahooIE(InfoExtractor): }) else: if s.get('format') == 'm3u8_playlist': - format_info['protocol'] = 'm3u8_native' - format_info['ext'] = 'mp4' + fmt = 'hls' + format_info.update({ + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + else: + fmt = format_info['ext'] = determine_ext(path) format_url = compat_urlparse.urljoin(host, path) format_info['url'] = format_url + format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '') formats.append(format_info) - self._sort_formats(formats) - closed_captions = self._html_search_regex( r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', default='[]') @@ -346,17 +347,25 @@ class YahooIE(InfoExtractor): def _get_info(self, video_id, display_id, webpage): region = self._search_regex( r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', - webpage, 'region', fatal=False, default='US') - data = compat_urllib_parse_urlencode({ - 'protocol': 'http', - 'region': region.upper(), - }) - query_url = ( - 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' - '{id}?{data}'.format(id=video_id, data=data)) - query_result = self._download_json( - query_url, display_id, 'Downloading video info') - return self._extract_info(display_id, query_result, webpage) + webpage, 'region', fatal=False, default='US').upper() + formats = [] + info = {} + for fmt in ('webm', 'mp4'): + query_result = self._download_json( + 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id, + display_id, 'Downloading %s video info' % fmt, query={ + 'protocol': 'http', + 'region': region, + 'format': fmt, + }) + info = self._extract_info(display_id, query_result, webpage) + formats.extend(info['formats']) + formats.extend(self._extract_m3u8_formats( + 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + info['formats'] = formats + return info class YahooSearchIE(SearchInfoExtractor): From 196c6ba06792ec38238631d9173fc146822baa7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 22:12:37 +0700 Subject: [PATCH 334/775] [facebook] Extract timestamp (Closes #10508) --- youtube_dl/extractor/facebook.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 0fb781a73..228b0b6d7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( error_to_compat_str, ExtractorError, + int_or_none, limit_length, sanitized_Request, urlencode_postdata, @@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', 'uploader': 'Tennis on Facebook', + 'upload_date': '20140908', + 'timestamp': 1410199200, } }, { 'note': 'Video without discernible title', @@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Facebook video #274175099429670', 'uploader': 'Asif Nawab Butt', + 'upload_date': '20140506', + 'timestamp': 1399398998, }, 'expected_warnings': [ 'title' @@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor): }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', - 'md5': '54706e4db4f5ad58fbad82dde1f1213f', + 'md5': 'b2c28d528273b323abe5c6ab59f0f030', 'info_dict': { 'id': '957955867617029', 'ext': 'mp4', 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', 'uploader': 'Demy de Zeeuw', + 'upload_date': '20160110', + 'timestamp': 1452431627, }, }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', @@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor): if not video_title: video_title = 'Facebook video #%s' % video_id uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) + timestamp = int_or_none(self._search_regex( + r'<abbr[^>]+data-utime=["\'](\d+)', webpage, + 'timestamp', default=None)) info_dict = { 'id': video_id, 'title': video_title, 'formats': formats, 'uploader': uploader, + 'timestamp': timestamp, } return webpage, info_dict From 7a3e849f6eaf51b1d86b843a63664012ced2258c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 22:23:55 +0700 Subject: [PATCH 335/775] [porncom] Extract categories and tags (Closes #10510) --- youtube_dl/extractor/porncom.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py index 4baf79688..d85e0294d 100644 --- a/youtube_dl/extractor/porncom.py +++ b/youtube_dl/extractor/porncom.py @@ -26,6 +26,8 @@ class PornComIE(InfoExtractor): 'duration': 551, 'view_count': int, 'age_limit': 18, + 'categories': list, + 'tags': list, }, }, { 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', @@ -75,7 +77,14 @@ class PornComIE(InfoExtractor): self._sort_formats(formats) view_count = str_to_int(self._search_regex( - r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count')) + r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, + 'view count', fatal=False)) + + def extract_list(kind): + s = self._search_regex( + r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(), + webpage, kind, fatal=False) + return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') return { 'id': video_id, @@ -86,4 +95,6 @@ class PornComIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, + 'categories': extract_list('categories'), + 'tags': extract_list('tags'), } From f8fd510eb4b2733a5c083d767d45baa88b289298 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 31 Aug 2016 18:31:49 +0100 Subject: [PATCH 336/775] [limelight] skip ism manifests and reduce requests --- youtube_dl/extractor/limelight.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a425bafe3..6752ffee2 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor): def _extract_info(self, streams, mobile_urls, properties): video_id = properties['media_id'] formats = [] - + urls = [] for stream in streams: stream_url = stream.get('url') - if not stream_url or stream.get('drmProtected'): + if not stream_url or stream.get('drmProtected') or stream_url in urls: continue + urls.append(stream_url) ext = determine_ext(stream_url) if ext == 'f4m': formats.extend(self._extract_f4m_formats( @@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) + http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) + urls.append(http_url) http_fmt = fmt.copy() http_fmt.update({ - 'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]), + 'url': http_url, 'format_id': format_id.replace('rtmp', 'http'), }) formats.append(http_fmt) @@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor): for mobile_url in mobile_urls: media_url = mobile_url.get('mobileUrl') format_id = mobile_url.get('targetMediaPlatform') - if not media_url or format_id == 'Widevine': + if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls: continue + urls.append(media_url) ext = determine_ext(media_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( From 2896dd73bc2c9844175258086c0300395722e5c9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 1 Sep 2016 08:00:13 +0100 Subject: [PATCH 337/775] [cbs] extract once formats(closes #10515) --- youtube_dl/extractor/cbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index c72ed2dbb..3f4dea40c 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE): path = 'dJ5BDC/media/guid/2198311517/' + guid smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) - for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): + for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): try: tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) formats.extend(tp_formats) From 165c54e97d10705614934d5b1d86d90c06951b7c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 16:28:03 +0800 Subject: [PATCH 338/775] =?UTF-8?q?[southpark.cc.com:espa=C3=B1ol]=20Skip?= =?UTF-8?q?=20geo-restricted=20=5FTESTS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaks https://travis-ci.org/rg3/youtube-dl/jobs/156728175 --- youtube_dl/extractor/southpark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index a147f7db1..e2a9e45ac 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE): 'description': 'Cartman Consigue Una Sonda Anal', }, 'playlist_count': 4, + 'skip': 'Geo-restricted', }] From 746a695b362cb602625ed7357294bb18de133883 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 16:42:35 +0800 Subject: [PATCH 339/775] [myvidster] Update _TESTS (closes #10473) --- youtube_dl/extractor/myvidster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py index 731c24542..2117d302d 100644 --- a/youtube_dl/extractor/myvidster.py +++ b/youtube_dl/extractor/myvidster.py @@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor): 'id': '3685814', 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', 'upload_date': '20141027', - 'uploader_id': 'utkualp', + 'uploader': 'utkualp', 'ext': 'mp4', 'age_limit': 18, }, From 05d4612947d6dbfaedb8f2a00daa5f29d85f73df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 16:58:16 +0800 Subject: [PATCH 340/775] [movingimage] Adapt to the new domain name and fix extraction Closes #10466 --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 2 +- .../extractor/{ssa.py => movingimage.py} | 26 +++++++------------ 3 files changed, 17 insertions(+), 17 deletions(-) rename youtube_dl/extractor/{ssa.py => movingimage.py} (65%) diff --git a/ChangeLog b/ChangeLog index 0f8076d96..877e8112e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [movingimage] Fix for the new site name (#10466) + + version 2016.08.31 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 21efa96b2..8d0688f53 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -486,6 +486,7 @@ from .motherless import MotherlessIE from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviezine import MoviezineIE +from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, @@ -806,7 +807,6 @@ from .srgssr import ( SRGSSRPlayIE, ) from .srmediathek import SRMediathekIE -from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamable import StreamableIE diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/movingimage.py similarity index 65% rename from youtube_dl/extractor/ssa.py rename to youtube_dl/extractor/movingimage.py index 54d1843f2..bb789c32e 100644 --- a/youtube_dl/extractor/ssa.py +++ b/youtube_dl/extractor/movingimage.py @@ -7,22 +7,19 @@ from ..utils import ( ) -class SSAIE(InfoExtractor): - _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)' +class MovingImageIE(InfoExtractor): + _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)' _TEST = { - 'url': 'http://ssa.nls.uk/film/3561', + 'url': 'http://movingimage.nls.uk/film/3561', + 'md5': '4caa05c2b38453e6f862197571a7be2f', 'info_dict': { 'id': '3561', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'SHETLAND WOOL', 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', 'duration': 900, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - }, } def _real_extract(self, url): @@ -30,10 +27,9 @@ class SSAIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - streamer = self._search_regex( - r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer') - play_path = self._search_regex( - r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0] + formats = self._extract_m3u8_formats( + self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'), + video_id, ext='mp4', entry_protocol='m3u8_native') def search_field(field_name, fatal=False): return self._search_regex( @@ -44,13 +40,11 @@ class SSAIE(InfoExtractor): description = unescapeHTML(search_field('Description')) duration = parse_duration(search_field('Running time')) thumbnail = self._search_regex( - r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False) + r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) return { 'id': video_id, - 'url': streamer, - 'play_path': play_path, - 'ext': 'flv', + 'formats': formats, 'title': title, 'description': description, 'duration': duration, From 4c8ab6fd715249290feab89bbc86eb803b459993 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 17:04:41 +0800 Subject: [PATCH 341/775] [thvideo] Remove extractor. Website down. Closes #10464 According to a screenshot in http://tieba.baidu.com/p/4691302183, thvideo.tv is shut down "temporarily". I see no clues that it will be up again, so I remove it here. --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 4 -- youtube_dl/extractor/thvideo.py | 84 ------------------------------ 3 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 youtube_dl/extractor/thvideo.py diff --git a/ChangeLog b/ChangeLog index 877e8112e..2e75c003d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +- [thvideo] Remove extractor (#10464) * [movingimage] Fix for the new site name (#10466) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d0688f53..459d776b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -869,10 +869,6 @@ from .tnaflix import ( MovieFapIE, ) from .toggle import ToggleIE -from .thvideo import ( - THVideoIE, - THVideoPlaylistIE -) from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py deleted file mode 100644 index 406f4a826..000000000 --- a/youtube_dl/extractor/thvideo.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - unified_strdate -) - - -class THVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://thvideo.tv/v/th1987/', - 'md5': 'fa107b1f73817e325e9433505a70db50', - 'info_dict': { - 'id': '1987', - 'ext': 'mp4', - 'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', - 'display_id': 'th1987', - 'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', - 'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', - 'upload_date': '20140722' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - # extract download link from mobile player page - webpage_player = self._download_webpage( - 'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), - video_id, note='Downloading video source page') - video_url = self._html_search_regex( - r'<source src="(.*?)" type', webpage_player, 'video url') - - # extract video info from main page - webpage = self._download_webpage( - 'http://thvideo.tv/v/th%s' % (video_id), video_id) - title = self._og_search_title(webpage) - display_id = 'th%s' % video_id - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description(webpage) - upload_date = unified_strdate(self._html_search_regex( - r'span itemprop="datePublished" content="(.*?)">', webpage, - 'upload date', fatal=False)) - - return { - 'id': video_id, - 'ext': 'mp4', - 'url': video_url, - 'title': title, - 'display_id': display_id, - 'thumbnail': thumbnail, - 'description': description, - 'upload_date': upload_date - } - - -class THVideoPlaylistIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://thvideo.tv/mylist2', - 'info_dict': { - 'id': '2', - 'title': '幻想万華鏡', - }, - 'playlist_mincount': 23, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - list_title = self._html_search_regex( - r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title', - fatal=False) - - entries = [ - self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') - for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] - - return self.playlist_result(entries, playlist_id, list_title) From f096ec262544babf6ea23347160c1c550e4e157e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 1 Sep 2016 13:34:12 +0100 Subject: [PATCH 342/775] [curiositystream] Add new extractor --- youtube_dl/extractor/curiositystream.py | 128 ++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 + 2 files changed, 132 insertions(+) create mode 100644 youtube_dl/extractor/curiositystream.py diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py new file mode 100644 index 000000000..7105e3c4c --- /dev/null +++ b/youtube_dl/extractor/curiositystream.py @@ -0,0 +1,128 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + urlencode_postdata, + compat_str, + ExtractorError, +) + + +class CuriosityStreamBaseIE(InfoExtractor): + _NETRC_MACHINE = 'curiositystream' + _auth_token = None + _API_BASE_URL = 'https://api.curiositystream.com/v1/' + + def _handle_errors(self, result): + error = result.get('error', {}).get('message') + if error: + if isinstance(error, dict): + error = ', '.join(error.values()) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + + def _call_api(self, path, video_id): + headers = {} + if self._auth_token: + headers['X-Auth-Token'] = self._auth_token + result = self._download_json( + self._API_BASE_URL + path, video_id, headers=headers) + self._handle_errors(result) + return result['data'] + + def _real_initialize(self): + if not self._auth_token: + user = self._downloader.cache.load('curiositystream', 'user') or {} + self._auth_token = user.get('auth_token') + if not self._auth_token: + (email, password) = self._get_login_info() + if email is None: + return + result = self._download_json( + self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'email': email, + 'password': password, + })) + self._handle_errors(result) + self._auth_token = result['message']['auth_token'] + self._downloader.cache.store( + 'curiositystream', 'user', { + 'auth_token': self._auth_token, + }) + + def _extract_media_info(self, media): + video_id = compat_str(media['id']) + limelight_media_id = media['limelight_media_id'] + title = media['title'] + + subtitles = {} + for closed_caption in media.get('closed_captions', []): + sub_url = closed_caption.get('file') + if not sub_url: + continue + lang = closed_caption.get('code') or closed_caption.get('language') or 'en' + subtitles.setdefault(lang, []).append({ + 'url': sub_url, + }) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:' + limelight_media_id, + 'title': title, + 'description': media.get('description'), + 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), + 'duration': int_or_none(media.get('duration')), + 'tags': media.get('tags'), + 'subtitles': subtitles, + 'ie_key': 'LimelightMedia', + } + + +class CuriosityStreamIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream' + _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/video/2', + 'md5': 'a0074c190e6cddaf86900b28d3e9ee7a', + 'info_dict': { + 'id': '2', + 'ext': 'mp4', + 'title': 'How Did You Develop The Internet?', + 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + 'timestamp': 1448388615, + 'upload_date': '20151124', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('media/' + video_id, video_id) + return self._extract_media_info(media) + + +class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream:collection' + _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/collection/2', + 'info_dict': { + 'id': '2', + 'title': 'Curious Minds: The Internet', + 'description': 'How is the internet shaping our lives in the 21st Century?', + }, + 'playlist_mincount': 17, + } + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._call_api( + 'collections/' + collection_id, collection_id) + entries = [] + for media in collection.get('media', []): + entries.append(self._extract_media_info(media)) + return self.playlist_result( + entries, collection_id, + collection.get('title'), collection.get('description')) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 459d776b3..0c2436b67 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,6 +194,10 @@ from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( + CuriosityStreamIE, + CuriosityStreamCollectionIE, +) from .cwtv import CWTVIE from .dailymail import DailyMailIE from .dailymotion import ( From 9250181f37cf0289c02d18ab91203c6181f9cc71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 21:37:25 +0700 Subject: [PATCH 343/775] [extractor/common] Restore NAME usage from EXT-X-MEDIA tag for formats codes in _extract_m3u8_formats (Closes #10522) --- youtube_dl/extractor/common.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da0af29ec..36d43fd50 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1202,6 +1202,7 @@ class InfoExtractor(object): 'preference': preference, }] last_info = None + last_media = None for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) @@ -1224,6 +1225,10 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }) + else: + # When there is no URI in EXT-X-MEDIA let this tag's + # data be used by regular URI lines below + last_media = media elif line.startswith('#') or not line.strip(): continue else: @@ -1234,13 +1239,14 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) + last_media_name = last_media.get('NAME') if last_media else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: - # Despite specification does not mention NAME attribute for - # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), From e816c9d158629ef054c1cc77eecf83043d06fe8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 22:18:16 +0700 Subject: [PATCH 344/775] [extractor/common] Simplify _extract_m3u8_formats --- youtube_dl/extractor/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 36d43fd50..a9c7a8d16 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1201,8 +1201,8 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }] - last_info = None - last_media = None + last_info = {} + last_media = {} for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) @@ -1232,17 +1232,13 @@ class InfoExtractor(object): elif line.startswith('#') or not line.strip(): continue else: - if last_info is None: - formats.append({'url': format_url(line)}) - continue tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media else None # Despite specification does not mention NAME attribute for # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') or last_media_name + stream_name = last_info.get('NAME') or last_media.get('NAME') # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. @@ -1275,6 +1271,7 @@ class InfoExtractor(object): f.update(parse_codecs(last_info.get('CODECS'))) formats.append(f) last_info = {} + last_media = {} return formats @staticmethod From f6af0f888b03e8c072b86c04492cc84c966c9f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 23:15:01 +0700 Subject: [PATCH 345/775] [youporn] Fix categories and tags extraction (Closes #10521) --- youtube_dl/extractor/youporn.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 0df2d76ee..0265a64a7 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor): 'age_limit': 18, }, }, { - # Anonymous User uploader + # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'info_dict': { 'id': '561726', @@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor): 'title': 'Big Tits Awesome Brunette On amazing webcam show', 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4', 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'Anonymous User', + 'uploader': 'Unknown', 'upload_date': '20111125', 'average_rating': int, 'view_count': int, @@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor): r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', fatal=False)) - def extract_tag_box(title): - tag_box = self._search_regex( - (r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*' - '<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title), - webpage, '%s tag box' % title, default=None) + def extract_tag_box(regex, title): + tag_box = self._search_regex(regex, webpage, title, default=None) if not tag_box: return [] return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box) - categories = extract_tag_box('Category') - tags = extract_tag_box('Tags') + categories = extract_tag_box( + r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories') + tags = extract_tag_box( + r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', + 'tags') return { 'id': video_id, From 8fb6af6bba201c9f750aadb7b092704195c7f8e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 23:32:28 +0700 Subject: [PATCH 346/775] [exfm] Remove extractor (Closes #10482) --- youtube_dl/extractor/exfm.py | 58 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 59 deletions(-) delete mode 100644 youtube_dl/extractor/exfm.py diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py deleted file mode 100644 index 09ed4f2b5..000000000 --- a/youtube_dl/extractor/exfm.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class ExfmIE(InfoExtractor): - IE_NAME = 'exfm' - IE_DESC = 'ex.fm' - _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' - _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' - _TESTS = [ - { - 'url': 'http://ex.fm/song/eh359', - 'md5': 'e45513df5631e6d760970b14cc0c11e7', - 'info_dict': { - 'id': '44216187', - 'ext': 'mp3', - 'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive', - 'uploader': 'deadjournalist', - 'upload_date': '20120424', - 'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', - }, - 'note': 'Soundcloud song', - 'skip': 'The site is down too often', - }, - { - 'url': 'http://ex.fm/song/wddt8', - 'md5': '966bd70741ac5b8570d8e45bfaed3643', - 'info_dict': { - 'id': 'wddt8', - 'ext': 'mp3', - 'title': 'Safe and Sound', - 'uploader': 'Capital Cities', - }, - 'skip': 'The site is down too often', - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') - info_url = 'http://ex.fm/api/v3/song/%s' % song_id - info = self._download_json(info_url, song_id)['song'] - song_url = info['url'] - if re.match(self._SOUNDCLOUD_URL, song_url) is not None: - self.to_screen('Soundcloud song detected') - return self.url_result(song_url.replace('/stream', ''), 'Soundcloud') - return { - 'id': song_id, - 'url': song_url, - 'ext': 'mp3', - 'title': info['title'], - 'thumbnail': info['image']['large'], - 'uploader': info['artist'], - 'view_count': info['loved_count'], - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0c2436b67..7b59d5db2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -261,7 +261,6 @@ from .espn import ESPNIE from .esri import EsriVideoIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE -from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE From af95ee94b4554449db175ae44060a66c89bd96ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 23:38:49 +0700 Subject: [PATCH 347/775] [glide] Fix extraction (Closes #10478) --- youtube_dl/extractor/glide.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 62ff84835..50f698803 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -14,10 +14,8 @@ class GlideIE(InfoExtractor): 'info_dict': { 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', 'ext': 'mp4', - 'title': 'Damon Timm\'s Glide message', + 'title': "Damon's Glide message", 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', - 'uploader': 'Damon Timm', - 'upload_date': '20140919', } } @@ -27,7 +25,8 @@ class GlideIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r'<title>(.+?)', webpage, 'title') + r'(.+?)', webpage, + 'title', default=None) or self._og_search_title(webpage) video_url = self._proto_relative_url(self._search_regex( r']+src=(["\'])(?P.+?)\1', webpage, 'video URL', default=None, @@ -36,18 +35,10 @@ class GlideIE(InfoExtractor): r']+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P.+?)\1', webpage, 'thumbnail url', default=None, group='url')) or self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r']+class=["\']info-name["\'][^>]*>([^<]+)', - webpage, 'uploader', fatal=False) - upload_date = unified_strdate(self._search_regex( - r']+class="info-date"[^>]*>([^<]+)', - webpage, 'upload date', fatal=False)) return { 'id': video_id, 'title': title, 'url': video_url, 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, } From 8276d3b87a54f43ca2f47b7709a6557ea979327c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:46:15 +0700 Subject: [PATCH 348/775] [thestar] Fix extraction (Closes #10465) --- youtube_dl/extractor/thestar.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py index ba1380abc..c3f118894 100644 --- a/youtube_dl/extractor/thestar.py +++ b/youtube_dl/extractor/thestar.py @@ -2,8 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .brightcove import BrightcoveLegacyIE -from ..compat import compat_parse_qs class TheStarIE(InfoExtractor): @@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0] - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) + brightcove_id = self._search_regex( + r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)', + webpage, 'brightcove id') + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'BrightcoveNew', brightcove_id) From f97ec8bcb95b45d9a657392cd24eabfadb4053e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:46:58 +0700 Subject: [PATCH 349/775] [glide] Remove unused import --- youtube_dl/extractor/glide.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 50f698803..f0d951396 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate class GlideIE(InfoExtractor): From 4191779dcda8a80faf6e53579e011b63ee5c3878 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 19:07:41 +0100 Subject: [PATCH 350/775] [nytimes] improve extraction --- youtube_dl/extractor/nytimes.py | 93 +++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 681683e86..142c34256 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -1,26 +1,37 @@ from __future__ import unicode_literals +import hmac +import hashlib +import base64 + from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, parse_iso8601, + mimetype2ext, + determine_ext, ) class NYTimesBaseIE(InfoExtractor): + _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v' + def _extract_video_from_id(self, video_id): - video_data = self._download_json( - 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, - video_id, 'Downloading video JSON') + # Authorization generation algorithm is reverse engineered from `signer` in + # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js + path = '/svc/video/api/v3/video/' + video_id + hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest() + video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={ + 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(), + 'X-NYTV': 'vhs', + }, fatal=False) + if not video_data: + video_data = self._download_json( + 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id, + video_id, 'Downloading video JSON') title = video_data['headline'] - description = video_data.get('summary') - duration = float_or_none(video_data.get('duration'), 1000) - - uploader = video_data.get('byline') - publication_date = video_data.get('publication_date') - timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None def get_file_size(file_size): if isinstance(file_size, int): @@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor): elif isinstance(file_size, dict): return int(file_size.get('value', 0)) else: - return 0 + return None - formats = [ - { - 'url': video['url'], - 'format_id': video.get('type'), - 'vcodec': video.get('video_codec'), - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'filesize': get_file_size(video.get('fileSize')), - } for video in video_data['renditions'] if video.get('url') - ] + urls = [] + formats = [] + for video in video_data.get('renditions', []): + video_url = video.get('url') + format_id = video.get('type') + if not video_url or format_id == 'thumbs' or video_url in urls: + continue + urls.append(video_url) + ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id or 'hls', fatal=False)) + elif ext == 'mpd': + continue + # formats.extend(self._extract_mpd_formats( + # video_url, video_id, format_id or 'dash', fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'vcodec': video.get('videoencoding') or video.get('video_codec'), + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')), + 'tbr': int_or_none(video.get('bitrate'), 1000), + 'ext': ext, + }) self._sort_formats(formats) - thumbnails = [ - { - 'url': 'http://www.nytimes.com/%s' % image['url'], + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': 'http://www.nytimes.com/' + image_url, 'width': int_or_none(image.get('width')), 'height': int_or_none(image.get('height')), - } for image in video_data.get('images', []) if image.get('url') - ] + }) + + publication_date = video_data.get('publication_date') + timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None return { 'id': video_id, 'title': title, - 'description': description, + 'description': video_data.get('summary'), 'timestamp': timestamp, - 'uploader': uploader, - 'duration': duration, + 'uploader': video_data.get('byline'), + 'duration': float_or_none(video_data.get('duration'), 1000), 'formats': formats, 'thumbnails': thumbnails, } @@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE): _TESTS = [{ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', - 'md5': '18a525a510f942ada2720db5f31644c0', + 'md5': 'd665342765db043f7e225cff19df0f2d', 'info_dict': { 'id': '100000002847155', 'ext': 'mov', From b207d5ebd4eab80e07673aba9696d240d1009bcf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 19:46:58 +0100 Subject: [PATCH 351/775] [curiositystream] don't cache auth token --- youtube_dl/extractor/curiositystream.py | 28 +++++++++---------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index 7105e3c4c..e3c99468c 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -33,24 +33,16 @@ class CuriosityStreamBaseIE(InfoExtractor): return result['data'] def _real_initialize(self): - if not self._auth_token: - user = self._downloader.cache.load('curiositystream', 'user') or {} - self._auth_token = user.get('auth_token') - if not self._auth_token: - (email, password) = self._get_login_info() - if email is None: - return - result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ - 'email': email, - 'password': password, - })) - self._handle_errors(result) - self._auth_token = result['message']['auth_token'] - self._downloader.cache.store( - 'curiositystream', 'user', { - 'auth_token': self._auth_token, - }) + (email, password) = self._get_login_info() + if email is None: + return + result = self._download_json( + self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'email': email, + 'password': password, + })) + self._handle_errors(result) + self._auth_token = result['message']['auth_token'] def _extract_media_info(self, media): video_id = compat_str(media['id']) From 6150502e4709b6b2ebc226c9c38fa346b9358699 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 22:14:40 +0100 Subject: [PATCH 352/775] [adobepass] check for authz_token expiration(#10527) --- youtube_dl/extractor/adobepass.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9e3a3e362..68ec37e00 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor): return self._search_regex( '<%s>(.+?)' % (tag, tag), xml_str, tag) + def is_expired(token, date_ele): + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) + return token_expires and token_expires <= int(time.time()) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') - if authn_token: - token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) - if token_expires and token_expires <= int(time.time()): - authn_token = None - requestor_info = {} + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None if not authn_token: # TODO add support for other TV Providers mso_id = 'DTV' @@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor): self._downloader.cache.store('mvpd', requestor_id, requestor_info) authz_token = requestor_info.get(guid) + if authz_token and is_expired(authz_token, 'simpleTokenTTL'): + authz_token = None if not authz_token: authorize = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, From 2c3e0af93e00d7e2e20283be12541aaebabfa1bf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Sep 2016 09:53:04 +0100 Subject: [PATCH 353/775] [go] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/go.py | 101 +++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/go.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7b59d5db2..2bcd5a0cd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -318,6 +318,7 @@ from .globo import ( GloboIE, GloboArticleIE, ) +from .go import GoIE from .godtube import GodTubeIE from .godtv import GodTVIE from .golem import GolemIE diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py new file mode 100644 index 000000000..6a437c54d --- /dev/null +++ b/youtube_dl/extractor/go.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + determine_ext, + parse_age_limit, +) + + +class GoIE(InfoExtractor): + _BRANDS = { + 'abc': '001', + 'freeform': '002', + 'watchdisneychannel': '004', + 'watchdisneyjunior': '008', + 'watchdisneyxd': '009', + } + _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/.*?vdka(?P\w+)' % '|'.join(_BRANDS.keys()) + _TESTS = [{ + 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', + 'info_dict': { + 'id': '0_g86w5onx', + 'ext': 'mp4', + 'title': 'Sneak Peek: Language Arts', + 'description': 'md5:7dcdab3b2d17e5217c953256af964e9c', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', + 'only_matching': True, + }] + + def _real_extract(self, url): + sub_domain, video_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + video_id)['video'][0] + title = video_data['title'] + + formats = [] + for asset in video_data.get('assets', {}).get('asset', []): + asset_url = asset.get('value') + if not asset_url: + continue + format_id = asset.get('format') + ext = determine_ext(asset_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': asset_url, + 'ext': ext, + }) + self._sort_formats(formats) + + subtitles = {} + for cc in video_data.get('closedcaption', {}).get('src', []): + cc_url = cc.get('value') + if not cc_url: + continue + ext = determine_ext(cc_url) + if ext == 'xml': + ext = 'ttml' + subtitles.setdefault(cc.get('lang'), []).append({ + 'url': cc_url, + 'ext': ext, + }) + + thumbnails = [] + for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []): + thumbnail_url = thumbnail.get('value') + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('longdescription') or video_data.get('description'), + 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000), + 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')), + 'episode_number': int_or_none(video_data.get('episodenumber')), + 'series': video_data.get('show', {}).get('title'), + 'season_number': int_or_none(video_data.get('season', {}).get('num')), + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + } From 349fc5c705d6b81ae53d698972f40b1125bee13e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 21:13:50 +0700 Subject: [PATCH 354/775] [facebook:plugins:video] Add extractor (Closes #10530) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/facebook.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2bcd5a0cd..bc616223e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -264,7 +264,10 @@ from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE -from .facebook import FacebookIE +from .facebook import ( + FacebookIE, + FacebookPluginsVideoIE, +) from .faz import FazIE from .fc2 import FC2IE from .fczenit import FczenitIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 228b0b6d7..3a220e995 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -351,3 +351,32 @@ class FacebookIE(InfoExtractor): self._VIDEO_PAGE_TEMPLATE % video_id, video_id, fatal_if_no_video=True) return info_dict + + +class FacebookPluginsVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?Phttps.+)' + + _TESTS = [{ + 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', + 'md5': '5954e92cdfe51fe5782ae9bda7058a07', + 'info_dict': { + 'id': '10154383743583686', + 'ext': 'mp4', + 'title': 'What to do during the haze?', + 'uploader': 'Gov.sg', + 'upload_date': '20160826', + 'timestamp': 1472184808, + }, + 'add_ie': [FacebookIE.ie_key()], + }, { + 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', + 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result( + compat_urllib_parse_unquote(self._match_id(url)), + FacebookIE.ie_key()) From 5e9e3d0f6bf2055c557f360758d6d7eb146edcba Mon Sep 17 00:00:00 2001 From: Sebastian Blunt Date: Fri, 2 Sep 2016 14:48:56 +0200 Subject: [PATCH 355/775] [drtv] Add support for dr.dk/nyheder It's the same video player, the only difference is that the video player is loaded differently, and certain metadata (title and description) is not available under dr.dk/mu, so make it by default get that from some of the html meta tags. Skip the dr.dk/tv test dr.dk/tv videos are only available for between 7 and 90 days due to Danish law, and in certain cases may be readded. Skip this test as it is no longer available. --- youtube_dl/extractor/drtv.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 2d74ff855..e210cb610 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -5,13 +5,14 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, parse_iso8601, + remove_end, ) class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' - _TEST = { + _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { @@ -23,7 +24,20 @@ class DRTVIE(InfoExtractor): 'upload_date': '20150322', 'duration': 1455, }, - } + 'skip': 'Video is no longer available', + }, { + 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', + 'md5': '2ada5074f9e79afc0d324a8e9784d850', + 'info_dict': { + 'id': 'christiania-pusher-street-ryddes-drdkrjpo', + 'ext': 'mp4', + 'title': 'LIVE Christianias rydning af Pusher Street er i gang', + 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', + 'timestamp': 1472800279, + 'upload_date': '20160902', + 'duration': 131.4, + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +49,8 @@ class DRTVIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) video_id = self._search_regex( - r'data-(?:material-identifier|episode-slug)="([^"]+)"', + (r'data-(?:material-identifier|episode-slug)="([^"]+)"', + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') programcard = self._download_json( @@ -43,8 +58,9 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = data['Title'] - description = data['Description'] + title = remove_end(self._og_search_title(webpage), ' | TV | DR') or data['Title'] + description = self._og_search_description(webpage) or data['Description'] + timestamp = parse_iso8601(data['CreatedTime']) thumbnail = None From 6562d34a8cbdb93de77a8042f7409ebe31e3e3e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 22:57:48 +0700 Subject: [PATCH 356/775] [utils] Improve mimetype2ext --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index d16ea7f77..405c5d351 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -39,6 +39,7 @@ from youtube_dl.utils import ( is_html, js_to_json, limit_length, + mimetype2ext, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase): limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + def test_mimetype2ext(self): + self.assertEqual(mimetype2ext(None), None) + self.assertEqual(mimetype2ext('video/x-flv'), 'flv') + self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') + self.assertEqual(mimetype2ext('text/vtt'), 'vtt') + self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') + self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1091f17f3..904f23fd7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2148,7 +2148,7 @@ def mimetype2ext(mt): return ext _, _, res = mt.rpartition('/') - res = res.lower() + res = res.split(';')[0].strip().lower() return { '3gpp': '3gp', From 6066d03db02b9c545435b2b8faffe2e0f6c66702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:02:15 +0700 Subject: [PATCH 357/775] [drtv] Modernize and make more robust --- youtube_dl/extractor/drtv.py | 53 ++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e210cb610..7122449a3 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -4,6 +4,9 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + float_or_none, + mimetype2ext, parse_iso8601, remove_end, ) @@ -58,10 +61,12 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = remove_end(self._og_search_title(webpage), ' | TV | DR') or data['Title'] - description = self._og_search_description(webpage) or data['Description'] + title = remove_end(self._og_search_title( + webpage, default=None), ' | TV | DR') or data['Title'] + description = self._og_search_description( + webpage, default=None) or data.get('Description') - timestamp = parse_iso8601(data['CreatedTime']) + timestamp = parse_iso8601(data.get('CreatedTime')) thumbnail = None duration = None @@ -72,16 +77,18 @@ class DRTVIE(InfoExtractor): subtitles = {} for asset in data['Assets']: - if asset['Kind'] == 'Image': - thumbnail = asset['Uri'] - elif asset['Kind'] == 'VideoResource': - duration = asset['DurationInMilliseconds'] / 1000.0 - restricted_to_denmark = asset['RestrictedToDenmark'] - spoken_subtitles = asset['Target'] == 'SpokenSubtitles' - for link in asset['Links']: - uri = link['Uri'] - target = link['Target'] - format_id = target + if asset.get('Kind') == 'Image': + thumbnail = asset.get('Uri') + elif asset.get('Kind') == 'VideoResource': + duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) + restricted_to_denmark = asset.get('RestrictedToDenmark') + spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' + for link in asset.get('Links', []): + uri = link.get('Uri') + if not uri: + continue + target = link.get('Target') + format_id = target or '' preference = None if spoken_subtitles: preference = -1 @@ -92,8 +99,8 @@ class DRTVIE(InfoExtractor): video_id, preference, f4m_id=format_id)) elif target == 'HLS': formats.extend(self._extract_m3u8_formats( - uri, video_id, 'mp4', preference=preference, - m3u8_id=format_id)) + uri, video_id, 'mp4', entry_protocol='m3u8_native', + preference=preference, m3u8_id=format_id)) else: bitrate = link.get('Bitrate') if bitrate: @@ -101,7 +108,7 @@ class DRTVIE(InfoExtractor): formats.append({ 'url': uri, 'format_id': format_id, - 'tbr': bitrate, + 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), }) subtitles_list = asset.get('SubtitlesList') @@ -110,12 +117,18 @@ class DRTVIE(InfoExtractor): 'Danish': 'da', } for subs in subtitles_list: - lang = subs['Language'] - subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] + if not subs.get('Uri'): + continue + lang = subs.get('Language') or 'da' + subtitles.setdefault(LANGS.get(lang, lang), []).append({ + 'url': subs['Uri'], + 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' + }) if not formats and restricted_to_denmark: - raise ExtractorError( - 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + self.raise_geo_restricted( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', + expected=True) self._sort_formats(formats) From dacb3a864a8c89edb312cd28c3de1605a5467d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:43:20 +0700 Subject: [PATCH 358/775] [youtube:playlist] Fallback to video extraction for video/playlist URLs when playlist is broken (Closes #10537) --- youtube_dl/extractor/youtube.py | 56 +++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d5d5b7334..ea98fbf69 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1841,6 +1841,28 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincout': 21, + }, { + # Playlist URL that does not actually serve a playlist + 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', + 'info_dict': { + 'id': 'FqZTN594JQw', + 'ext': 'webm', + 'title': "Smiley's People 01 detective, Adventure Series, Action", + 'uploader': 'STREEM', + 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'upload_date': '20150526', + 'license': 'Standard YouTube License', + 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', + 'categories': ['People & Blogs'], + 'tags': list, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [YoutubeIE.ie_key()], }] def _real_initialize(self): @@ -1901,9 +1923,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): playlist_title = self._html_search_regex( r'(?s)

    ]*>\s*(.*?)\s*

    ', - page, 'title') + page, 'title', default=None) - return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) + has_videos = True + + if not playlist_title: + try: + # Some playlist URLs don't actually serve a playlist (e.g. + # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4) + next(self._entries(page, playlist_id)) + except StopIteration: + has_videos = False + + return has_videos, self.playlist_result( + self._entries(page, playlist_id), playlist_id, playlist_title) def _check_download_just_video(self, url, playlist_id): # Check if it's a video-specific URL @@ -1912,9 +1945,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): video_id = query_dict['v'][0] if self._downloader.params.get('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(video_id, 'Youtube', video_id=video_id) + return video_id, self.url_result(video_id, 'Youtube', video_id=video_id) else: self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + return video_id, None + return None, None def _real_extract(self, url): # Extract playlist id @@ -1923,7 +1958,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): raise ExtractorError('Invalid URL: %s' % url) playlist_id = mobj.group(1) or mobj.group(2) - video = self._check_download_just_video(url, playlist_id) + video_id, video = self._check_download_just_video(url, playlist_id) if video: return video @@ -1931,7 +1966,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - return self._extract_playlist(playlist_id) + has_videos, playlist = self._extract_playlist(playlist_id) + if has_videos or not video_id: + return playlist + + # Some playlist URLs don't actually serve a playlist (see + # https://github.com/rg3/youtube-dl/issues/10537). + # Fallback to plain video extraction if there is a video id + # along with playlist id. + return self.url_result(video_id, 'Youtube', video_id=video_id) class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @@ -2312,7 +2355,8 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): video = self._check_download_just_video(url, 'WL') if video: return video - return self._extract_playlist('WL') + _, playlist = self._extract_playlist('WL') + return playlist class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): From c2b2c7e1386056698ee1b0de5427ea90abf8e9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:50:42 +0700 Subject: [PATCH 359/775] [utils] Add quicktime to mimetype2ext --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 904f23fd7..ed199c4ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2168,6 +2168,7 @@ def mimetype2ext(mt): 'f4m+xml': 'f4m', 'hds+xml': 'f4m', 'vnd.ms-sstr+xml': 'ism', + 'quicktime': 'mov', }.get(res, res) From 3fcce30289a475901728af7a8dbe85304105b8ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:53:17 +0700 Subject: [PATCH 360/775] [drtv] Update tests --- youtube_dl/extractor/drtv.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 7122449a3..88d096b30 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -16,21 +16,23 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _TESTS = [{ - 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', - 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { - 'id': 'panisk-paske-5', + 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', - 'title': 'Panisk Påske (5)', - 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', - 'timestamp': 1426984612, - 'upload_date': '20150322', - 'duration': 1455, + 'title': 'Klassen - Dårlig taber (10)', + 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', + 'timestamp': 1471991907, + 'upload_date': '20160823', + 'duration': 606.84, + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Video is no longer available', }, { 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', - 'md5': '2ada5074f9e79afc0d324a8e9784d850', + 'md5': '2c37175c718155930f939ef59952474a', 'info_dict': { 'id': 'christiania-pusher-street-ryddes-drdkrjpo', 'ext': 'mp4', @@ -39,7 +41,7 @@ class DRTVIE(InfoExtractor): 'timestamp': 1472800279, 'upload_date': '20160902', 'duration': 131.4, - } + }, }] def _real_extract(self, url): From 6496ccb41398971373a2f7162a0684dd12f0b56e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:17:15 +0700 Subject: [PATCH 361/775] [youtube] Add support for rental videos' previews (Closes #10532) --- youtube_dl/extractor/youtube.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ea98fbf69..4c8edef8d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -844,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 'only_matching': True, + }, + { + # Rental video preview + 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', + 'info_dict': { + 'id': 'uGpuVWrhIzE', + 'ext': 'mp4', + 'title': 'Piku - Trailer', + 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', + 'upload_date': '20150811', + 'uploader': 'FlixMatrix', + 'uploader_id': 'FlixMatrixKaravan', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', + 'license': 'Standard YouTube License', + }, + 'params': { + 'skip_download': True, + }, } ] @@ -1254,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) add_dash_mpd(video_info) + # Rental video is not rented but preview is available (e.g. + # https://www.youtube.com/watch?v=yYr8q0y5Jfg, + # https://github.com/rg3/youtube-dl/issues/10532) + if not video_info and args.get('ypc_vid'): + return self.url_result( + args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): From 3a7d35b982fac19ca47b87358001379fafbd5731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:42:33 +0700 Subject: [PATCH 362/775] Credit @C4K3 for #10536 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index b9a602c12..c4bef040a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -182,3 +182,4 @@ Rob van Bekkum Petr Zvoníček Pratyush Singh Aleksander Nitecki +Sebastian Blunt From 4b3a6076586a38450fa9633480d175a13e33dac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:45:17 +0700 Subject: [PATCH 363/775] [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e75c003d..eb05fe77e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,32 @@ version +Core +* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in + _extract_m3u8_formats (#10522) +* Handle semicolon in mimetype2ext + + Extractors ++ [youtube] Add support for rental videos' previews (#10532) +* [youtube:playlist] Fallback to video extraction for video/playlist URLs when + no playlist is actually served (#10537) ++ [drtv] Add support for dr.dk/nyheder (#10536) ++ [facebook:plugins:video] Add extractor (#10530) ++ [go] Add extractor for *.go.com sites +* [adobepass] Check for authz_token expiration (#10527) +* [nytimes] improve extraction +* [thestar] Fix extraction (#10465) +* [glide] Fix extraction (#10478) +- [exfm] Remove extractor (#10482) +* [youporn] Fix categories and tags extraction (#10521) ++ [curiositystream] Add extractor for app.curiositystream.com - [thvideo] Remove extractor (#10464) * [movingimage] Fix for the new site name (#10466) ++ [cbs] Add support for once formats (#10515) +* [limelight] Skip ism snd duplicate manifests ++ [porncom] Extract categories and tags (#10510) ++ [facebook] Extract timestamp (#10508) ++ [yahoo] Extract more formats version 2016.08.31 From 86c3bbbcede6efa175f5a93e02511fe32585521f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:46:41 +0700 Subject: [PATCH 364/775] release 2016.09.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2caca5115..fc18e733b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.31 +[debug] youtube-dl version 2016.09.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index eb05fe77e..68dbeb696 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.03 Core * Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 42bf291e2..015332bca 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -171,6 +171,8 @@ - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** + - **curiositystream** + - **curiositystream:collection** - **CWTV** - **DailyMail** - **dailymotion** @@ -223,11 +225,11 @@ - **EsriVideo** - **Europa** - **EveryonesMixtape** - - **exfm**: ex.fm - **ExpoTV** - **ExtremeTube** - **EyedoTV** - **facebook** + - **FacebookPluginsVideo** - **faz.net** - **fc2** - **Fczenit** @@ -271,6 +273,7 @@ - **Glide**: Glide mobile video messages (glide.me) - **Globo** - **GloboArticle** + - **Go** - **GodTube** - **GodTV** - **Golem** @@ -406,6 +409,7 @@ - **MovieClips** - **MovieFap** - **Moviezine** + - **MovingImage** - **MPORA** - **MSN** - **mtg**: MTG services @@ -659,7 +663,6 @@ - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - - **SSA** - **stanfordoc**: Stanford Open ClassRoom - **Steam** - **Stitcher** @@ -702,8 +705,6 @@ - **TheStar** - **ThisAmericanLife** - **ThisAV** - - **THVideo** - - **THVideoPlaylist** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fe442dd88..5be8c0122 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.31' +__version__ = '2016.09.03' From dedb1770295d214225a3a31b5f99da877cf01eee Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 3 Sep 2016 01:50:26 +0200 Subject: [PATCH 365/775] Fix parsing of HTML5 media elements This fixes an error in _parse_html5_media_entries in case an audio or video tag directly uses a src attribute insted of elements in it's body. --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a9c7a8d16..a82968162 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1749,7 +1749,7 @@ class InfoExtractor(object): media_attributes = extract_attributes(media_tag) src = media_attributes.get('src') if src: - _, formats = _media_formats(src) + _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) media_info['thumbnail'] = media_attributes.get('poster') if media_content: From cf0efe96366259a5f0f07ae79280bfa17dc6f6e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 17:25:03 +0800 Subject: [PATCH 366/775] [fc2:embed] New extractor for Flash player URLs Closes #10512 --- ChangeLog | 6 ++++ youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/fc2.py | 58 ++++++++++++++++++++++++++---- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 68dbeb696..065fc83a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [fc2] Recognize Flash player URLs (#10512) + + version 2016.09.03 Core diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bc616223e..d851e5f36 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -269,7 +269,10 @@ from .facebook import ( FacebookPluginsVideoIE, ) from .faz import FazIE -from .fc2 import FC2IE +from .fc2 import ( + FC2IE, + FC2EmbedIE, +) from .fczenit import FczenitIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c7d69ff1f..b9e58d4df 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,10 +1,12 @@ -#! -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import hashlib +import re from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urllib_request, compat_urlparse, ) @@ -16,7 +18,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P[^/]+)' + _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' IE_NAME = 'fc2' _NETRC_MACHINE = 'fc2' _TESTS = [{ @@ -75,12 +77,17 @@ class FC2IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) self._login() - webpage = self._download_webpage(url, video_id) - self._downloader.cookiejar.clear_session_cookies() # must clear - self._login() + webpage = None + if not url.startswith('fc2:'): + webpage = self._download_webpage(url, video_id) + self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) + title = 'FC2 video %s' % video_id + thumbnail = None + if webpage is not None: + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() @@ -113,3 +120,40 @@ class FC2IE(InfoExtractor): 'ext': 'flv', 'thumbnail': thumbnail, } + + +class FC2EmbedIE(InfoExtractor): + _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P.+)' + IE_NAME = 'fc2:embed' + + _TEST = { + 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】', + 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a', + 'info_dict': { + 'id': '201403223kCqB3Ez', + 'ext': 'flv', + 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + query = compat_parse_qs(mobj.group('query')) + + video_id = query['i'][-1] + title = query.get('tl', ['FC2 video %s' % video_id])[0] + + sj = query.get('sj', [None])[0] + thumbnail = None + if sj: + # See thumbnailImagePath() in ServerConst.as of flv2.swf + thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( + sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) + + return { + '_type': 'url_transparent', + 'url': 'fc2:%s' % video_id, + 'title': title, + 'thumbnail': thumbnail, + } From cdc783510bb575b2318b1d7d42fb98f0c0f0df18 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:16:19 +0800 Subject: [PATCH 367/775] [foxnews:insider] Add new extractor Closes #10445 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/foxnews.py | 48 +++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 065fc83a8..199983674 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d851e5f36..8c6ee0503 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,7 +287,10 @@ from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE +from .foxnews import ( + FoxNewsIE, + FoxNewsInsiderIE, +) from .foxsports import FoxSportsIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index b04da2415..5c7acd795 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals import re from .amp import AMPIE +from .common import InfoExtractor class FoxNewsIE(AMPIE): IE_DESC = 'Fox News and Fox Business Video' - _VALID_URL = r'https?://(?Pvideo\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' + _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ { 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -49,6 +50,11 @@ class FoxNewsIE(AMPIE): 'url': 'http://video.foxbusiness.com/v/4442309889001', 'only_matching': True, }, + { + # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words + 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -58,3 +64,43 @@ class FoxNewsIE(AMPIE): 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) info['id'] = video_id return info + + +class FoxNewsInsiderIE(InfoExtractor): + _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews:insider' + + _TEST = { + 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', + 'md5': 'a10c755e582d28120c62749b4feb4c0c', + 'info_dict': { + 'id': '5099377331001', + 'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', + 'ext': 'mp4', + 'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', + 'description': 'Is campus censorship getting out of control?', + 'timestamp': 1472168725, + 'upload_date': '20160825', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'add_ie': [FoxNewsIE.ie_key()], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + '_type': 'url_transparent', + 'ie_key': FoxNewsIE.ie_key(), + 'url': embed_url, + 'display_id': display_id, + 'title': title, + 'description': description, + } From ed2bfe93aaa11f49f7b2b92b581abb6aa385dfbf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:22:00 +0800 Subject: [PATCH 368/775] [fc2:embed] Add ie_key --- youtube_dl/extractor/fc2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index b9e58d4df..c032d4d02 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -153,6 +153,7 @@ class FC2EmbedIE(InfoExtractor): return { '_type': 'url_transparent', + 'ie_key': FC2IE.ie_key(), 'url': 'fc2:%s' % video_id, 'title': title, 'thumbnail': thumbnail, From 45aab4d30b7c3fc03c9be9680550cba88bd85b5c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:37:36 +0800 Subject: [PATCH 369/775] [youjizz] Fix extraction. The site has moved to HTML5 Closes #10437 --- ChangeLog | 1 + youtube_dl/extractor/youjizz.py | 43 +++++++-------------------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index 199983674..2809e55d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 31e2f9263..b50f34e9b 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,21 +1,16 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class YouJizzIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P[0-9]+)\.html(?:$|[?#])' _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', - 'md5': '07e15fa469ba384c7693fd246905547c', + 'md5': '78fc1901148284c69af12640e01c6310', 'info_dict': { 'id': '2189178', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Zeichentrick 1', 'age_limit': 18, } @@ -27,38 +22,18 @@ class YouJizzIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + # YouJizz's HTML5 player has invalid HTML + webpage = webpage.replace('"controls', '" controls') age_limit = self._rta_search(webpage) video_title = self._html_search_regex( r'\s*(.*)\s*', webpage, 'title') - embed_page_url = self._search_regex( - r'(https?://www.youjizz.com/videos/embed/[0-9]+)', - webpage, 'embed page') - webpage = self._download_webpage( - embed_page_url, video_id, note='downloading embed page') + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - # Get the video URL - m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P.+?)"\);', webpage) - if m_playlist is not None: - playlist_url = m_playlist.group('playlist') - playlist_page = self._download_webpage(playlist_url, video_id, - 'Downloading playlist page') - m_levels = list(re.finditer(r'[^"]+)"\)\);', - webpage, 'video URL') - - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'title': video_title, - 'ext': 'flv', - 'format': 'flv', - 'player_url': embed_page_url, 'age_limit': age_limit, - } + }) + + return info_dict From 9603b6601208333bc49e0c69199f0e652a7aaea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:52:18 +0700 Subject: [PATCH 370/775] Introduce --skip-unavailable-fragments --- youtube_dl/__init__.py | 1 + youtube_dl/downloader/fragment.py | 10 ++++++++-- youtube_dl/options.py | 10 +++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a9730292c..42128272a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -318,6 +318,7 @@ def _real_main(argv=None): 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'fragment_retries': opts.fragment_retries, + 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index ba903ae10..b4a798f8f 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -22,14 +22,20 @@ class FragmentFD(FileDownloader): Available options: - fragment_retries: Number of times to retry a fragment for HTTP error (DASH only) + fragment_retries: Number of times to retry a fragment for HTTP error (DASH + and hlsnative only) + skip_unavailable_fragments: + Skip unavailable fragments (DASH and hlsnative only) """ def report_retry_fragment(self, fragment_name, count, retries): self.to_screen( - '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...' + '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' % (fragment_name, count, self.format_retries(retries))) + def report_skip_fragment(self, fragment_name): + self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5d62deef4..56f312f57 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -423,7 +423,15 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--fragment-retries', dest='fragment_retries', metavar='RETRIES', default=10, - help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)') + help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)') + downloader.add_option( + '--skip-unavailable-fragments', + action='store_true', dest='skip_unavailable_fragments', default=True, + help='Skip unavailable fragments (DASH and hlsnative only)') + general.add_option( + '--abort-on-unavailable-fragment', + action='store_false', dest='skip_unavailable_fragments', + help='Abort downloading when some fragment is not available') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', From 25afc2a7830e281e849609202b4f70728664bdb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:55:55 +0700 Subject: [PATCH 371/775] [downloader/dash:hls] Respect --fragment-retries and --skip-unavailable-fragments (Closes #10165, closes #10448) --- youtube_dl/downloader/dash.py | 12 +++++----- youtube_dl/downloader/hls.py | 41 +++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8bbab9dbc..cbcee324d 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -38,6 +38,7 @@ class DashSegmentsFD(FragmentFD): segments_filenames = [] fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) def append_url_to_file(target_url, tmp_filename, segment_name): target_filename = '%s-%s' % (tmp_filename, segment_name) @@ -52,19 +53,20 @@ class DashSegmentsFD(FragmentFD): down.close() segments_filenames.append(target_sanitized) break - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately # retried with the same request data this usually succeeds (1-2 attemps # is usually enough) thus allowing to download the whole file successfully. - # So, we will retry all fragments that fail with 404 HTTP error for now. - if err.code != 404: - raise - # Retry fragment + # To be future-proof we will retry all fragments that fail with any + # HTTP error. count += 1 if count <= fragment_retries: self.report_retry_fragment(segment_name, count, fragment_retries) if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(segment_name) + return self.report_error('giving up after %s fragment retries' % fragment_retries) return False diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index baaff44d5..7412620a5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,6 +13,7 @@ from .fragment import FragmentFD from .external import FFmpegFD from ..compat import ( + compat_urllib_error, compat_urlparse, compat_struct_pack, ) @@ -83,6 +84,10 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + test = self.params.get('test', False) + extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: @@ -99,15 +104,37 @@ class HlsFD(FragmentFD): line if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) - frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + frag_name = 'Frag%d' % i + frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name) if extra_query: frag_url = update_url_query(frag_url, extra_query) - success = ctx['dl'].download(frag_filename, {'url': frag_url}) - if not success: + count = 0 + while count <= fragment_retries: + try: + success = ctx['dl'].download(frag_filename, {'url': frag_url}) + if not success: + return False + down, frag_sanitized = sanitize_open(frag_filename, 'rb') + frag_content = down.read() + down.close() + break + except compat_urllib_error.HTTPError: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/rg3/youtube-dl/issues/10165, + # https://github.com/rg3/youtube-dl/issues/10448). + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(frag_name, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + i += 1 + media_sequence += 1 + self.report_skip_fragment(frag_name) + continue + self.report_error( + 'giving up after %s fragment retries' % fragment_retries) return False - down, frag_sanitized = sanitize_open(frag_filename, 'rb') - frag_content = down.read() - down.close() if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) frag_content = AES.new( @@ -115,7 +142,7 @@ class HlsFD(FragmentFD): ctx['dest_stream'].write(frag_content) frags_filenames.append(frag_sanitized) # We only download the first fragment during the test - if self.params.get('test', False): + if test: break i += 1 media_sequence += 1 From 2e99cd30c3108fd8da6a9f9fadfa89852c8d8826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:57:59 +0700 Subject: [PATCH 372/775] [downloader/dash:hls] Report exact fragment error on retry --- youtube_dl/downloader/dash.py | 4 ++-- youtube_dl/downloader/fragment.py | 5 +++-- youtube_dl/downloader/hls.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index cbcee324d..e087cf142 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD): down.close() segments_filenames.append(target_sanitized) break - except compat_urllib_error.HTTPError: + except compat_urllib_error.HTTPError as err: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately # retried with the same request data this usually succeeds (1-2 attemps @@ -62,7 +62,7 @@ class DashSegmentsFD(FragmentFD): # HTTP error. count += 1 if count <= fragment_retries: - self.report_retry_fragment(segment_name, count, fragment_retries) + self.report_retry_fragment(err, segment_name, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: self.report_skip_fragment(segment_name) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index b4a798f8f..84aacf7db 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -6,6 +6,7 @@ import time from .common import FileDownloader from .http import HttpFD from ..utils import ( + error_to_compat_str, encodeFilename, sanitize_open, ) @@ -28,10 +29,10 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) """ - def report_retry_fragment(self, fragment_name, count, retries): + def report_retry_fragment(self, err, fragment_name, count, retries): self.to_screen( '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' - % (fragment_name, count, self.format_retries(retries))) + % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) def report_skip_fragment(self, fragment_name): self.to_screen('[download] Skipping fragment %s...' % fragment_name) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7412620a5..5d70abf62 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -118,14 +118,14 @@ class HlsFD(FragmentFD): frag_content = down.read() down.close() break - except compat_urllib_error.HTTPError: + except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. # See https://github.com/rg3/youtube-dl/issues/10165, # https://github.com/rg3/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: - self.report_retry_fragment(frag_name, count, fragment_retries) + self.report_retry_fragment(err, frag_name, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: i += 1 From 4a69fa04e0074a3d5938ffb03decff9cc33f5d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Aug 2016 22:28:14 +0700 Subject: [PATCH 373/775] [downloader/dash] Abort download immediately after giving up on some fragment --- youtube_dl/downloader/dash.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index e087cf142..efeae02a3 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -66,14 +66,17 @@ class DashSegmentsFD(FragmentFD): if count > fragment_retries: if skip_unavailable_fragments: self.report_skip_fragment(segment_name) - return + return True self.report_error('giving up after %s fragment retries' % fragment_retries) return False + return True if initialization_url: - append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init') + if not append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init'): + return False for i, segment_url in enumerate(segment_urls): - append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i) + if not append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i): + return False self._finish_frag_download(ctx) From 7e5dc339de14547aa7b489e88b4c456ec613ba9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 00:29:01 +0700 Subject: [PATCH 374/775] [youtube:watchlater] Fix extraction (Closes #10544) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4c8edef8d..0bc85af74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2376,7 +2376,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): }] def _real_extract(self, url): - video = self._check_download_just_video(url, 'WL') + _, video = self._check_download_just_video(url, 'WL') if video: return video _, playlist = self._extract_playlist('WL') From 091624f9da491ef3a98e63367bf4ffd9836dafde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 03:39:13 +0700 Subject: [PATCH 375/775] [vimple] Extend _VALID_URL (Closes #10547) --- youtube_dl/extractor/vimple.py | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 92321d66e..7fd9b777b 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -28,23 +28,24 @@ class SprutoBaseIE(InfoExtractor): class VimpleIE(SprutoBaseIE): IE_DESC = 'Vimple - one-click video hosting' - _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P[\da-f-]{32,36})' - _TESTS = [ - { - 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', - 'md5': '2e750a330ed211d3fd41821c6ad9a279', - 'info_dict': { - 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', - 'ext': 'mp4', - 'title': 'Sunset', - 'duration': 20, - 'thumbnail': 're:https?://.*?\.jpg', - }, - }, { - 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', - 'only_matching': True, - } - ] + _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P[\da-f-]{32,36})' + _TESTS = [{ + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', + 'info_dict': { + 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', + 'ext': 'mp4', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', + }, + }, { + 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', + 'only_matching': True, + }, { + 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 37c7490ac62d4aacbf9103bf6760d20f21984a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 04:59:46 +0700 Subject: [PATCH 376/775] [espn] Extend _VALID_URL (Closes #10549) --- youtube_dl/extractor/espn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 66c08bec4..6d10f8e68 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -5,7 +5,7 @@ from ..utils import remove_end class ESPNIE(InfoExtractor): - _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', 'md5': '60e5d097a523e767d06479335d1bdc58', @@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/video/clip?id=10365079', + 'only_matching': True, }] def _real_extract(self, url): From 622638512b8241c39837b634e75c44cf9105a299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 16:25:59 +0800 Subject: [PATCH 377/775] [rottentomatoes] Fix extraction Closes #10467 --- ChangeLog | 1 + youtube_dl/extractor/rottentomatoes.py | 30 +++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2809e55d7..e6a2d24e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index f9cd48790..df39ed3f2 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse -from .internetvideoarchive import InternetVideoArchiveIE +from ..utils import js_to_json class RottenTomatoesIE(InfoExtractor): @@ -11,21 +10,36 @@ class RottenTomatoesIE(InfoExtractor): _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'info_dict': { - 'id': '613340', + 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'thumbnail': 're:^https?://.*\.jpg$', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage) - query = compat_urlparse.urlparse(og_video).query + + params = self._parse_json( + self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), + video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) + + formats = [] + if params.get('urlHLS'): + formats.extend(self._extract_m3u8_formats( + params['urlHLS'], video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + if params.get('urlMP4'): + formats.append({ + 'url': params['urlMP4'], + 'format_id': 'mp4', + }) + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'url': InternetVideoArchiveIE._build_xml_url(query), - 'ie_key': InternetVideoArchiveIE.ie_key(), + 'id': video_id, 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': params.get('thumbnailImg'), } From b29cd56591f1ef001d9f30bdff87789815f1fa0c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 17:01:39 +0800 Subject: [PATCH 378/775] [pornovoisines] Fix extraction (closes #10469) --- ChangeLog | 1 + youtube_dl/extractor/pornovoisines.py | 80 +++++++++++++++------------ 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index e6a2d24e1..616b55803 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 6b51e5c54..58f557e39 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import random from .common import InfoExtractor from ..utils import ( @@ -13,61 +12,69 @@ from ..utils import ( class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P\d+)/(?P[^/]+)' - - _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ - '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' - - _SERVER_NUMBERS = (1, 2) + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P\d+)/(?P[^/.]+)' _TEST = { - 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', - 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', + 'md5': '6f8aca6a058592ab49fe701c8ba8317b', 'info_dict': { - 'id': '1285', + 'id': '919', 'display_id': 'recherche-appartement', 'ext': 'mp4', 'title': 'Recherche appartement', - 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140925', 'duration': 120, 'view_count': int, 'average_rating': float, - 'categories': ['Débutantes', 'Scénario', 'Sodomie'], + 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], 'age_limit': 18, + 'subtitles': { + 'fr': [{ + 'ext': 'vtt', + }] + }, } } - @classmethod - def build_video_url(cls, num): - return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = mobj.group('display_id') + settings_url = self._download_json( + 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + video_id, note='Getting settings URL')['video_settings_url'] + settings = self._download_json(settings_url, video_id)['data'] + + formats = [] + for kind, data in settings['variants'].items(): + if kind == 'HLS': + formats.extend(self._extract_m3u8_formats( + data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + elif kind == 'MP4': + for item in data: + formats.append({ + 'url': item['url'], + 'height': item.get('height'), + 'bitrate': item.get('bitrate'), + }) + self._sort_formats(formats) + webpage = self._download_webpage(url, video_id) - video_url = self.build_video_url(video_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) - title = self._html_search_regex( - r'

    (.+?)

    ', webpage, 'title', flags=re.DOTALL) - description = self._html_search_regex( - r'
    (.+?)
    ', - webpage, 'description', fatal=False, flags=re.DOTALL) - - thumbnail = self._search_regex( - r'
    \s*]+class=([\'"])thumb\1[^>]*src=([\'"])(?P[^"]+)\2', + webpage, 'thumbnail', fatal=False, group='url') upload_date = unified_strdate(self._search_regex( - r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) - duration = int_or_none(self._search_regex( - 'Durée (\d+)', webpage, 'duration', fatal=False)) + r'Le\s*([\d/]+)', webpage, 'upload date', fatal=False)) + duration = settings.get('main', {}).get('duration') view_count = int_or_none(self._search_regex( r'(\d+) vues', webpage, 'view count', fatal=False)) average_rating = self._search_regex( @@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor): if average_rating: average_rating = float_or_none(average_rating.replace(',', '.')) - categories = self._html_search_meta( - 'keywords', webpage, 'categories', fatal=False) + categories = self._html_search_regex( + r'(?s)Catégories\s*:\s*(.+?)', webpage, 'categories', fatal=False) if categories: categories = [category.strip() for category in categories.split(',')] + subtitles = {'fr': [{ + 'url': subtitle, + } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} + return { 'id': video_id, 'display_id': display_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, @@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor): 'average_rating': average_rating, 'categories': categories, 'age_limit': 18, + 'subtitles': subtitles, } From 919cf1a62f022c61cfa65498e8c1b1cc0d21046e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 23:00:52 +0800 Subject: [PATCH 379/775] [downloader/dash] Abort if the first segment fails Closes #10497, Closes #10542 --- ChangeLog | 4 ++++ youtube_dl/downloader/dash.py | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 616b55803..1d277b562 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core +* If the first segment of DASH fails, abort the whole download process to + prevent throttling (#10497) + Extractors * [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index efeae02a3..41fc9cfc2 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -40,7 +40,8 @@ class DashSegmentsFD(FragmentFD): fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def append_url_to_file(target_url, tmp_filename, segment_name): + def process_segment(segment, tmp_filename, fatal): + target_url, segment_name = segment target_filename = '%s-%s' % (tmp_filename, segment_name) count = 0 while count <= fragment_retries: @@ -64,18 +65,23 @@ class DashSegmentsFD(FragmentFD): if count <= fragment_retries: self.report_retry_fragment(err, segment_name, count, fragment_retries) if count > fragment_retries: - if skip_unavailable_fragments: + if not fatal: self.report_skip_fragment(segment_name) return True self.report_error('giving up after %s fragment retries' % fragment_retries) return False return True - if initialization_url: - if not append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init'): - return False - for i, segment_url in enumerate(segment_urls): - if not append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i): + segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] + segments_to_download.extend([ + (segment_url, 'Seg%d' % i) + for i, segment_url in enumerate(segment_urls)]) + + for i, segment in enumerate(segments_to_download): + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = i == 0 or not skip_unavailable_fragments + if not process_segment(segment, ctx['tmpfilename'], fatal): return False self._finish_frag_download(ctx) From 0def758782c273e0a1c9984f895638845796715b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 11:42:15 +0100 Subject: [PATCH 380/775] [internetvideoarchive] extract all formats --- youtube_dl/extractor/common.py | 14 +++++++------- youtube_dl/extractor/internetvideoarchive.py | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a82968162..6edd5a769 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1163,13 +1163,6 @@ class InfoExtractor(object): m3u8_id=None, note=None, errnote=None, fatal=True, live=False): - formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] - - format_url = lambda u: ( - u - if re.match(r'^https?://', u) - else compat_urlparse.urljoin(m3u8_url, u)) - res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', @@ -1180,6 +1173,13 @@ class InfoExtractor(object): m3u8_doc, urlh = res m3u8_url = urlh.geturl() + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] + + format_url = lambda u: ( + u + if re.match(r'^https?://', u) + else compat_urlparse.urljoin(m3u8_url, u)) + # We should try extracting formats only from master playlists [1], i.e. # playlists that describe available qualities. On the other hand media # playlists [2] should be returned as is since they contain just the media diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 45add007f..76cc5ec3e 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor): # There are multiple videos in the playlist whlie only the first one # matches the video played in browsers video_info = configuration['playlist'][0] + title = video_info['title'] formats = [] for source in video_info['sources']: file_url = source['file'] if determine_ext(file_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - file_url, video_id, ext='mp4', m3u8_id='hls')) + m3u8_formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + file_url = m3u8_formats[0]['url'] + formats.extend(self._extract_f4m_formats( + file_url.replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + file_url.replace('.m3u8', '.mpd'), + video_id, mpd_id='dash', fatal=False)) else: a_format = { 'url': file_url, @@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor): self._sort_formats(formats) - title = video_info['title'] description = video_info.get('description') thumbnail = video_info.get('image') else: From 100bd86a68b5ee84669d162c9bcda31616f6596a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 11:44:13 +0100 Subject: [PATCH 381/775] [rottentomatoes] delegate extraction to InternetVideoArchiveIE --- youtube_dl/extractor/rottentomatoes.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index df39ed3f2..23abf7a27 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): @@ -13,6 +13,7 @@ class RottenTomatoesIE(InfoExtractor): 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 'thumbnail': 're:^https?://.*\.jpg$', }, } @@ -20,26 +21,12 @@ class RottenTomatoesIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - - params = self._parse_json( - self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), - video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) - - formats = [] - if params.get('urlHLS'): - formats.extend(self._extract_m3u8_formats( - params['urlHLS'], video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - if params.get('urlMP4'): - formats.append({ - 'url': params['urlMP4'], - 'format_id': 'mp4', - }) - self._sort_formats(formats) + iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') return { + '_type': 'url_transparent', + 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, + 'ie_key': InternetVideoArchiveIE.ie_key(), 'id': video_id, 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': params.get('thumbnailImg'), } From feaa5ad787cdc28e4b6979f1c7798134b1bee723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:12:34 +0700 Subject: [PATCH 382/775] [youtube:playlist] Extend _VALID_URL --- youtube_dl/extractor/youtube.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0bc85af74..8fc26bd02 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -264,7 +264,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID - (?!.*?&list=) # combined list/video URLs are handled by the playlist IE + (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE (?(1).+)? # if we found the ID, everything can follow $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' @@ -1778,11 +1778,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): _VALID_URL = r"""(?x)(?: (?:https?://)? (?:\w+\.)? - youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) - \? (?:.*?[&;])*? (?:p|a|list)= - | p/ + youtube\.com/ + (?: + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) + \? (?:.*?[&;])*? (?:p|a|list)= + | p/ + )| + youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= ) ( (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} @@ -1887,6 +1890,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'skip_download': True, }, 'add_ie': [YoutubeIE.ie_key()], + }, { + 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', + 'only_matching': True, }] def _real_initialize(self): From 433af6ad3002424ecb316e23946722d54010dbe1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 14:18:41 +0100 Subject: [PATCH 383/775] [theplatform] fix player regex(closes #10546) --- youtube_dl/extractor/theplatform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 23067e8c6..6febf805b 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE): class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)?|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): # rtmp download 'skip_download': True, }, + 'skip': '404 Not Found', }, { # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', From d9606d9b6cb44ee7600abf63333db4b88532a391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:51:48 +0700 Subject: [PATCH 384/775] release 2016.09.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 7 ++++++- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fc18e733b..1ddb3ef85 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.03 +[debug] youtube-dl version 2016.09.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1d277b562..a26f5d4aa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.04 Core * If the first segment of DASH fails, abort the whole download process to diff --git a/README.md b/README.md index 87465aa5e..207b633db 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like. --mark-watched Mark videos watched (YouTube only) --no-mark-watched Do not mark videos watched (YouTube only) --no-color Do not emit color codes in output + --abort-on-unavailable-fragment Abort downloading when some fragment is not + available ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. @@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like. -R, --retries RETRIES Number of retries (default is 10), or "infinite". --fragment-retries RETRIES Number of retries for a fragment (default - is 10), or "infinite" (DASH only) + is 10), or "infinite" (DASH and hlsnative + only) + --skip-unavailable-fragments Skip unavailable fragments (DASH and + hlsnative only) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 015332bca..9e21016f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -232,6 +232,7 @@ - **FacebookPluginsVideo** - **faz.net** - **fc2** + - **fc2:embed** - **Fczenit** - **features.aol.com** - **fernsehkritik.tv** @@ -245,6 +246,7 @@ - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video + - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5be8c0122..3d12a47e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.03' +__version__ = '2016.09.04' From 8112bfeabae792754f51e0c012ed34c4dc521bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:57:18 +0700 Subject: [PATCH 385/775] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index a26f5d4aa..a542496a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,26 @@ -version 2016.09.04 +version Core -* If the first segment of DASH fails, abort the whole download process to - prevent throttling (#10497) +* In DASH downloader if the first segment fails, abort the whole download + process to prevent throttling (#10497) ++ Add support for --skip-unavailable-fragments and --fragment retries in + hlsnative downloader (#10165, #10448). ++ Add support for --skip-unavailable-fragments in DASH downloader ++ Introduce --skip-unavailable-fragments option for fragment based downloaders + that allows to skip fragments unavailable due to a HTTP error +* Fix extraction of video/audio entries with src attribute in + _parse_html5_media_entries (#10540) Extractors +* [theplatform] Relax URL regular expression (#10546) +* [youtube:playlist] Extend URL regular expression +* [rottentomatoes] Delegate extraction to internetvideoarchive extractor +* [internetvideoarchive] Extract all formats * [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) +* [espn] Extend URL regular expression (#10549) +* [vimple] Extend URL regular expression (#10547) +* [youtube:watchlater] Fix extraction (#10544) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) @@ -19,7 +33,6 @@ Core _extract_m3u8_formats (#10522) * Handle semicolon in mimetype2ext - Extractors + [youtube] Add support for rental videos' previews (#10532) * [youtube:playlist] Fallback to video extraction for video/playlist URLs when From 48094901086534533ca89283067f2ab732857654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:58:28 +0700 Subject: [PATCH 386/775] release 2016.09.04.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1ddb3ef85..c03092442 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.04 +[debug] youtube-dl version 2016.09.04.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a542496a3..d392513ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.04.1 Core * In DASH downloader if the first segment fails, abort the whole download diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3d12a47e8..b2ea6dac6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.04' +__version__ = '2016.09.04.1' From 78e762d23c48f85c61a8afcae29307912000a7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Thu, 1 Sep 2016 17:31:08 +0200 Subject: [PATCH 387/775] Add new extractor for TV Noe (Czech Christian TV). Fixes #10520 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvnoe.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/tvnoe.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8c6ee0503..e47adc26c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -916,6 +916,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, TVPIE, diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py new file mode 100644 index 000000000..d50261ddd --- /dev/null +++ b/youtube_dl/extractor/tvnoe.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .jwplatform import JWPlatformBaseIE +from ..utils import clean_html, get_element_by_class, js_to_json + + +class TVNoeIE(JWPlatformBaseIE): + _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.tvnoe.cz/video/10362', + 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', + 'info_dict': { + 'id': '10362', + 'ext': 'mp4', + 'series': 'Noční univerzita', + 'title': 'prof. Tomáš Halík, Th.D. - ' + + 'Návrat náboženství a střet civilizací', + 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex(r']+src="([^"]+)"', + webpage, 'iframe src attribute') + + ifs_page = self._download_webpage(iframe_url, video_id) + jwplayer_data = self._parse_json(self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=iframe_url) + + info_dict.update({ + 'id': video_id, + 'title': clean_html( + get_element_by_class('field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class('field-name-body', + webpage)), + 'series': clean_html(get_element_by_class('title', webpage)) + }) + return info_dict From 9127e1533d294eb672d783d1eeed15aeb9b2cbe1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 5 Sep 2016 13:37:36 +0800 Subject: [PATCH 388/775] [tvnoe] PEP8 and coding style --- youtube_dl/extractor/tvnoe.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index d50261ddd..1cd3e6a58 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .jwplatform import JWPlatformBaseIE -from ..utils import clean_html, get_element_by_class, js_to_json +from ..utils import ( + clean_html, + get_element_by_class, + js_to_json, +) class TVNoeIE(JWPlatformBaseIE): @@ -14,8 +18,7 @@ class TVNoeIE(JWPlatformBaseIE): 'id': '10362', 'ext': 'mp4', 'series': 'Noční univerzita', - 'title': 'prof. Tomáš Halík, Th.D. - ' + - 'Návrat náboženství a střet civilizací', + 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', } } @@ -24,21 +27,23 @@ class TVNoeIE(JWPlatformBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - iframe_url = self._search_regex(r']+src="([^"]+)"', - webpage, 'iframe src attribute') + iframe_url = self._search_regex( + r']+src="([^"]+)"', webpage, 'iframe URL') ifs_page = self._download_webpage(iframe_url, video_id) - jwplayer_data = self._parse_json(self._find_jwplayer_data(ifs_page), - video_id, transform_source=js_to_json) + jwplayer_data = self._parse_json( + self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) info_dict = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=iframe_url) info_dict.update({ 'id': video_id, - 'title': clean_html( - get_element_by_class('field-name-field-podnazev', webpage)), - 'description': clean_html(get_element_by_class('field-name-body', - webpage)), + 'title': clean_html(get_element_by_class( + 'field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class( + 'field-name-body', webpage)), 'series': clean_html(get_element_by_class('title', webpage)) }) + return info_dict From b49ad71ce1d985165e07fd0f59f80f677434ad84 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 5 Sep 2016 13:38:55 +0800 Subject: [PATCH 389/775] [ChangeLog] Update for #10524 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index d392513ce..0be9b0fbb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [tvnoe] New extractor (#10524) + + version 2016.09.04.1 Core From 95be19d436d1938d104310e194e85ea5a10c3353 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Sun, 4 Sep 2016 23:23:40 +0800 Subject: [PATCH 390/775] [miaopai] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/miaopai.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/miaopai.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8c6ee0503..d511b04bc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -471,6 +471,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE +from .miaopai import MiaoPaiIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dl/extractor/miaopai.py new file mode 100644 index 000000000..c36b441b8 --- /dev/null +++ b/youtube_dl/extractor/miaopai.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import sanitized_Request + + +class MiaoPaiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+).htm' + _TEST = { + 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', + 'md5': '095ed3f1cd96b821add957bdc29f845b', + 'info_dict': { + 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__', + 'ext': 'mp4', + 'title': '西游记音乐会的秒拍视频', + 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg', + } + } + + _USER_AGENT_IPAD = 'User-Agent:Mozilla/5.0 ' \ + '(iPad; CPU OS 9_1 like Mac OS X) ' \ + 'AppleWebKit/601.1.46 (KHTML, like Gecko) ' \ + 'Version/9.0 Mobile/13B143 Safari/601.1' + + def _real_extract(self, url): + video_id = self._match_id(url) + request = sanitized_Request(url) + request.add_header('User-Agent', self._USER_AGENT_IPAD) + webpage = self._download_webpage(request, video_id) + + title = self._html_search_regex(r'([^<]*)', + webpage, + 'title') + regex = r"""
    ]*data-url=['"]([^'"]*\.jpg)['"]""" + thumbnail = self._html_search_regex(regex, webpage, '') + regex = r"""

    ', - webpage, 'title', default=None) or self._og_search_title(webpage) + webpage, 'title', default=None) or self._og_search_title( + webpage)).strip() video_id = self._html_search_regex( r'data-video=(["\'])(?P.+?)\1', webpage, 'video id', group='id') data = self._download_json( - 'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) + 'https://mediazone.vrt.be/api/v1/%s/assets/%s' + % (site_id, video_id), display_id) formats = [] for target in data['targetUrls']: From c6129feb7f8313941a4d2044fa4b45ceaa0a91c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Sep 2016 23:20:45 +0700 Subject: [PATCH 430/775] [ketnet] Add extractor (Closes #10343) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ketnet.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/ketnet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b7b630e9d..38dc33674 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -407,6 +407,7 @@ from .kankan import KankanIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE +from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py new file mode 100644 index 000000000..aaf3f807a --- /dev/null +++ b/youtube_dl/extractor/ketnet.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class KetnetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', + 'md5': 'd907f7b1814ef0fa285c0475d9994ed7', + 'info_dict': { + 'id': 'zomerse-filmpjes', + 'ext': 'mp4', + 'title': 'Gluur mee op de filmset en op Pennenzakkenrock', + 'description': 'Gluur mee met Ghost Rockers op de filmset', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', + 'only_matching': True, + }, { + 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage, + 'player config'), + video_id) + + title = config['title'] + + formats = self._extract_m3u8_formats( + config['source']['hls'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'series': config.get('program'), + 'episode': config.get('episode'), + 'formats': formats, + } From 6c3affcb18f7eabf7d428e5efe474e0547ab25cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 10 Sep 2016 20:09:09 +0800 Subject: [PATCH 431/775] [newgrounds] Fix uploader extraction Closes #10584 Also change test URLs to HTTPS, as proposed by @stepshal in #10593. Closes #10593 --- ChangeLog | 6 ++++++ youtube_dl/extractor/newgrounds.py | 23 ++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index d84f447ba..fafe445cb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [newgrounds] Fix uploader extraction (#10584) + + version 2016.09.08 Extractors diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 705940323..9bea610c8 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -1,15 +1,12 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor class NewgroundsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://www.newgrounds.com/audio/listen/549479', + 'url': 'https://www.newgrounds.com/audio/listen/549479', 'md5': 'fe6033d297591288fa1c1f780386f07a', 'info_dict': { 'id': '549479', @@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Burn7', } }, { - 'url': 'http://www.newgrounds.com/portal/view/673111', + 'url': 'https://www.newgrounds.com/portal/view/673111', 'md5': '3394735822aab2478c31b1004fe5e5bc', 'info_dict': { 'id': '673111', @@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') - webpage = self._download_webpage(url, music_id) + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'([^>]+)', webpage, 'title') uploader = self._html_search_regex( - [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'], - webpage, 'uploader') + r'Author\s*]+>([^<]+)', webpage, 'uploader', fatal=False) - music_url_json_string = self._html_search_regex( - r'({"url":"[^"]+"),', webpage, 'music url') + '}' - music_url_json = json.loads(music_url_json_string) - music_url = music_url_json['url'] + music_url = self._parse_json(self._search_regex( + r'"url":("[^"]+"),', webpage, ''), media_id) return { - 'id': music_id, + 'id': media_id, 'title': title, 'url': music_url, 'uploader': uploader, From b29f842e0eb095248ff39d1fa28c5b4941793246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 20:46:45 +0700 Subject: [PATCH 432/775] [canalplus] Add support for c8.fr (Closes #10577) --- youtube_dl/extractor/canalplus.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 61463f249..69e8f4f57 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?c8\.fr| (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| @@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'c8': 'd8', 'd17': 'd17', 'itele': 'itele', } From 84a18e9b908eb0b770f03603200026a06f4f08b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 22:01:49 +0700 Subject: [PATCH 433/775] [polskieradio:category] Improve extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/polskieradio.py | 158 ++++++++++++++------------- 2 files changed, 84 insertions(+), 79 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a142996f..96f3d3fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -671,7 +671,10 @@ from .pluralsight import ( ) from .podomatic import PodomaticIE from .pokemon import PokemonIE -from .polskieradio import PolskieRadioIE, PolskieRadioProgrammeIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioCategoryIE, +) from .porn91 import Porn91IE from .porncom import PornComIE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index c51d3d9be..5ff173774 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -10,90 +11,13 @@ from ..compat import ( compat_urlparse ) from ..utils import ( + extract_attributes, int_or_none, strip_or_none, unified_timestamp, ) -class PolskieRadioProgrammeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(,[^/]+)?/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', - 'info_dict': { - 'id': '5102', - 'title': 'HISTORIA ŻYWA', - }, - 'playlist_mincount': 34, - }, { - 'url': 'http://www.polskieradio.pl/7/4807', - 'info_dict': { - 'id': '4807', - 'title': 'Vademecum 1050. rocznicy Chrztu Polski' - }, - 'playlist_mincount': 5 - }, { - 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', - 'only_matching': True - }, { - 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', - 'info_dict': { - 'id': '4143', - 'title': 'Kierunek Kraków', - }, - 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', - 'only_matching': True - }] - - def _get_entries_from_page_content(self, base_url, content): - entries = [] - - articles = re.findall( - r'
    \s+', - content) - for article_id, article_url, _, article_title in articles: - resolved_article_url = compat_urlparse.urljoin(base_url, article_url) - entries.append(self.url_result( - resolved_article_url, - ie='PolskieRadio', - video_id=article_id, - video_title=article_title)) - - return entries - - @classmethod - def suitable(cls, url): - return False if PolskieRadioIE.suitable(url) else super(PolskieRadioProgrammeIE, cls).suitable(url) - - def _real_extract(self, url): - programme_id = self._match_id(url) - webpage = self._download_webpage(url, programme_id) - - title = self._html_search_regex( - r'(.+?)', - webpage, 'title', fatal=False) - description = None - - entries = self._get_entries_from_page_content(url, webpage) - - pages = re.findall(r' 1: - page_url_root = next(url for _, url, _ in pages if len(url) > 0) - for page_number in range(2, page_count + 1): - page_url = page_url_root + str(page_number) - resolved_page_url = compat_urlparse.urljoin(url, page_url) - page_content = self._download_webpage( - resolved_page_url, programme_id, - note="Downloading page number %d" % page_number) - entries.extend(self._get_entries_from_page_content(url, page_content)) - - return self.playlist_result(entries, programme_id, title, description) - - class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ @@ -176,3 +100,81 @@ class PolskieRadioIE(InfoExtractor): description = strip_or_none(self._og_search_description(webpage)) return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioCategoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 38, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + + def _entries(self, url, page, category_id): + content = page + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)]+>.*?(]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?
    ', + content): + entry = extract_attributes(a_entry) + href = entry.get('href') + if not href: + continue + yield self.url_result( + compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + entry_id, entry.get('title')) + mobj = re.search( + r']+class=["\']next["\'][^>]*>\s*]+href=(["\'])(?P(?:(?!\1).)+)\1', + content) + if not mobj: + break + next_url = compat_urlparse.urljoin(url, mobj.group('url')) + content = self._download_webpage( + next_url, category_id, 'Downloading page %s' % page_num) + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage = self._download_webpage(url, category_id) + title = self._html_search_regex( + r'([^<]+) - [^<]+ - [^<]+', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) From 732424375017a033f5b398b0f3dc2c6d47f3d3fd Mon Sep 17 00:00:00 2001 From: Scott Leggett Date: Mon, 5 Sep 2016 22:41:08 +1000 Subject: [PATCH 434/775] [9now] Fix extraction --- youtube_dl/extractor/ninenow.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index faa577237..907b42609 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,7 +44,14 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') + current_key = ( + page_data.get('episode', {}).get('currentEpisodeKey') or + page_data.get('clip', {}).get('currentClipKey') + ) + common_data = ( + page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or + page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') + ) video_data = common_data['video'] if video_data.get('drm'): From 56c0ead4d3b9f365f0562678504879be8e79b89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 00:42:13 +0700 Subject: [PATCH 435/775] [9now] Improve video data extraction (Closes #10561) --- youtube_dl/extractor/ninenow.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 907b42609..351bea7ba 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,14 +44,20 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - current_key = ( - page_data.get('episode', {}).get('currentEpisodeKey') or - page_data.get('clip', {}).get('currentClipKey') - ) - common_data = ( - page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or - page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') - ) + + for kind in ('episode', 'clip'): + current_key = page_data.get(kind, {}).get( + 'current%sKey' % kind.capitalize()) + if not current_key: + continue + cache = page_data.get(kind, {}).get('%sCache' % kind, {}) + if not cache: + continue + common_data = (cache.get(current_key) or list(cache.values())[0])[kind] + break + else: + raise ExtractorError('Unable to find video data') + video_data = common_data['video'] if video_data.get('drm'): From 2512b17493fced6b469d9610c1ad5c5af52870f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 01:27:20 +0700 Subject: [PATCH 436/775] [lrt] Fix audio extraction (Closes #10566) --- youtube_dl/extractor/lrt.py | 46 ++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 1072405b3..f5c997ef4 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -1,8 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, parse_duration, remove_end, @@ -12,8 +15,10 @@ from ..utils import ( class LRTIE(InfoExtractor): IE_NAME = 'lrt.lt' _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P[0-9]+)' - _TEST = { + _TESTS = [{ + # m3u8 download 'url': 'http://www.lrt.lt/mediateka/irasas/54391/', + 'md5': 'fe44cf7e4ab3198055f2c598fc175cb0', 'info_dict': { 'id': '54391', 'ext': 'mp4', @@ -23,20 +28,45 @@ class LRTIE(InfoExtractor): 'view_count': int, 'like_count': int, }, - 'params': { - 'skip_download': True, # m3u8 download + }, { + # direct mp3 download + 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/', + 'md5': '389da8ca3cad0f51d12bed0c844f6a0a', + 'info_dict': { + 'id': '1013074524', + 'ext': 'mp3', + 'title': 'Kita tema 2016-09-05 15:05', + 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5', + 'duration': 3008, + 'view_count': int, + 'like_count': int, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' - LRT') - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', - webpage, 'm3u8 url', group='url') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + + formats = [] + for _, file_url in re.findall( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage): + ext = determine_ext(file_url) + if ext not in ('m3u8', 'mp3'): + continue + # mp3 served as m3u8 produces stuttered media file + if ext == 'm3u8' and '.mp3' in file_url: + continue + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + fatal=False)) + elif ext == 'mp3': + formats.append({ + 'url': file_url, + 'vcodec': 'none', + }) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) From 1e35999c1e4637174e2532c457431315b5e186d9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Sep 2016 19:43:09 +0100 Subject: [PATCH 437/775] [tfo] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tfo.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/tfo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 96f3d3fcb..124e909fb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -870,6 +870,7 @@ from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE +from .tfo import TFOIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py new file mode 100644 index 000000000..6f1eeac57 --- /dev/null +++ b/youtube_dl/extractor/tfo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, +) + + +class TFOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' + _TEST = { + 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', + 'md5': '47c987d0515561114cf03d1226a9d4c7', + 'info_dict': { + 'id': '100463871', + 'ext': 'mp4', + 'title': 'Video Game Hackathon', + 'description': 'md5:558afeba217c6c8d96c60e5421795c07', + 'upload_date': '20160212', + 'timestamp': 1455310233, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id) + infos = self._download_json( + 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({ + 'product_id': video_id, + }).encode(), headers={ + 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value, + }) + if infos.get('success') == 0: + raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True) + video_data = infos['data'] + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:' + video_data['llid'], + 'title': video_data['title'], + 'description': video_data.get('description'), + 'series': video_data.get('collection'), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'duration': int_or_none(video_data.get('duration')), + 'ie_key': 'LimelightMedia', + } From 001a5fd3d75b311102264cf3920c6aa5b2322e51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 03:02:00 +0800 Subject: [PATCH 438/775] [iwara] Fix extraction after relaunch Closes #10462, closes #3215 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/iwara.py | 77 ++++++++++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 36 -------------- 4 files changed, 79 insertions(+), 37 deletions(-) create mode 100644 youtube_dl/extractor/iwara.py delete mode 100644 youtube_dl/extractor/trollvids.py diff --git a/ChangeLog b/ChangeLog index fafe445cb..387dc7bf6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 124e909fb..2e795260e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,6 +395,7 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE +from .iwara import IwaraIE from .izlesene import IzleseneIE from .jeuxvideo import JeuxVideoIE from .jove import JoveIE @@ -899,7 +900,6 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE -from .trollvids import TrollvidsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py new file mode 100644 index 000000000..8d7e7f472 --- /dev/null +++ b/youtube_dl/extractor/iwara.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import remove_end + + +class IwaraIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': 'amVwUl1EHpAD9RD', + 'ext': 'mp4', + 'title': '【MMD R-18】ガールフレンド carry_me_off', + 'age_limit': 18, + }, + }, { + 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO', + 'md5': '7e5f1f359cd51a027ba4a7b7710a50f0', + 'info_dict': { + 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc', + 'ext': 'mp4', + 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4', + 'age_limit': 18, + }, + 'add_ie': ['GoogleDrive'], + }, { + 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq', + 'md5': '1d85f1e5217d2791626cff5ec83bb189', + 'info_dict': { + 'id': '6liAP9s2Ojc', + 'ext': 'mp4', + 'age_limit': 0, + 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)', + 'description': 'md5:590c12c0df1443d833fbebe05da8c47a', + 'upload_date': '20160910', + 'uploader': 'aMMDsork', + 'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + # ecchi is 'sexy' in Japanese + age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 + + entries = self._parse_html5_media_entries(url, webpage, video_id) + + if not entries: + iframe_url = self._html_search_regex( + r']+src=([\'"])(?P[^\'"]+)\1', + webpage, 'iframe URL', group='url') + return { + '_type': 'url_transparent', + 'url': iframe_url, + 'age_limit': age_limit, + } + + title = remove_end(self._html_search_regex( + r'([^<]+)', webpage, 'title'), ' | Iwara') + + info_dict = entries[0] + info_dict.update({ + 'id': video_id, + 'title': title, + 'age_limit': age_limit, + }) + + return info_dict diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py deleted file mode 100644 index 657705623..000000000 --- a/youtube_dl/extractor/trollvids.py +++ /dev/null @@ -1,36 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .nuevo import NuevoBaseIE - - -class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P\d+)/(?P[^/?#&]+)' - IE_NAME = 'trollvids' - _TEST = { - 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', - 'info_dict': { - 'id': '2349002', - 'ext': 'mp4', - 'title': '【MMD R-18】ガールフレンド carry_me_off', - 'age_limit': 18, - 'duration': 216.78, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - info = self._extract_nuevo( - 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id, - video_id) - info.update({ - 'display_id': display_id, - 'age_limit': 18 - }) - return info From bfcda07a2710738c32f63fdb4e09e177acc53df3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 04:06:00 +0800 Subject: [PATCH 439/775] [abc:iview] Skip the test. They are removed soon --- youtube_dl/extractor/abc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index c7b6df7d0..3792bd232 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -100,6 +100,7 @@ class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P[^/?#]+)' + # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', 'md5': '979d10b2939101f0d27a06b79edad536', @@ -112,6 +113,7 @@ class ABCIViewIE(InfoExtractor): 'uploader_id': 'abc1', 'timestamp': 1471719600, }, + 'skip': 'Video gone', }] def _real_extract(self, url): From 2cb93afcd8a8a1f086a97ef3791fa033ddc1610a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 14:59:14 +0700 Subject: [PATCH 440/775] [viafree] Improve video id extraction (Closes #10615) --- youtube_dl/extractor/tvplay.py | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c0fec2594..5548ff2ac 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,6 +16,7 @@ from ..utils import ( parse_iso8601, qualities, try_get, + js_to_json, update_url_query, ) @@ -367,6 +368,10 @@ class ViafreeIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TVPlayIE.ie_key()], + }, { + # Different og:image URL schema + 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'only_matching': True, @@ -384,14 +389,35 @@ class ViafreeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + data = self._parse_json( + self._search_regex( + r'(?s)window\.App\s*=\s*({.+?})\s*;\s* Date: Sun, 11 Sep 2016 18:32:45 +0800 Subject: [PATCH 441/775] [foxnews] Support Fox News Articles (closes #10598) --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/foxnews.py | 40 +++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 387dc7bf6..a73a35e88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2e795260e..e9027fb69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( + FoxNewsVideoIE, FoxNewsIE, FoxNewsInsiderIE, ) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 5c7acd795..3e9a6a08c 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,7 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsIE(AMPIE): +class FoxNewsVideoIE(AMPIE): + IE_NAME = 'foxnews:video' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -66,6 +67,35 @@ class FoxNewsIE(AMPIE): return info +class FoxNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews' + + _TEST = { + 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'info_dict': { + 'id': '5116295019001', + 'ext': 'mp4', + 'title': 'Trump and Clinton asked to defend positions on Iraq War', + 'description': 'Veterans react on \'The Kelly File\'', + 'timestamp': 1473299755, + 'upload_date': '20160908', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-video-id=([\'"])(?P[^\'"]+)\1', + webpage, 'video ID', group='id') + return self.url_result( + 'http://video.foxnews.com/v/' + video_id, + FoxNewsVideoIE.ie_key()) + + class FoxNewsInsiderIE(InfoExtractor): _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:insider' @@ -83,7 +113,11 @@ class FoxNewsInsiderIE(InfoExtractor): 'upload_date': '20160825', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'add_ie': [FoxNewsIE.ie_key()], + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': [FoxNewsVideoIE.ie_key()], } def _real_extract(self, url): @@ -98,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsIE.ie_key(), + 'ie_key': FoxNewsVideoIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From f01115c933bdf6a3d741bb2f306d26b4df943a40 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:36:59 +0800 Subject: [PATCH 442/775] [openload] Temporary fix (#10408) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 03baf8e32..76316ca2f 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 1 + j += 3 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From fea74acad8e8ebc1fda1d24a10c085c6771a71be Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:53:05 +0800 Subject: [PATCH 443/775] [foxnews] Revert to old extractor names --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/foxnews.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e9027fb69..a3cd9c289 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,8 +292,8 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( - FoxNewsVideoIE, FoxNewsIE, + FoxNewsArticleIE, FoxNewsInsiderIE, ) from .foxsports import FoxSportsIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 3e9a6a08c..229bcb175 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,8 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsVideoIE(AMPIE): - IE_NAME = 'foxnews:video' +class FoxNewsIE(AMPIE): + IE_NAME = 'foxnews' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -67,9 +67,9 @@ class FoxNewsVideoIE(AMPIE): return info -class FoxNewsIE(InfoExtractor): +class FoxNewsArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' - IE_NAME = 'foxnews' + IE_NAME = 'foxnews:article' _TEST = { 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', @@ -93,7 +93,7 @@ class FoxNewsIE(InfoExtractor): webpage, 'video ID', group='id') return self.url_result( 'http://video.foxnews.com/v/' + video_id, - FoxNewsVideoIE.ie_key()) + FoxNewsIE.ie_key()) class FoxNewsInsiderIE(InfoExtractor): @@ -117,7 +117,7 @@ class FoxNewsInsiderIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': [FoxNewsVideoIE.ie_key()], + 'add_ie': [FoxNewsIE.ie_key()], } def _real_extract(self, url): @@ -132,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsVideoIE.ie_key(), + 'ie_key': FoxNewsIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From 6bb05b32a990b8fb961971fcb8110d292cf953e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 19:22:51 +0800 Subject: [PATCH 444/775] [pornhub] Extract categories and tags (closes #10499) --- ChangeLog | 1 + youtube_dl/extractor/pornhub.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index a73a35e88..5d6609987 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [pornhub] Extract categories and tags (#10499) + [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 20976c101..0724efc09 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, sanitized_Request, str_to_int, @@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, }, { # non-ASCII title @@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, 'params': { 'skip_download': True, @@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor): }) self._sort_formats(formats) + page_params = self._parse_json(self._search_regex( + r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', + webpage, 'page parameters', group='data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + tags = categories = None + if page_params: + tags = page_params.get('tags', '').split(',') + categories = page_params.get('categories', '').split(',') + return { 'id': video_id, 'uploader': video_uploader, @@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, + 'tags': tags, + 'categories': categories, } From 6599c72527ca8434589c010c48164494ab4c2469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:50:36 +0700 Subject: [PATCH 445/775] [tube8] Extract categories and tags (Closes #10579) --- youtube_dl/extractor/tube8.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c21..e937b2396 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, + 'categories': ['Teen'], + 'tags': ['dancing'], + }, + 'params': { + 'proxy': '127.0.0.1:8118', } + }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, @@ -51,6 +59,17 @@ class Tube8IE(KeezMoviesIE): r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +77,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info From bc9186c8822db456dae93d053a34e60b7887405a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:51:12 +0700 Subject: [PATCH 446/775] [tvplay] Remove unused import --- youtube_dl/extractor/tvplay.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 5548ff2ac..58ffc0e6f 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,7 +16,6 @@ from ..utils import ( parse_iso8601, qualities, try_get, - js_to_json, update_url_query, ) From 1c81476cbb167776e7b1454bf135fb7ebf62547f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:20:09 +0700 Subject: [PATCH 447/775] release 2016.09.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a983bf432..d7195712b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.08 +[debug] youtube-dl version 2016.09.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5d6609987..21d9f6275 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11 Extractors + [pornhub] Extract categories and tags (#10499) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e6be746a8..7a7b268d3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -247,7 +247,8 @@ - **Formula1** - **FOX** - **Foxgay** - - **FoxNews**: Fox News and Fox Business Video + - **foxnews**: Fox News and Fox Business Video + - **foxnews:article** - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** @@ -326,6 +327,7 @@ - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV + - **Iwara** - **Izlesene** - **JeuxVideo** - **Jove** @@ -339,6 +341,7 @@ - **KarriereVideos** - **keek** - **KeezMovies** + - **Ketnet** - **KhanAcademy** - **KickStarter** - **KonserthusetPlay** @@ -540,6 +543,7 @@ - **podomatic** - **Pokemon** - **PolskieRadio** + - **PolskieRadioCategory** - **PornCom** - **PornHd** - **PornHub**: PornHub and Thumbzilla @@ -701,9 +705,11 @@ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **TeleMB** + - **TeleQuebec** - **TeleTask** - **Telewebion** - **TF1** + - **TFO** - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** @@ -725,7 +731,6 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** - - **trollvids** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 941ffb3f6..5f572391c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.08' +__version__ = '2016.09.11' From eb87d4545a58be369723eddf5433b4198d64d367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:29:25 +0700 Subject: [PATCH 448/775] [devscripts/release.sh] Add ChangeLog reminder prompt --- devscripts/release.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index ca6ae1b49..981d37ca7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi +read -p "Is ChangeLog up to date? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; + /bin/echo -e "\n### First of all, testing..." make clean if $skip_tests ; then From d667ab7fad8d04a318b54e95d7a764e1667d80bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:30:18 +0700 Subject: [PATCH 449/775] [ChangeLog] Actualize --- ChangeLog | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21d9f6275..9183f29e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,22 @@ -version 2016.09.11 +version Extractors ++ [tube8] Extract categories and tags (#10579) + [pornhub] Extract categories and tags (#10499) -+ [foxnews] Support Fox News articles (#10598) +* [openload] Temporary fix (#10408) ++ [foxnews] Add support Fox News articles (#10598) +* [viafree] Improve video id extraction (#10615) * [iwara] Fix extraction after relaunch (#10462, #3215) ++ [tfo] Add extractor for tfo.org +* [lrt] Fix audio extraction (#10566) +* [9now] Fix extraction (#10561) ++ [canalplus] Add support for c8.fr (#10577) * [newgrounds] Fix uploader extraction (#10584) ++ [polskieradio:category] Add support for category lists (#10576) ++ [ketnet] Add extractor for ketnet.be (#10343) ++ [canvas] Add support for een.be (#10605) ++ [telequebec] Add extractor for telequebec.tv (#1999) +* [parliamentliveuk] Fix extraction (#9137) version 2016.09.08 From fc150cba1d6763ab115319c5726b5081b0f49106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:32:01 +0700 Subject: [PATCH 450/775] [devscripts/release.sh] Add missing fi --- devscripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 981d37ca7..1af61aa0b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -61,7 +61,7 @@ if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi read -p "Is ChangeLog up to date? (y/n) " -n 1 -if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 0307d6fba6d3b793acac5785b2cee39e3dfbffcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:33:20 +0700 Subject: [PATCH 451/775] release 2016.09.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d7195712b..e87fed573 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11 +[debug] youtube-dl version 2016.09.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 9183f29e8..669544815 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11.1 Extractors + [tube8] Extract categories and tags (#10579) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f572391c..903aede58 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11' +__version__ = '2016.09.11.1' From ee7e672eb0eca7a916845b359511262935f9ef1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:44:22 +0700 Subject: [PATCH 452/775] [tube8] Remove proxy settings from test --- youtube_dl/extractor/tube8.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index e937b2396..1853a1104 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -26,10 +26,6 @@ class Tube8IE(KeezMoviesIE): 'categories': ['Teen'], 'tags': ['dancing'], }, - 'params': { - 'proxy': '127.0.0.1:8118', - } - }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, From be457302267b456412fb9848bcb8ce36874d8d7e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 12 Sep 2016 02:55:15 +0800 Subject: [PATCH 453/775] [nbc] Add new extractor for NBC Olympics (#10295, #10361) --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nbc.py | 40 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/ChangeLog b/ChangeLog index 669544815..46eea0626 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [nbc] Add support for NBC Olympics (#10361) + + version 2016.09.11.1 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a3cd9c289..522691de1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -534,6 +534,7 @@ from .nbc import ( CSNNEIE, NBCIE, NBCNewsIE, + NBCOlympicsIE, NBCSportsIE, NBCSportsVPlayerIE, ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f694e210b..f37bf2f30 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -335,3 +335,43 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, 'ie_key': 'ThePlatformFeed', } + + +class NBCOlympicsIE(InfoExtractor): + _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P[a-z-]+)' + + _TEST = { + # Geo-restricted to US + 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'md5': '54fecf846d05429fbaa18af557ee523a', + 'info_dict': { + 'id': 'WjTBzDXx5AUq', + 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'ext': 'mp4', + 'title': 'Rose\'s son Leo was in tears after his dad won gold', + 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.', + 'timestamp': 1471274964, + 'upload_date': '20160815', + 'uploader': 'NBCU-SPORTS', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + iframe_url = drupal_settings['vod']['iframe_url'] + theplatform_url = iframe_url.replace( + 'vplayer.nbcolympics.com', 'player.theplatform.com') + + return { + '_type': 'url_transparent', + 'url': theplatform_url, + 'ie_key': ThePlatformIE.ie_key(), + 'display_id': display_id, + } From 546edb2efabb18f9eb0eecb2f8719fcb777e99a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:01:31 +0700 Subject: [PATCH 454/775] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index a5e6a4233..4112f53bb 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. From d002e919863c910e52c623ee544e93fe41af4665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:48:45 +0700 Subject: [PATCH 455/775] [vimeo:ondemand] Pass Referer along with embed URL (#10624) --- youtube_dl/extractor/vimeo.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 7e854f326..50aacc6ac 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -350,6 +350,10 @@ class VimeoIE(VimeoBaseInfoExtractor): } ] + @staticmethod + def _smuggle_referrer(url, referrer_url): + return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + @staticmethod def _extract_vimeo_url(url, webpage): # Look for embedded (iframe) Vimeo player @@ -357,8 +361,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) if mobj: player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'http_headers': {'Referer': url}}) - return surl + return VimeoIE._smuggle_referrer(player_url, url) # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -585,6 +588,20 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, + }, { + # requires Referer to be passed along with og:video:url + 'url': 'https://vimeo.com/ondemand/36938/126682985', + 'info_dict': { + 'id': '126682985', + 'ext': 'mp4', + 'title': 'Rävlock, rätt läte på rätt plats', + 'uploader': 'Lindroth & Norin', + 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847', + 'uploader_id': 'user14430847', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://vimeo.com/ondemand/nazmaalik', 'only_matching': True, @@ -599,7 +616,12 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key()) + return self.url_result( + # Some videos require Referer to be passed along with og:video:url + # similarly to generic vimeo embeds (e.g. + # https://vimeo.com/ondemand/36938/126682985). + VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url), + VimeoIE.ie_key()) class VimeoChannelIE(VimeoBaseInfoExtractor): From a5ff05df1af97613c979f85ab2f6f610f60be910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:49:31 +0700 Subject: [PATCH 456/775] [extractor/generic] Add vimeo embed that requires Referer passed --- youtube_dl/extractor/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 24b217715..2e46ca179 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1369,6 +1369,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Vimeo'], }, + { + # generic vimeo embed that requires original URL passed as Referer + 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/', + 'only_matching': True, + }, { 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video', 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', From e8bcd982ccee87e45a5cc8b116cc4452c81b0453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:33:00 +0700 Subject: [PATCH 457/775] [kaltura] Skip chun format --- youtube_dl/extractor/kaltura.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 6a8464998..22a06e4ae 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -262,6 +262,10 @@ class KalturaIE(InfoExtractor): # Continue if asset is not ready if f.get('status') != 2: continue + # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) + # skip for now. + if f.get('fileExt') == 'chun': + continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) formats.append({ From 1d16035bb4ec516d25326ce5ff35affb4ff1f13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:43:45 +0700 Subject: [PATCH 458/775] [kaltura] Improve audio detection --- youtube_dl/extractor/kaltura.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 22a06e4ae..5a8403777 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -268,6 +268,10 @@ class KalturaIE(InfoExtractor): continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g + # -f mp4-56) + vcodec = 'none' if 'videoCodecId' not in f and f.get( + 'frameRate') == 0 else f.get('videoCodecId') formats.append({ 'format_id': '%(fileExt)s-%(bitrate)s' % f, 'ext': f.get('fileExt'), @@ -275,7 +279,7 @@ class KalturaIE(InfoExtractor): 'fps': int_or_none(f.get('frameRate')), 'filesize_approx': int_or_none(f.get('size'), invscale=1024), 'container': f.get('containerFormat'), - 'vcodec': f.get('videoCodecId'), + 'vcodec': vcodec, 'height': int_or_none(f.get('height')), 'width': int_or_none(f.get('width')), 'url': video_url, From a6ccc3e518eabf61cc41575e52361d5ea79e3796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:05:52 +0700 Subject: [PATCH 459/775] [safari] Improve ids regexes (#10617) --- youtube_dl/extractor/safari.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 08ddbe3c4..eabe41efe 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -103,13 +103,13 @@ class SafariIE(SafariBaseIE): webpage = self._download_webpage(url, video_id) reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P.+?)\1', + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P.+?)\1', + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura widget id', group='id') ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P.+?)\1', + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', group='id') query = { From fcba157e8049350c5386cc3b850626320d9ff7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:29:43 +0700 Subject: [PATCH 460/775] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 4112f53bb..ab9968129 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. From 7a7309219cae70e14f58e904591a77360bfbc985 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 12 Sep 2016 23:39:11 +0100 Subject: [PATCH 461/775] [adobepass] add an option to specify mso_id and support for ROGERS TV Provider(closes #10606) --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/extractor/adobepass.py | 49 +++++++++++++++++++++++-------- youtube_dl/options.py | 4 +++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 805733fb7..f70d5f49a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,6 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. + ap_mso_id Adobe Pass Multiple-system operator Identifier. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 42128272a..2b1b841c9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -293,6 +293,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, + 'ap_mso_id': opts.ap_mso_id, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 68ec37e00..454a6af8d 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,10 +6,12 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( unescapeHTML, urlencode_postdata, unified_timestamp, + ExtractorError, ) @@ -41,6 +43,11 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def raise_mvpd_required(): + raise ExtractorError('This video is only available for users of participating TV providers. ' + 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' + 'and --netrc to provide account credentials.', expected=True) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -55,19 +62,26 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = 'DTV' + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() username, password = self._get_netrc_login_info(mso_id) if not username or not password: - return '' + return raise_mvpd_required() - def post_form(form_page, note, data={}): + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - return self._download_webpage( - post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) - provider_redirect_page = self._download_webpage( + provider_redirect_page_res = self._download_webpage_handle( self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, 'Downloading Provider Redirect Page', query={ 'noflash': 'true', @@ -77,13 +91,22 @@ class AdobePassIE(InfoExtractor): 'domain_name': 'adobe.com', 'redirect_url': url, }) - provider_login_page = post_form( - provider_redirect_page, 'Downloading Provider Login Page') - mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': username, - 'password': password, - }) - post_form(mvpd_confirm_page, 'Confirming Login') + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + login_data = {} + if mso_id == 'DTV': + login_data = { + 'username': username, + 'password': password, + } + elif mso_id == 'Rogers': + login_data = { + 'UserName': username, + 'UserPassword': password, + } + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 56f312f57..c4057ce59 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -350,6 +350,10 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') + authentication.add_option( + '--ap-mso-id', + dest='ap_mso_id', metavar='APMSOID', + help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 45396dd2ed3bc7ab9ac6f9b5a5f51179b629abb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:20:25 +0700 Subject: [PATCH 462/775] [nhk] Fix extraction (Closes #10633) --- youtube_dl/extractor/nhk.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 691bdfa4e..5c8cd76dc 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -1,14 +1,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P.+?)\.html' + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P[^/]+/[^/?#&]+)' _TEST = { # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. - 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', + 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', 'info_dict': { 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'ext': 'flv', @@ -19,25 +20,25 @@ class NhkVodIE(InfoExtractor): }, 'skip': 'Videos available only for a limited period of time', } + _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + data = self._download_json(self._API_URL, video_id) - embed_code = self._search_regex( - r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala embed code', group='id') + try: + episode = next( + e for e in data['data']['episodes'] + if e.get('url') and video_id in e['url']) + except StopIteration: + raise ExtractorError('Unable to find episode') - title = self._search_regex( - r']+class=["\']episode-detail["\']>\s*([^<]+)', - webpage, 'title', default=None) - description = self._html_search_regex( - r'(?s)]+class=["\']description["\'][^>]*>(.+?)

    ', - webpage, 'description', default=None) - series = self._search_regex( - r']+class=["\']detail-top-player-title[^>]+>]+>([^<]+)', - webpage, 'series', default=None) + embed_code = episode['vod_id'] + + title = episode.get('sub_title_clean') or episode['sub_title'] + description = episode.get('description_clean') or episode.get('description') + series = episode.get('title_clean') or episode.get('title') return { '_type': 'url_transparent', From 8414c2da31a5ff3cc5ba84fdd537d714d04949f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:22:16 +0700 Subject: [PATCH 463/775] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 454a6af8d..50a208085 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -44,7 +44,8 @@ class AdobePassIE(InfoExtractor): return token_expires and token_expires <= int(time.time()) def raise_mvpd_required(): - raise ExtractorError('This video is only available for users of participating TV providers. ' + raise ExtractorError( + 'This video is only available for users of participating TV providers. ' 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' 'and --netrc to provide account credentials.', expected=True) From 1b6712ab2378b2e8eb59f372fb51193f8d3bdc97 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 13 Sep 2016 22:16:01 +0100 Subject: [PATCH 464/775] [adobepass] add specific options for adobe pass authentication - add --ap-username and --ap-password option to specify TV provider username and password in the cmd line - add --ap-retries option to limit the number of retries - add --list-ap-msi-ids to list the supported TV Providers --- youtube_dl/YoutubeDL.py | 4 +- youtube_dl/__init__.py | 15 +++ youtube_dl/extractor/adobepass.py | 206 ++++++++++++++++-------------- youtube_dl/extractor/common.py | 10 +- youtube_dl/options.py | 24 +++- 5 files changed, 155 insertions(+), 104 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f70d5f49a..9c2c26280 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id Adobe Pass Multiple-system operator Identifier. + ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_username: TV Provider username for authentication purposes. + ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b1b841c9..052f20ee7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -34,12 +34,14 @@ from .utils import ( setproctitle, std_headers, write_string, + render_table, ) from .update import update_self from .downloader import ( FileDownloader, ) from .extractor import gen_extractors, list_extractors +from .extractor.adobepass import MSO_INFO from .YoutubeDL import YoutubeDL @@ -118,18 +120,26 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) + if opts.list_ap_mso_ids: + table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] + write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + sys.exit(0) # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error('using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error('account username missing\n') + if opts.ap_password is not None and opts.ap_username is None: + parser.error('TV Provider account username missing\n') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error('using output template conflicts with using title, video ID or auto number') if opts.usetitle and opts.useid: parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') if opts.ratelimit is not None: numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) if numeric_limit is None: @@ -169,6 +179,8 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) + if opts.ap_retries is not None: + opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -294,6 +306,9 @@ def _real_main(argv=None): 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, 'ap_mso_id': opts.ap_mso_id, + 'ap_username': opts.ap_username, + 'ap_password': opts.ap_password, + 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 50a208085..9add6c0f8 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -15,6 +15,20 @@ from ..utils import ( ) +MSO_INFO = { + 'DTV': { + 'name': 'DirecTV', + 'username_field': 'username', + 'password_field': 'password', + }, + 'Rogers': { + 'name': 'Rogers Cable', + 'username_field': 'UserName', + 'password_field': 'UserPassword', + }, +} + + class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' @@ -43,6 +57,18 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' @@ -57,105 +83,95 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} - authn_token = requestor_info.get('authn_token') - if authn_token and is_expired(authn_token, 'simpleTokenExpires'): - authn_token = None - if not authn_token: - # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') - if not mso_id: - raise_mvpd_required() - username, password = self._get_netrc_login_info(mso_id) - if not username or not password: - return raise_mvpd_required() + retries = self._downloader.params.get('ap_retries', 3) + count = 0 + while count < retries: + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None + if not authn_token: + # TODO add support for other TV Providers + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() + if mso_id not in MSO_INFO: + raise ExtractorError( + 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: + raise_mvpd_required() + mso_info = MSO_INFO[mso_id] - def post_form(form_page_res, note, data={}): - form_page, urlh = form_page_res - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) - form_data = self._hidden_inputs(form_page) - form_data.update(data) - return self._download_webpage_handle( - post_url, video_id, note, data=urlencode_postdata(form_data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, }) - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, }) - provider_login_page_res = post_form( - provider_redirect_page_res, 'Downloading Provider Login Page') - login_data = {} - if mso_id == 'DTV': - login_data = { - 'username': username, - 'password': password, - } - elif mso_id == 'Rogers': - login_data = { - 'UserName': username, - 'UserPassword': password, - } - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) - if mso_id == 'DTV': - post_form(mvpd_confirm_page_res, 'Confirming Login') + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + if '' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') def _scrub_eq(o): @@ -350,10 +350,28 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') - authentication.add_option( + + adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') + adobe_pass.add_option( '--ap-mso-id', dest='ap_mso_id', metavar='APMSOID', - help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') + help='Adobe Pass Multiple-system operator Identifier') + adobe_pass.add_option( + '--ap-username', + dest='ap_username', metavar='APUSERNAME', + help='TV Provider Login with this account ID') + adobe_pass.add_option( + '--ap-password', + dest='ap_password', metavar='APPASSWORD', + help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + adobe_pass.add_option( + '--list-ap-mso-ids', + action='store_true', dest='list_ap_mso_ids', default=False, + help='List all supported TV Providers') + adobe_pass.add_option( + '--ap-retries', + dest='ap_retries', metavar='APRETRIES', default=3, + help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 4875ff68476ff7de9733c80effb652fc6ab07ea0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:01:31 +0800 Subject: [PATCH 465/775] [bilibili] Remove copyrighted test cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I can't find any English or Chinese material that claims BiliBili has bought legal redistribution permissions for copyrighted products from copyrighted holders. References for removed test cases: "刀语": https://en.wikipedia.org/wiki/Katanagatari, by White Fox "哆啦A梦": https://en.wikipedia.org/wiki/Doraemon, by Shin-Ei Animation "岳父岳母真难当": https://en.wikipedia.org/wiki/Serial_(Bad)_Weddings, by Les films du 24 "混沌武士": https://en.wikipedia.org/wiki/Samurai_Champloo, by Manglobe I shouldn't have added them to _TESTS --- youtube_dl/extractor/bilibili.py | 61 ++------------------------------ 1 file changed, 2 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8fa96d3a0..9f5c12ab9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -17,7 +17,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P\d+)' - _TESTS = [{ + _TEST = { 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { @@ -32,64 +32,7 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - }, { - 'url': 'http://www.bilibili.com/video/av1041170/', - 'info_dict': { - 'id': '1041170', - 'ext': 'mp4', - 'title': '【BD1080P】刀语【诸神&异域】', - 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'duration': 3382.259, - 'timestamp': 1396530060, - 'upload_date': '20140403', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '枫叶逝去', - 'uploader_id': '520116', - }, - }, { - 'url': 'http://www.bilibili.com/video/av4808130/', - 'info_dict': { - 'id': '4808130', - 'ext': 'mp4', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'duration': 1493.995, - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - # Missing upload time - 'url': 'http://www.bilibili.com/video/av1867637/', - 'info_dict': { - 'id': '1867637', - 'ext': 'mp4', - 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', - 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', - 'duration': 5760.0, - 'uploader': '黑夜为猫', - 'uploader_id': '610729', - 'thumbnail': 're:^https?://.+\.jpg', - }, - 'params': { - # Just to test metadata extraction - 'skip_download': True, - }, - 'expected_warnings': ['upload time'], - }, { - 'url': 'http://bangumi.bilibili.com/anime/v/40068', - 'md5': '08d539a0884f3deb7b698fb13ba69696', - 'info_dict': { - 'id': '40068', - 'ext': 'mp4', - 'duration': 1402.357, - 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - 'thumbnail': 're:^http?://.+\.jpg', - }, - }] + } _APP_KEY = '6f90a59ac58a4123' _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' From 86d68f906e21a6674f9f8676b22a47414b6c9fd2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:11:49 +0800 Subject: [PATCH 466/775] [bilibili] Fix extraction for videos without backup_url (#10647) --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 46eea0626..25c916eb2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 9f5c12ab9..2d174e6f9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -67,7 +67,7 @@ class BiliBiliIE(InfoExtractor): 'url': durl['url'], 'filesize': int_or_none(durl['size']), }] - for backup_url in durl['backup_url']: + for backup_url in durl.get('backup_url', []): formats.append({ 'url': backup_url, # backup URLs have lower priorities From 5712c0f42639cd183b0dfbc51482592e790e99d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 14 Sep 2016 16:36:42 +0100 Subject: [PATCH 467/775] [adobepass] remove unnecessary option --- youtube_dl/__init__.py | 3 --- youtube_dl/extractor/adobepass.py | 3 +-- youtube_dl/options.py | 4 ---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 052f20ee7..cdff3df65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,8 +179,6 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) - if opts.ap_retries is not None: - opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -308,7 +306,6 @@ def _real_main(argv=None): 'ap_mso_id': opts.ap_mso_id, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9add6c0f8..913a817d2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,9 +83,8 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - retries = self._downloader.params.get('ap_retries', 3) count = 0 - while count < retries: + while count < 2: requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b99201a20..342ae3be3 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -368,10 +368,6 @@ def parseOpts(overrideArguments=None): '--list-ap-mso-ids', action='store_true', dest='list_ap_mso_ids', default=False, help='List all supported TV Providers') - adobe_pass.add_option( - '--ap-retries', - dest='ap_retries', metavar='APRETRIES', default=3, - help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From b690ea15ebe7549854962f02987a8faaa6d41f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Sep 2016 22:45:23 +0700 Subject: [PATCH 468/775] [viafree] Fix test --- youtube_dl/extractor/tvplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 58ffc0e6f..3eda0a399 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -369,7 +369,7 @@ class ViafreeIE(InfoExtractor): 'add_ie': [TVPlayIE.ie_key()], }, { # Different og:image URL schema - 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', From 925194022cd661747771e58bad41e5f7ae118999 Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 8 Sep 2016 18:29:05 +0700 Subject: [PATCH 469/775] Improve some _VALID_URLs --- youtube_dl/extractor/abc.py | 2 +- youtube_dl/extractor/aljazeera.py | 2 +- youtube_dl/extractor/azubu.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/bpb.py | 2 +- youtube_dl/extractor/camdemy.py | 2 +- youtube_dl/extractor/cbssports.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/chirbit.py | 2 +- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/criterion.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- youtube_dl/extractor/democracynow.py | 2 +- youtube_dl/extractor/engadget.py | 2 +- youtube_dl/extractor/expotv.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/gamestar.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/hotnewhiphop.py | 2 +- youtube_dl/extractor/imdb.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kuwo.py | 8 ++++---- youtube_dl/extractor/litv.py | 2 +- youtube_dl/extractor/lynda.py | 2 +- youtube_dl/extractor/macgamestore.py | 2 +- youtube_dl/extractor/metacritic.py | 2 +- youtube_dl/extractor/mgtv.py | 2 +- youtube_dl/extractor/ministrygrid.py | 2 +- youtube_dl/extractor/mitele.py | 2 +- youtube_dl/extractor/motorsport.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/myspass.py | 2 +- youtube_dl/extractor/nbc.py | 6 +++--- youtube_dl/extractor/ndr.py | 8 ++++---- youtube_dl/extractor/nextmedia.py | 6 +++--- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/openload.py | 2 +- youtube_dl/extractor/periscope.py | 2 +- youtube_dl/extractor/playvid.py | 2 +- youtube_dl/extractor/qqmusic.py | 6 +++--- youtube_dl/extractor/rottentomatoes.py | 2 +- youtube_dl/extractor/roxwel.py | 2 +- youtube_dl/extractor/rtve.py | 6 +++--- youtube_dl/extractor/screenjunkies.py | 2 +- youtube_dl/extractor/senateisvp.py | 2 +- youtube_dl/extractor/slideshare.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/syfy.py | 2 +- youtube_dl/extractor/teachingchannel.py | 2 +- youtube_dl/extractor/telecinco.py | 2 +- youtube_dl/extractor/telewebion.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/thescene.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 4 ++-- youtube_dl/extractor/vevo.py | 4 ++-- youtube_dl/extractor/videodetective.py | 2 +- youtube_dl/extractor/weiqitv.py | 2 +- youtube_dl/extractor/yam.py | 2 +- youtube_dl/extractor/youtube.py | 12 ++++++------ 65 files changed, 86 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 3792bd232..465249bbf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -13,7 +13,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index b081695d8..388e578d5 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' + _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index a813eb429..72e1bd59d 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://www.azubu.tv/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P[^/]+)$' _TEST = { 'url': 'http://www.azubu.tv/MarsTVMDLen', diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index deb9cc1c0..b17916137 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1028,7 +1028,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 6ad45a1e6..9661ade4f 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -12,7 +12,7 @@ from ..utils import ( class BpbIE(InfoExtractor): IE_DESC = 'Bundeszentrale für politische Bildung' - _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P[0-9]+)/' _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 268c34392..d4e6fbdce 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'https?://www.camdemy.com/folder/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P\d+)' _TESTS = [{ # links with trailing slash 'url': 'http://www.camdemy.com/folder/450', diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index bf7915626..3a62c840b 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -4,7 +4,7 @@ from .cbs import CBSBaseIE class CBSSportsIE(CBSBaseIE): - _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 5a58d1777..87c2e7089 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -17,7 +17,7 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index b43518652..61aed0167 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -65,7 +65,7 @@ class ChirbitIE(InfoExtractor): class ChirbitProfileIE(InfoExtractor): IE_NAME = 'chirbit:profile' - _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' _TEST = { 'url': 'http://chirbit.com/ScarletBeauty', 'info_dict': { diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f24568dcc..ac3bdfe8f 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -6,7 +6,7 @@ from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' _TESTS = [{ diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index dedb810a0..ad32673a8 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' + _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P[0-9]+)-.+' _TEST = { 'url': 'http://www.criterion.com/films/184-le-samourai', 'md5': 'bc51beba55685509883a9a7830919ec3', diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 9099f5046..a47e04993 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -6,7 +6,7 @@ from ..compat import compat_str class DctpTvIE(InfoExtractor): - _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P.+?)/$' + _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'info_dict': { diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 65a98d789..bdfe638b4 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -13,7 +13,7 @@ from ..utils import ( class DemocracynowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P[^\?]*)' + _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P[^\?]*)' IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index a39e9010d..65635c18b 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://www.engadget.com/video/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P[^/?#]+)' _TESTS = [{ # video with 5min ID diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py index 971c918a4..ef11962f3 100644 --- a/youtube_dl/extractor/expotv.py +++ b/youtube_dl/extractor/expotv.py @@ -8,7 +8,7 @@ from ..utils import ( class ExpoTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' + _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' _TEST = { 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 1477708bb..0a70ca763 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class FreespeechIE(InfoExtractor): IE_NAME = 'freespeech.org' - _VALID_URL = r'https://www\.freespeech\.org/video/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)' _TEST = { 'add_ie': ['Youtube'], 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 341e72733..55a34604a 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -9,7 +9,7 @@ from ..utils import ( class GameStarIE(InfoExtractor): - _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 731bacd67..427499b11 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -10,7 +10,7 @@ from ..utils import unified_strdate class GooglePlusIE(InfoExtractor): IE_DESC = 'Google Plus' - _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' + _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' IE_NAME = 'plus.google' _TEST = { 'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 0c015141f..a43abd154 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -11,7 +11,7 @@ from ..utils import ( class GoshgayIE(InfoExtractor): - _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)' + _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)' _TEST = { 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', 'md5': '4b6db9a0a333142eb9f15913142b0ed1', diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index b6cc15b6f..749e9154f 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class HarkIE(InfoExtractor): - _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+' + _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+' _TEST = { 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', 'md5': '6783a58491b47b92c7c1af5a77d4cbee', diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 9db565209..34163725f 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -12,7 +12,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3a6a6f5ad..f0fc8d49a 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -94,7 +94,7 @@ class ImdbIE(InfoExtractor): class ImdbListIE(InfoExtractor): IE_NAME = 'imdb:list' IE_DESC = 'Internet Movie Database lists' - _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _TEST = { 'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'info_dict': { diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index bad46005b..bfccf89b0 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class KaraoketvIE(InfoExtractor): - _VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index c61e78622..fbe499497 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -6,7 +6,7 @@ from ..utils import smuggle_url class KickStarterIE(InfoExtractor): - _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*' + _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*' _TESTS = [{ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description', 'md5': 'c81addca81327ffa66c642b5d8b08cab', diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 0eeb9ffeb..ba621ca7b 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -59,7 +59,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { @@ -139,7 +139,7 @@ class KuwoIE(KuwoBaseIE): class KuwoAlbumIE(InfoExtractor): IE_NAME = 'kuwo:album' IE_DESC = '酷我音乐 - 专辑' - _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { @@ -200,7 +200,7 @@ class KuwoChartIE(InfoExtractor): class KuwoSingerIE(InfoExtractor): IE_NAME = 'kuwo:singer' IE_DESC = '酷我音乐 - 歌手' - _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { @@ -296,7 +296,7 @@ class KuwoCategoryIE(InfoExtractor): class KuwoMvIE(KuwoBaseIE): IE_NAME = 'kuwo:mv' IE_DESC = '酷我音乐 - MV' - _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/mv/6480076/', 'info_dict': { diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index 05c6579f1..a3784e6c6 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -14,7 +14,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' + _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a98c4c530..299873ecc 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py index 3cd4a3a19..43db9929c 100644 --- a/youtube_dl/extractor/macgamestore.py +++ b/youtube_dl/extractor/macgamestore.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class MacGameStoreIE(InfoExtractor): IE_NAME = 'macgamestore' IE_DESC = 'MacGameStore trailers' - _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' _TEST = { 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 444ec0310..7d468d78b 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -9,7 +9,7 @@ from ..utils import ( class MetacriticIE(InfoExtractor): - _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 27bdff8b2..e0bb5d208 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' _TESTS = [{ diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index e48eba3fa..10190d5f6 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -8,7 +8,7 @@ from ..utils import ( class MinistryGridIE(InfoExtractor): - _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index cd169f361..2294745d4 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -74,7 +74,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(MiTeleBaseIE): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 370328b36..c9d1ab64d 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -9,7 +9,7 @@ from ..compat import ( class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', 'info_dict': { diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index f130b75c4..aa091a62c 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class MoviezineIE(InfoExtractor): - _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)' _TEST = { 'url': 'http://www.moviezine.se/video/205866', diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 1ca7b1a9e..2afe535b5 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,7 +11,7 @@ from ..utils import ( class MySpassIE(InfoExtractor): - _VALID_URL = r'https?://www\.myspass\.de/.*' + _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f37bf2f30..7f1bd9229 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -13,7 +13,7 @@ from ..utils import ( class NBCIE(InfoExtractor): - _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' + _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' _TESTS = [ { @@ -138,7 +138,7 @@ class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsIE(InfoExtractor): # Does not include https because its certificate is invalid - _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', @@ -161,7 +161,7 @@ class NBCSportsIE(InfoExtractor): class CSNNEIE(InfoExtractor): - _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 0cded6b5c..e3b0da2e9 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor): class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', @@ -105,7 +105,7 @@ class NDRIE(NDRBaseIE): class NJoyIE(NDRBaseIE): IE_NAME = 'njoy' IE_DESC = 'N-JOY' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', @@ -238,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', @@ -332,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE): class NJoyEmbedIE(NDREmbedBaseIE): IE_NAME = 'njoy:embed' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _TESTS = [{ # httpVideo 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index aae7aeeeb..a08e48c4b 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601 class NextMediaIE(InfoExtractor): IE_DESC = '蘋果日報' - _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', 'md5': 'dff9fad7009311c421176d1ac90bfe4f', @@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor): class NextMediaActionNewsIE(NextMediaIE): IE_DESC = '蘋果日報 - 動新聞' - _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' + _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' _TESTS = [{ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dd75a48af..6eaaa8416 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor): class NiconicoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)' _TEST = { 'url': 'http://www.nicovideo.jp/mylist/27411728', diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index 4a41c0542..f2ccc53dc 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class OktoberfestTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' _TEST = { 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 76316ca2f..c261a7455 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -13,7 +13,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 6c640089d..eb1aeba46 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -94,7 +94,7 @@ class PeriscopeIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE): - _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index 78d219299..79c2db085 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -14,7 +14,7 @@ from ..utils import ( class PlayvidIE(InfoExtractor): - _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' _TESTS = [{ 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index ff0af9543..37cb9e2c9 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -18,7 +18,7 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', @@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', 'info_dict': { @@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index 23abf7a27..1d404d20a 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -5,7 +5,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): - _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py index 41638c1d0..65284643b 100644 --- a/youtube_dl/extractor/roxwel.py +++ b/youtube_dl/extractor/roxwel.py @@ -7,7 +7,7 @@ from ..utils import unified_strdate, determine_ext class RoxwelIE(InfoExtractor): - _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' + _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' _TEST = { 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 34f9c4a99..f1b92f6da 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -184,7 +184,7 @@ class RTVEInfantilIE(InfoExtractor): class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', @@ -226,7 +226,7 @@ class RTVELiveIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' - _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index dd0a6ba19..02e574cd8 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -11,7 +11,7 @@ from ..utils import ( class ScreenJunkiesIE(InfoExtractor): - _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:www\.)?screenjunkies\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', 'md5': '5c2b686bec3d43de42bde9ec047536b0', diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index c5f474dd1..35540c082 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ['arch', '', 'http://ussenate-f.akamaihd.net/'] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 4967c1b77..74a1dc672 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -14,7 +14,7 @@ from ..utils import ( class SlideshareIE(InfoExtractor): - _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' + _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' _TEST = { 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 74cb3a08a..b41d9f59f 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor): class SpiegelArticleIE(InfoExtractor): - _VALID_URL = r'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' IE_NAME = 'Spiegel:Article' IE_DESC = 'Articles on spiegel.de' _TESTS = [{ diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index ab8bab5cd..def7e5a2c 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -8,7 +8,7 @@ from ..utils import ( class SyfyIE(AdobePassIE): - _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', 'info_dict': { diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index d14d93e3a..e89759714 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -7,7 +7,7 @@ from .ooyala import OoyalaIE class TeachingChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)' + _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)' _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2ecfd0405..d5abfc9e4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -6,7 +6,7 @@ from .mitele import MiTeleBaseIE class TelecincoIE(MiTeleBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' - _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _TESTS = [{ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py index 77916c601..7786b2813 100644 --- a/youtube_dl/extractor/telewebion.py +++ b/youtube_dl/extractor/telewebion.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class TelewebionIE(InfoExtractor): - _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)' _TEST = { 'url': 'http://www.telewebion.com/#!/episode/1263668/', diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index 8cb3c3669..ec6f4ecaa 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -11,7 +11,7 @@ from ..utils import ( class TheInterceptIE(InfoExtractor): - _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd', diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 3e4e14031..ce1326c03 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -7,7 +7,7 @@ from ..utils import qualities class TheSceneIE(InfoExtractor): - _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' + _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' _TEST = { 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 88eb83d74..ce4f91f46 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -13,7 +13,7 @@ from ..compat import ( class TlcDeIE(InfoExtractor): IE_NAME = 'tlc.de' - _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' + _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' _TEST = { 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89b869559..c2f507233 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -307,7 +307,7 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)' _TESTS = [] @classmethod diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 54605d863..a3dc9d33e 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -14,7 +14,7 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' IE_NAME = 'ustream' _TESTS = [{ 'url': 'http://www.ustream.tv/recorded/20274954', @@ -117,7 +117,7 @@ class UstreamIE(InfoExtractor): class UstreamChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)' IE_NAME = 'ustream:channel' _TEST = { 'url': 'http://www.ustream.tv/channel/channeljapan', diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 388b4debe..783efda7d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -31,7 +31,7 @@ class VevoIE(VevoBaseIE): (currently used by MTVIE and MySpaceIE) ''' _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| + (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) @@ -374,7 +374,7 @@ class VevoIE(VevoBaseIE): class VevoPlaylistIE(VevoBaseIE): - _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 2ed5d9643..a19411a05 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -6,7 +6,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class VideoDetectiveIE(InfoExtractor): - _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index 3dafbeec2..8e09156c2 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class WeiqiTVIE(InfoExtractor): IE_DESC = 'WQTV' - _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 63bbc0634..ef5535547 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -15,7 +15,7 @@ from ..utils import ( class YamIE(InfoExtractor): IE_DESC = '蕃薯藤yam天空部落' - _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)' + _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)' _TESTS = [{ # An audio hosted on Yam diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5082cb589..5ca903825 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2302,7 +2302,7 @@ class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com (multi-season) shows' - _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)' IE_NAME = 'youtube:show' _TESTS = [{ 'url': 'https://www.youtube.com/show/airdisasters', @@ -2371,7 +2371,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeWatchLaterIE(YoutubePlaylistIE): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=WL', @@ -2392,7 +2392,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?' _LOGIN_REQUIRED = True def _real_extract(self, url): @@ -2403,21 +2403,21 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _PLAYLIST_TITLE = 'Youtube Recommended videos' class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _FEED_NAME = 'subscriptions' _PLAYLIST_TITLE = 'Youtube Subscriptions' class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/history|:ythistory' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' _PLAYLIST_TITLE = 'Youtube History' From 014b7e6b25be5583c772af054cd7a1e37a327088 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:07:05 +0100 Subject: [PATCH 470/775] [go] add support for free full episodes(#10439) --- youtube_dl/extractor/go.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 6a437c54d..7925c1e22 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -8,6 +8,8 @@ from ..utils import ( int_or_none, determine_ext, parse_age_limit, + urlencode_postdata, + ExtractorError, ) @@ -19,7 +21,7 @@ class GoIE(InfoExtractor): 'watchdisneyjunior': '008', 'watchdisneyxd': '009', } - _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys()) + _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys()) _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -38,9 +40,13 @@ class GoIE(InfoExtractor): }] def _real_extract(self, url): - sub_domain, video_id = re.match(self._VALID_URL, url).groups() + sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id') + brand = self._BRANDS[sub_domain] video_data = self._download_json( - 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), video_id)['video'][0] title = video_data['title'] @@ -52,6 +58,21 @@ class GoIE(InfoExtractor): format_id = asset.get('format') ext = determine_ext(asset_url) if ext == 'm3u8': + video_type = video_data.get('type') + if video_type == 'lf': + entitlement = self._download_json( + 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + video_id, data=urlencode_postdata({ + 'video_id': video_data['id'], + 'video_type': video_type, + 'brand': brand, + 'device': '001', + })) + errors = entitlement.get('errors', {}).get('errors', []) + if errors: + error_massege = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) else: From 353f340e11d7fc4a0a4973ddd85bc93b1061a487 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:22:42 +0100 Subject: [PATCH 471/775] [go] fix typo --- youtube_dl/extractor/go.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 7925c1e22..c7776b186 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -70,8 +70,8 @@ class GoIE(InfoExtractor): })) errors = entitlement.get('errors', {}).get('errors', []) if errors: - error_massege = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) From 6db354a9f4c62c3cc47918adc13e1e4b63146c80 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:53:04 +0800 Subject: [PATCH 472/775] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index ba621ca7b..081af86f6 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -82,7 +82,7 @@ class KuwoIE(KuwoBaseIE): 'upload_date': '20150518', }, 'params': { - 'format': 'mp3-320' + 'format': 'mp3-320', }, }, { 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', @@ -181,7 +181,7 @@ class KuwoChartIE(InfoExtractor): 'info_dict': { 'id': '香港中文龙虎榜', }, - 'playlist_mincount': 10, + 'playlist_mincount': 7, } def _real_extract(self, url): @@ -303,7 +303,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': 'PM02:00', + 'creator': '2PM', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From 961516bfd1f3b514859f03766d282824ba8a76f5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:56:15 +0800 Subject: [PATCH 473/775] [kwuo:song] Improve error detection (closes #10650) --- ChangeLog | 1 + youtube_dl/extractor/kuwo.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 25c916eb2..c3c8bf037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [kwuo] Improve error detection (#10650) * [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 081af86f6..63e10125e 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -91,10 +91,10 @@ class KuwoIE(KuwoBaseIE): def _real_extract(self, url): song_id = self._match_id(url) - webpage = self._download_webpage( + webpage, urlh = self._download_webpage_handle( url, song_id, note='Download song detail info', errnote='Unable to get song detail info') - if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: + if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: raise ExtractorError('this song has been offline because of copyright issues', expected=True) song_name = self._html_search_regex( From a942d6cb48994c5ff14ccef8773fb086a5544970 Mon Sep 17 00:00:00 2001 From: renalid <renaud.euvrard@MAC-1636.local> Date: Fri, 2 Sep 2016 18:31:52 +0200 Subject: [PATCH 474/775] [utils,franceinter] Add french months' names and fix extraction Update of the "FranceInter" radio extractor : webpages HTML structure had changed, the extractor didn't work. So I updated this extractor to get the mp3 URL and all details. --- youtube_dl/extractor/franceinter.py | 38 ++++++++++++++++------------- youtube_dl/utils.py | 13 ++++++++-- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 2369f868d..6dad8d712 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,20 +2,24 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + unified_timestamp, + month_by_name, +) class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' + _TEST = { - 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', + 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'md5': '4764932e466e6f6c79c317d2e74f6884', 'info_dict': { - 'id': '793962', + 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo', - 'description': 'md5:7e93ddb4451e7530022792240a3049c7', - 'timestamp': 1387369800, + 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', + 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', + 'timestamp': 1387324800, 'upload_date': '20131218', }, } @@ -25,17 +29,17 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - path = self._search_regex( - r'<a id="player".+?href="([^"]+)"', webpage, 'video url') - video_url = 'http://www.franceinter.fr/' + path + video_url = self._search_regex( + r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') - title = self._html_search_regex( - r'<span class="title-diffusion">(.+?)</span>', webpage, 'title') - description = self._html_search_regex( - r'<span class="description">(.*?)</span>', - webpage, 'description', fatal=False) - timestamp = int_or_none(self._search_regex( - r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) + extractdate = extractdate.split('-') + extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] + + timestamp = unified_timestamp(extractdate) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed199c4ad..623ced625 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,6 +91,10 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] +FRENCH_MONTH_NAMES = [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] + KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 'flv', 'f4v', 'f4a', 'f4b', @@ -1587,11 +1591,16 @@ def parse_count(s): return lookup_unit_table(_UNIT_TABLE, s) -def month_by_name(name): +def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ + name_list = ENGLISH_MONTH_NAMES + + if lang == 'fr': + name_list = FRENCH_MONTH_NAMES + try: - return ENGLISH_MONTH_NAMES.index(name) + 1 + return name_list.index(name) + 1 except ValueError: return None From f6717dec8abe7c0d34e704732b53665a9415fa2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:13:55 +0700 Subject: [PATCH 475/775] [utils] Improve month_by_name and add tests --- test/test_utils.py | 11 +++++++++++ youtube_dl/utils.py | 16 ++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 405c5d351..4ebca8744 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,6 +40,7 @@ from youtube_dl.utils import ( js_to_json, limit_length, mimetype2ext, + month_by_name, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -634,6 +635,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_month_by_name(self): + self.assertEqual(month_by_name(None), None) + self.assertEqual(month_by_name('December', 'en'), 12) + self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('December'), 12) + self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('Unknown', 'unknown'), None) + + def test_m + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 623ced625..a4ef15908 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,9 +91,12 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] -FRENCH_MONTH_NAMES = [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] +MONTH_NAMES = { + 'en': ENGLISH_MONTH_NAMES, + 'fr': [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], +} KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', @@ -1594,13 +1597,10 @@ def parse_count(s): def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ - name_list = ENGLISH_MONTH_NAMES - - if lang == 'fr': - name_list = FRENCH_MONTH_NAMES + month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en']) try: - return name_list.index(name) + 1 + return month_names.index(name) + 1 except ValueError: return None From 3e4185c3965579c2cc10922384694c2465be4557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:57:01 +0700 Subject: [PATCH 476/775] [utils] Use native french month names --- test/test_utils.py | 6 ++---- youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4ebca8744..9789d8611 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -638,13 +638,11 @@ class TestUtil(unittest.TestCase): def test_month_by_name(self): self.assertEqual(month_by_name(None), None) self.assertEqual(month_by_name('December', 'en'), 12) - self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('décembre', 'fr'), 12) self.assertEqual(month_by_name('December'), 12) - self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('décembre'), None) self.assertEqual(month_by_name('Unknown', 'unknown'), None) - def test_m - def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a4ef15908..69ca88c85 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -94,8 +94,8 @@ ENGLISH_MONTH_NAMES = [ MONTH_NAMES = { 'en': ENGLISH_MONTH_NAMES, 'fr': [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], + 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], } KNOWN_EXTENSIONS = ( From 0002962f3feb86ec8c14429af7ecddc17815fa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:59:13 +0700 Subject: [PATCH 477/775] [franceinter] Improve extraction (Closes #10538) --- youtube_dl/extractor/franceinter.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 6dad8d712..0d58f89c5 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - unified_timestamp, - month_by_name, -) +from ..compat import compat_str +from ..utils import month_by_name class FranceInterIE(InfoExtractor): @@ -18,8 +16,7 @@ class FranceInterIE(InfoExtractor): 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', - 'timestamp': 1387324800, + 'description': 'md5:7f2ce449894d1e585932273080fb410d', 'upload_date': '20131218', }, } @@ -30,22 +27,28 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') + r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', group='url') title = self._og_search_title(webpage) description = self._og_search_description(webpage) - extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) - extractdate = extractdate.split('-') - extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] - - timestamp = unified_timestamp(extractdate) + upload_date_str = self._search_regex( + r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', + webpage, 'upload date', fatal=False) + if upload_date_str: + upload_date_list = upload_date_str.split() + upload_date_list.reverse() + upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date = ''.join(upload_date_list) + else: + upload_date = None return { 'id': video_id, 'title': title, 'description': description, - 'timestamp': timestamp, + 'upload_date': upload_date, 'formats': [{ 'url': video_url, 'vcodec': 'none', From 797c636bcb02d1199015b753d26430eec13c4b2b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 18:58:47 +0100 Subject: [PATCH 478/775] [ap] improve adobe pass names and parse error handling --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 8 +++++--- youtube_dl/extractor/adobepass.py | 9 +++------ youtube_dl/options.py | 12 ++++++------ 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9c2c26280..29d8517a3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_mso: Adobe Pass Multiple-system operator Identifier. ap_username: TV Provider username for authentication purposes. ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cdff3df65..5614ef0fb 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,9 +120,9 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) - if opts.list_ap_mso_ids: + if opts.ap_mso_list: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] - write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) sys.exit(0) # Conflicting, missing and erroneous options @@ -165,6 +165,8 @@ def _real_main(argv=None): parser.error('max sleep interval must be greater than or equal to min sleep interval') else: opts.max_sleep_interval = opts.sleep_interval + if opts.ap_mso and opts.ap_mso not in MSO_INFO: + parser.error('Unsupported TV Provider, use --ap-mso-list to get a list of supported TV Providers') def parse_retries(retries): if retries in ('inf', 'infinite'): @@ -303,7 +305,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, - 'ap_mso_id': opts.ap_mso_id, + 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, 'quiet': (opts.quiet or any_getting or any_printing), diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 913a817d2..8ef5a96ce 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -72,8 +72,8 @@ class AdobePassIE(InfoExtractor): def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' - 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' - 'and --netrc to provide account credentials.', expected=True) + 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' + 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) mvpd_headers = { 'ap_42': 'anonymous', @@ -91,12 +91,9 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') + mso_id = self._downloader.params.get('ap_mso') if not mso_id: raise_mvpd_required() - if mso_id not in MSO_INFO: - raise ExtractorError( - 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) username, password = self._get_login_info('ap_username', 'ap_password', mso_id) if not username or not password: raise_mvpd_required() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 342ae3be3..46c326b3d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -353,20 +353,20 @@ def parseOpts(overrideArguments=None): adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') adobe_pass.add_option( - '--ap-mso-id', - dest='ap_mso_id', metavar='APMSOID', + '--ap-mso', + dest='ap_mso', metavar='MSO', help='Adobe Pass Multiple-system operator Identifier') adobe_pass.add_option( '--ap-username', - dest='ap_username', metavar='APUSERNAME', + dest='ap_username', metavar='USERNAME', help='TV Provider Login with this account ID') adobe_pass.add_option( '--ap-password', - dest='ap_password', metavar='APPASSWORD', + dest='ap_password', metavar='PASSWORD', help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( - '--list-ap-mso-ids', - action='store_true', dest='list_ap_mso_ids', default=False, + '--ap-mso-list', + action='store_true', dest='ap_mso_list', default=False, help='List all supported TV Providers') video_format = optparse.OptionGroup(parser, 'Video Format Options') From 87148bb7110ed54ef50f0660dfe0a735cdede3ca Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 20:21:09 +0100 Subject: [PATCH 479/775] [adobepass] rename --ap-mso-list option to --ap-list-mso --- youtube_dl/__init__.py | 4 ++-- youtube_dl/options.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5614ef0fb..1cf3140a0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,7 +120,7 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) - if opts.ap_mso_list: + if opts.ap_list_mso: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) sys.exit(0) @@ -166,7 +166,7 @@ def _real_main(argv=None): else: opts.max_sleep_interval = opts.sleep_interval if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-mso-list to get a list of supported TV Providers') + parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') def parse_retries(retries): if retries in ('inf', 'infinite'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 46c326b3d..b2e863119 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -365,8 +365,8 @@ def parseOpts(overrideArguments=None): dest='ap_password', metavar='PASSWORD', help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( - '--ap-mso-list', - action='store_true', dest='ap_mso_list', default=False, + '--ap-list-mso', + action='store_true', dest='ap_list_mso', default=False, help='List all supported TV Providers') video_format = optparse.OptionGroup(parser, 'Video Format Options') From c035dba19e815eca4a21f17918e96c2e2bd55d6b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 15 Sep 2016 08:12:12 +0100 Subject: [PATCH 480/775] [bellmedia] add support for more sites --- youtube_dl/extractor/{ctv.py => bellmedia.py} | 39 ++++++++++++++++--- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) rename youtube_dl/extractor/{ctv.py => bellmedia.py} (54%) diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/bellmedia.py similarity index 54% rename from youtube_dl/extractor/ctv.py rename to youtube_dl/extractor/bellmedia.py index a1fe86316..32326ed9e 100644 --- a/youtube_dl/extractor/ctv.py +++ b/youtube_dl/extractor/bellmedia.py @@ -6,8 +6,25 @@ import re from .common import InfoExtractor -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)' +class BellMediaIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?P<domain> + (?: + ctv| + tsn| + bnn| + thecomedynetwork| + discovery| + discoveryvelocity| + sciencechannel| + investigationdiscovery| + animalplanet| + bravo| + mtv| + space + )\.ca| + much\.com + )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -32,15 +49,27 @@ class CTVIE(InfoExtractor): }, { 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', + 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', + 'only_matching': True, }] + _DOMAINS = { + 'thecomedynetwork': 'comedy', + 'discoveryvelocity': 'discvel', + 'sciencechannel': 'discsci', + 'investigationdiscovery': 'invdisc', + 'animalplanet': 'aniplan', + } def _real_extract(self, url): domain, video_id = re.match(self._VALID_URL, url).groups() - if domain == 'thecomedynetwork': - domain = 'comedy' + domain = domain.split('.')[0] return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (domain, video_id), + 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), 'ie_key': 'NineCNineMedia', } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 522691de1..dd0579425 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -93,6 +93,7 @@ from .bbc import ( ) from .beeg import BeegIE from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE from .beatportpro import BeatportProIE from .bet import BetIE from .bigflix import BigflixIE @@ -195,7 +196,6 @@ from .crunchyroll import ( ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( From 95be29e1c6b7a06ac444d5142582ebece79698ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 20:58:02 +0700 Subject: [PATCH 481/775] [twitch] Fix api calls (Closes #10654, closes #10660) --- youtube_dl/extractor/twitch.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 359a8859c..af6d890b0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -32,6 +32,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' _LOGIN_URL = 'http://www.twitch.tv/login' + _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _NETRC_MACHINE = 'twitch' def _handle_error(self, response): @@ -44,15 +45,9 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _call_api(self, path, item_id, note): - headers = { - 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', - 'X-Requested-With': 'XMLHttpRequest', - } - for cookie in self._downloader.cookiejar: - if cookie.name == 'api_token': - headers['Twitch-Api-Token'] = cookie.value response = self._download_json( - '%s/%s' % (self._API_BASE, path), item_id, note) + '%s/%s' % (self._API_BASE, path), item_id, note, + headers={'Client-ID': self._CLIENT_ID}) self._handle_error(response) return response From eb5b1fc0211e89f386c4f5563cc1d5d4edeb3c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:53:35 +0700 Subject: [PATCH 482/775] [crunchyroll] Fix authentication (Closes #10655) --- youtube_dl/extractor/crunchyroll.py | 47 +++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6d3abb52f..1b69bd0b6 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -34,22 +34,51 @@ from ..aes import ( class CrunchyrollBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.crunchyroll.com/login' + _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' def _login(self): (username, password) = self._get_login_info() if username is None: return - self.report_login() - login_url = 'https://www.crunchyroll.com/?a=formhandler' - data = urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'name': username, - 'password': password, + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form_str = self._search_regex( + r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, + login_page, 'login form', group='form') + + post_url = extract_attributes(login_form_str).get('action') + if not post_url: + post_url = self._LOGIN_URL + elif not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page) + + login_form.update({ + 'login_form[name]': username, + 'login_form[password]': password, }) - login_request = sanitized_Request(login_url, data) - login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - self._download_webpage(login_request, None, False, 'Wrong login info') + + response = self._download_webpage( + post_url, None, 'Logging in', 'Wrong login info', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + # Successful login + if '<title>Redirecting' in response: + return + + error = self._html_search_regex( + '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + raise ExtractorError('Unable to log in') def _real_initialize(self): self._login() From c8498368549048a578d5f30773aaa9760454983c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:54:48 +0700 Subject: [PATCH 483/775] [utils] Improve _hidden_inputs --- youtube_dl/extractor/common.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ff19270ae..e413799f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -888,16 +888,16 @@ class InfoExtractor(object): def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) hidden_inputs = {} - for input in re.findall(r'(?i)<input([^>]+)>', html): - if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): + for input in re.findall(r'(?i)(<input[^>]+>)', html): + attrs = extract_attributes(input) + if not input: continue - name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input) - if not name: + if attrs.get('type') not in ('hidden', 'submit'): continue - value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) - if not value: - continue - hidden_inputs[name.group('value')] = value.group('value') + name = attrs.get('name') or attrs.get('id') + value = attrs.get('value') + if name and value is not None: + hidden_inputs[name] = value return hidden_inputs def _form_hidden_inputs(self, form_id, html): From 537f753399ed9fd07fcb9285a2a3330010394c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:17:17 +0700 Subject: [PATCH 484/775] [options] Improve Adobe Pass wording --- youtube_dl/options.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b2e863119..100d21310 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -355,19 +355,19 @@ def parseOpts(overrideArguments=None): adobe_pass.add_option( '--ap-mso', dest='ap_mso', metavar='MSO', - help='Adobe Pass Multiple-system operator Identifier') + help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs') adobe_pass.add_option( '--ap-username', dest='ap_username', metavar='USERNAME', - help='TV Provider Login with this account ID') + help='Multiple-system operator account login') adobe_pass.add_option( '--ap-password', dest='ap_password', metavar='PASSWORD', - help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( '--ap-list-mso', action='store_true', dest='ap_list_mso', default=False, - help='List all supported TV Providers') + help='List all supported multiple-system operators') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From d2522b86ac7d1eff1f00e21bcd976a2616b6a6d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:18:31 +0700 Subject: [PATCH 485/775] [options] Actually print Adobe Pass options sections in --help --- youtube_dl/options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 100d21310..53497fbc6 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -831,6 +831,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(video_format) parser.add_option_group(subtitles) parser.add_option_group(authentication) + parser.add_option_group(adobe_pass) parser.add_option_group(postproc) if overrideArguments is not None: From 1da50aa34e9fa0fd927de8197dcf2884551dd800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:24:55 +0700 Subject: [PATCH 486/775] [YoutubeDL] Improve Adobe Pass options' wording --- youtube_dl/YoutubeDL.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 29d8517a3..442aa663b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,9 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso: Adobe Pass Multiple-system operator Identifier. - ap_username: TV Provider username for authentication purposes. - ap_password: TV Provider password for authentication purposes. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. From 2133565cec3646680600d314b93e535f6fa52339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:26:37 +0700 Subject: [PATCH 487/775] [extractor/common] Simplify _get_login_info --- youtube_dl/extractor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e413799f9..9627816b4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -689,8 +689,6 @@ class InfoExtractor(object): if self._downloader is None: return (None, None) - username = None - password = None downloader_params = self._downloader.params # Attempt to use provided username and password or .netrc data @@ -700,7 +698,7 @@ class InfoExtractor(object): else: username, password = self._get_netrc_login_info(netrc_machine) - return (username, password) + return username, password def _get_tfa_info(self, note='two-factor verification code'): """ From 32443dd346594d64b579af714f4828287492c464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:34:29 +0700 Subject: [PATCH 488/775] [extractor/common] Update _get_login_info's comment --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9627816b4..95ea3fca5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -683,7 +683,10 @@ class InfoExtractor(object): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ Get the login info as (username, password) - It will look in the netrc file using the _NETRC_MACHINE value + First look for the manually specified credentials using username_option + and password_option as keys in params dictionary. If no such credentials + available look in the netrc file using the netrc_machine or _NETRC_MACHINE + value. If there's no info available, return (None, None) """ if self._downloader is None: From dcce092e0aa92799f1e3a51ce5aae611af4d70d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:35:12 +0700 Subject: [PATCH 489/775] [extractor/common] Simplify _get_netrc_login_info and carry long lines --- youtube_dl/extractor/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95ea3fca5..4f738b9fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -674,11 +674,13 @@ class InfoExtractor(object): username = info[0] password = info[2] else: - raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine) + raise netrc.NetrcParseError( + 'No authenticators for %s' % netrc_machine) except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + self._downloader.report_warning( + 'parsing .netrc: %s' % error_to_compat_str(err)) - return (username, password) + return username, password def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ From 1dec2c8a0e00e8ed53ddd030347ce9225df9964e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:47:45 +0700 Subject: [PATCH 490/775] [adobepass] Change mvpd cache section name In order to better emphasize it's relation to Adobe Pass --- youtube_dl/extractor/adobepass.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8ef5a96ce..01932e5e6 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -32,6 +32,7 @@ MSO_INFO = { class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MVPD_CACHE = 'ap-mvpd' @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): @@ -85,7 +86,7 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') count = 0 while count < 2: - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None @@ -125,12 +126,12 @@ class AdobePassIE(InfoExtractor): 'requestor_id': requestor_id, }), headers=mvpd_headers) if '<pendingLogout' in session: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authn_token = unescapeHTML(xml_text(session, 'authnToken')) requestor_info['authn_token'] = authn_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) authz_token = requestor_info.get(guid) if authz_token and is_expired(authz_token, 'simpleTokenTTL'): @@ -146,12 +147,12 @@ class AdobePassIE(InfoExtractor): 'userMeta': '1', }), headers=mvpd_headers) if '<pendingLogout' in authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) requestor_info[guid] = authz_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) mvpd_headers.update({ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), @@ -167,7 +168,7 @@ class AdobePassIE(InfoExtractor): 'hashed_guid': 'false', }), headers=mvpd_headers) if '<pendingLogout' in short_authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue return short_authorize From 490b755769a364ca0624390453e36321d5182d3e Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Wed, 14 Sep 2016 23:03:26 +0700 Subject: [PATCH 491/775] Improve some id regexes --- youtube_dl/extractor/canvas.py | 2 +- youtube_dl/extractor/nfl.py | 2 +- youtube_dl/extractor/npo.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index ef0691dcd..d183d5d52 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -71,7 +71,7 @@ class CanvasIE(InfoExtractor): webpage)).strip() video_id = self._html_search_regex( - r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id') + r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') data = self._download_json( 'https://mediazone.vrt.be/api/v1/%s/assets/%s' diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 200874d68..3930d16f1 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -165,7 +165,7 @@ class NFLIE(InfoExtractor): group='config')) # For articles, the id in the url is not the video id video_id = self._search_regex( - r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1', + r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=video_id, group='id') config = self._download_json(config_url, video_id, 'Downloading player config') url_template = NFLIE.prepend_host( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 87f5675c7..3293bdb17 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -429,7 +429,7 @@ class SchoolTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id') + r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id') return { '_type': 'url_transparent', 'ie_key': 'NPO', From e6bf3621e703a7cd0d62736a1765b0ccff5adfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:31:16 +0700 Subject: [PATCH 492/775] [ChangeLog] Actualize --- ChangeLog | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c3c8bf037..cd1f2fdf1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,26 @@ version <unreleased> +Core +* Improve _hidden_inputs ++ Introduce improved explicit Adobe Pass support ++ Add --ap-mso to provide multiple-system operator identifier ++ Add --ap-username to provide MSO account username ++ Add --ap-password to provide MSO account password ++ Add --ap-list-mso to list all supported MSOs ++ Add support for Rogers Cable multiple-system operator (#10606) + Extractors -* [kwuo] Improve error detection (#10650) +* [crunchyroll] Fix authentication (#10655) +* [twitch] Fix API calls (#10654, #10660) ++ [bellmedia] Add support for more Bell Media Television sites +* [franceinter] Fix extraction (#10538, #2105) +* [kuwo] Improve error detection (#10650) ++ [go] Add support for free full episodes (#10439) * [bilibili] Fix extraction for specific videos (#10647) +* [nhk] Fix extraction (#10633) +* [kaltura] Improve audio detection +* [kaltura] Skip chun format ++ [vimeo:ondemand] Pass Referer along with embed URL (#10624) + [nbc] Add support for NBC Olympics (#10361) From f5e008d134f5e69920829cfd7a5ce5ae57d275c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:46:11 +0700 Subject: [PATCH 493/775] release 2016.09.15 --- .github/ISSUE_TEMPLATE.md | 8 ++++---- ChangeLog | 2 +- README.md | 11 +++++++++++ docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e87fed573..61cea757c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11.1 +[debug] youtube-dl version 2016.09.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} @@ -55,4 +55,4 @@ $ youtube-dl -v <your command line> ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/ChangeLog b/ChangeLog index cd1f2fdf1..4583537ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.15 Core * Improve _hidden_inputs diff --git a/README.md b/README.md index 7543f81ac..4debe15fe 100644 --- a/README.md +++ b/README.md @@ -358,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like. -n, --netrc Use .netrc authentication data --video-password PASSWORD Video password (vimeo, smotri, youku) +## Adobe Pass Options: + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, youtube-dl will + ask interactively. + --ap-list-mso List all supported multiple-system + operators + ## Post-processing Options: -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7a7b268d3..fcb618561 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,6 +89,7 @@ - **BeatportPro** - **Beeg** - **BehindKink** + - **BellMedia** - **Bet** - **Bigflix** - **Bild**: Bild.de @@ -169,7 +170,6 @@ - **CSNNE** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** @@ -445,6 +445,7 @@ - **NBA** - **NBC** - **NBCNews** + - **NBCOlympics** - **NBCSports** - **NBCSportsVPlayer** - **ndr**: NDR.de - Norddeutscher Rundfunk diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 903aede58..081fd6ef0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11.1' +__version__ = '2016.09.15' From 9d8985a165ebdc9fd8d72e7536253c42162b58a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 00:54:34 +0700 Subject: [PATCH 494/775] [tv4] Fix hls and hds formats (Closes #10659) --- youtube_dl/extractor/tv4.py | 49 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 343edf206..5d2d8f132 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, + int_or_none, parse_iso8601, + try_get, + update_url_query, ) @@ -65,36 +69,47 @@ class TV4IE(InfoExtractor): video_id = self._match_id(url) info = self._download_json( - 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') + 'http://www.tv4play.se/player/assets/%s.json' % video_id, + video_id, 'Downloading video info JSON') # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info['is_geo_restricted']: + if info.get('is_geo_restricted'): self.report_warning('This content might not be available in your country due to licensing restrictions.') - if info['requires_subscription']: + if info.get('requires_subscription'): raise ExtractorError('This content requires subscription.', expected=True) - sources_data = self._download_json( - 'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') - sources = sources_data['playback'] + title = info['title'] formats = [] - for item in sources.get('items', {}).get('item', []): - ext, bitrate = item['mediaFormat'], item['bitrate'] - formats.append({ - 'format_id': '%s_%s' % (ext, bitrate), - 'tbr': bitrate, - 'ext': ext, - 'url': item['url'], - }) + # http formats are linked with unresolvable host + for kind in ('hls', ''): + data = self._download_json( + 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, + video_id, 'Downloading sources JSON', query={ + 'protocol': kind, + 'videoFormat': 'MP4+WEBVTTS+WEBVTT', + }) + item = try_get(data, lambda x: x['playback']['items']['item'], dict) + manifest_url = item.get('url') + if not isinstance(manifest_url, compat_str): + continue + if kind == 'hls': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=kind, fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + update_url_query(manifest_url, {'hdcore': '3.8.0'}), + video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], + 'title': title, 'formats': formats, 'description': info.get('description'), 'timestamp': parse_iso8601(info.get('broadcast_date_time')), - 'duration': info.get('duration'), + 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('image'), - 'is_live': sources.get('live'), + 'is_live': info.get('is_live') is True, } From 52dc8a9b3f1af7abda6652a75b906d70809c475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 22:02:59 +0700 Subject: [PATCH 495/775] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 0d58f89c5..1a1232ade 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -10,14 +10,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', - 'md5': '4764932e466e6f6c79c317d2e74f6884', + 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', 'info_dict': { - 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', + 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'md5:7f2ce449894d1e585932273080fb410d', - 'upload_date': '20131218', + 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', + 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', + 'upload_date': '20160914', }, } @@ -39,7 +39,7 @@ class FranceInterIE(InfoExtractor): if upload_date_str: upload_date_list = upload_date_str.split() upload_date_list.reverse() - upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) upload_date = ''.join(upload_date_list) else: upload_date = None From 98b7506e96b5ac107a777d8bb8900623d832fba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 17:36:22 +0100 Subject: [PATCH 496/775] [toutv] add support for authentication(closes #10669) --- youtube_dl/extractor/radiocanada.py | 55 ++++++++++++++++---------- youtube_dl/extractor/toutv.py | 60 ++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 8ec402646..6751270ee 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -13,6 +13,7 @@ from ..utils import ( xpath_element, ExtractorError, determine_protocol, + unsmuggle_url, ) @@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor): } def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) app_code, video_id = re.match(self._VALID_URL, url).groups() - device_types = ['ipad', 'android'] + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + if get_meta('protectionType'): + raise ExtractorError('This video is DRM protected.', expected=True) + + device_types = ['ipad'] if app_code != 'toutv': device_types.append('flash') + if not smuggled_data: + device_types.append('android') formats = [] # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file for device_type in device_types: - v_data = self._download_xml( - 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', - video_id, note='Downloading %s XML' % device_type, query={ - 'appCode': app_code, - 'idMedia': video_id, - 'connectionType': 'broadband', - 'multibitrate': 'true', - 'deviceType': device_type, + validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' + query = { + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + } + if smuggled_data: + validation_url = 'https://services.radio-canada.ca/media/validation/v2/' + query.update(smuggled_data) + else: + query.update({ # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 'paysJ391wsHjbOJwvCs26toz': 'CA', 'bypasslock': 'NZt5K62gRqfc', - }, fatal=False) + }) + v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) v_url = xpath_text(v_data, 'url') if not v_url: continue @@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor): f4m_id='hds', fatal=False)) self._sort_formats(formats) - metadata = self._download_xml( - 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', - video_id, note='Downloading metadata XML', query={ - 'appCode': app_code, - 'idMedia': video_id, - }) - - def get_meta(name): - el = find_xpath_attr(metadata, './/Meta', 'name', name) - return el.text if el is not None else None - return { 'id': video_id, 'title': get_meta('Title'), diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 54c2d0aa6..d2d5c1171 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -2,12 +2,22 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + js_to_json, + ExtractorError, + urlencode_postdata, + extract_attributes, + smuggle_url, +) class TouTvIE(InfoExtractor): + _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' + _access_token = None + _claims = None _TEST = { 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', } + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + state = 'http://ici.tou.tv//' + webpage = self._download_webpage(state, None, 'Downloading homepage') + toutvlogin = self._parse_json(self._search_regex( + r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) + authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' + login_webpage = self._download_webpage( + authorize_url, None, 'Downloading login page', query={ + 'client_id': toutvlogin['clientId'], + 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'response_type': 'token', + 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', + 'state': state, + }) + login_form = self._search_regex( + r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') + form_data = self._hidden_inputs(login_form) + form_data.update({ + 'login-email': email, + 'login-password': password, + }) + post_url = extract_attributes(login_form).get('action') or authorize_url + _, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(form_data)) + self._access_token = self._search_regex( + r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', + urlh.geturl(), 'access token') + self._claims = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/getClaims', + None, 'Extracting Claims', query={ + 'token': self._access_token, + 'access_token': self._access_token, + })['claims'] + def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + if metadata.get('IsDrm'): + raise ExtractorError('This video is DRM protected.', expected=True) video_id = metadata['IdMedia'] details = metadata['Details'] title = details['OriginalTitle'] + video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) + if self._access_token and self._claims: + video_url = smuggle_url(video_url, { + 'access_token': self._access_token, + 'claims': self._claims, + }) return { '_type': 'url_transparent', - 'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), + 'url': video_url, 'id': video_id, 'title': title, 'thumbnail': details.get('ImageUrl'), From 6ad0219556cefe60239027633193cc9f1dc9fb1d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 19:30:38 +0100 Subject: [PATCH 497/775] [common] add helper method for Wowza Streaming Engine format extraction --- youtube_dl/extractor/common.py | 43 +++++++++++++++++++++++++++++ youtube_dl/extractor/vier.py | 4 +-- youtube_dl/extractor/vodplatform.py | 25 ++--------------- youtube_dl/extractor/vrt.py | 40 ++++++--------------------- 4 files changed, 55 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4f738b9fc..c00023458 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1795,6 +1795,49 @@ class InfoExtractor(object): m3u8_id='hls', fatal=False)) return formats + def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): + url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) + url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url') + http_base_url = 'http' + url_base + formats = [] + if 'm3u8' not in skip_protocols: + formats.extend(self._extract_m3u8_formats( + http_base_url + '/playlist.m3u8', video_id, 'mp4', + m3u8_entry_protocol, m3u8_id='hls', fatal=False)) + if 'f4m' not in skip_protocols: + formats.extend(self._extract_f4m_formats( + http_base_url + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) + if re.search(r'(?:/smil:|\.smil)', url_base): + if 'dash' not in skip_protocols: + formats.extend(self._extract_mpd_formats( + http_base_url + '/manifest.mpd', + video_id, mpd_id='dash', fatal=False)) + if 'smil' not in skip_protocols: + rtmp_formats = self._extract_smil_formats( + http_base_url + '/jwplayer.smil', + video_id, fatal=False) + for rtmp_format in rtmp_formats: + rtsp_format = rtmp_format.copy() + rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + del rtsp_format['play_path'] + del rtsp_format['ext'] + rtsp_format.update({ + 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'protocol': 'rtsp', + }) + formats.extend([rtmp_format, rtsp_format]) + else: + for protocol in ('rtmp', 'rtsp'): + if protocol not in skip_protocols: + formats.append({ + 'url': protocol + url_base, + 'format_id': protocol, + 'protocol': protocol, + }) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 6645c6186..dc142a245 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -48,8 +48,8 @@ class VierIE(InfoExtractor): [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], webpage, 'filename') - playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) - formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) + formats = self._extract_wowza_formats(playlist_url, display_id) self._sort_formats(formats) title = self._og_search_title(webpage, default=display_id) diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index 7bdd8b1dc..239644340 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -25,29 +25,8 @@ class VODPlatformIE(InfoExtractor): title = unescapeHTML(self._og_search_title(webpage)) hidden_inputs = self._hidden_inputs(webpage) - base_url = self._search_regex( - '(.*/)(?:playlist.m3u8|manifest.mpd)', - hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], - 'base url') - formats = self._extract_m3u8_formats( - base_url + 'playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - formats.extend(self._extract_mpd_formats( - base_url + 'manifest.mpd', video_id, - mpd_id='dash', fatal=False)) - rtmp_formats = self._extract_smil_formats( - base_url + 'jwplayer.smil', video_id, fatal=False) - for rtmp_format in rtmp_formats: - rtsp_format = rtmp_format.copy() - rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - del rtsp_format['play_path'] - del rtsp_format['ext'] - rtsp_format.update({ - 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'protocol': 'rtsp', - }) - formats.extend([rtmp_format, rtsp_format]) + formats = self._extract_wowza_formats( + hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil']) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index bec7ab327..00c72e346 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, float_or_none, ) @@ -75,7 +74,6 @@ class VRTIE(InfoExtractor): }, { 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', - 'md5': '', 'info_dict': { 'id': '2377055', 'ext': 'mp4', @@ -119,39 +117,17 @@ class VRTIE(InfoExtractor): video_id, 'mp4', m3u8_id='hls', fatal=False)) if src: - if determine_ext(src) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - src.replace('playlist.m3u8', 'manifest.f4m'), - video_id, f4m_id='hds', fatal=False)) - if 'data-video-geoblocking="true"' not in webpage: - rtmp_formats = self._extract_smil_formats( - src.replace('playlist.m3u8', 'jwplayer.smil'), - video_id, fatal=False) - formats.extend(rtmp_formats) - for rtmp_format in rtmp_formats: - rtmp_format_c = rtmp_format.copy() - rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - del rtmp_format_c['play_path'] - del rtmp_format_c['ext'] - http_format = rtmp_format_c.copy() + formats = self._extract_wowza_formats(src, video_id) + if 'data-video-geoblocking="true"' not in webpage: + for f in formats: + if f['url'].startswith('rtsp://'): + http_format = f.copy() http_format.update({ - 'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), + 'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), + 'format_id': f['format_id'].replace('rtsp', 'http'), 'protocol': 'http', }) - rtsp_format = rtmp_format_c.copy() - rtsp_format.update({ - 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'protocol': 'rtsp', - }) - formats.extend([http_format, rtsp_format]) - else: - formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) + formats.append(http_format) if not formats and 'data-video-geoblocking="true"' in webpage: self.raise_geo_restricted('This video is only available in Belgium') From 7d273a387aade7665cd25eee69d94ee615d9a4b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 19:31:39 +0100 Subject: [PATCH 498/775] [mangomolo] add support for Mangomolo embeds --- youtube_dl/extractor/awaan.py | 63 ++++++++++++------------------ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/generic.py | 29 ++++++++++++++ youtube_dl/extractor/mangomolo.py | 54 +++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 youtube_dl/extractor/mangomolo.py diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index bdf23c6a9..66d7515bc 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -50,25 +50,6 @@ class AWAANBaseIE(InfoExtractor): 'is_live': is_live, } - def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): - formats = [] - format_url_base = 'http' + self._html_search_regex( - [ - r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', - r'<a[^>]+href="rtsp(://[^"]+)"' - ], webpage, 'format url') - formats.extend(self._extract_mpd_formats( - format_url_base + '/manifest.mpd', - video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_m3u8_formats( - format_url_base + '/playlist.m3u8', video_id, 'mp4', - m3u8_entry_protocol, m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - format_url_base + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - return formats - class AWAANVideoIE(AWAANBaseIE): IE_NAME = 'awaan:video' @@ -99,16 +80,18 @@ class AWAANVideoIE(AWAANBaseIE): video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse_urlencode({ - 'id': video_data['id'], - 'user_id': video_data['user_id'], - 'signature': video_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), video_id) - info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloVideo', + }) return info @@ -138,16 +121,18 @@ class AWAANLiveIE(AWAANBaseIE): channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse_urlencode({ - 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), - 'signature': channel_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), channel_id) - info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloLive', + }) return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd0579425..4baf4cd48 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -472,6 +472,10 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .makerschannel import MakersChannelIE from .makertv import MakerTVIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) from .matchtv import MatchTVIE from .mdr import MDRIE from .meta import METAIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2e46ca179..e01305942 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2254,6 +2254,35 @@ class GenericIE(InfoExtractor): return self.url_result( self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') + # Look for Mangomolo embeds + mobj = re.search( + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + (?: + video\?.*?\bid=(?P<video_id>\d+)| + index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) + ).+?)\1''', webpage) + if mobj is not None: + info = { + '_type': 'url_transparent', + 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } + video_id = mobj.group('video_id') + if video_id: + info.update({ + 'ie_key': 'MangomoloVideo', + 'id': video_id, + }) + else: + info.update({ + 'ie_key': 'MangomoloLive', + 'id': mobj.group('channel_id'), + }) + return info + # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py new file mode 100644 index 000000000..8cac8ace2 --- /dev/null +++ b/youtube_dl/extractor/mangomolo.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote +from ..utils import ( + int_or_none, +) + + +class MangomoloBaseIE(InfoExtractor): + def _get_real_id(self, page_id): + return page_id + + def _real_extract(self, url): + page_id = self._get_real_id(self._match_id(url)) + webpage = self._download_webpage(url, page_id) + hidden_inputs = self._hidden_inputs(webpage) + m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' + + format_url = self._html_search_regex( + [ + r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'<a[^>]+href="(rtsp://[^"]+)"' + ], webpage, 'format url') + formats = self._extract_wowza_formats( + format_url, page_id, m3u8_entry_protocol, ['smil']) + self._sort_formats(formats) + + return { + 'id': page_id, + 'title': self._live_title(page_id) if self._IS_LIVE else page_id, + 'uploader_id': hidden_inputs.get('userid'), + 'duration': int_or_none(hidden_inputs.get('duration')), + 'is_live': self._IS_LIVE, + 'formats': formats, + } + + +class MangomoloVideoIE(MangomoloBaseIE): + IENAME = 'mangomolo:video' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _IS_LIVE = False + + +class MangomoloLiveIE(MangomoloBaseIE): + IENAME = 'mangomolo:live' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _IS_LIVE = True + + def _get_real_id(self, page_id): + return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode() From fc86d4eed0bf10f8f90326472811e5b4d4ad4bd9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 20:10:47 +0100 Subject: [PATCH 499/775] [mangomolo] fix typo --- youtube_dl/extractor/mangomolo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 8cac8ace2..2db503f2b 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -40,13 +40,13 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): - IENAME = 'mangomolo:video' + IE_NAME = 'mangomolo:video' _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): - IENAME = 'mangomolo:live' + IE_NAME = 'mangomolo:live' _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True From 30d9e20938fa91ece09c376b67030647215d48df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 22:06:55 +0100 Subject: [PATCH 500/775] [postprocessor/ffmpeg] apply FFmpegFixupM3u8PP only for videos with aac codec(#5591) --- youtube_dl/postprocessor/ffmpeg.py | 63 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index fa99b0c2a..8d1214ee2 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -139,6 +139,30 @@ class FFmpegPostProcessor(PostProcessor): def probe_executable(self): return self._paths[self.probe_basename] + def get_audio_codec(self, path): + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + try: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), + encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.decode('ascii', 'ignore').split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): self.check_version() @@ -188,31 +212,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): self._preferredquality = preferredquality self._nopostoverwrites = nopostoverwrites - def get_audio_codec(self, path): - - if not self.probe_available: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') - try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: - return None - except (IOError, OSError): - return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec - return None - def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -504,15 +503,15 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor): class FFmpegFixupM3u8PP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') + if self.get_audio_codec(filename) == 'aac': + temp_filename = prepend_extension(filename, 'temp') - options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) - - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return [], info From d05ef09d9d94fa70335af5fbaab385b37b16d705 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 17 Sep 2016 08:11:01 +0100 Subject: [PATCH 501/775] [mangomolo] fix domain regex --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/mangomolo.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e01305942..92a6e5146 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2256,7 +2256,7 @@ class GenericIE(InfoExtractor): # Look for Mangomolo embeds mobj = re.search( - r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/ (?: video\?.*?\bid=(?P<video_id>\d+)| index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 2db503f2b..1885ac7df 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -41,13 +41,13 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): IE_NAME = 'mangomolo:video' - _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): IE_NAME = 'mangomolo:live' - _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True def _get_real_id(self, page_id): From c51a7f0b2f2454bfe0b53f9d79567b3210e015b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 15:44:37 +0700 Subject: [PATCH 502/775] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 1a1232ade..707b9e00d 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str from ..utils import month_by_name @@ -10,14 +9,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', - 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', + 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', + 'md5': '9e54d7bdb6fdc02a841007f8a975c094', 'info_dict': { - 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', 'ext': 'mp3', - 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', - 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', - 'upload_date': '20160914', + 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', + 'description': 'md5:401969c5d318c061f86bda1fa359292b', + 'upload_date': '20160907', }, } @@ -40,6 +39,7 @@ class FranceInterIE(InfoExtractor): upload_date_list = upload_date_str.split() upload_date_list.reverse() upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) + upload_date_list[2] = '%02d' % int(upload_date_list[2]) upload_date = ''.join(upload_date_list) else: upload_date = None From e14c82bd6b6cfc1e904b067350d818657c911e07 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 17 Sep 2016 18:45:08 +0800 Subject: [PATCH 503/775] [jwplatform] Use js_to_json to detect more JWPlayers --- ChangeLog | 6 ++++++ youtube_dl/extractor/jwplatform.py | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4583537ac..a9f7cee53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [jwplatform] Improve JWPlayer detection + + version 2016.09.15 Core diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 7aaa65476..38199fcd0 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + js_to_json, mimetype2ext, ) @@ -19,14 +20,15 @@ class JWPlatformBaseIE(InfoExtractor): # TODO: Merge this with JWPlayer-related codes in generic.py mobj = re.search( - 'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)', + r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', webpage) if mobj: return mobj.group('options') def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id) + self._find_jwplayer_data(webpage), video_id, + transform_source=js_to_json) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) From 584d6f3457205b547c8969f11eade117f871ec8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 17 Sep 2016 18:46:43 +0800 Subject: [PATCH 504/775] [thisav] Recognize jwplayers (closes #10447) --- ChangeLog | 1 + youtube_dl/extractor/thisav.py | 39 ++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index a9f7cee53..b0a65bde2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [thisav] Recognize HTML5 videos (#10447) * [jwplatform] Improve JWPlayer detection diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 7f323c938..027a8e907 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -3,13 +3,12 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor -from ..utils import determine_ext +from .jwplatform import JWPlatformBaseIE -class ThisAVIE(InfoExtractor): +class ThisAVIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' - _TEST = { + _TESTS = [{ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', 'md5': '0480f1ef3932d901f0e0e719f188f19b', 'info_dict': { @@ -19,7 +18,17 @@ class ThisAVIE(InfoExtractor): 'uploader': 'dj7970', 'uploader_id': 'dj7970' } - } + }, { + 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', + 'md5': 'ba90c076bd0f80203679e5b60bf523ee', + 'info_dict': { + 'id': '242352', + 'ext': 'mp4', + 'title': 'Nerdy 18yo Big Ass Tattoos and Glasses', + 'uploader': 'cybersluts', + 'uploader_id': 'cybersluts', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -28,20 +37,28 @@ class ThisAVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') video_url = self._html_search_regex( - r"addVariable\('file','([^']+)'\);", webpage, 'video url') + r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) + if video_url: + info_dict = { + 'formats': [{ + 'url': video_url, + }], + } + else: + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) uploader = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', webpage, 'uploader id', fatal=False) - ext = determine_ext(video_url) - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'uploader': uploader, 'uploader_id': uploader_id, 'title': title, - 'ext': ext, - } + }) + + return info_dict From a0d5077c8dfa9fa31ebf3e63fdb1b2a7a5182a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Sep 2016 01:18:57 +0700 Subject: [PATCH 505/775] [extractor/common] Introduce fragments interface --- youtube_dl/extractor/common.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c00023458..566ed7a4d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,7 +86,9 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url Mandatory. The URL of the video file or URL of + the manifest file in case of fragmented media + (DASH, hls, hds). * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -115,6 +117,11 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp", "rtmpe", "m3u8", "m3u8_native" or "http_dash_segments". + * fragments A list of fragments of the fragmented media, + with the following entries: + * "url" (mandatory) - fragment's URL + * "duration" (optional, int or float) + * "filesize" (optional, int) * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. From b4c1d6e800a5b28accf4ba588b8fa3f0c420ce13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Sep 2016 01:21:57 +0700 Subject: [PATCH 506/775] [extractor/common] Expose fragments interface for dashsegments formats --- youtube_dl/extractor/common.py | 142 +++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 566ed7a4d..e637b33d5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1551,42 +1551,52 @@ class InfoExtractor(object): def extract_multisegment_info(element, ms_parent_info): ms_info = ms_parent_info.copy() + + # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some + # common attributes and elements. We will only extract relevant + # for us. + def extract_common(source): + segment_timeline = source.find(_add_ns('SegmentTimeline')) + if segment_timeline is not None: + s_e = segment_timeline.findall(_add_ns('S')) + if s_e: + ms_info['total_number'] = 0 + ms_info['s'] = [] + for s in s_e: + r = int(s.get('r', 0)) + ms_info['total_number'] += 1 + r + ms_info['s'].append({ + 't': int(s.get('t', 0)), + # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) + 'd': int(s.attrib['d']), + 'r': r, + }) + start_number = source.get('startNumber') + if start_number: + ms_info['start_number'] = int(start_number) + timescale = source.get('timescale') + if timescale: + ms_info['timescale'] = int(timescale) + segment_duration = source.get('duration') + if segment_duration: + ms_info['segment_duration'] = int(segment_duration) + + def extract_Initialization(source): + initialization = source.find(_add_ns('Initialization')) + if initialization is not None: + ms_info['initialization_url'] = initialization.attrib['sourceURL'] + segment_list = element.find(_add_ns('SegmentList')) if segment_list is not None: + extract_common(segment_list) + extract_Initialization(segment_list) segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) if segment_urls_e: ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] - initialization = segment_list.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] else: segment_template = element.find(_add_ns('SegmentTemplate')) if segment_template is not None: - start_number = segment_template.get('startNumber') - if start_number: - ms_info['start_number'] = int(start_number) - segment_timeline = segment_template.find(_add_ns('SegmentTimeline')) - if segment_timeline is not None: - s_e = segment_timeline.findall(_add_ns('S')) - if s_e: - ms_info['total_number'] = 0 - ms_info['s'] = [] - for s in s_e: - r = int(s.get('r', 0)) - ms_info['total_number'] += 1 + r - ms_info['s'].append({ - 't': int(s.get('t', 0)), - # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) - 'd': int(s.attrib['d']), - 'r': r, - }) - else: - timescale = segment_template.get('timescale') - if timescale: - ms_info['timescale'] = int(timescale) - segment_duration = segment_template.get('duration') - if segment_duration: - ms_info['segment_duration'] = int(segment_duration) + extract_common(segment_template) media_template = segment_template.get('media') if media_template: ms_info['media_template'] = media_template @@ -1594,11 +1604,14 @@ class InfoExtractor(object): if initialization: ms_info['initialization_url'] = initialization else: - initialization = segment_template.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] + extract_Initialization(segment_template) return ms_info + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) + mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] for period in mpd_doc.findall(_add_ns('Period')): @@ -1655,9 +1668,7 @@ class InfoExtractor(object): } representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: - if 'total_number' not in representation_ms_info and 'segment_duration': - segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale']) - representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) + media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) @@ -1666,7 +1677,11 @@ class InfoExtractor(object): # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time - if '%(Number' in media_template: + if '%(Number' in media_template and 's' not in representation_ms_info: + segment_duration = None + if 'total_number' not in representation_ms_info and 'segment_duration': + segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) + representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['segment_urls'] = [ media_template % { 'Number': segment_number, @@ -1675,28 +1690,65 @@ class InfoExtractor(object): for segment_number in range( representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])] + representation_ms_info['fragments'] = [{ + 'url': media_template % { + 'Number': segment_number, + 'Bandwidth': representation_attrib.get('bandwidth'), + }, + 'duration': segment_duration, + } for segment_number in range( + representation_ms_info['start_number'], + representation_ms_info['total_number'] + representation_ms_info['start_number'])] else: + # $Number*$ or $Time$ in media template with S list available + # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg + # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 representation_ms_info['segment_urls'] = [] + representation_ms_info['fragments'] = [] segment_time = 0 + segment_d = None + segment_number = representation_ms_info['start_number'] def add_segment_url(): - representation_ms_info['segment_urls'].append( - media_template % { - 'Time': segment_time, - 'Bandwidth': representation_attrib.get('bandwidth'), - } - ) + segment_url = media_template % { + 'Time': segment_time, + 'Bandwidth': representation_attrib.get('bandwidth'), + 'Number': segment_number, + } + representation_ms_info['segment_urls'].append(segment_url) + representation_ms_info['fragments'].append({ + 'url': segment_url, + 'duration': float_or_none(segment_d, representation_ms_info['timescale']), + }) for num, s in enumerate(representation_ms_info['s']): segment_time = s.get('t') or segment_time + segment_d = s['d'] add_segment_url() + segment_number += 1 for r in range(s.get('r', 0)): - segment_time += s['d'] + segment_time += segment_d add_segment_url() - segment_time += s['d'] + segment_number += 1 + segment_time += segment_d + elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: + # No media template + # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI + # or any YouTube dashsegments video + fragments = [] + s_num = 0 + for segment_url in representation_ms_info['segment_urls']: + s = representation_ms_info['s'][s_num] + for r in range(s.get('r', 0) + 1): + fragments.append({ + 'url': segment_url, + 'duration': float_or_none(s['d'], representation_ms_info['timescale']), + }) + representation_ms_info['fragments'] = fragments if 'segment_urls' in representation_ms_info: f.update({ 'segment_urls': representation_ms_info['segment_urls'], + 'fragments': [], 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: @@ -1706,6 +1758,10 @@ class InfoExtractor(object): }) if not f.get('url'): f['url'] = initialization_url + f['fragments'].append({'url': initialization_url}) + f['fragments'].extend(representation_ms_info['fragments']) + for fragment in f['fragments']: + fragment['url'] = combine_url(base_url, fragment['url']) try: existing_format = next( fo for fo in formats From 21d21b0c72a731d4ff5affa2182fbe1687c031a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 19:25:31 +0700 Subject: [PATCH 507/775] [svt] Fix DASH formats extraction --- youtube_dl/extractor/svt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 1c04dfb7b..fb0a4b24e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -16,7 +16,7 @@ class SVTBaseIE(InfoExtractor): def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: - player_type = vr.get('playerType') + player_type = vr.get('playerType') or vr.get('format') vurl = vr['url'] ext = determine_ext(vurl) if ext == 'm3u8': From 86f4d14f817acaee1f1f544cd9b06d47bc2a5180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 20:35:22 +0700 Subject: [PATCH 508/775] Refactor fragments interface and dash segments downloader - Eliminate segment_urls and initialization_url + Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly) * Rewrite dashsegments downloader to use fragments data * Improve generic mpd extraction --- youtube_dl/downloader/dash.py | 35 +++++++++++---------------------- youtube_dl/extractor/common.py | 31 +++++++++++------------------ youtube_dl/extractor/generic.py | 4 +++- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 41fc9cfc2..8437dde30 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import os -import re from .fragment import FragmentFD from ..compat import compat_urllib_error @@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - base_url = info_dict['url'] - segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls'] - initialization_url = info_dict.get('initialization_url') + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, - 'total_frags': len(segment_urls) + (1 if initialization_url else 0), + 'total_frags': len(segments), } self._prepare_and_start_frag_download(ctx) - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) - segments_filenames = [] fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def process_segment(segment, tmp_filename, fatal): - target_url, segment_name = segment + def process_segment(segment, tmp_filename, num): + segment_url = segment['url'] + segment_name = 'Frag%d' % num target_filename = '%s-%s' % (tmp_filename, segment_name) + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = num == 0 or not skip_unavailable_fragments count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) + success = ctx['dl'].download(target_filename, {'url': segment_url}) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') @@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD): return False return True - segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] - segments_to_download.extend([ - (segment_url, 'Seg%d' % i) - for i, segment_url in enumerate(segment_urls)]) - - for i, segment in enumerate(segments_to_download): - # In DASH, the first segment contains necessary headers to - # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - if not process_segment(segment, ctx['tmpfilename'], fatal): + for i, segment in enumerate(segments): + if not process_segment(segment, ctx['tmpfilename'], i): return False self._finish_frag_download(ctx) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e637b33d5..f35311e7a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,9 +86,10 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file or URL of - the manifest file in case of fragmented media - (DASH, hls, hds). + * url Mandatory. The URL of the video file + * manifest_url + The URL of the manifest file in case of + fragmented media (DASH, hls, hds) * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -1528,9 +1529,10 @@ class InfoExtractor(object): mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() return self._parse_mpd_formats( - compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) + compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, + formats_dict=formats_dict, mpd_url=mpd_url) - def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): + def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): """ Parse formats from MPD manifest. References: @@ -1654,6 +1656,7 @@ class InfoExtractor(object): f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'url': base_url, + 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), 'height': int_or_none(representation_attrib.get('height')), @@ -1682,14 +1685,6 @@ class InfoExtractor(object): if 'total_number' not in representation_ms_info and 'segment_duration': segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) - representation_ms_info['segment_urls'] = [ - media_template % { - 'Number': segment_number, - 'Bandwidth': representation_attrib.get('bandwidth'), - } - for segment_number in range( - representation_ms_info['start_number'], - representation_ms_info['total_number'] + representation_ms_info['start_number'])] representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, @@ -1703,7 +1698,6 @@ class InfoExtractor(object): # $Number*$ or $Time$ in media template with S list available # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 - representation_ms_info['segment_urls'] = [] representation_ms_info['fragments'] = [] segment_time = 0 segment_d = None @@ -1715,7 +1709,6 @@ class InfoExtractor(object): 'Bandwidth': representation_attrib.get('bandwidth'), 'Number': segment_number, } - representation_ms_info['segment_urls'].append(segment_url) representation_ms_info['fragments'].append({ 'url': segment_url, 'duration': float_or_none(segment_d, representation_ms_info['timescale']), @@ -1745,17 +1738,15 @@ class InfoExtractor(object): 'duration': float_or_none(s['d'], representation_ms_info['timescale']), }) representation_ms_info['fragments'] = fragments - if 'segment_urls' in representation_ms_info: + # NB: MPD manifest may contain direct URLs to unfragmented media. + # No fragments key is present in this case. + if 'fragments' in representation_ms_info: f.update({ - 'segment_urls': representation_ms_info['segment_urls'], 'fragments': [], 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) - f.update({ - 'initialization_url': initialization_url, - }) if not f.get('url'): f['url'] = initialization_url f['fragments'].append({'url': initialization_url}) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 92a6e5146..c1792c534 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor): return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( - doc, video_id, mpd_base_url=url.rpartition('/')[0]) + doc, video_id, + mpd_base_url=full_response.geturl().rpartition('/')[0], + mpd_url=url) self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): From 30d0b549be5696f24b87471a0e691f9afca4a9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 21:33:38 +0700 Subject: [PATCH 509/775] [extractor/common] Add manifest_url for hls and hds formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f35311e7a..9c8991542 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1150,6 +1150,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, + 'manifest_url': manifest_url, 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, @@ -1255,9 +1256,11 @@ class InfoExtractor(object): # format_id intact. if not live: format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) + manifest_url = format_url(line.strip()) f = { 'format_id': '-'.join(format_id), - 'url': format_url(line.strip()), + 'url': manifest_url, + 'manifest_url': manifest_url, 'tbr': tbr, 'ext': ext, 'fps': float_or_none(last_info.get('FRAME-RATE')), From 26394d021df1137301b1508bd00dd3478c15116c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 23:34:10 +0700 Subject: [PATCH 510/775] [globo:article] Add support for multiple videos (Closes #10653) --- youtube_dl/extractor/globo.py | 39 +++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 5638be48f..dc7b2661c 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import random +import re import math from .common import InfoExtractor @@ -14,6 +15,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + orderedSet, str_or_none, ) @@ -63,6 +65,9 @@ class GloboIE(InfoExtractor): }, { 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', 'only_matching': True, + }, { + 'url': 'globo:3607726', + 'only_matching': True, }] class MD5(object): @@ -396,7 +401,7 @@ class GloboIE(InfoExtractor): class GloboArticleIE(InfoExtractor): - _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?' + _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?' _VIDEOID_REGEXES = [ r'\bdata-video-id=["\'](\d{7,})', @@ -408,15 +413,20 @@ class GloboArticleIE(InfoExtractor): _TESTS = [{ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', - 'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', 'info_dict': { - 'id': '3652183', - 'ext': 'mp4', - 'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', - 'duration': 110.711, - 'uploader': 'Rede Globo', - 'uploader_id': '196', - } + 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes', + 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões', + 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12', + }, + 'playlist_count': 1, + }, { + 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html', + 'info_dict': { + 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato', + 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF", + 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c', + }, + 'playlist_count': 6, }, { 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html', 'only_matching': True, @@ -435,5 +445,12 @@ class GloboArticleIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id') - return self.url_result('globo:%s' % video_id, 'Globo') + video_ids = [] + for video_regex in self._VIDEOID_REGEXES: + video_ids.extend(re.findall(video_regex, webpage)) + entries = [ + self.url_result('globo:%s' % video_id, GloboIE.ie_key()) + for video_id in orderedSet(video_ids)] + title = self._og_search_title(webpage, fatal=False) + description = self._html_search_meta('description', webpage) + return self.playlist_result(entries, display_id, title, description) From 190d2027d0b6c785cf789edf6c1bdac2ef650a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 07:22:06 +0700 Subject: [PATCH 511/775] [xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) --- youtube_dl/extractor/xfileshare.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 995aada0d..de344bad2 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -124,12 +124,14 @@ class XFileShareIE(InfoExtractor): webpage = self._download_webpage(req, video_id, 'Downloading video page') title = (self._search_regex( - [r'style="z-index: [0-9]+;">([^<]+)</span>', + (r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'h4-fine[^>]*>([^<]+)<', r'>Watch (.+) ', - r'<h2 class="video-page-head">([^<]+)</h2>'], - webpage, 'title', default=None) or self._og_search_title(webpage)).strip() + r'<h2 class="video-page-head">([^<]+)</h2>', + r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) or video_id).strip() def extract_video_url(default=NO_DEFAULT): return self._search_regex( From 14ae11efab64baf4994688490474609554c1bf80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 16:56:40 +0700 Subject: [PATCH 512/775] [vyborymos] Add extractor (Closes #10692) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vyborymos.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/vyborymos.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4baf4cd48..8166fd4f9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1069,6 +1069,7 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE +from .vyborymos import VyboryMosIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py new file mode 100644 index 000000000..884aecb71 --- /dev/null +++ b/youtube_dl/extractor/vyborymos.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VyboryMosIE(InfoExtractor): + _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://vybory.mos.ru/#precinct/13636', + 'info_dict': { + 'id': '13636', + 'ext': 'mp4', + 'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Россия, Москва, улица Введенского, 32А', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://vybory.mos.ru/account/channels?station_id=13636', + 'only_matching': True, + }] + + def _real_extract(self, url): + station_id = self._match_id(url) + + channels = self._download_json( + 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, + station_id) + + formats = [] + for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): + for num, host in enumerate(hosts, 1): + formats.append({ + 'url': 'http://%s/master.m3u8?sid=%s' % (host, sid), + 'ext': 'mp4', + 'format_id': 'camera%d-host%d' % (cam_num, num), + 'format_note': '%s, %s' % (name, host), + }) + + info = self._download_json( + 'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id, + station_id, 'Downloading station info') + + title = info['name'] + + return { + 'id': station_id, + 'title': self._live_title(title), + 'description': info.get('address'), + 'is_live': True, + 'formats': formats, + } From 9ca93b99d110f58ec9b280020fb5fede2441794e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:15:22 +0700 Subject: [PATCH 513/775] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index b0a65bde2..dd11a17b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Introduce manifest_url and fragments fields in formats dictionary for + fragmented media ++ Provide manifest_url field for DASH segments, HLS and HDS ++ Provide fragments field for DASH segments +* Rework DASH segments downloader to use fragments field ++ Add helper method for Wowza Streaming Engine formats extraction + Extractors ++ [vyborymos] Add extractor for vybory.mos.ru (#10692) ++ [xfileshare] Add title regular expression for streamin.to (#10646) ++ [globo:article] Add support for multiple videos (#10653) + [thisav] Recognize HTML5 videos (#10447) * [jwplatform] Improve JWPlayer detection ++ [mangomolo] Add support for Mangomolo embeds ++ [toutv] Add support for authentication (#10669) +* [franceinter] Fix upload date extraction +* [tv4] Fix HLS and HDS formats extraction (#10659) version 2016.09.15 From 3acff9423df437dd4bd1530a69011fc9ddc74ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:16:55 +0700 Subject: [PATCH 514/775] release 2016.09.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 61cea757c..b9d8ebad7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.15 +[debug] youtube-dl version 2016.09.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index dd11a17b9..a71fadfa7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.18 Core + Introduce manifest_url and fragments fields in formats dictionary for diff --git a/docs/supportedsites.md b/docs/supportedsites.md index fcb618561..95a137393 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -388,6 +388,8 @@ - **mailru**: Видео@Mail.Ru - **MakersChannel** - **MakerTV** + - **mangomolo:live** + - **mangomolo:video** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** @@ -849,6 +851,7 @@ - **VRT** - **vube**: Vube.com - **VuClip** + - **VyboryMos** - **Walla** - **washingtonpost** - **washingtonpost:article** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 081fd6ef0..5ae6a72aa 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.15' +__version__ = '2016.09.18' From a1da888d0cc92fdf3506b30ee85ce241e9090408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:28:41 +0700 Subject: [PATCH 515/775] [vyborymos] Improve station info extraction --- youtube_dl/extractor/vyborymos.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py index 884aecb71..9e703c4b6 100644 --- a/youtube_dl/extractor/vyborymos.py +++ b/youtube_dl/extractor/vyborymos.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str class VyboryMosIE(InfoExtractor): @@ -28,7 +29,7 @@ class VyboryMosIE(InfoExtractor): channels = self._download_json( 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, - station_id) + station_id, 'Downloading channels JSON') formats = [] for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): @@ -41,14 +42,13 @@ class VyboryMosIE(InfoExtractor): }) info = self._download_json( - 'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id, - station_id, 'Downloading station info') - - title = info['name'] + 'http://vybory.mos.ru/json/voting_stations/%s/%s.json' + % (compat_str(station_id)[:3], station_id), + station_id, 'Downloading station JSON', fatal=False) return { 'id': station_id, - 'title': self._live_title(title), + 'title': self._live_title(info['name'] if info else station_id), 'description': info.get('address'), 'is_live': True, 'formats': formats, From d8dbf8707d4e45a939fc74c76bb919771007f8ba Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 18 Sep 2016 18:33:54 +0800 Subject: [PATCH 516/775] [thisav] Improve title extraction (closes #10682) I didn't add a test case as the one in #10682 looks like a copyrighted product. --- ChangeLog | 6 ++++++ youtube_dl/extractor/thisav.py | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index a71fadfa7..18f9fa861 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [thisav] Improve title extraction (#10682) + + version 2016.09.18 Core diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 027a8e907..4473a3c77 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .jwplatform import JWPlatformBaseIE +from ..utils import remove_end class ThisAVIE(JWPlatformBaseIE): @@ -35,7 +36,9 @@ class ThisAVIE(JWPlatformBaseIE): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') + title = remove_end(self._html_search_regex( + r'<title>([^<]+)', webpage, 'title'), + ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站') video_url = self._html_search_regex( r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) if video_url: From cc764a6da8530248f9810397a22b20c972877a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Sep 2016 19:10:18 +0700 Subject: [PATCH 517/775] [twitch:stream] Remove fallback to profile extraction when stream is offline Main page does not contain profile videos anymore --- youtube_dl/extractor/twitch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index af6d890b0..bc352391e 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -400,11 +400,8 @@ class TwitchStreamIE(TwitchBaseIE): 'kraken/streams/%s' % channel_id, channel_id, 'Downloading stream JSON').get('stream') - # Fallback on profile extraction if stream is offline if not stream: - return self.url_result( - 'http://www.twitch.tv/%s/profile' % channel_id, - 'TwitchProfile', channel_id) + raise ExtractorError('%s is offline' % channel_id, expected=True) # Channel name may be typed if different case than the original channel name # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing From 70b4cf9b1b8a2c2935ca7384d7545463cfd4ea16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Sep 2016 02:50:06 +0700 Subject: [PATCH 518/775] [crunchyroll] Check if already logged in (Closes #10700) --- youtube_dl/extractor/crunchyroll.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 1b69bd0b6..e4c10ad24 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -46,6 +46,13 @@ class CrunchyrollBaseIE(InfoExtractor): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') + def is_logged(webpage): + return 'Redirecting' in webpage + + # Already logged in + if is_logged(login_page): + return + login_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, login_page, 'login form', group='form') @@ -69,7 +76,7 @@ class CrunchyrollBaseIE(InfoExtractor): headers={'Content-Type': 'application/x-www-form-urlencoded'}) # Successful login - if '<title>Redirecting' in response: + if is_logged(response): return error = self._html_search_regex( From 59fd8f931d274cc702a7e260e9ec996f8db7c9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 19 Sep 2016 02:57:14 +0700 Subject: [PATCH 519/775] [ChangeLog] Actualize --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index 18f9fa861..c67d5f650 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,10 @@ version <unreleased> Extractors ++ [crunchyroll] Check if already authenticated (#10700) +- [twitch:stream] Remove fallback to profile extraction when stream is offline * [thisav] Improve title extraction (#10682) +* [vyborymos] Improve station info extraction version 2016.09.18 From cb57386873a053b3328a78f48cf27f23ca6897d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 19 Sep 2016 02:58:32 +0700 Subject: [PATCH 520/775] release 2016.09.19 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b9d8ebad7..8b28d784a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.18 +[debug] youtube-dl version 2016.09.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c67d5f650..24077c430 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.19 Extractors + [crunchyroll] Check if already authenticated (#10700) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5ae6a72aa..9d3138181 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.18' +__version__ = '2016.09.19' From c38f06818df83f5f46cbdee1069bfaf53a537cc8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 11:55:30 +0100 Subject: [PATCH 521/775] add support for Adobe Pass auth in tbs,tnt and trutv extractors(fixes #10642)(closes #10222)(closes #10519) --- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/tbs.py | 13 +++++-------- youtube_dl/extractor/trutv.py | 12 ++++++++++++ youtube_dl/extractor/turner.py | 17 ++++++++++------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 01932e5e6..c787e0962 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,7 +83,7 @@ class AdobePassIE(InfoExtractor): 'User-Agent': self._USER_AGENT, } - guid = xml_text(resource, 'guid') + guid = xml_text(resource, 'guid') if '<' in resource else resource count = 0 while count < 2: requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index 0c351e045..bf93eb868 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -4,10 +4,7 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE -from ..utils import ( - extract_attributes, - ExtractorError, -) +from ..utils import extract_attributes class TBSIE(TurnerBaseIE): @@ -37,10 +34,6 @@ class TBSIE(TurnerBaseIE): site = domain[:3] webpage = self._download_webpage(url, display_id) video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) - if video_params.get('isAuthRequired') == 'true': - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported.', expected=True) query = None clip_id = video_params.get('clipid') if clip_id: @@ -56,4 +49,8 @@ class TBSIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, }, + }, { + 'url': url, + 'site_name': site.upper(), + 'auth_required': video_params.get('isAuthRequired') != 'false', }) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index e60d8a181..3a5782525 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -22,9 +22,17 @@ class TruTVIE(TurnerBaseIE): def _real_extract(self, url): path, video_id = re.match(self._VALID_URL, url).groups() + auth_required = False if path: data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path else: + webpage = self._download_webpage(url, video_id) + video_id = self._search_regex( + r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';", + webpage, 'video id', default=video_id) + auth_required = self._search_regex( + r'TTV\.TVE\.authRequired\s*=\s*(true|false);', + webpage, 'auth required', default='false') == 'true' data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id return self._extract_cvp_info( data_src, path, { @@ -32,4 +40,8 @@ class TruTVIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do', }, + }, { + 'url': url, + 'site_name': 'truTV', + 'auth_required': auth_required, }) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 4228c1ccc..57ffedb87 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( xpath_text, @@ -16,11 +16,11 @@ from ..utils import ( ) -class TurnerBaseIE(InfoExtractor): +class TurnerBaseIE(AdobePassIE): def _extract_timestamp(self, video_data): return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) - def _extract_cvp_info(self, data_src, video_id, path_data={}): + def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'] title = xpath_text(video_data, 'headline', fatal=True) @@ -70,11 +70,14 @@ class TurnerBaseIE(InfoExtractor): secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' token = tokens.get(secure_path) if not token: + query = { + 'path': secure_path, + 'videoId': content_id, + } + if ap_data.get('auth_required'): + query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name']) auth = self._download_xml( - secure_path_data['tokenizer_src'], video_id, query={ - 'path': secure_path, - 'videoId': content_id, - }) + secure_path_data['tokenizer_src'], video_id, query=query) error_msg = xpath_text(auth, 'error/msg') if error_msg: raise ExtractorError(error_msg, expected=True) From e33a7253b23e0adca9a3cb9a3856952c922a3357 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 15:52:23 +0100 Subject: [PATCH 522/775] [fox] add support for Adobe Pass auth(closes #8584) --- youtube_dl/extractor/fox.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 9f406b17e..9f2e5d065 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor +from .adobepass import AdobePassIE from ..utils import ( smuggle_url, update_url_query, ) -class FOXIE(InfoExtractor): +class FOXIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.fox.com/watch/255180355939/7684182528', @@ -30,14 +30,26 @@ class FOXIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - release_url = self._parse_json(self._search_regex( - r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), - video_id)['release_url'] + settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), video_id) + fox_pdk_player = settings['fox_pdk_player'] + release_url = fox_pdk_player['release_url'] + query = { + 'mbr': 'true', + 'switch': 'http' + } + if fox_pdk_player.get('access') == 'locked': + ap_p = settings['foxAdobePassProvider'] + rating = ap_p.get('videoRating') + if rating == 'n/a': + rating = None + resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query( - release_url, {'switch': 'http'}), {'force_smil_url': True}), + 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), 'id': video_id, } From 4bfd294e2f83301921494c02e497cccf1a26cfd5 Mon Sep 17 00:00:00 2001 From: coolsa <noob.cloud@gmail.com> Date: Sun, 18 Sep 2016 03:53:05 -0600 Subject: [PATCH 523/775] [soundcloud] Extract license metadata --- youtube_dl/extractor/soundcloud.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 9635c2b49..47b84809f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'E.T. ExTerrestrial Music', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'duration': 143, + 'license': 'all-rights-reserved', } }, # not streamable song @@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'The Royal Concept', 'upload_date': '20120521', 'duration': 227, + 'license': 'all-rights-reserved', }, 'params': { # rtmp @@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # private link (alt format) @@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # downloadable song @@ -112,6 +116,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'oddsamples', 'upload_date': '20140109', 'duration': 17, + 'license': 'cc-by-sa', }, }, ] @@ -138,8 +143,8 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] + track_license = info['license'] if thumbnail is not None: thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' @@ -152,6 +157,7 @@ class SoundcloudIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), + 'license': track_license, } formats = [] if info.get('downloadable', False): @@ -222,6 +228,7 @@ class SoundcloudIE(InfoExtractor): track_id = mobj.group('track_id') token = None + if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id From f62a77b99a73ed3acf8406efaa34d08c73682be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 20 Sep 2016 21:55:57 +0700 Subject: [PATCH 524/775] [soundcloud] Modernize --- youtube_dl/extractor/soundcloud.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 47b84809f..513c54829 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -143,21 +143,20 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] - track_license = info['license'] - if thumbnail is not None: + thumbnail = info.get('artwork_url') + if isinstance(thumbnail, compat_str): thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' result = { 'id': track_id, - 'uploader': info['user']['username'], - 'upload_date': unified_strdate(info['created_at']), + 'uploader': info.get('user', {}).get('username'), + 'upload_date': unified_strdate(info.get('created_at')), 'title': info['title'], - 'description': info['description'], + 'description': info.get('description'), 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), - 'license': track_license, + 'license': info.get('license'), } formats = [] if info.get('downloadable', False): @@ -227,7 +226,6 @@ class SoundcloudIE(InfoExtractor): raise ExtractorError('Invalid URL: %s' % url) track_id = mobj.group('track_id') - token = None if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID From 1ae0ae5db0bc9c388de970c71880e2f3dc400cc3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 18:51:29 +0100 Subject: [PATCH 525/775] [cartoonnetwork] add support Adobe Pass auth --- youtube_dl/extractor/cartoonnetwork.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 688a6375e..086ec90c9 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -33,4 +33,10 @@ class CartoonNetworkIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', }, + }, { + 'url': url, + 'site_name': 'CartoonNetwork', + 'auth_required': self._search_regex( + r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);', + webpage, 'auth required', default='false') == 'true', }) From 3a5a18705f2a7faf64a4b69665511ef5f0c6084d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Sep 2016 15:56:31 +0100 Subject: [PATCH 526/775] [adobepass] add support MSO that depend on watchTVeverywhere(closes #10709) --- youtube_dl/extractor/adobepass.py | 1264 ++++++++++++++++++++++++++++- 1 file changed, 1259 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index c787e0962..8f7ed6ef2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -17,15 +17,1269 @@ from ..utils import ( MSO_INFO = { 'DTV': { - 'name': 'DirecTV', + 'name': 'DIRECTV', 'username_field': 'username', 'password_field': 'password', }, 'Rogers': { - 'name': 'Rogers Cable', + 'name': 'Rogers', 'username_field': 'UserName', 'password_field': 'UserPassword', }, + 'thr030': { + 'name': '3 Rivers Communications' + }, + 'com140': { + 'name': 'Access Montana' + }, + 'acecommunications': { + 'name': 'AcenTek' + }, + 'acm010': { + 'name': 'Acme Communications' + }, + 'ada020': { + 'name': 'Adams Cable Service' + }, + 'alb020': { + 'name': 'Albany Mutual Telephone' + }, + 'algona': { + 'name': 'Algona Municipal Utilities' + }, + 'allwest': { + 'name': 'All West Communications' + }, + 'all025': { + 'name': 'Allen\'s Communications' + }, + 'spl010': { + 'name': 'Alliance Communications' + }, + 'all070': { + 'name': 'ALLO Communications' + }, + 'alpine': { + 'name': 'Alpine Communications' + }, + 'hun015': { + 'name': 'American Broadband' + }, + 'nwc010': { + 'name': 'American Broadband Missouri' + }, + 'com130-02': { + 'name': 'American Community Networks' + }, + 'com130-01': { + 'name': 'American Warrior Networks' + }, + 'tom020': { + 'name': 'Amherst Telephone/Tomorrow Valley' + }, + 'tvc020': { + 'name': 'Andycable' + }, + 'arkwest': { + 'name': 'Arkwest Communications' + }, + 'art030': { + 'name': 'Arthur Mutual Telephone Company' + }, + 'arvig': { + 'name': 'Arvig' + }, + 'nttcash010': { + 'name': 'Ashland Home Net' + }, + 'astound': { + 'name': 'Astound (now Wave)' + }, + 'dix030': { + 'name': 'ATC Broadband' + }, + 'ara010': { + 'name': 'ATC Communications' + }, + 'she030-02': { + 'name': 'Ayersville Communications' + }, + 'baldwin': { + 'name': 'Baldwin Lightstream' + }, + 'bal040': { + 'name': 'Ballard TV' + }, + 'cit025': { + 'name': 'Bardstown Cable TV' + }, + 'bay030': { + 'name': 'Bay Country Communications' + }, + 'tel095': { + 'name': 'Beaver Creek Cooperative Telephone' + }, + 'bea020': { + 'name': 'Beaver Valley Cable' + }, + 'bee010': { + 'name': 'Bee Line Cable' + }, + 'wir030': { + 'name': 'Beehive Broadband' + }, + 'bra020': { + 'name': 'BELD' + }, + 'bel020': { + 'name': 'Bellevue Municipal Cable' + }, + 'vol040-01': { + 'name': 'Ben Lomand Connect / BLTV' + }, + 'bev010': { + 'name': 'BEVCOMM' + }, + 'big020': { + 'name': 'Big Sandy Broadband' + }, + 'ble020': { + 'name': 'Bledsoe Telephone Cooperative' + }, + 'bvt010': { + 'name': 'Blue Valley Tele-Communications' + }, + 'bra050': { + 'name': 'Brandenburg Telephone Co.' + }, + 'bte010': { + 'name': 'Bristol Tennessee Essential Services' + }, + 'annearundel': { + 'name': 'Broadstripe' + }, + 'btc010': { + 'name': 'BTC Communications' + }, + 'btc040': { + 'name': 'BTC Vision - Nahunta' + }, + 'bul010': { + 'name': 'Bulloch Telephone Cooperative' + }, + 'but010': { + 'name': 'Butler-Bremer Communications' + }, + 'tel160-csp': { + 'name': 'C Spire SNAP' + }, + 'csicable': { + 'name': 'Cable Services Inc.' + }, + 'cableamerica': { + 'name': 'CableAmerica' + }, + 'cab038': { + 'name': 'CableSouth Media 3' + }, + 'weh010-camtel': { + 'name': 'Cam-Tel Company' + }, + 'car030': { + 'name': 'Cameron Communications' + }, + 'canbytel': { + 'name': 'Canby Telcom' + }, + 'crt020': { + 'name': 'CapRock Tv' + }, + 'car050': { + 'name': 'Carnegie Cable' + }, + 'cas': { + 'name': 'CAS Cable' + }, + 'casscomm': { + 'name': 'CASSCOMM' + }, + 'mid180-02': { + 'name': 'Catalina Broadband Solutions' + }, + 'cccomm': { + 'name': 'CC Communications' + }, + 'nttccde010': { + 'name': 'CDE Lightband' + }, + 'cfunet': { + 'name': 'Cedar Falls Utilities' + }, + 'dem010-01': { + 'name': 'Celect-Bloomer Telephone Area' + }, + 'dem010-02': { + 'name': 'Celect-Bruce Telephone Area' + }, + 'dem010-03': { + 'name': 'Celect-Citizens Connected Area' + }, + 'dem010-04': { + 'name': 'Celect-Elmwood/Spring Valley Area' + }, + 'dem010-06': { + 'name': 'Celect-Mosaic Telecom' + }, + 'dem010-05': { + 'name': 'Celect-West WI Telephone Area' + }, + 'net010-02': { + 'name': 'Cellcom/Nsight Telservices' + }, + 'cen100': { + 'name': 'CentraCom' + }, + 'nttccst010': { + 'name': 'Central Scott / CSTV' + }, + 'cha035': { + 'name': 'Chaparral CableVision' + }, + 'cha050': { + 'name': 'Chariton Valley Communication Corporation, Inc.' + }, + 'cha060': { + 'name': 'Chatmoss Cablevision' + }, + 'nttcche010': { + 'name': 'Cherokee Communications' + }, + 'che050': { + 'name': 'Chesapeake Bay Communications' + }, + 'cimtel': { + 'name': 'Cim-Tel Cable, LLC.' + }, + 'cit180': { + 'name': 'Citizens Cablevision - Floyd, VA' + }, + 'cit210': { + 'name': 'Citizens Cablevision, Inc.' + }, + 'cit040': { + 'name': 'Citizens Fiber' + }, + 'cit250': { + 'name': 'Citizens Mutual' + }, + 'war040': { + 'name': 'Citizens Telephone Corporation' + }, + 'wat025': { + 'name': 'City Of Monroe' + }, + 'wadsworth': { + 'name': 'CityLink' + }, + 'nor100': { + 'name': 'CL Tel' + }, + 'cla010': { + 'name': 'Clarence Telephone and Cedar Communications' + }, + 'ser060': { + 'name': 'Clear Choice Communications' + }, + 'tac020': { + 'name': 'Click! Cable TV' + }, + 'war020': { + 'name': 'CLICK1.NET' + }, + 'cml010': { + 'name': 'CML Telephone Cooperative Association' + }, + 'cns': { + 'name': 'CNS' + }, + 'com160': { + 'name': 'Co-Mo Connect' + }, + 'coa020': { + 'name': 'Coast Communications' + }, + 'coa030': { + 'name': 'Coaxial Cable TV' + }, + 'mid055': { + 'name': 'Cobalt TV (Mid-State Community TV)' + }, + 'col070': { + 'name': 'Columbia Power & Water Systems' + }, + 'col080': { + 'name': 'Columbus Telephone' + }, + 'nor105': { + 'name': 'Communications 1 Cablevision, Inc.' + }, + 'com150': { + 'name': 'Community Cable & Broadband' + }, + 'com020': { + 'name': 'Community Communications Company' + }, + 'coy010': { + 'name': 'commZoom' + }, + 'com025': { + 'name': 'Complete Communication Services' + }, + 'cat020': { + 'name': 'Comporium' + }, + 'com071': { + 'name': 'ComSouth Telesys' + }, + 'consolidatedcable': { + 'name': 'Consolidated' + }, + 'conwaycorp': { + 'name': 'Conway Corporation' + }, + 'coo050': { + 'name': 'Coon Valley Telecommunications Inc' + }, + 'coo080': { + 'name': 'Cooperative Telephone Company' + }, + 'cpt010': { + 'name': 'CP-TEL' + }, + 'cra010': { + 'name': 'Craw-Kan Telephone' + }, + 'crestview': { + 'name': 'Crestview Cable Communications' + }, + 'cross': { + 'name': 'Cross TV' + }, + 'cro030': { + 'name': 'Crosslake Communications' + }, + 'ctc040': { + 'name': 'CTC - Brainerd MN' + }, + 'phe030': { + 'name': 'CTV-Beam - East Alabama' + }, + 'cun010': { + 'name': 'Cunningham Telephone & Cable' + }, + 'dpc010': { + 'name': 'D & P Communications' + }, + 'dak030': { + 'name': 'Dakota Central Telecommunications' + }, + 'nttcdel010': { + 'name': 'Delcambre Telephone LLC' + }, + 'tel160-del': { + 'name': 'Delta Telephone Company' + }, + 'sal040': { + 'name': 'DiamondNet' + }, + 'ind060-dc': { + 'name': 'Direct Communications' + }, + 'doy010': { + 'name': 'Doylestown Cable TV' + }, + 'dic010': { + 'name': 'DRN' + }, + 'dtc020': { + 'name': 'DTC' + }, + 'dtc010': { + 'name': 'DTC Cable (Delhi)' + }, + 'dum010': { + 'name': 'Dumont Telephone Company' + }, + 'dun010': { + 'name': 'Dunkerton Telephone Cooperative' + }, + 'cci010': { + 'name': 'Duo County Telecom' + }, + 'eagle': { + 'name': 'Eagle Communications' + }, + 'weh010-east': { + 'name': 'East Arkansas Cable TV' + }, + 'eatel': { + 'name': 'EATEL Video, LLC' + }, + 'ell010': { + 'name': 'ECTA' + }, + 'emerytelcom': { + 'name': 'Emery Telcom Video LLC' + }, + 'nor200': { + 'name': 'Empire Access' + }, + 'endeavor': { + 'name': 'Endeavor Communications' + }, + 'sun045': { + 'name': 'Enhanced Telecommunications Corporation' + }, + 'mid030': { + 'name': 'enTouch' + }, + 'epb020': { + 'name': 'EPB Smartnet' + }, + 'jea010': { + 'name': 'EPlus Broadband' + }, + 'com065': { + 'name': 'ETC' + }, + 'ete010': { + 'name': 'Etex Communications' + }, + 'fbc-tele': { + 'name': 'F&B Communications' + }, + 'fal010': { + 'name': 'Falcon Broadband' + }, + 'fam010': { + 'name': 'FamilyView CableVision' + }, + 'far020': { + 'name': 'Farmers Mutual Telephone Company' + }, + 'fay010': { + 'name': 'Fayetteville Public Utilities' + }, + 'sal060': { + 'name': 'fibrant' + }, + 'fid010': { + 'name': 'Fidelity Communications' + }, + 'for030': { + 'name': 'FJ Communications' + }, + 'fli020': { + 'name': 'Flint River Communications' + }, + 'far030': { + 'name': 'FMT - Jesup' + }, + 'foo010': { + 'name': 'Foothills Communications' + }, + 'for080': { + 'name': 'Forsyth CableNet' + }, + 'fbcomm': { + 'name': 'Frankfort Plant Board' + }, + 'tel160-fra': { + 'name': 'Franklin Telephone Company' + }, + 'nttcftc010': { + 'name': 'FTC' + }, + 'fullchannel': { + 'name': 'Full Channel, Inc.' + }, + 'gar040': { + 'name': 'Gardonville Cooperative Telephone Association' + }, + 'gbt010': { + 'name': 'GBT Communications, Inc.' + }, + 'tec010': { + 'name': 'Genuine Telecom' + }, + 'clr010': { + 'name': 'Giant Communications' + }, + 'gla010': { + 'name': 'Glasgow EPB' + }, + 'gle010': { + 'name': 'Glenwood Telecommunications' + }, + 'gra060': { + 'name': 'GLW Broadband Inc.' + }, + 'goldenwest': { + 'name': 'Golden West Cablevision' + }, + 'vis030': { + 'name': 'Grantsburg Telcom' + }, + 'gpcom': { + 'name': 'Great Plains Communications' + }, + 'gri010': { + 'name': 'Gridley Cable Inc' + }, + 'hbc010': { + 'name': 'H&B Cable Services' + }, + 'hae010': { + 'name': 'Haefele TV Inc.' + }, + 'htc010': { + 'name': 'Halstad Telephone Company' + }, + 'har005': { + 'name': 'Harlan Municipal Utilities' + }, + 'har020': { + 'name': 'Hart Communications' + }, + 'ced010': { + 'name': 'Hartelco TV' + }, + 'hea040': { + 'name': 'Heart of Iowa Communications Cooperative' + }, + 'htc020': { + 'name': 'Hickory Telephone Company' + }, + 'nttchig010': { + 'name': 'Highland Communication Services' + }, + 'hig030': { + 'name': 'Highland Media' + }, + 'spc010': { + 'name': 'Hilliary Communications' + }, + 'hin020': { + 'name': 'Hinton CATV Co.' + }, + 'hometel': { + 'name': 'HomeTel Entertainment, Inc.' + }, + 'hoodcanal': { + 'name': 'Hood Canal Communications' + }, + 'weh010-hope': { + 'name': 'Hope - Prescott Cable TV' + }, + 'horizoncable': { + 'name': 'Horizon Cable TV, Inc.' + }, + 'hor040': { + 'name': 'Horizon Chillicothe Telephone' + }, + 'htc030': { + 'name': 'HTC Communications Co. - IL' + }, + 'htccomm': { + 'name': 'HTC Communications, Inc. - IA' + }, + 'wal005': { + 'name': 'Huxley Communications' + }, + 'imon': { + 'name': 'ImOn Communications' + }, + 'ind040': { + 'name': 'Independence Telecommunications' + }, + 'rrc010': { + 'name': 'Inland Networks' + }, + 'stc020': { + 'name': 'Innovative Cable TV St Croix' + }, + 'car100': { + 'name': 'Innovative Cable TV St Thomas-St John' + }, + 'icc010': { + 'name': 'Inside Connect Cable' + }, + 'int100': { + 'name': 'Integra Telecom' + }, + 'int050': { + 'name': 'Interstate Telecommunications Coop' + }, + 'irv010': { + 'name': 'Irvine Cable' + }, + 'k2c010': { + 'name': 'K2 Communications' + }, + 'kal010': { + 'name': 'Kalida Telephone Company, Inc.' + }, + 'kal030': { + 'name': 'Kalona Cooperative Telephone Company' + }, + 'kmt010': { + 'name': 'KMTelecom' + }, + 'kpu010': { + 'name': 'KPU Telecommunications' + }, + 'kuh010': { + 'name': 'Kuhn Communications, Inc.' + }, + 'lak130': { + 'name': 'Lakeland Communications' + }, + 'lan010': { + 'name': 'Langco' + }, + 'lau020': { + 'name': 'Laurel Highland Total Communications, Inc.' + }, + 'leh010': { + 'name': 'Lehigh Valley Cooperative Telephone' + }, + 'bra010': { + 'name': 'Limestone Cable/Bracken Cable' + }, + 'loc020': { + 'name': 'LISCO' + }, + 'lit020': { + 'name': 'Litestream' + }, + 'tel140': { + 'name': 'LivCom' + }, + 'loc010': { + 'name': 'LocalTel Communications' + }, + 'weh010-longview': { + 'name': 'Longview - Kilgore Cable TV' + }, + 'lon030': { + 'name': 'Lonsdale Video Ventures, LLC' + }, + 'lns010': { + 'name': 'Lost Nation-Elwood Telephone Co.' + }, + 'nttclpc010': { + 'name': 'LPC Connect' + }, + 'lumos': { + 'name': 'Lumos Networks' + }, + 'madison': { + 'name': 'Madison Communications' + }, + 'mad030': { + 'name': 'Madison County Cable Inc.' + }, + 'nttcmah010': { + 'name': 'Mahaska Communication Group' + }, + 'mar010': { + 'name': 'Marne & Elk Horn Telephone Company' + }, + 'mcc040': { + 'name': 'McClure Telephone Co.' + }, + 'mctv': { + 'name': 'MCTV' + }, + 'merrimac': { + 'name': 'Merrimac Communications Ltd.' + }, + 'metronet': { + 'name': 'Metronet' + }, + 'mhtc': { + 'name': 'MHTC' + }, + 'midhudson': { + 'name': 'Mid-Hudson Cable' + }, + 'midrivers': { + 'name': 'Mid-Rivers Communications' + }, + 'mid045': { + 'name': 'Midstate Communications' + }, + 'mil080': { + 'name': 'Milford Communications' + }, + 'min030': { + 'name': 'MINET' + }, + 'nttcmin010': { + 'name': 'Minford TV' + }, + 'san040-02': { + 'name': 'Mitchell Telecom' + }, + 'mlg010': { + 'name': 'MLGC' + }, + 'mon060': { + 'name': 'Mon-Cre TVE' + }, + 'mou110': { + 'name': 'Mountain Telephone' + }, + 'mou050': { + 'name': 'Mountain Village Cable' + }, + 'mtacomm': { + 'name': 'MTA Communications, LLC' + }, + 'mtc010': { + 'name': 'MTC Cable' + }, + 'med040': { + 'name': 'MTC Technologies' + }, + 'man060': { + 'name': 'MTCC' + }, + 'mtc030': { + 'name': 'MTCO Communications' + }, + 'mul050': { + 'name': 'Mulberry Telecommunications' + }, + 'mur010': { + 'name': 'Murray Electric System' + }, + 'musfiber': { + 'name': 'MUS FiberNET' + }, + 'mpw': { + 'name': 'Muscatine Power & Water' + }, + 'nttcsli010': { + 'name': 'myEVTV.com' + }, + 'nor115': { + 'name': 'NCC' + }, + 'nor260': { + 'name': 'NDTC' + }, + 'nctc': { + 'name': 'Nebraska Central Telecom, Inc.' + }, + 'nel020': { + 'name': 'Nelsonville TV Cable' + }, + 'nem010': { + 'name': 'Nemont' + }, + 'new075': { + 'name': 'New Hope Telephone Cooperative' + }, + 'nor240': { + 'name': 'NICP' + }, + 'cic010': { + 'name': 'NineStar Connect' + }, + 'nktelco': { + 'name': 'NKTelco' + }, + 'nortex': { + 'name': 'Nortex Communications' + }, + 'nor140': { + 'name': 'North Central Telephone Cooperative' + }, + 'nor030': { + 'name': 'Northland Communications' + }, + 'nor075': { + 'name': 'Northwest Communications' + }, + 'nor125': { + 'name': 'Norwood Light Broadband' + }, + 'net010': { + 'name': 'Nsight Telservices' + }, + 'dur010': { + 'name': 'Ntec' + }, + 'nts010': { + 'name': 'NTS Communications' + }, + 'new045': { + 'name': 'NU-Telecom' + }, + 'nulink': { + 'name': 'NuLink' + }, + 'jam030': { + 'name': 'NVC' + }, + 'far035': { + 'name': 'OmniTel Communications' + }, + 'onesource': { + 'name': 'OneSource Communications' + }, + 'cit230': { + 'name': 'Opelika Power Services' + }, + 'daltonutilities': { + 'name': 'OptiLink' + }, + 'mid140': { + 'name': 'OPTURA' + }, + 'ote010': { + 'name': 'OTEC Communication Company' + }, + 'cci020': { + 'name': 'Packerland Broadband' + }, + 'pan010': { + 'name': 'Panora Telco/Guthrie Center Communications' + }, + 'otter': { + 'name': 'Park Region Telephone & Otter Tail Telcom' + }, + 'mid050': { + 'name': 'Partner Communications Cooperative' + }, + 'fib010': { + 'name': 'Pathway' + }, + 'paulbunyan': { + 'name': 'Paul Bunyan Communications' + }, + 'pem020': { + 'name': 'Pembroke Telephone Company' + }, + 'mck010': { + 'name': 'Peoples Rural Telephone Cooperative' + }, + 'pul010': { + 'name': 'PES Energize' + }, + 'phi010': { + 'name': 'Philippi Communications System' + }, + 'phonoscope': { + 'name': 'Phonoscope Cable' + }, + 'pin070': { + 'name': 'Pine Belt Communications, Inc.' + }, + 'weh010-pine': { + 'name': 'Pine Bluff Cable TV' + }, + 'pin060': { + 'name': 'Pineland Telephone Cooperative' + }, + 'cam010': { + 'name': 'Pinpoint Communications' + }, + 'pio060': { + 'name': 'Pioneer Broadband' + }, + 'pioncomm': { + 'name': 'Pioneer Communications' + }, + 'pioneer': { + 'name': 'Pioneer DTV' + }, + 'pla020': { + 'name': 'Plant TiftNet, Inc.' + }, + 'par010': { + 'name': 'PLWC' + }, + 'pro035': { + 'name': 'PMT' + }, + 'vik011': { + 'name': 'Polar Cablevision' + }, + 'pottawatomie': { + 'name': 'Pottawatomie Telephone Co.' + }, + 'premiercomm': { + 'name': 'Premier Communications' + }, + 'psc010': { + 'name': 'PSC' + }, + 'pan020': { + 'name': 'PTCI' + }, + 'qco010': { + 'name': 'QCOL' + }, + 'qua010': { + 'name': 'Quality Cablevision' + }, + 'rad010': { + 'name': 'Radcliffe Telephone Company' + }, + 'car040': { + 'name': 'Rainbow Communications' + }, + 'rai030': { + 'name': 'Rainier Connect' + }, + 'ral010': { + 'name': 'Ralls Technologies' + }, + 'rct010': { + 'name': 'RC Technologies' + }, + 'red040': { + 'name': 'Red River Communications' + }, + 'ree010': { + 'name': 'Reedsburg Utility Commission' + }, + 'mol010': { + 'name': 'Reliance Connects- Oregon' + }, + 'res020': { + 'name': 'Reserve Telecommunications' + }, + 'weh010-resort': { + 'name': 'Resort TV Cable' + }, + 'rld010': { + 'name': 'Richland Grant Telephone Cooperative, Inc.' + }, + 'riv030': { + 'name': 'River Valley Telecommunications Coop' + }, + 'rockportcable': { + 'name': 'Rock Port Cablevision' + }, + 'rsf010': { + 'name': 'RS Fiber' + }, + 'rtc': { + 'name': 'RTC Communication Corp' + }, + 'res040': { + 'name': 'RTC-Reservation Telephone Coop.' + }, + 'rte010': { + 'name': 'RTEC Communications' + }, + 'stc010': { + 'name': 'S&T' + }, + 'san020': { + 'name': 'San Bruno Cable TV' + }, + 'san040-01': { + 'name': 'Santel' + }, + 'sav010': { + 'name': 'SCI Broadband-Savage Communications Inc.' + }, + 'sco050': { + 'name': 'Scottsboro Electric Power Board' + }, + 'scr010': { + 'name': 'Scranton Telephone Company' + }, + 'selco': { + 'name': 'SELCO' + }, + 'she010': { + 'name': 'Shentel' + }, + 'she030': { + 'name': 'Sherwood Mutual Telephone Association, Inc.' + }, + 'ind060-ssc': { + 'name': 'Silver Star Communications' + }, + 'sjoberg': { + 'name': 'Sjoberg\'s Inc.' + }, + 'sou025': { + 'name': 'SKT' + }, + 'sky050': { + 'name': 'SkyBest TV' + }, + 'nttcsmi010': { + 'name': 'Smithville Communications' + }, + 'woo010': { + 'name': 'Solarus' + }, + 'sou075': { + 'name': 'South Central Rural Telephone Cooperative' + }, + 'sou065': { + 'name': 'South Holt Cablevision, Inc.' + }, + 'sou035': { + 'name': 'South Slope Cooperative Communications' + }, + 'spa020': { + 'name': 'Spanish Fork Community Network' + }, + 'spe010': { + 'name': 'Spencer Municipal Utilities' + }, + 'spi005': { + 'name': 'Spillway Communications, Inc.' + }, + 'srt010': { + 'name': 'SRT' + }, + 'cccsmc010': { + 'name': 'St. Maarten Cable TV' + }, + 'sta025': { + 'name': 'Star Communications' + }, + 'sco020': { + 'name': 'STE' + }, + 'uin010': { + 'name': 'STRATA Networks' + }, + 'sum010': { + 'name': 'Sumner Cable TV' + }, + 'pie010': { + 'name': 'Surry TV/PCSI TV' + }, + 'swa010': { + 'name': 'Swayzee Communications' + }, + 'sweetwater': { + 'name': 'Sweetwater Cable Television Co' + }, + 'weh010-talequah': { + 'name': 'Tahlequah Cable TV' + }, + 'tct': { + 'name': 'TCT' + }, + 'tel050': { + 'name': 'Tele-Media Company' + }, + 'com050': { + 'name': 'The Community Agency' + }, + 'thr020': { + 'name': 'Three River' + }, + 'cab140': { + 'name': 'Town & Country Technologies' + }, + 'tra010': { + 'name': 'Trans-Video' + }, + 'tre010': { + 'name': 'Trenton TV Cable Company' + }, + 'tcc': { + 'name': 'Tri County Communications Cooperative' + }, + 'tri025': { + 'name': 'TriCounty Telecom' + }, + 'tri110': { + 'name': 'TrioTel Communications, Inc.' + }, + 'tro010': { + 'name': 'Troy Cablevision, Inc.' + }, + 'tsc': { + 'name': 'TSC' + }, + 'cit220': { + 'name': 'Tullahoma Utilities Board' + }, + 'tvc030': { + 'name': 'TV Cable of Rensselaer' + }, + 'tvc015': { + 'name': 'TVC Cable' + }, + 'cab180': { + 'name': 'TVision' + }, + 'twi040': { + 'name': 'Twin Lakes' + }, + 'tvtinc': { + 'name': 'Twin Valley' + }, + 'uis010': { + 'name': 'Union Telephone Company' + }, + 'uni110': { + 'name': 'United Communications - TN' + }, + 'uni120': { + 'name': 'United Services' + }, + 'uss020': { + 'name': 'US Sonet' + }, + 'cab060': { + 'name': 'USA Communications' + }, + 'she005': { + 'name': 'USA Communications/Shellsburg, IA' + }, + 'val040': { + 'name': 'Valley TeleCom Group' + }, + 'val025': { + 'name': 'Valley Telecommunications' + }, + 'val030': { + 'name': 'Valparaiso Broadband' + }, + 'cla050': { + 'name': 'Vast Broadband' + }, + 'sul015': { + 'name': 'Venture Communications Cooperative, Inc.' + }, + 'ver025': { + 'name': 'Vernon Communications Co-op' + }, + 'weh010-vicksburg': { + 'name': 'Vicksburg Video' + }, + 'vis070': { + 'name': 'Vision Communications' + }, + 'volcanotel': { + 'name': 'Volcano Vision, Inc.' + }, + 'vol040-02': { + 'name': 'VolFirst / BLTV' + }, + 'ver070': { + 'name': 'VTel' + }, + 'nttcvtx010': { + 'name': 'VTX1' + }, + 'bci010-02': { + 'name': 'Vyve Broadband' + }, + 'wab020': { + 'name': 'Wabash Mutual Telephone' + }, + 'waitsfield': { + 'name': 'Waitsfield Cable' + }, + 'wal010': { + 'name': 'Walnut Communications' + }, + 'wavebroadband': { + 'name': 'Wave' + }, + 'wav030': { + 'name': 'Waverly Communications Utility' + }, + 'wbi010': { + 'name': 'WBI' + }, + 'web020': { + 'name': 'Webster-Calhoun Cooperative Telephone Association' + }, + 'wes005': { + 'name': 'West Alabama TV Cable' + }, + 'carolinata': { + 'name': 'West Carolina Communications' + }, + 'wct010': { + 'name': 'West Central Telephone Association' + }, + 'wes110': { + 'name': 'West River Cooperative Telephone Company' + }, + 'ani030': { + 'name': 'WesTel Systems' + }, + 'westianet': { + 'name': 'Western Iowa Networks' + }, + 'nttcwhi010': { + 'name': 'Whidbey Telecom' + }, + 'weh010-white': { + 'name': 'White County Cable TV' + }, + 'wes130': { + 'name': 'Wiatel' + }, + 'wik010': { + 'name': 'Wiktel' + }, + 'wil070': { + 'name': 'Wilkes Communications, Inc./RiverStreet Networks' + }, + 'wil015': { + 'name': 'Wilson Communications' + }, + 'win010': { + 'name': 'Windomnet/SMBS' + }, + 'win090': { + 'name': 'Windstream Cable TV' + }, + 'wcta': { + 'name': 'Winnebago Cooperative Telecom Association' + }, + 'wtc010': { + 'name': 'WTC' + }, + 'wil040': { + 'name': 'WTC Communications, Inc.' + }, + 'wya010': { + 'name': 'Wyandotte Cable' + }, + 'hin020-02': { + 'name': 'X-Stream Services' + }, + 'xit010': { + 'name': 'XIT Communications' + }, + 'yel010': { + 'name': 'Yelcot Communications' + }, + 'mid180-01': { + 'name': 'yondoo' + }, + 'cou060': { + 'name': 'Zito Media' + }, } @@ -113,10 +1367,10 @@ class AdobePassIE(InfoExtractor): provider_login_page_res = post_form( provider_redirect_page_res, 'Downloading Provider Login Page') mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { - mso_info['username_field']: username, - mso_info['password_field']: password, + mso_info.get('username_field', 'username'): username, + mso_info.get('password_field', 'password'): password, }) - if mso_id == 'DTV': + if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( From 12f211d0cbd25554ff3116ee173ffc3f25d0e453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 21 Sep 2016 22:51:36 +0700 Subject: [PATCH 527/775] [videomore] Fix embed regex --- youtube_dl/extractor/videomore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 328b5b7fb..8a11ff848 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -84,7 +84,7 @@ class VideomoreIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r'<object[^>]+data=(["\'])https?://videomore.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', + r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', webpage) if mobj: return mobj.group('url') From 1978540a5122c53012e17a78841f3da0df77fd34 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Sep 2016 21:49:52 +0100 Subject: [PATCH 528/775] [ooyala] extract all hls formats --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 2038a6ba5..72ec20938 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -47,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor): delivery_type = stream['delivery_type'] if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - s_url, embed_code, 'mp4', 'm3u8_native', + re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( From 0a439c5c4c1a6a2ee54465c5ad893ffb768539d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Sep 2016 21:48:53 +0700 Subject: [PATCH 529/775] [udemy] Stringify video id --- youtube_dl/extractor/udemy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index c2f507233..cce29c6e0 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, + compat_str, compat_urllib_request, compat_urlparse, ) @@ -207,7 +208,7 @@ class UdemyIE(InfoExtractor): if youtube_url: return self.url_result(youtube_url, 'Youtube') - video_id = asset['id'] + video_id = compat_str(asset['id']) thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') duration = float_or_none(asset.get('data', {}).get('duration')) From e3d6bdc8fc48ddf0bea324c9196297e539669aaf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:11:13 +0800 Subject: [PATCH 530/775] [ustream] Support HLS streams (closes #10698) --- ChangeLog | 5 ++ youtube_dl/extractor/ustream.py | 122 +++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 24077c430..5122af4c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [ustream] Support the new HLS streams (#10698) + version 2016.09.19 Extractors diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index a3dc9d33e..0c06bf36b 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,15 +1,20 @@ from __future__ import unicode_literals +import random import re from .common import InfoExtractor from ..compat import ( + compat_str, compat_urlparse, ) from ..utils import ( + encode_data_uri, ExtractorError, int_or_none, float_or_none, + mimetype2ext, + str_or_none, ) @@ -47,8 +52,108 @@ class UstreamIE(InfoExtractor): 'id': '10299409', }, 'playlist_count': 3, + }, { + 'url': 'http://www.ustream.tv/recorded/91343263', + 'info_dict': { + 'id': '91343263', + 'ext': 'mp4', + 'title': 'GitHub Universe - General Session - Day 1', + 'upload_date': '20160914', + 'description': 'GitHub Universe - General Session - Day 1', + 'timestamp': 1473872730, + 'uploader': 'wa0dnskeqkr', + 'uploader_id': '38977840', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, }] + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): + def num_to_hex(n): + return hex(n)[2:] + + rnd = random.randrange + + if not extra_note: + extra_note = '' + + conn_info = self._download_json( + 'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id), + video_id, note='Downloading connection info' + extra_note, + query={ + 'type': 'viewer', + 'appId': app_id_ver[0], + 'appVersion': app_id_ver[1], + 'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))), + 'rpin': '_rpin.%d' % rnd(1e15), + 'referrer': url, + 'media': video_id, + 'application': 'recorded', + }) + host = conn_info[0]['args'][0]['host'] + connection_id = conn_info[0]['args'][0]['connectionId'] + + return self._download_json( + 'http://%s/1/ustream?connectionId=%s' % (host, connection_id), + video_id, note='Downloading stream info' + extra_note) + + def _get_streams(self, url, video_id, app_id_ver): + # Sometimes the return dict does not have 'stream' + for trial_count in range(3): + stream_info = self._get_stream_info( + url, video_id, app_id_ver, + extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '') + if 'stream' in stream_info[0]['args'][0]: + return stream_info[0]['args'][0]['stream'] + return [] + + def _parse_segmented_mp4(self, dash_stream_info): + def resolve_dash_template(template, idx, chunk_hash): + return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash) + + formats = [] + for stream in dash_stream_info['streams']: + # Use only one provider to avoid too many formats + provider = dash_stream_info['providers'][0] + fragments = [{ + 'url': resolve_dash_template( + provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0']) + }] + for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']): + fragments.append({ + 'url': resolve_dash_template( + provider['url'] + stream['segmentUrl'], idx, + dash_stream_info['hashes'][compat_str(idx // 10 * 10)]) + }) + content_type = stream['contentType'] + kind = content_type.split('/')[0] + f = { + 'format_id': '-'.join(filter(None, [ + 'dash', kind, str_or_none(stream.get('bitrate'))])), + 'protocol': 'http_dash_segments', + # TODO: generate a MPD doc for external players? + 'url': encode_data_uri(b'<MPD/>', 'text/xml'), + 'ext': mimetype2ext(content_type), + 'height': stream.get('height'), + 'width': stream.get('width'), + 'fragments': fragments, + } + if kind == 'video': + f.update({ + 'vcodec': stream.get('codec'), + 'acodec': 'none', + 'vbr': stream.get('bitrate'), + }) + else: + f.update({ + 'vcodec': 'none', + 'acodec': stream.get('codec'), + 'abr': stream.get('bitrate'), + }) + formats.append(f) + return formats + def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') @@ -86,7 +191,22 @@ class UstreamIE(InfoExtractor): 'url': video_url, 'ext': format_id, 'filesize': filesize, - } for format_id, video_url in video['media_urls'].items()] + } for format_id, video_url in video['media_urls'].items() if video_url] + + if not formats: + hls_streams = self._get_streams(url, video_id, app_id_ver=(11, 2)) + if hls_streams: + # m3u8_native leads to intermittent ContentTooShortError + formats.extend(self._extract_m3u8_formats( + hls_streams[0]['url'], video_id, ext='mp4', m3u8_id='hls')) + + ''' + # DASH streams handling is incomplete as 'url' is missing + dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1)) + if dash_streams: + formats.extend(self._parse_segmented_mp4(dash_streams)) + ''' + self._sort_formats(formats) description = video.get('description') From 628406db960c032eb68ef318ce9fecf6b8329834 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:13:56 +0800 Subject: [PATCH 531/775] [Makefile] Cleanup files from fragment-based downloaders --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 354052c50..ac234fcb0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 4ddcb5999d0323fb83c5b879127d31763f5d63e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:47:01 +0800 Subject: [PATCH 532/775] [openload] Fix extraction (closes #10408, closes #10727) Thanks to @daniel100097 for providing a working version --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5122af4c0..6c72bae90 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [openload] Fix extraction (#10408) + [ustream] Support the new HLS streams (#10698) version 2016.09.19 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c261a7455..b6e3ac250 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -51,7 +51,8 @@ class OpenloadIE(InfoExtractor): # declared to be freely used in youtube-dl # See https://github.com/rg3/youtube-dl/issues/10408 enc_data = self._html_search_regex( - r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data') + r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"', + webpage, 'encrypted data') video_url_chars = [] @@ -60,7 +61,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 3 + j += 2 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 45cae3b021828cc6f7a67c7a14645ae6f0806f59 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 22 Sep 2016 19:27:57 +0100 Subject: [PATCH 533/775] [cbs] extract info from thunder videoPlayerService(closes #10728) --- youtube_dl/extractor/cbs.py | 58 ++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 3f4dea40c..58f258c54 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,7 +4,9 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( int_or_none, find_xpath_attr, - ExtractorError, + xpath_element, + xpath_text, + update_url_query, ) @@ -47,27 +49,49 @@ class CBSIE(CBSBaseIE): 'only_matching': True, }] - def _extract_video_info(self, guid): - path = 'dJ5BDC/media/guid/2198311517/' + guid - smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path - formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) - for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): - try: - tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) - formats.extend(tp_formats) - except ExtractorError: + def _extract_video_info(self, content_id): + items_data = self._download_xml( + 'http://can.cbs.com/thunder/player/videoPlayerService.php', + content_id, query={'partner': 'cbs', 'contentId': content_id}) + video_data = xpath_element(items_data, './/item') + title = xpath_text(video_data, 'videoTitle', 'title', True) + tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id + tp_release_url = 'http://link.theplatform.com/s/' + tp_path + + asset_types = [] + subtitles = {} + formats = [] + for item in items_data.findall('.//item'): + asset_type = xpath_text(item, 'assetType') + if not asset_type or asset_type in asset_types: continue + asset_types.append(asset_type) + query = { + 'mbr': 'true', + 'assetTypes': asset_type, + } + if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): + query['formats'] = 'MPEG4,M3U' + elif asset_type in ('RTMP', 'WIFI', '3G'): + query['formats'] = 'MPEG4,FLV' + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats) - metadata = self._download_theplatform_metadata(path, guid) - info = self._parse_theplatform_metadata(metadata) + + info = self._extract_theplatform_metadata(tp_path, content_id) info.update({ - 'id': guid, + 'id': content_id, + 'title': title, + 'series': xpath_text(video_data, 'seriesTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), + 'thumbnail': xpath_text(video_data, 'previewImageURL'), 'formats': formats, 'subtitles': subtitles, - 'series': metadata.get('cbs$SeriesTitle'), - 'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), - 'episode': metadata.get('cbs$EpisodeTitle'), - 'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), }) return info From 71ad00c09fecd3ecc84784cf215537cad0a79595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 23 Sep 2016 21:08:16 +0700 Subject: [PATCH 534/775] [prosiebensat1] Add support for kabeleinsdoku (Closes #10732) --- youtube_dl/extractor/prosiebensat1.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7335dc2af..5a29b844d 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -122,7 +122,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' _TESTS = [ { @@ -290,6 +290,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'skip_download': True, }, }, + { + # geo restricted to Germany + 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From 24628cf7db46ecce3fe56d387266c556cd9210ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:01:01 +0700 Subject: [PATCH 535/775] [soundcloud:playlist] Provide video id for playlist entries (Closes #10733) --- youtube_dl/extractor/soundcloud.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 513c54829..496cc5d8e 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -477,7 +477,11 @@ class SoundcloudPlaylistIE(SoundcloudIE): data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') - entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']] + entries = [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=compat_str(track['id']) if track.get('id') else None) + for track in data['tracks'] if track.get('permalink_url')] return { '_type': 'playlist', From 8eec691e8a89d0094b806b86111fbcfd0ade64c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:12:49 +0700 Subject: [PATCH 536/775] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6c72bae90..e0908aa30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Add support for watchTVeverywhere.com authentication provider based MSOs for + Adobe Pass authentication (#10709) + Extractors ++ [soundcloud:playlist] Provide video id for early playlist entries (#10733) ++ [prosiebensat1] Add support for kabeleinsdoku (#10732) +* [cbs] Extract info from thunder videoPlayerService (#10728) * [openload] Fix extraction (#10408) + [ustream] Support the new HLS streams (#10698) ++ [ooyala] Extract all HLS formats ++ [cartoonnetwork] Add support for Adobe Pass authentication ++ [soundcloud] Extract license metadata ++ [fox] Add support for Adobe Pass authentication (#8584) ++ [tbs] Add support for Adobe Pass authentication (#10642, #10222) ++ [trutv] Add support for Adobe Pass authentication (#10519) ++ [turner] Add support for Adobe Pass authentication + version 2016.09.19 From e6332059ac66bfc91ed18e5b15d9238e4283ee7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:16:47 +0700 Subject: [PATCH 537/775] release 2016.09.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8b28d784a..7669ab9b7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.19 +[debug] youtube-dl version 2016.09.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0908aa30..a1c4df479 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.24 Core + Add support for watchTVeverywhere.com authentication provider based MSOs for diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9d3138181..2af6380b8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.19' +__version__ = '2016.09.24' From 5968d7d2fe619e85eb424d6e47d000f0b295d4a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 14:20:42 +0800 Subject: [PATCH 538/775] [extractor/common] Improved support for HTML5 subtitles Ref: #10625 In a strict sense, <track>s with kind=captions are not subtitles. [1] openload misuses this attribute, and I guess there will be more examples, so I add it to common.py. Also allow extracting information for subtitles-only <video> or <audio> tags, which is the case of openload. [1] https://www.w3.org/TR/html5/embedded-content-0.html#attr-track-kind --- ChangeLog | 6 ++++++ youtube_dl/extractor/common.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a1c4df479..ebe4ff0e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +vesion <unreleased> + +Core ++ Improved support for HTML5 subtitles + + version 2016.09.24 Core diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c8991542..5cb4479ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1828,7 +1828,7 @@ class InfoExtractor(object): for track_tag in re.findall(r'<track[^>]+>', media_content): track_attributes = extract_attributes(track_tag) kind = track_attributes.get('kind') - if not kind or kind == 'subtitles': + if not kind or kind in ('subtitles', 'captions'): src = track_attributes.get('src') if not src: continue @@ -1836,7 +1836,7 @@ class InfoExtractor(object): media_info['subtitles'].setdefault(lang, []).append({ 'url': absolute_url(src), }) - if media_info['formats']: + if media_info['formats'] or media_info['subtitles']: entries.append(media_info) return entries From 0711995bcac2f44e09a943521dceb1c54bf8ffb7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 14:27:08 +0800 Subject: [PATCH 539/775] [openload] Support subtitles (closes #10625) --- ChangeLog | 3 +++ youtube_dl/extractor/openload.py | 24 +++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index ebe4ff0e8..766cc477b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ vesion <unreleased> Core + Improved support for HTML5 subtitles +Extractors ++ [openload] Support subtitles (#10625) + version 2016.09.24 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b6e3ac250..4f5175136 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -24,6 +24,22 @@ class OpenloadIE(InfoExtractor): 'title': 'skyrim_no-audio_1080.mp4', 'thumbnail': 're:^https?://.*\.jpg$', }, + }, { + 'url': 'https://openload.co/embed/rjC09fkPLYs', + 'info_dict': { + 'id': 'rjC09fkPLYs', + 'ext': 'mp4', + 'title': 'movie.mp4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, + }, + 'params': { + 'skip_download': True, # test subtitles only + }, }, { 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', 'only_matching': True, @@ -71,11 +87,17 @@ class OpenloadIE(InfoExtractor): 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) - return { + entries = self._parse_html5_media_entries(url, webpage, video_id) + subtitles = entries[0]['subtitles'] if entries else None + + info_dict = { 'id': video_id, 'title': title, 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles 'ext': determine_ext(title), + 'subtitles': subtitles, } + + return info_dict From 8add4bfecb73f44cffe3cbf33941fc409564149b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 10:41:38 +0100 Subject: [PATCH 540/775] [mtv] add support for new website urls(closes #8169)(closes #9808) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mtv.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8166fd4f9..bf1f70885 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -516,6 +516,7 @@ from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, + MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, ) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index bdda68819..84a2dcb62 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -270,6 +270,27 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'(?x)https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' + _FEED_URL = 'http://www.mtv.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer', + 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'info_dict': { + 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'ext': 'mp4', + 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', + 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', + 'timestamp': 1468846800, + 'upload_date': '20160718', + }, + }, { + 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', + 'only_matching': True, + }] + + +class MTVVideoIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' From a54ffb8aa778062901dd15b020576bc7d472ae40 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 10:50:14 +0100 Subject: [PATCH 541/775] [mtv] add common IE_NAME prefix for MTVIE and MTVVideoIE --- youtube_dl/extractor/mtv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 84a2dcb62..2e9580b10 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -270,7 +270,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): - _VALID_URL = r'(?x)https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' + IE_NAME = 'mtv' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ @@ -291,6 +292,7 @@ class MTVIE(MTVServicesInfoExtractor): class MTVVideoIE(MTVServicesInfoExtractor): + IE_NAME = 'mtv:video' _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' From f0bc5a8609786633d8b51ab4255c1f0fdb941f73 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 19:57:55 +0800 Subject: [PATCH 542/775] [twitter] Support Periscope embeds (closes #10737) Also update _TESTS --- ChangeLog | 1 + youtube_dl/extractor/periscope.py | 9 +++++++ youtube_dl/extractor/twitter.py | 45 +++++++++++++++++++++++++------ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 766cc477b..5c96dc179 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core + Improved support for HTML5 subtitles Extractors ++ [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index eb1aeba46..e8b2f11c6 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, @@ -41,6 +43,13 @@ class PeriscopeIE(PeriscopeBaseIE): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): token = self._match_id(url) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index c5a5843b6..3411fcf7e 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, float_or_none, @@ -13,6 +14,8 @@ from ..utils import ( ExtractorError, ) +from .periscope import PeriscopeIE + class TwitterBaseIE(InfoExtractor): def _get_vmap_video_url(self, vmap_url, video_id): @@ -48,12 +51,12 @@ class TwitterCardIE(TwitterBaseIE): }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', - 'md5': 'd4724ffe6d2437886d004fa5de1043b3', + 'md5': 'b6d9683dd3f48e340ded81c0e917ad46', 'info_dict': { 'id': 'dq4Oj5quskI', 'ext': 'mp4', 'title': 'Ubuntu 11.10 Overview', - 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', + 'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'upload_date': '20111013', 'uploader': 'OMG! Ubuntu!', 'uploader_id': 'omgubuntu', @@ -100,12 +103,17 @@ class TwitterCardIE(TwitterBaseIE): return self.url_result(iframe_url) config = self._parse_json(self._html_search_regex( - r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), + r'data-(?:player-)?config="([^"]+)"', webpage, + 'data player config', default='{}'), video_id) if config.get('source_type') == 'vine': return self.url_result(config['player_url'], 'Vine') + periscope_url = PeriscopeIE._extract_url(webpage) + if periscope_url: + return self.url_result(periscope_url, PeriscopeIE.ie_key()) + def _search_dimensions_in_video_url(a_format, video_url): m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) if m: @@ -244,10 +252,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'Donte The Dumbass', + 'uploader': 'JG', 'uploader_id': 'jaydingeer', }, 'params': { @@ -278,6 +286,18 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', + 'info_dict': { + 'id': '1zqKVVlkqLaKB', + 'ext': 'mp4', + 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', + 'upload_date': '20160923', + 'uploader_id': 'OPP_HSD', + 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', + 'timestamp': 1474613214, + }, + 'add_ie': ['Periscope'], }] def _real_extract(self, url): @@ -328,13 +348,22 @@ class TwitterIE(InfoExtractor): }) return info + twitter_card_url = None if 'class="PlayableMedia' in webpage: + twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) + else: + twitter_card_iframe_url = self._search_regex( + r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1', + webpage, 'Twitter card iframe URL', default=None, group='url') + if twitter_card_iframe_url: + twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) + + if twitter_card_url: info.update({ '_type': 'url_transparent', 'ie_key': 'TwitterCard', - 'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid), + 'url': twitter_card_url, }) - return info raise ExtractorError('There\'s no video in this tweet.') From 8e45e1cc4d706e6b43dac8105acf3592fa3d4725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 19:18:01 +0700 Subject: [PATCH 543/775] [soundcloud] Generalize playlist entries extraction (#10733) --- youtube_dl/extractor/soundcloud.py | 42 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 496cc5d8e..f3cb35f77 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -260,7 +260,20 @@ class SoundcloudIE(InfoExtractor): return self._extract_info_dict(info, full_title, secret_token=token) -class SoundcloudSetIE(SoundcloudIE): +class SoundcloudBaseIE(SoundcloudIE): + @staticmethod + def _extract_id(e): + return compat_str(e['id']) if e.get('id') else None + + def _extract_track_entries(self, tracks): + return [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=self._extract_id(track)) + for track in tracks if track.get('permalink_url')] + + +class SoundcloudSetIE(SoundcloudBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ @@ -299,7 +312,7 @@ class SoundcloudSetIE(SoundcloudIE): msgs = (compat_str(err['error_message']) for err in info['errors']) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) - entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']] + entries = self._extract_track_entries(info['tracks']) return { '_type': 'playlist', @@ -309,7 +322,7 @@ class SoundcloudSetIE(SoundcloudIE): } -class SoundcloudUserIE(SoundcloudIE): +class SoundcloudUserIE(SoundcloudBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -326,21 +339,21 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (All)', }, - 'playlist_mincount': 111, + 'playlist_mincount': 74, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Tracks)', }, - 'playlist_mincount': 50, + 'playlist_mincount': 37, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/sets', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Playlists)', }, - 'playlist_mincount': 3, + 'playlist_mincount': 2, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', 'info_dict': { @@ -359,7 +372,7 @@ class SoundcloudUserIE(SoundcloudIE): 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { 'id': '7098329', - 'title': 'Grynpyret (Spotlight)', + 'title': 'GRYNPYRET (Spotlight)', }, 'playlist_mincount': 1, }] @@ -421,13 +434,14 @@ class SoundcloudUserIE(SoundcloudIE): for cand in candidates: if isinstance(cand, dict): permalink_url = cand.get('permalink_url') + entry_id = self._extract_id(cand) if permalink_url and permalink_url.startswith('http'): - return permalink_url + return permalink_url, entry_id for e in collection: - permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) if permalink_url: - entries.append(self.url_result(permalink_url)) + entries.append(self.url_result(permalink_url, video_id=entry_id)) next_href = response.get('next_href') if not next_href: @@ -447,7 +461,7 @@ class SoundcloudUserIE(SoundcloudIE): } -class SoundcloudPlaylistIE(SoundcloudIE): +class SoundcloudPlaylistIE(SoundcloudBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ @@ -477,11 +491,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') - entries = [ - self.url_result( - track['permalink_url'], SoundcloudIE.ie_key(), - video_id=compat_str(track['id']) if track.get('id') else None) - for track in data['tracks'] if track.get('permalink_url')] + entries = self._extract_track_entries(data['tracks']) return { '_type': 'playlist', From 7518a61d416133bff8b99c693dfca0b15c0d5b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 19:29:49 +0700 Subject: [PATCH 544/775] [soundcloud] Fix typo in playlist base class name --- youtube_dl/extractor/soundcloud.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index f3cb35f77..1a8114aa7 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -260,7 +260,7 @@ class SoundcloudIE(InfoExtractor): return self._extract_info_dict(info, full_title, secret_token=token) -class SoundcloudBaseIE(SoundcloudIE): +class SoundcloudPlaylistBaseIE(SoundcloudIE): @staticmethod def _extract_id(e): return compat_str(e['id']) if e.get('id') else None @@ -273,7 +273,7 @@ class SoundcloudBaseIE(SoundcloudIE): for track in tracks if track.get('permalink_url')] -class SoundcloudSetIE(SoundcloudBaseIE): +class SoundcloudSetIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ @@ -322,7 +322,7 @@ class SoundcloudSetIE(SoundcloudBaseIE): } -class SoundcloudUserIE(SoundcloudBaseIE): +class SoundcloudUserIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -461,7 +461,7 @@ class SoundcloudUserIE(SoundcloudBaseIE): } -class SoundcloudPlaylistIE(SoundcloudBaseIE): +class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ From 6f126d903f46d976a380a5b4265084e5a21a3c09 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 15:38:19 +0100 Subject: [PATCH 545/775] [download/hls] Delegate downloading to ffmpeg for live streams --- youtube_dl/downloader/hls.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5d70abf62..541b92ee1 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -31,7 +31,7 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest): + def can_download(manifest, info_dict): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -53,6 +53,7 @@ class HlsFD(FragmentFD): ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) + check_results.append(not info_dict.get('is_live')) return all(check_results) def real_download(self, filename, info_dict): @@ -62,7 +63,7 @@ class HlsFD(FragmentFD): s = manifest.decode('utf-8', 'ignore') - if not self.can_download(s): + if not self.can_download(s, info_dict): self.report_warning( 'hlsnative has detected features it does not support, ' 'extraction will be delegated to ffmpeg') From 27e99078d337cdc77a5a7228998d3b2fe722e7cb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 15:39:06 +0100 Subject: [PATCH 546/775] [brightcove:new] add support for live streams --- youtube_dl/extractor/brightcove.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aeb22be16..2ec55b185 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor): 'url': text_track['src'], }) + is_live = False + duration = float_or_none(json_data.get('duration'), 1000) + if duration and duration < 0: + is_live = True + return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': clean_html(json_data.get('description')), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), - 'duration': float_or_none(json_data.get('duration'), 1000), + 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), 'uploader_id': account_id, 'formats': formats, 'subtitles': subtitles, 'tags': json_data.get('tags', []), + 'is_live': is_live, } From e71a450956c808d469b983e5ffde1a63aff24390 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 21:55:53 +0100 Subject: [PATCH 547/775] [common] add hdcore sign to akamai f4m formats --- youtube_dl/extractor/common.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5cb4479ec..1076b46da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1842,10 +1842,15 @@ class InfoExtractor(object): def _extract_akamai_formats(self, manifest_url, video_id): formats = [] + hdcore_sign = 'hdcore=3.7.0' f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') - formats.extend(self._extract_f4m_formats( - update_url_query(f4m_url, {'hdcore': '3.7.0'}), - video_id, f4m_id='hds', fatal=False)) + if 'hdcore=' not in f4m_url: + f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign + f4m_formats = self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False) + for entry in f4m_formats: + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.extend(f4m_formats) m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', From 7fd57de6fb146ffca594e4ae632d7ff217926b52 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 21:59:48 +0100 Subject: [PATCH 548/775] [cbsnews:livevideo] fix extraction and extract m3u8 formats --- youtube_dl/extractor/cbsnews.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 4aa6917a0..216989230 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -9,6 +9,7 @@ from ..utils import ( class CBSNewsIE(CBSIE): + IE_NAME = 'cbsnews' IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' @@ -68,15 +69,16 @@ class CBSNewsIE(CBSIE): class CBSNewsLiveVideoIE(InfoExtractor): + IE_NAME = 'cbsnews:livevideo' IE_DESC = 'CBS News Live Videos' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)' # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples _TEST = { 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Clinton, Sanders Prepare To Face Off In NH', 'duration': 334, }, @@ -84,25 +86,22 @@ class CBSNewsLiveVideoIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_info = self._download_json( + 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={ + 'device': 'desktop', + 'dvr_slug': display_id, + }) - video_info = self._parse_json(self._html_search_regex( - r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story'] - - hdcore_sign = 'hdcore=3.3.1' - f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id) - if f4m_formats: - for entry in f4m_formats: - # URLs without the extra param induce an 404 error - entry.update({'extra_param_to_segment_url': hdcore_sign}) - self._sort_formats(f4m_formats) + formats = self._extract_akamai_formats(video_info['url'], display_id) + self._sort_formats(formats) return { - 'id': video_id, + 'id': display_id, + 'display_id': display_id, 'title': video_info['headline'], 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), 'duration': parse_duration(video_info.get('segmentDur')), - 'formats': f4m_formats, + 'formats': formats, } From 63c583eb2c9a906ba1075da289afdde29b385fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 04:43:10 +0700 Subject: [PATCH 549/775] [prosiebensat1] Add support for sat1gold (#10745) --- youtube_dl/extractor/prosiebensat1.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 5a29b844d..2f5aa530a 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -122,7 +122,17 @@ class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?: + prosieben|prosiebenmaxx|sixx|sat1(?:gold)?|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku + )\.(?:de|at|ch)| + ran\.de|fem\.com + ) + /(?P<id>.+) + ''' _TESTS = [ { @@ -295,6 +305,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', 'only_matching': True, }, + { + # geo restricted to Germany + 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From ddde91952f4eec796b14eb258c0cb33dda3935bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 05:36:18 +0700 Subject: [PATCH 550/775] [prosiebensat1] Fix playlist support (Closes #10745) --- youtube_dl/extractor/prosiebensat1.py | 39 ++++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 2f5aa530a..a064de05e 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -310,6 +310,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, + { + 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' @@ -381,19 +385,28 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') - for regex in self._PLAYLIST_CLIP_REGEXES: - playlist_clips = re.findall(regex, webpage) - if playlist_clips: - title = self._html_search_regex( - self._TITLE_REGEXES, webpage, 'title') - description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) - entries = [ - self.url_result( - re.match('(.+?//.+?)/', url).group(1) + clip_path, - 'ProSiebenSat1') - for clip_path in playlist_clips] - return self.playlist_result(entries, playlist_id, title, description) + playlist = self._parse_json( + self._search_regex( + 'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', + webpage, 'playlist'), + playlist_id) + entries = [] + for item in playlist: + clip_id = item.get('id') or item.get('upc') + if not clip_id: + continue + info = self._extract_video_info(url, clip_id) + info.update({ + 'id': clip_id, + 'title': item.get('title') or item.get('teaser', {}).get('headline'), + 'description': item.get('teaser', {}).get('description'), + 'thumbnail': item.get('poster'), + 'duration': float_or_none(item.get('duration')), + 'series': item.get('tvShowTitle'), + 'uploader': item.get('broadcastPublisher'), + }) + entries.append(info) + return self.playlist_result(entries, playlist_id) def _real_extract(self, url): video_id = self._match_id(url) From f92bb612c69957c3803aaf14aea1d03a7d7d917f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:14:32 +0700 Subject: [PATCH 551/775] [mwave] Relax _VALID_URLs (Closes #10735, closes #10748) --- youtube_dl/extractor/mwave.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index a103e0323..fea1caf47 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -9,9 +9,9 @@ from ..utils import ( class MwaveIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s' - _TEST = { + _TESTS = [{ 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', # md5 is unstable 'info_dict': { @@ -23,7 +23,10 @@ class MwaveIE(InfoExtractor): 'duration': 206, 'view_count': int, } - } + }, { + 'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -60,8 +63,8 @@ class MwaveIE(InfoExtractor): class MwaveMeetGreetIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)' + _TESTS = [{ 'url': 'http://mwave.interest.me/meetgreet/view/256', 'info_dict': { 'id': '173294', @@ -72,7 +75,10 @@ class MwaveMeetGreetIE(InfoExtractor): 'duration': 3634, 'view_count': int, } - } + }, { + 'url': 'http://mwave.interest.me/en/meetgreet/view/256', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 0a078550b9ac570cb357c2af74a39068d08ce1ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:19:17 +0700 Subject: [PATCH 552/775] [prosiebensat1] Improve _VALID_URL --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index a064de05e..84d04aa69 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -127,7 +127,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?:www\.)? (?: (?: - prosieben|prosiebenmaxx|sixx|sat1(?:gold)?|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv )\.(?:de|at|ch)| ran\.de|fem\.com ) From 493353c7fd5d15fa35152915c10c7249277b5ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:25:57 +0700 Subject: [PATCH 553/775] [prosiebensat1] Add support for advopedia --- youtube_dl/extractor/prosiebensat1.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 84d04aa69..873d4f981 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -127,9 +127,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?:www\.)? (?: (?: - prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| - ran\.de|fem\.com + ran\.de|fem\.com|advopedia\.de ) /(?P<id>.+) ''' @@ -314,6 +314,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, }, + { + 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From 2d3d29976b2c83e8daab62a0f2a61c232692a310 Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Sat, 17 Sep 2016 21:48:20 +0700 Subject: [PATCH 554/775] [youtube] Change test URLs from http to https --- youtube_dl/extractor/youtube.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5ca903825..f86823112 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -369,7 +369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube' _TESTS = [ { - 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', + 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', 'info_dict': { 'id': 'BaW_jenozKc', 'ext': 'mp4', @@ -389,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', + 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY', 'note': 'Test generic use_cipher_signature video (#897)', 'info_dict': { 'id': 'UxxajLWwzqY', @@ -443,7 +443,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', + 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', 'note': 'Use the first video ID in the URL', 'info_dict': { 'id': 'BaW_jenozKc', @@ -465,7 +465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { - 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', + 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I', 'note': '256k DASH audio (format 141) via DASH manifest', 'info_dict': { 'id': 'a9LDPn-MO4I', @@ -539,7 +539,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Normal age-gate video (No vevo, embed allowed) { - 'url': 'http://youtube.com/watch?v=HtVdAasjOgU', + 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 'info_dict': { 'id': 'HtVdAasjOgU', 'ext': 'mp4', @@ -555,7 +555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Age-gate video with encrypted signature { - 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU', + 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU', 'info_dict': { 'id': '6kLq3WMV1nU', 'ext': 'mp4', @@ -748,11 +748,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip': 'Not multifeed anymore', }, { - 'url': 'http://vid.plus/FlRa-iH7PGw', + 'url': 'https://vid.plus/FlRa-iH7PGw', 'only_matching': True, }, { - 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', + 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', 'only_matching': True, }, { @@ -1846,7 +1846,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'playlist_count': 2, }, { 'note': 'embedded', - 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 'playlist_count': 4, 'info_dict': { 'title': 'JODA15', @@ -1854,7 +1854,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): } }, { 'note': 'Embedded SWF player', - 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', + 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', 'playlist_count': 4, 'info_dict': { 'title': 'JODA7', @@ -2156,7 +2156,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:live' _TESTS = [{ - 'url': 'http://www.youtube.com/user/TheYoungTurks/live', + 'url': 'https://www.youtube.com/user/TheYoungTurks/live', 'info_dict': { 'id': 'a48o2S1cPoo', 'ext': 'mp4', @@ -2176,7 +2176,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, { - 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 'only_matching': True, }] @@ -2201,7 +2201,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): IE_NAME = 'youtube:playlists' _TESTS = [{ - 'url': 'http://www.youtube.com/user/ThirstForScience/playlists', + 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', @@ -2209,7 +2209,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): }, }, { # with "Load more" button - 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', + 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', 'playlist_mincount': 70, 'info_dict': { 'id': 'igorkle1', @@ -2442,10 +2442,10 @@ class YoutubeTruncatedURLIE(InfoExtractor): ''' _TESTS = [{ - 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041', 'only_matching': True, }, { - 'url': 'http://www.youtube.com/watch?', + 'url': 'https://www.youtube.com/watch?', 'only_matching': True, }, { 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', @@ -2466,7 +2466,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): 'Did you forget to quote the URL? Remember that & is a meta ' 'character in most shells, so you want to put the URL in quotes, ' 'like youtube-dl ' - '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' + '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' ' or simply youtube-dl BaW_jenozKc .', expected=True) From f3625cc4ca8d8683b900e070ad7acd58b1fac5c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 18:08:35 +0700 Subject: [PATCH 555/775] [PULL_REQUEST_TEMPLATE.md] Add Unlicense notice --- .github/PULL_REQUEST_TEMPLATE.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f24bb4b09..3a168b7b1 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,6 +10,10 @@ - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: +- [ ] I am the original original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) + ### What is the purpose of your *pull request*? - [ ] Bug fix - [ ] New extractor From e590b7ff9e8e408bb9ec4da58ab6847686d29dbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 18:09:46 +0700 Subject: [PATCH 556/775] [PULL_REQUEST_TEMPLATE.md] Add checkable Improvement options PR's purpose --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3a168b7b1..89e8a3188 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,6 +16,7 @@ ### What is the purpose of your *pull request*? - [ ] Bug fix +- [ ] Improvement - [ ] New extractor - [ ] New feature From a3d8b3816802c76beffa48789eac5181e02db3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 21:58:17 +0700 Subject: [PATCH 557/775] [npo] Generalize playlist extractors --- youtube_dl/extractor/npo.py | 63 +++++++++++++++---------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 3293bdb17..f95867d58 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -438,9 +438,29 @@ class SchoolTVIE(InfoExtractor): } -class VPROIE(NPOIE): +class NPOPlaylistBaseIE(NPOIE): + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + ] + + playlist_title = self._html_search_regex( + self._PLAYLIST_TITLE_RE, webpage, 'playlist title', + default=None) or self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) + + +class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' + _PLAYLIST_TITLE_RE = r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*' + _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ { @@ -473,48 +493,17 @@ class VPROIE(NPOIE): } ] - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(r'data-media-id="([^"]+)"', webpage) - ] - - playlist_title = self._search_regex( - r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*', - webpage, 'playlist title', default=None) or self._og_search_title(webpage) - - return self.playlist_result(entries, playlist_id, playlist_title) - - -class WNLIE(InfoExtractor): +class WNLIE(NPOPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' + _PLAYLIST_TITLE_RE = r'(?s)]+class="subject"[^>]*>(.+?)

    ' + _PLAYLIST_ENTRY_RE = r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' - _TEST = { + _TESTS = [{ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', 'info_dict': { 'id': 'vandaag-de-dag-6-mei', 'title': 'Vandaag de Dag 6 mei', }, 'playlist_count': 4, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id, 'NPO') - for video_id, part in re.findall( - r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) - ] - - playlist_title = self._html_search_regex( - r'(?s)]+class="subject"[^>]*>(.+?)', - webpage, 'playlist title') - - return self.playlist_result(entries, playlist_id, playlist_title) + }] From ddb19772d572ae2118664a22d083a8f31fc63d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:03:18 +0700 Subject: [PATCH 558/775] [vpro] Fix playlist title extraction and update tests --- youtube_dl/extractor/npo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index f95867d58..ff02d0309 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -459,7 +459,7 @@ class NPOPlaylistBaseIE(NPOIE): class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' - _PLAYLIST_TITLE_RE = r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*' + _PLAYLIST_TITLE_RE = r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)' _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ @@ -473,12 +473,13 @@ class VPROIE(NPOPlaylistBaseIE): 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', }, + 'skip': 'Video gone', }, { 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html', 'info_dict': { 'id': 'sergio-herman', - 'title': 'Sergio Herman: Fucking perfect', + 'title': 'sergio herman: fucking perfect', }, 'playlist_count': 2, }, @@ -487,7 +488,7 @@ class VPROIE(NPOPlaylistBaseIE): 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html', 'info_dict': { 'id': 'education-education', - 'title': '2Doc', + 'title': 'education education', }, 'playlist_count': 2, } From 5742c18bc1ea3da5b0fd480e75fcdf099220e52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:19:00 +0700 Subject: [PATCH 559/775] [npo] Add support for anderetijden.nl (Closes #10754) --- youtube_dl/extractor/npo.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index ff02d0309..66035a77c 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( fix_xml_ampersands, + orderedSet, parse_duration, qualities, strip_jsonp, @@ -446,7 +447,7 @@ class NPOPlaylistBaseIE(NPOIE): entries = [ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage)) ] playlist_title = self._html_search_regex( @@ -508,3 +509,18 @@ class WNLIE(NPOPlaylistBaseIE): }, 'playlist_count': 4, }] + + +class AndereTijdenIE(NPOPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' + _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' + _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' + + _TESTS = [{ + 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'info_dict': { + 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'title': 'Duitse soldaten over de Slag bij Arnhem', + }, + 'playlist_count': 3, + }] From f1ee462c82381d3c68673500c0491fe477030c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:38:36 +0700 Subject: [PATCH 560/775] [PULL_REQUEST_TEMPLATE.md] Fix typo --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 89e8a3188..46fa26f02 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,7 +11,7 @@ - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: -- [ ] I am the original original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) ### What is the purpose of your *pull request*? From 2d5b4af0070f8aa6f3f4eb8fdabef5d006f6429a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 23:30:57 +0700 Subject: [PATCH 561/775] [extractors] Add import for anderetijden extractor --- youtube_dl/extractor/extractors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bf1f70885..23fd2a308 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -612,13 +612,14 @@ from .nowtv import ( ) from .noz import NozIE from .npo import ( + AndereTijdenIE, NPOIE, NPOLiveIE, NPORadioIE, NPORadioFragmentIE, SchoolTVIE, VPROIE, - WNLIE + WNLIE, ) from .npr import NprIE from .nrk import ( From d3c97bad6181e1d3dc0cb4eece041e1cfb0ba6bc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 26 Sep 2016 14:14:37 +0800 Subject: [PATCH 562/775] Ignore and cleanup 3gp files --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a802c75a1..002b700f5 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ updates_key.pem *.m4a *.m4v *.mp3 +*.3gp *.part *.swp test/testdata diff --git a/Makefile b/Makefile index ac234fcb0..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From fffb9cff944cfab11f311900ee8138f28f7232d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 22:15:58 +0700 Subject: [PATCH 563/775] [kaltura] Speed up embed regexes (#10764) --- youtube_dl/extractor/kaltura.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 5a8403777..91bc3a0a7 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -105,20 +105,20 @@ class KalturaIE(InfoExtractor): kWidget\.(?:thumb)?[Ee]mbed\( \{.*? (?P['\"])wid(?P=q1)\s*:\s* - (?P['\"])_?(?P[^'\"]+)(?P=q2),.*? + (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P[^'\"]+)(?P=q4), + (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4), """, webpage) or re.search( r'''(?xs) (?P["\']) - (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*? + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: entry_?[Ii]d| (?P["\'])entry_?[Ii]d(?P=q2) )\s*:\s* - (?P["\'])(?P.+?)(?P=q3) + (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage)) if mobj: embed_info = mobj.groupdict() From d3dbb46330461c0c70c3aae47b69d27882cfc325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20B=C3=A1rta?= Date: Sun, 25 Sep 2016 12:44:46 +0200 Subject: [PATCH 564/775] [promptfile] Fix extraction (Closes #10634) --- youtube_dl/extractor/promptfile.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index f93bd19ff..54c4aee13 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -15,12 +15,12 @@ from ..utils import ( class PromptFileIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P[0-9A-Z\-]+)' _TEST = { - 'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF', - 'md5': 'd1451b6302da7215485837aaea882c4c', + 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', + 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', 'info_dict': { - 'id': 'D21B4746E9-F01462F0FF', + 'id': '86D1CE8462-576CAAE416', 'ext': 'mp4', - 'title': 'Birds.mp4', + 'title': 'oceans.mp4', 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -33,14 +33,20 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) + chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' + chash = self._html_search_regex(chash_pattern, webpage, "chash") fields = self._hidden_inputs(webpage) + k = list(fields)[0] + fields[k] = chash + fields[k] + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( req, video_id, 'Downloading video page') - url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL') + url_pattern = r'', webpage, 'title') thumbnail = self._html_search_regex( From 72c3d02d294b04b35a19417b31ad497e7540caa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 23:39:54 +0700 Subject: [PATCH 565/775] [promptfile] Improve and modernize --- youtube_dl/extractor/promptfile.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 54c4aee13..d40cca06f 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -7,7 +7,6 @@ from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, - sanitized_Request, urlencode_postdata, ) @@ -33,20 +32,23 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) - chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' - chash = self._html_search_regex(chash_pattern, webpage, "chash") + chash = self._search_regex( + r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') fields = self._hidden_inputs(webpage) - k = list(fields)[0] - fields[k] = chash + fields[k] + keys = list(fields.keys()) + chash_key = keys[0] if len(keys) == 1 else next( + key for key in keys if key.startswith('cha')) + fields[chash_key] = chash + fields[chash_key] - post = urlencode_postdata(fields) - req = sanitized_Request(url, post) - req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( - req, video_id, 'Downloading video page') + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), + headers={'Content-type': 'application/x-www-form-urlencoded'}) - url_pattern = r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*Download File', + r']+href=(["\'])(?Phttps?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), + webpage, 'video url', group='url') title = self._html_search_regex( r'', webpage, 'title') thumbnail = self._html_search_regex( @@ -55,7 +57,7 @@ class PromptFileIE(InfoExtractor): formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, 'ext': determine_ext(title), }] self._sort_formats(formats) From d75d9e343e91527c1fe34678e913ae16a0eafbdd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 27 Sep 2016 14:38:41 +0800 Subject: [PATCH 566/775] [einthusan] Fix extraction (closes #10714) --- ChangeLog | 1 + youtube_dl/extractor/einthusan.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5c96dc179..fdebb89b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core + Improved support for HTML5 subtitles Extractors +* [einthusan] Fix extraction (#10714) + [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index f7339702c..443865ad2 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.einthusan.com/movies/watch.php?id=2447', - 'md5': 'af244f4458cd667205e513d75da5b8b1', + 'md5': 'd71379996ff5b7f217eca034c34e3461', 'info_dict': { 'id': '2447', 'ext': 'mp4', @@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'http://www.einthusan.com/movies/watch.php?id=1671', - 'md5': 'ef63c7a803e22315880ed182c10d1c5c', + 'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213', 'info_dict': { 'id': '1671', 'ext': 'mp4', 'title': 'Soodhu Kavvuum', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51', + 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c', } }, ] @@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor): video_id = self._search_regex( r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) - video_url = self._download_webpage( + m3u8_url = self._download_webpage( 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' - % video_id, video_id) + % video_id, video_id, headers={'Referer': url}) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native') description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( @@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'thumbnail': thumbnail, 'description': description, } From 93933c9819fa1282081a5f0761cbeabc9fbea336 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 27 Sep 2016 15:28:37 +0100 Subject: [PATCH 567/775] [awaan:video] fix test(closes #10773) --- youtube_dl/extractor/awaan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index 66d7515bc..a2603bbff 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -66,6 +66,7 @@ class AWAANVideoIE(AWAANBaseIE): 'duration': 2041, 'timestamp': 1227504126, 'upload_date': '20081124', + 'uploader_id': '71', }, }, { 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', From 2342733f850c979c6f23ea2e83dfcb176fb08fa5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 27 Sep 2016 15:29:50 +0100 Subject: [PATCH 568/775] fix tests related to 1978540a5122c53012e17a78841f3da0df77fd34(closes #10774) --- youtube_dl/extractor/formula1.py | 6 +++++- youtube_dl/extractor/voxmedia.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 8c417ab65..fecfc28ae 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor): 'md5': '8c79e54be72078b26b89e0e111c0502b', 'info_dict': { 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Race highlights - Spain 2016', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { 'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index b1b32ad44..f8e331493 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -9,13 +9,16 @@ class VoxMediaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', - 'md5': '73856edf3e89a711e70d5cf7cb280b37', 'info_dict': { 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { # data-ooyala-id @@ -31,13 +34,16 @@ class VoxMediaIE(InfoExtractor): }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', - 'md5': '375c483c5080ab8cd85c9c84cfc2d1e4', 'info_dict': { 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { # youtube embed From f9dd86a112835e04e271e8d1d844f250e6ff0c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 21:37:33 +0700 Subject: [PATCH 569/775] [npo] Clarify IE_NAMEs (Closes #10775) --- youtube_dl/extractor/npo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 66035a77c..9c7cc777b 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -497,6 +497,7 @@ class VPROIE(NPOPlaylistBaseIE): class WNLIE(NPOPlaylistBaseIE): + IE_NAME = 'wnl' _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' _PLAYLIST_TITLE_RE = r'(?s)]+class="subject"[^>]*>(.+?)' _PLAYLIST_ENTRY_RE = r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' @@ -512,6 +513,7 @@ class WNLIE(NPOPlaylistBaseIE): class AndereTijdenIE(NPOPlaylistBaseIE): + IE_NAME = 'anderetijden' _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' From 1a2fbe322ee2d711b474f32a7d3f331791fb1881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 21:55:51 +0700 Subject: [PATCH 570/775] [periscope] Treat timed_out state as finished stream --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index e8b2f11c6..61043cad5 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -87,7 +87,7 @@ class PeriscopeIE(PeriscopeBaseIE): 'ext': 'flv' if format_id == 'rtmp' else 'mp4', } if format_id != 'rtmp': - f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8' + f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8' formats.append(f) self._sort_formats(formats) From e3845525906228091fdf446f2cf2e9a20e93f59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 25 Sep 2016 05:39:29 +0200 Subject: [PATCH 571/775] [vk] Add support for dailymotion embeds Fixes #10661 --- youtube_dl/extractor/vk.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index cd22df25a..f26e0732c 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -23,8 +23,9 @@ from ..utils import ( unified_strdate, urlencode_postdata, ) -from .vimeo import VimeoIE +from .dailymotion import DailymotionIE from .pladform import PladformIE +from .vimeo import VimeoIE class VKBaseIE(InfoExtractor): @@ -210,6 +211,23 @@ class VKIE(VKBaseIE): 'view_count': int, }, }, + { + # dailymotion embed + 'url': 'https://vk.com/video-37468416_456239855', + 'info_dict': { + 'id': 'k3lz2cmXyRuJQSjGHUv', + 'ext': 'mp4', + 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', + 'description': 'md5:c651358f03c56f1150b555c26d90a0fd', + 'uploader': 'AniLibria.Tv', + 'upload_date': '20160914', + 'uploader_id': 'x1p5vl5', + 'timestamp': 1473877246, + }, + 'params': { + 'skip_download': True, + } + }, { # video key is extra_data not url\d+ 'url': 'http://vk.com/video-110305615_171782105', @@ -315,6 +333,10 @@ class VKIE(VKBaseIE): m_rutube.group(1).replace('\\', '')) return self.url_result(rutube_url) + dailymotion_urls = DailymotionIE._extract_urls(info_page) + if dailymotion_urls: + return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key()) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) From cdfcc4ce95e351c3f560fa3c07ae7d4ab188ef25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 22:27:10 +0700 Subject: [PATCH 572/775] [mtv] Improve _VALID_URL --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 2e9580b10..74a3a035e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -271,7 +271,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ From dd2cffeeec8feac8fe52924760b2cb368249396a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 22:43:35 +0700 Subject: [PATCH 573/775] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index fdebb89b9..33c94ef55 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,24 @@ vesion Core ++ Add hdcore query parameter to akamai f4m formats ++ Delegate HLS live streams downloading to ffmpeg + Improved support for HTML5 subtitles Extractors ++ [vk] Add support for dailymotion embeds (#10661) +* [einthusan] Fix extraction (#10714) +* [promptfile] Fix extraction (#10634) +* [kaltura] Speed up embed regular expressions (#10764) ++ [npo] Add support for anderetijden.nl (#10754) ++ [prosiebensat1] Add support for advopedia sites +* [mwave] Relax URL regular expression (#10735, #10748) +* [prosiebensat1] Fix playlist support (#10745) ++ [prosiebensat1] Add support for sat1gold sites (#10745) ++ [cbsnews:livevideo] Fix extraction and extract m3u8 formats ++ [brightcove:new] Add support for live streams +* [soundcloud] Generalize playlist entries extraction (#10733) ++ [mtv] Add support for new URL schema (#8169, #9808) * [einthusan] Fix extraction (#10714) + [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) From c8f45f763cac3c0d0e4ca35ba072d8d321957e85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 23:03:00 +0700 Subject: [PATCH 574/775] [ChangeLog] Remove duplicate --- ChangeLog | 1 - 1 file changed, 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 33c94ef55..0d15b6a82 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,7 +7,6 @@ Core Extractors + [vk] Add support for dailymotion embeds (#10661) -* [einthusan] Fix extraction (#10714) * [promptfile] Fix extraction (#10634) * [kaltura] Speed up embed regular expressions (#10764) + [npo] Add support for anderetijden.nl (#10754) From 8f0cf20ab987019c3ba66c375450f80bb1cfe281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 23:09:46 +0700 Subject: [PATCH 575/775] release 2016.09.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 10 ++++++---- youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7669ab9b7..273eb8c0b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.24 +[debug] youtube-dl version 2016.09.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0d15b6a82..f8149cc30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -vesion +vesion 2016.09.27 Core + Add hdcore query parameter to akamai f4m formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 95a137393..26f275577 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -40,6 +40,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** + - **anderetijden**: npo.nl and ntr.nl - **AnimeOnDemand** - **anitube.se** - **AnySex** @@ -127,8 +128,8 @@ - **CBS** - **CBSInteractive** - **CBSLocal** - - **CBSNews**: CBS News - - **CBSNewsLiveVideo**: CBS News Live Videos + - **cbsnews**: CBS News + - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** - **CCTV** - **CDA** @@ -424,8 +425,9 @@ - **MPORA** - **MSN** - **mtg**: MTG services - - **MTV** + - **mtv** - **mtv.de** + - **mtv:video** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** @@ -865,7 +867,7 @@ - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - - **WNL** + - **wnl**: npo.nl and ntr.nl - **WorldStarHipHop** - **wrzuta.pl** - **wrzuta.pl:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2af6380b8..af0c2cfc4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.24' +__version__ = '2016.09.27' From 8bfda726c20198b7e68a805967917ef1a79e9b91 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 28 Sep 2016 16:34:27 +0100 Subject: [PATCH 576/775] [limelight:media] improve http formats extraction --- youtube_dl/extractor/limelight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 6752ffee2..b7bfa7a6d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) + http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] urls.append(http_url) http_fmt = fmt.copy() http_fmt.update({ From f533490bb7b2d25b9c6fe7ccd381ebe2bef7d4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Sep 2016 22:58:25 +0700 Subject: [PATCH 577/775] [ketnet] Extract mzsource formats (#10770) --- youtube_dl/extractor/ketnet.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py index aaf3f807a..eb0a16008 100644 --- a/youtube_dl/extractor/ketnet.py +++ b/youtube_dl/extractor/ketnet.py @@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor): }, { 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', 'only_matching': True, + }, { + # mzsource, geo restricted to Belgium + 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe', + 'only_matching': True, }] def _real_extract(self, url): @@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor): title = config['title'] - formats = self._extract_m3u8_formats( - config['source']['hls'], video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + formats = [] + for source_key in ('', 'mz'): + source = config.get('%ssource' % source_key) + if not isinstance(source, dict): + continue + for format_id, format_url in source.items(): + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + fatal=False)) + elif format_id == 'hds': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) self._sort_formats(formats) return { From a56e74e2713ed45f4096735cf49d1d97b5e75389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 28 Sep 2016 16:54:06 +0200 Subject: [PATCH 578/775] [Instagram] Extract comments --- youtube_dl/extractor/instagram.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8f7f232be..5ebc30a10 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Naomi Leonor Phan-Quang', 'like_count': int, 'comment_count': int, + 'comments': list, }, }, { # missing description @@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Britney Spears', 'like_count': int, 'comment_count': int, + 'comments': list, }, 'params': { 'skip_download': True, @@ -101,6 +103,14 @@ class InstagramIE(InfoExtractor): uploader_id = media.get('owner', {}).get('username') like_count = int_or_none(media.get('likes', {}).get('count')) comment_count = int_or_none(media.get('comments', {}).get('count')) + comments = [{ + 'author': comment.get('user', {}).get('username'), + 'author_id': comment.get('user', {}).get('id'), + 'id': comment.get('id'), + 'text': comment.get('text'), + 'timestamp': int_or_none(comment.get('created_at')), + } for comment in media.get('comments', {}).get('nodes', []) + if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) @@ -131,6 +141,7 @@ class InstagramIE(InfoExtractor): 'uploader': uploader, 'like_count': like_count, 'comment_count': comment_count, + 'comments': comments, } From 0d72ff9c51ecc84aae1717c05f8b73ad94199687 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 29 Sep 2016 21:39:35 +0800 Subject: [PATCH 579/775] [leeco] Recognize more Le Sports URLs (#10794) --- ChangeLog | 8 +++++++- youtube_dl/extractor/leeco.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index f8149cc30..70da55c90 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,10 @@ -vesion 2016.09.27 +version + +Extractors ++ [leeco] Recognize more Le Sports URLs (#10794) + + +version 2016.09.27 Core + Add hdcore query parameter to akamai f4m formats diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index e9cc9aa59..c48a5aad1 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -29,7 +29,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P\d+)\.html' + _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -73,6 +73,12 @@ class LeIE(InfoExtractor): }, { 'url': 'http://sports.le.com/video/25737697.html', 'only_matching': True, + }, { + 'url': 'http://www.lesports.com/match/1023203003.html', + 'only_matching': True, + }, { + 'url': 'http://sports.le.com/match/1023203003.html', + 'only_matching': True, }] # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf From 93aa0b631878b62f756c83e1069a14cd2d8775f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:04:10 +0700 Subject: [PATCH 580/775] [vk] Add support for finished live streams (#10799) --- youtube_dl/extractor/vk.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f26e0732c..1d089c9d7 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -226,7 +226,7 @@ class VKIE(VKBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { # video key is extra_data not url\d+ @@ -241,6 +241,18 @@ class VKIE(VKBaseIE): 'view_count': int, }, }, + { + # finished live stream, live_mp4 + 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', + 'md5': '90d22d051fccbbe9becfccc615be6791', + 'info_dict': { + 'id': '456242764', + 'ext': 'mp4', + 'title': 'ИгроМир 2016 — день 1', + 'uploader': 'Игромания', + 'duration': 5239, + }, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -366,7 +378,10 @@ class VKIE(VKBaseIE): formats = [] for k, v in data.items(): - if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v: + if (not k.startswith('url') and not k.startswith('cache') + and k not in ('extra_data', 'live_mp4')): + continue + if not isinstance(v, compat_str) or not v.startswith('http'): continue height = int_or_none(self._search_regex( r'^(?:url|cache)(\d+)', k, 'height', default=None)) From 475f8a458099c64d367356471069bd0ff2bd1b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:21:39 +0700 Subject: [PATCH 581/775] [vk] Add support for running live streams (Closes #10799) --- youtube_dl/extractor/vk.py | 47 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 1d089c9d7..9f7a593ef 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -253,6 +253,12 @@ class VKIE(VKBaseIE): 'duration': 5239, }, }, + { + # live stream, hls and rtmp links,most likely already finished live + # stream by the time you are reading this comment + 'url': 'https://vk.com/video-140332_456239111', + 'only_matching': True, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -361,6 +367,11 @@ class VKIE(VKBaseIE): data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') data = json.loads(data_json) + title = unescapeHTML(data['md_title']) + + if data.get('live') == 2: + title = self._live_title(title) + # Extract upload date upload_date = None mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) @@ -377,25 +388,33 @@ class VKIE(VKBaseIE): r'([\d,.]+)', views, 'view count', fatal=False)) formats = [] - for k, v in data.items(): - if (not k.startswith('url') and not k.startswith('cache') - and k not in ('extra_data', 'live_mp4')): + for format_id, format_url in data.items(): + if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): continue - if not isinstance(v, compat_str) or not v.startswith('http'): - continue - height = int_or_none(self._search_regex( - r'^(?:url|cache)(\d+)', k, 'height', default=None)) - formats.append({ - 'format_id': k, - 'url': v, - 'height': height, - }) + if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'): + height = int_or_none(self._search_regex( + r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'height': height, + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, + fatal=False, live=True)) + elif format_id == 'rtmp': + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': 'flv', + }) self._sort_formats(formats) return { - 'id': compat_str(data['vid']), + 'id': compat_str(data.get('vid') or video_id), 'formats': formats, - 'title': unescapeHTML(data['md_title']), + 'title': title, 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'), From efa97bdcf1f1e90d1b51a09324d7869dcd70729b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Sep 2016 00:28:32 +0800 Subject: [PATCH 582/775] Move write_xattr to utils.py There are some other places that use xattr functions. It's better to move it to a common place so that others can use it. --- youtube_dl/postprocessor/xattrpp.py | 114 ++-------------------------- youtube_dl/utils.py | 99 ++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 107 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index e39ca60aa..fbdfa02ac 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -1,37 +1,15 @@ from __future__ import unicode_literals -import os -import subprocess -import sys -import errno - from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( - check_executable, hyphenate_date, - version_tuple, - PostProcessingError, - encodeArgument, - encodeFilename, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, ) -class XAttrMetadataError(PostProcessingError): - def __init__(self, code=None, msg='Unknown error'): - super(XAttrMetadataError, self).__init__(msg) - self.code = code - - # Parsing code and msg - if (self.code in (errno.ENOSPC, errno.EDQUOT) or - 'No space left' in self.msg or 'Disk quota excedded' in self.msg): - self.reason = 'NO_SPACE' - elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: - self.reason = 'VALUE_TOO_LONG' - else: - self.reason = 'NOT_SUPPORTED' - - class XAttrMetadataPP(PostProcessor): # @@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor): def run(self, info): """ Set extended attributes on downloaded file (if xattr support is found). """ - # This mess below finds the best xattr tool for the job and creates a - # "write_xattr" function. - try: - # try the pyxattr module... - import xattr - - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - self._downloader.report_warning( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) - - raise ImportError - - def write_xattr(path, key, value): - try: - xattr.set(path, key, value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - - except ImportError: - if compat_os_name == 'nt': - # Write xattrs to NTFS Alternate Data Streams: - # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 - def write_xattr(path, key, value): - assert ':' not in key - assert os.path.exists(path) - - ads_fn = path + ':' + key - try: - with open(ads_fn, 'wb') as f: - f.write(value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - else: - user_has_setfattr = check_executable('setfattr', ['--version']) - user_has_xattr = check_executable('xattr', ['-h']) - - if user_has_setfattr or user_has_xattr: - - def write_xattr(path, key, value): - value = value.decode('utf-8') - if user_has_setfattr: - executable = 'setfattr' - opts = ['-n', key, '-v', value] - elif user_has_xattr: - executable = 'xattr' - opts = ['-w', key, value] - - cmd = ([encodeFilename(executable, True)] + - [encodeArgument(o) for o in opts] + - [encodeFilename(path, True)]) - - try: - p = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = p.communicate() - stderr = stderr.decode('utf-8', 'replace') - if p.returncode != 0: - raise XAttrMetadataError(p.returncode, stderr) - - else: - # On Unix, and can't find pyxattr, setfattr, or xattr. - if sys.platform.startswith('linux'): - self._downloader.report_error( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'pyxattr' or 'xattr' " - "modules, or the GNU 'attr' package " - "(which contains the 'setfattr' tool).") - else: - self._downloader.report_error( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'xattr' module, " - "or the 'xattr' binary.") - # Write the metadata to the file's xattrs self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs') @@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor): return [], info + except XAttrUnavailableError as e: + self._downloader.report_error(str(e)) + return [], info + except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self._downloader.report_warning( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 69ca88c85..fcbfa0d76 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -42,6 +42,7 @@ from .compat import ( compat_html_entities_html5, compat_http_client, compat_kwargs, + compat_os_name, compat_parse_qs, compat_shlex_quote, compat_socket_create_connection, @@ -775,6 +776,25 @@ class ContentTooShortError(Exception): self.expected = expected +class XAttrMetadataError(Exception): + def __init__(self, code=None, msg='Unknown error'): + super(XAttrMetadataError, self).__init__(msg) + self.code = code + + # Parsing code and msg + if (self.code in (errno.ENOSPC, errno.EDQUOT) or + 'No space left' in self.msg or 'Disk quota excedded' in self.msg): + self.reason = 'NO_SPACE' + elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: + self.reason = 'VALUE_TOO_LONG' + else: + self.reason = 'NOT_SUPPORTED' + + +class XAttrUnavailableError(Exception): + pass + + def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting # expected HTTP responses to meet HTTP/1.0 or later (see also @@ -3131,3 +3151,82 @@ def decode_png(png_data): current_row.append(color) return width, height, pixels + + +def write_xattr(path, key, value): + # This mess below finds the best xattr tool for the job + try: + # try the pyxattr module... + import xattr + + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + try: + xattr.set(path, key, value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + + except ImportError: + if compat_os_name == 'nt': + # Write xattrs to NTFS Alternate Data Streams: + # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + assert ':' not in key + assert os.path.exists(path) + + ads_fn = path + ':' + key + try: + with open(ads_fn, 'wb') as f: + f.write(value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + else: + user_has_setfattr = check_executable('setfattr', ['--version']) + user_has_xattr = check_executable('xattr', ['-h']) + + if user_has_setfattr or user_has_xattr: + + value = value.decode('utf-8') + if user_has_setfattr: + executable = 'setfattr' + opts = ['-n', key, '-v', value] + elif user_has_xattr: + executable = 'xattr' + opts = ['-w', key, value] + + cmd = ([encodeFilename(executable, True)] + + [encodeArgument(o) for o in opts] + + [encodeFilename(path, True)]) + + try: + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + stdout, stderr = p.communicate() + stderr = stderr.decode('utf-8', 'replace') + if p.returncode != 0: + raise XAttrMetadataError(p.returncode, stderr) + + else: + # On Unix, and can't find pyxattr, setfattr, or xattr. + if sys.platform.startswith('linux'): + raise XAttrUnavailableError( + "Couldn't find a tool to set the xattrs. " + "Install either the python 'pyxattr' or 'xattr' " + "modules, or the GNU 'attr' package " + "(which contains the 'setfattr' tool).") + else: + raise XAttrUnavailableError( + "Couldn't find a tool to set the xattrs. " + "Install either the python 'xattr' module, " + "or the 'xattr' binary.") From 3aa3953d28dae68b87aa83682043b5eec0973ddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 25 Sep 2016 20:26:58 +0200 Subject: [PATCH 583/775] [vk] Fix date and view count extraction. --- youtube_dl/extractor/vk.py | 17 +++++------------ youtube_dl/utils.py | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 9f7a593ef..3cfbd97af 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -373,19 +373,12 @@ class VKIE(VKBaseIE): title = self._live_title(title) # Extract upload date - upload_date = None - mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) - if mobj is not None: - mobj.group(1) + ' ' + mobj.group(2) - upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) + upload_date = unified_strdate(self._html_search_regex( + r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None)) - view_count = None - views = self._html_search_regex( - r'"mv_views_count_number"[^>]*>(.+?\bviews?)<', - info_page, 'view count', default=None) - if views: - view_count = str_to_int(self._search_regex( - r'([\d,.]+)', views, 'view count', fatal=False)) + view_count = str_to_int(self._html_search_regex( + r'class="mv_views_count[^>]*>([\d,.]+)', + info_page, 'view count', default=None)) formats = [] for format_id, format_url in data.items(): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fcbfa0d76..243d09034 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -124,6 +124,8 @@ DATE_FORMATS = ( '%d %b %Y', '%B %d %Y', '%b %d %Y', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', From c6eed6b8c000672f0515d916dda54002c7fca356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:47:25 +0700 Subject: [PATCH 584/775] [utils] Lower priority for rare date formats and add tests --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9789d8611..b1b2effca 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) + self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) @@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) + self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 243d09034..d2dfa8013 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -124,8 +124,6 @@ DATE_FORMATS = ( '%d %b %Y', '%B %d %Y', '%b %d %Y', - '%b %d %Y at %H:%M', - '%b %d %Y at %H:%M:%S', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', @@ -144,6 +142,8 @@ DATE_FORMATS = ( '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) From a7ee8a00f4af9853d06ed895c5023cc6b573fd57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:48:21 +0700 Subject: [PATCH 585/775] [vk] Extract timestamp (Closes #10760) --- youtube_dl/extractor/vk.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 3cfbd97af..77f5cebcf 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -20,7 +20,7 @@ from ..utils import ( remove_start, str_to_int, unescapeHTML, - unified_strdate, + unified_timestamp, urlencode_postdata, ) from .dailymotion import DailymotionIE @@ -106,6 +106,7 @@ class VKIE(VKBaseIE): 'title': 'ProtivoGunz - Хуёвая песня', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'duration': 195, + 'timestamp': 1329060660, 'upload_date': '20120212', 'view_count': int, }, @@ -119,6 +120,7 @@ class VKIE(VKBaseIE): 'uploader': 'Tom Cruise', 'title': 'No name', 'duration': 9, + 'timestamp': 1374374880, 'upload_date': '20130721', 'view_count': int, } @@ -195,6 +197,7 @@ class VKIE(VKBaseIE): 'upload_date': '20150709', 'view_count': int, }, + 'skip': 'Removed', }, { # youtube embed @@ -237,6 +240,7 @@ class VKIE(VKBaseIE): 'ext': 'mp4', 'title': 'S-Dance, репетиции к The way show', 'uploader': 'THE WAY SHOW | 17 апреля', + 'timestamp': 1454870100, 'upload_date': '20160207', 'view_count': int, }, @@ -373,8 +377,9 @@ class VKIE(VKBaseIE): title = self._live_title(title) # Extract upload date - upload_date = unified_strdate(self._html_search_regex( - r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None)) + timestamp = unified_timestamp(self._html_search_regex( + r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + 'upload date', fatal=False)) view_count = str_to_int(self._html_search_regex( r'class="mv_views_count[^>]*>([\d,.]+)', @@ -411,7 +416,7 @@ class VKIE(VKBaseIE): 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'), - 'upload_date': upload_date, + 'timestamp': timestamp, 'view_count': view_count, } From 70d7b323b6556eb693bec43a1eb10ded889184b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:51:52 +0700 Subject: [PATCH 586/775] [vk] Improve view count extraction --- youtube_dl/extractor/vk.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 77f5cebcf..58799d413 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -255,6 +255,7 @@ class VKIE(VKBaseIE): 'title': 'ИгроМир 2016 — день 1', 'uploader': 'Игромания', 'duration': 5239, + 'view_count': int, }, }, { @@ -376,14 +377,13 @@ class VKIE(VKBaseIE): if data.get('live') == 2: title = self._live_title(title) - # Extract upload date timestamp = unified_timestamp(self._html_search_regex( - r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page, 'upload date', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'class="mv_views_count[^>]*>([\d,.]+)', - info_page, 'view count', default=None)) + view_count = str_to_int(self._search_regex( + r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', + info_page, 'view count', fatal=False)) formats = [] for format_id, format_url in data.items(): From af33dd8ee7da49b5daf1582b2870deaa5427444b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 00:13:03 +0700 Subject: [PATCH 587/775] [aftonbladet] Remove extractor --- youtube_dl/extractor/aftonbladet.py | 64 ----------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 65 deletions(-) delete mode 100644 youtube_dl/extractor/aftonbladet.py diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py deleted file mode 100644 index 5766b4fe8..000000000 --- a/youtube_dl/extractor/aftonbladet.py +++ /dev/null @@ -1,64 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import int_or_none - - -class AftonbladetIE(InfoExtractor): - _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P[0-9]+)' - _TEST = { - 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', - 'info_dict': { - 'id': '36015', - 'ext': 'mp4', - 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', - 'description': 'Jupiters måne mest aktiv av alla himlakroppar', - 'timestamp': 1394142732, - 'upload_date': '20140306', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # find internal video meta data - meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json' - player_config = self._parse_json(self._html_search_regex( - r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) - internal_meta_id = player_config['aptomaVideoId'] - internal_meta_url = meta_url % internal_meta_id - internal_meta_json = self._download_json( - internal_meta_url, video_id, 'Downloading video meta data') - - # find internal video formats - format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' - internal_video_id = internal_meta_json['videoId'] - internal_formats_url = format_url % internal_video_id - internal_formats_json = self._download_json( - internal_formats_url, video_id, 'Downloading video formats') - - formats = [] - for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: - p = fmt['paths'][0] - formats.append({ - 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), - 'ext': 'mp4', - 'width': int_or_none(fmt.get('width')), - 'height': int_or_none(fmt.get('height')), - 'tbr': int_or_none(fmt.get('bitrate')), - 'protocol': 'http', - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': internal_meta_json['title'], - 'formats': formats, - 'thumbnail': internal_meta_json.get('imageUrl'), - 'description': internal_meta_json.get('shortPreamble'), - 'timestamp': int_or_none(internal_meta_json.get('timePublished')), - 'duration': int_or_none(internal_meta_json.get('duration')), - 'view_count': int_or_none(internal_meta_json.get('views')), - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 23fd2a308..09b3b4942 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -31,7 +31,6 @@ from .aenetworks import ( HistoryTopicIE, ) from .afreecatv import AfreecaTVIE -from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From b0582fc80615ec94c37e14015bd9bbfef6745aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 00:15:09 +0700 Subject: [PATCH 588/775] [vgtv] Add support for tv.aftonbladet.se (Closes #10800) --- youtube_dl/extractor/vgtv.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 185756301..3b38ac700 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -22,6 +22,7 @@ class VGTVIE(XstreamIE): 'fvn.no/fvntv': 'fvntv', 'aftenposten.no/webtv': 'aptv', 'ap.vgtv.no/webtv': 'aptv', + 'tv.aftonbladet.se/abtv': 'abtv', } _APP_NAME_TO_VENDOR = { @@ -30,6 +31,7 @@ class VGTVIE(XstreamIE): 'satv': 'sa', 'fvntv': 'fvn', 'aptv': 'ap', + 'abtv': 'ab', } _VALID_URL = r'''(?x) @@ -40,7 +42,8 @@ class VGTVIE(XstreamIE): /? (?: \#!/(?:video|live)/| - embed?.*id= + embed?.*id=| + articles/ )| (?P %s @@ -135,6 +138,14 @@ class VGTVIE(XstreamIE): 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk', 'only_matching': True, }, + { + 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', + 'only_matching': True, + }, + { + 'url': 'abtv:140026', + 'only_matching': True, + } ] def _real_extract(self, url): From de6babf92252ea5828a9c17d76766357cff3e440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 22:30:34 +0700 Subject: [PATCH 589/775] [tvland] Extend _VALID_URL (Closes #10812) --- youtube_dl/extractor/tvland.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index cb76a2a58..957cf1ea2 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class TVLandIE(MTVServicesInfoExtractor): IE_NAME = 'tvland.com' - _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a @@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor): 'upload_date': '20151228', 'timestamp': 1451289600, }, + }, { + 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', + 'only_matching': True, }] From 16097822582b839a3744b54af90f7b3fd7132d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 28 Sep 2016 17:28:16 +0200 Subject: [PATCH 590/775] [Instagram] Extract video dimensions --- youtube_dl/extractor/instagram.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 5ebc30a10..dde435189 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -84,7 +84,7 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) (video_url, description, thumbnail, timestamp, uploader, - uploader_id, like_count, comment_count) = [None] * 8 + uploader_id, like_count, comment_count, height, width) = [None] * 10 shared_data = self._parse_json( self._search_regex( @@ -96,6 +96,8 @@ class InstagramIE(InfoExtractor): shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) if media: video_url = media.get('video_url') + height = int_or_none(media.get('dimensions', {}).get('height')) + width = int_or_none(media.get('dimensions', {}).get('width')) description = media.get('caption') thumbnail = media.get('display_src') timestamp = int_or_none(media.get('date')) @@ -115,6 +117,12 @@ class InstagramIE(InfoExtractor): if not video_url: video_url = self._og_search_video_url(webpage, secure=False) + formats = [{ + 'url': video_url, + 'width': width, + 'height': height, + }] + if not uploader_id: uploader_id = self._search_regex( r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', @@ -131,7 +139,7 @@ class InstagramIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, 'description': description, From a1001f47fc19adf983859bb281f08a09bd7f7e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Oct 2016 00:16:08 +0700 Subject: [PATCH 591/775] [instagram] PEP 8 --- youtube_dl/extractor/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index dde435189..196407b06 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -111,8 +111,8 @@ class InstagramIE(InfoExtractor): 'id': comment.get('id'), 'text': comment.get('text'), 'timestamp': int_or_none(comment.get('created_at')), - } for comment in media.get('comments', {}).get('nodes', []) - if comment.get('text')] + } for comment in media.get( + 'comments', {}).get('nodes', []) if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) From eaf9b22f94f37487d75457423a9a293dee1b1d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 20:03:25 +0200 Subject: [PATCH 592/775] [clubic] Rely on _match_id and _parse_json --- youtube_dl/extractor/clubic.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 2fba93543..f7ee3a8f8 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -1,9 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( clean_html, @@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id player_page = self._download_webpage(player_url, video_id) - config_json = self._search_regex( + config = self._parse_json(self._search_regex( r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, - 'configuration') - config = json.loads(config_json) + 'configuration'), video_id) video_info = config['videoInfo'] sources = config['sources'] From d7753d194803086d97ffe47f022c47c906ebcc71 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Sep 2016 00:49:14 +0800 Subject: [PATCH 593/775] [downloader/http] Use write_xattr function for --xattr-set-filesize --- youtube_dl/__init__.py | 6 ------ youtube_dl/downloader/http.py | 8 +++++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1cf3140a0..72141b983 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -283,12 +283,6 @@ def _real_main(argv=None): 'key': 'ExecAfterDownload', 'exec_cmd': opts.exec_cmd, }) - if opts.xattr_set_filesize: - try: - import xattr - xattr # Confuse flake8 - except ImportError: - parser.error('setting filesize xattr requested but python-xattr is not available') external_downloader_args = None if opts.external_downloader_args: external_downloader_args = compat_shlex_split(opts.external_downloader_args) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f8b69d186..11294d106 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -13,6 +13,9 @@ from ..utils import ( encodeFilename, sanitize_open, sanitized_Request, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, ) @@ -179,9 +182,8 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - import xattr - xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) - except(OSError, IOError, ImportError) as err: + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) try: From e295618f9e1c1fc404d9baa4ccef961d3eb3ea88 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 15:22:48 +0800 Subject: [PATCH 594/775] [dctp] Fix extraction (closes #10734) --- ChangeLog | 1 + youtube_dl/extractor/dctp.py | 67 ++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 70da55c90..efc3e494e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index a47e04993..14ba88715 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -1,61 +1,54 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..utils import unified_strdate class DctpTvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', + 'md5': '174dd4a8a6225cf5655952f969cfbe24', 'info_dict': { - 'id': '1324', + 'id': '95eaa4f33dad413aa17b4ee613cccc6c', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', - 'ext': 'flv', - 'title': 'Videoinstallation für eine Kaufhausfassade' + 'ext': 'mp4', + 'title': 'Videoinstallation für eine Kaufhausfassade', + 'description': 'Kurzfilm', + 'upload_date': '20110407', + 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - } } def _real_extract(self, url): video_id = self._match_id(url) - base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/' - version_json = self._download_json( - base_url + 'version.json', - video_id, note='Determining file version') - version = version_json['version_name'] - info_json = self._download_json( - '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id), - video_id, note='Fetching object ID') - object_id = compat_str(info_json['object_id']) - meta_json = self._download_json( - '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id), - video_id, note='Downloading metadata') - uuid = meta_json['uuid'] - title = meta_json['title'] - wide = meta_json['is_wide'] - if wide: - ratio = '16x9' - else: - ratio = '4x3' - play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio) + webpage = self._download_webpage(url, video_id) + + object_id = self._html_search_meta('DC.identifier', webpage) servers_json = self._download_json( - 'http://www.dctp.tv/streaming_servers/', + 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/', video_id, note='Downloading server list') - url = servers_json[0]['endpoint'] + server = servers_json[0]['server'] + m3u8_path = self._search_regex( + r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path') + formats = self._extract_m3u8_formats( + 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4', + entry_protocol='m3u8_native') + + title = self._og_search_title(webpage) + description = self._html_search_meta('DC.description', webpage) + upload_date = unified_strdate( + self._html_search_meta('DC.date.created', webpage)) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': object_id, 'title': title, - 'format': 'rtmp', - 'url': url, - 'play_path': play_path, - 'rtmp_real_time': True, - 'ext': 'flv', - 'display_id': video_id + 'formats': formats, + 'display_id': video_id, + 'description': description, + 'upload_date': upload_date, + 'thumbnail': thumbnail, } From 9bd7bd0b8054231adbeb2a0eddd42a0b969fd6c4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 16:37:49 +0800 Subject: [PATCH 595/775] [twitch] Skip a 404 test --- youtube_dl/extractor/twitch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bc352391e..46c2cfe7b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': 'HTTP Error 404: Not Found', }] def _real_extract(self, url): From 9c51a2464276f5eb26d1b571d32052df55d6ead9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 20:06:08 +0200 Subject: [PATCH 596/775] [criterion] Rely on _match_id, improve regex and add thumbnail to test --- youtube_dl/extractor/criterion.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index ad32673a8..cf6a5d6cb 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le Samouraï', 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', + 'thumbnail': 're:^https?://.*\.jpg$', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) final_url = self._search_regex( - r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') title = self._og_search_title(webpage) description = self._html_search_meta('description', webpage) thumbnail = self._search_regex( - r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', webpage, 'thumbnail url') return { From d54739a2e6a8dc089e7530afda0a1cfe355a6fef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 19:58:13 +0800 Subject: [PATCH 597/775] [downloader/http] xattr values should be bytes --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 11294d106..af405b950 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -182,7 +182,7 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) From 53a7e3d2879feac7b1b6f714692581057b9b5f6b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 20:13:04 +0800 Subject: [PATCH 598/775] [utils] Support xattr as well as pyxattr Closes #9054 There are two xattr packages in Python, pyxattr [1] and xattr [2]. They have different APIs. In old days pyxattr supports Linux only and xattr supports Linux, Mac, FreeBSD and Solaris, and pyxattr supports Linux only. Recently pyxattr adds support for Mac OS X. [3] An old version of [2] is shipped with Mac OS X. However, some Linux distributions have pyxattr only, for example PLD-Linux [4] and old Arch Linux. [5] As a result, supporting both is the way to go. [1] https://github.com/iustin/pyxattr [2] https://github.com/xattr/xattr [3] https://github.com/iustin/pyxattr/pull/9 [4] https://github.com/rg3/youtube-dl/issues/5498 [5] https://git.archlinux.org/svntogit/community.git/commit/?id=427c4c76401e386d865ccddea4fbfdc74df80492 https://git.archlinux.org/svntogit/community.git/commit/?id=59b40da7b69622a6761d364a8b07909e9cccaa56 python-xattr is added on 2016/06/29 while pyxattr is there for more than 6 years --- ChangeLog | 4 ++++ youtube_dl/utils.py | 29 +++++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index efc3e494e..8ef39cd63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core ++ Support pyxattr as well as python-xattr for --xattrs and + --xattr-set-filesize (#9054) + Extractors * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2dfa8013..c259f8bff 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3161,20 +3161,25 @@ def write_xattr(path, key, value): # try the pyxattr module... import xattr - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - # TODO: fallback to CLI tools - raise XAttrUnavailableError( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) + if hasattr(xattr, 'set'): # pyxattr + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + setxattr = xattr.set + else: # xattr + setxattr = xattr.setxattr try: - xattr.set(path, key, value) + setxattr(path, key, value) except EnvironmentError as e: raise XAttrMetadataError(e.errno, e.strerror) From e1e97c2446ab6fcffcfae738e0c7f29ff58a9dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Oct 2016 22:50:47 +0700 Subject: [PATCH 599/775] [periscope:user] Fix extraction (Closes #10820) --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 61043cad5..0e3623024 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE): user = list(data_store['UserCache']['users'].values())[0]['user'] user_id = user['id'] - session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] broadcasts = self._call_api( 'getUserBroadcastsPublic', From 4da4516973b56bcaa65794a8ae0856cf54740c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:59:08 +0200 Subject: [PATCH 600/775] [byutv] Rely on _match_id and _parse_json --- youtube_dl/extractor/byutv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3aec601f8..b2d25eec0 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -8,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' + _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'md5': '05850eb8c749e2ee05ad5a1c34668493', @@ -27,15 +26,15 @@ class BYUtvIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - episode_json = re.sub( - r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) - ep = json.loads(episode_json) + + ep = self._parse_json( + episode_code, video_id, transform_source=lambda s: + re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) if ep['providerType'] == 'Ooyala': return { From 6d2549fb4f2a646b6b6898db5281cde669277626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 00:44:54 +0700 Subject: [PATCH 601/775] [byutv] Fix id and display id --- youtube_dl/extractor/byutv.py | 46 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index b2d25eec0..084cc7ae2 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -7,15 +7,15 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?byutv.org/watch/(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', - 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { - 'id': 'studio-c-season-5-episode-5', + 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'display_id': 'studio-c-season-5-episode-5', 'ext': 'mp4', - 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'title': 'Season 5 Episode 5', + 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, @@ -23,28 +23,34 @@ class BYUtvIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], - } + }, { + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') ep = self._parse_json( - episode_code, video_id, transform_source=lambda s: + episode_code, display_id, transform_source=lambda s: re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) - if ep['providerType'] == 'Ooyala': - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'title': ep['title'], - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } - else: + if ep['providerType'] != 'Ooyala': raise ExtractorError('Unsupported provider %s' % ep['provider']) + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': display_id, + 'title': ep['title'], + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } From f6ba581f89fc764e4eaf3045ff5b63e27ad66cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 00:50:07 +0700 Subject: [PATCH 602/775] [byutv:event] Add extractor --- youtube_dl/extractor/byutv.py | 39 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 084cc7ae2..4be175d70 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv.org/watch/(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { @@ -54,3 +54,40 @@ class BYUtvIE(InfoExtractor): 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), } + + +class BYUtvEventIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P[0-9a-f-]+)' + _TEST = { + 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'info_dict': { + 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'ext': 'mp4', + 'title': 'Toledo vs. BYU (9/30/16)', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + ooyala_id = self._search_regex( + r'providerId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'ooyala id', group='id') + + title = self._search_regex( + r'class=["\']description["\'][^>]*>\s*

    ([^<]+)

    ', webpage, + 'title').strip() + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ooyala_id, + 'id': video_id, + 'title': title, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09b3b4942..e8928307c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -116,7 +116,10 @@ from .brightcove import ( BrightcoveNewIE, ) from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE +from .byutv import ( + BYUtvIE, + BYUtvEventIE, +) from .c56 import C56IE from .camdemy import ( CamdemyIE, From b19e275d99c8dfe121ba0dc3478e4eb9c83e4f9b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 02:12:14 +0800 Subject: [PATCH 603/775] [__init__] Fix lost xattr if --embed-thumbnail used Reported at https://github.com/rg3/youtube-dl/issues/9054#issuecomment-250451823 --- ChangeLog | 1 + youtube_dl/__init__.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ef39cd63..acceb9d02 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* Fix possibly lost extended attributes + Support pyxattr as well as python-xattr for --xattrs and --xattr-set-filesize (#9054) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 72141b983..f84b866df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -266,8 +266,6 @@ def _real_main(argv=None): postprocessors.append({ 'key': 'FFmpegEmbedSubtitle', }) - if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails postprocessors.append({ @@ -276,6 +274,10 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + # XAttrMetadataPP should be run after post-processors that may change file + # contents + if opts.xattrs: + postprocessors.append({'key': 'XAttrMetadata'}) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: From bd2644120526429783c55e885f7042633826d7da Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 03:03:41 +0800 Subject: [PATCH 604/775] [utils] Fix xattr error handling --- youtube_dl/extractor/generic.py | 16 ++++++++++++---- youtube_dl/utils.py | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1792c534..489b3c7c1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2332,12 +2332,23 @@ class GenericIE(InfoExtractor): info_dict.update(json_ld) return info_dict + # Look for HTML5 media + entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') + if entries: + for entry in entries: + entry.update({ + 'id': video_id, + 'title': video_title, + }) + self._sort_formats(entry['formats']) + return self.playlist_result(entries) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True vpath = compat_urlparse.urlparse(vurl).path vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') + return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js') def filter_video(urls): return list(filter(check_video, urls)) @@ -2387,9 +2398,6 @@ class GenericIE(InfoExtractor): # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: if m_video_type is not None: found = filter_video(re.findall(r'.*?]*)?\s+src=["\'](.*?)["\']', webpage) if not found: REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c259f8bff..044520037 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -782,6 +782,7 @@ class XAttrMetadataError(Exception): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code + self.msg = msg # Parsing code and msg if (self.code in (errno.ENOSPC, errno.EDQUOT) or From fd152641726a3f5e47a6a5065f8e9b6fe2623c11 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 05:24:31 +0800 Subject: [PATCH 605/775] [jwplatform] Support old-style jwplayer playlists --- ChangeLog | 1 + youtube_dl/extractor/jwplatform.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index acceb9d02..d48a09122 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ Core --xattr-set-filesize (#9054) Extractors +* [jwplatform] Improve JWPlayer handling * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 38199fcd0..e10f7e9f9 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -39,6 +39,12 @@ class JWPlatformBaseIE(InfoExtractor): jwplayer_data = {'playlist': [jwplayer_data]} entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + for video_data in jwplayer_data['playlist']: # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 From 99ed78c79e94c14ce24bc5bdccaf9573d4f83552 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 14:07:49 +0800 Subject: [PATCH 606/775] [jwplatform] Support DASH streams --- ChangeLog | 1 + youtube_dl/extractor/jwplatform.py | 6 +++++- youtube_dl/extractor/rudo.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d48a09122..f5172a864 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ Core --xattr-set-filesize (#9054) Extractors ++ [jwplatform] Support DASH streams in JWPlayer * [jwplatform] Improve JWPlayer handling * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index e10f7e9f9..5d56e0a28 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -32,7 +32,8 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: @@ -63,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor): if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 38366b784..9a330c196 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE): transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls') + jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') info_dict.update({ 'title': self._og_search_title(webpage), From 703b3afa93326c96bc5faf753305ab95c4e98b10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 14:25:06 +0800 Subject: [PATCH 607/775] [amcnetworks] Skip a restricted _TEST --- youtube_dl/extractor/amcnetworks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index c739d2c99..d2b03b177 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE): # m3u8 download 'skip_download': True, }, + 'skip': 'Requires TV provider accounts', }, { 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'only_matching': True, From 26406d33c7808bdff38ffcda36d2d6a4e5bb4f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 15:56:33 +0700 Subject: [PATCH 608/775] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index f5172a864..277cc2ee1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,15 +1,26 @@ version Core -* Fix possibly lost extended attributes +* Fix possibly lost extended attributes during post-processing + Support pyxattr as well as python-xattr for --xattrs and --xattr-set-filesize (#9054) Extractors + [jwplatform] Support DASH streams in JWPlayer -* [jwplatform] Improve JWPlayer handling ++ [jwplatform] Support old-style JWPlayer playlists ++ [byutv:event] Add extractor +* [periscope:user] Fix extraction (#10820) * [dctp] Fix extraction (#10734) ++ [instagram] Extract video dimensions (#10790) ++ [tvland] Extend URL regular expression (#10812) ++ [vgtv] Add support for tv.aftonbladet.se (#10800) +- [aftonbladet] Remove extractor +* [vk] Fix timestamp and view count extraction (#10760) ++ [vk] Add support for running and finished live streams (#10799) + [leeco] Recognize more Le Sports URLs (#10794) ++ [instagram] Extract comments (#10788) ++ [ketnet] Extract mzsource formats (#10770) +* [limelight:media] Improve HTTP formats extraction version 2016.09.27 From 6c152ce20f7bd5f1fbb786abe70c4aa3412aef26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 15:58:00 +0700 Subject: [PATCH 609/775] release 2016.10.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 273eb8c0b..e813e4c59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.27 +[debug] youtube-dl version 2016.10.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 277cc2ee1..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.02 Core * Fix possibly lost extended attributes during post-processing diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 26f275577..828ed0ba9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -34,7 +34,6 @@ - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - **AfreecaTV**: afreecatv.com - - **Aftonbladet** - **AirMozilla** - **AlJazeera** - **Allocine** @@ -112,6 +111,7 @@ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** - **BYUtv** + - **BYUtvEvent** - **Camdemy** - **CamdemyFolder** - **CamWithHer** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index af0c2cfc4..161ba4391 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.27' +__version__ = '2016.10.02' From 567a5996cac5f3ba2d06748cbbfb295eab48074c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 15:34:46 +0200 Subject: [PATCH 610/775] [pornoxo] Use JWPlatform to improve metadata extraction --- youtube_dl/extractor/pornoxo.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 202f58673..3c9087f2d 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .jwplatform import JWPlatformBaseIE from ..utils import ( str_to_int, ) -class PornoXOIE(InfoExtractor): +class PornoXOIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P\d+)/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', @@ -17,7 +17,8 @@ class PornoXOIE(InfoExtractor): 'id': '7564', 'ext': 'flv', 'title': 'Striptease From Sexy Secretary!', - 'description': 'Striptease From Sexy Secretary!', + 'display_id': 'striptease-from-sexy-secretary', + 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 'categories': list, # NSFW 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, @@ -26,23 +27,14 @@ class PornoXOIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id, display_id = mobj.groups() webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url') + video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) title = self._html_search_regex( r'([^<]+)\s*-\s*PornoXO', webpage, 'title') - description = self._html_search_regex( - r'<meta name="description" content="([^"]+)\s*featuring', - webpage, 'description', fatal=False) - - thumbnail = self._html_search_regex( - r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) - view_count = str_to_int(self._html_search_regex( r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) @@ -53,13 +45,14 @@ class PornoXOIE(InfoExtractor): None if categories_str is None else categories_str.split(',')) - return { + video_data.update({ 'id': video_id, - 'url': video_url, 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'display_id': display_id, + 'description': self._html_search_meta('description', webpage), 'categories': categories, 'view_count': view_count, 'age_limit': 18, - } + }) + + return video_data From ee5de4e38e3629ffc5d6360e06fa5dcfd43cbeb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Oct 2016 00:54:02 +0700 Subject: [PATCH 611/775] [nhl] Add support for wch2016.com (Closes #10833) --- youtube_dl/extractor/nhl.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index b04d21113..26149c88f 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor): class NHLIE(InfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)' + _SITES_MAP = { + 'nhl': 'nhl', + 'wch2016': 'wch', + } _TESTS = [{ # type=video 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503', @@ -270,13 +274,20 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', + 'only_matching': True, + }, { + 'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068', + 'only_matching': True, }] def _real_extract(self, url): - tmp_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + tmp_id, site = mobj.group('id'), mobj.group('site') video_data = self._download_json( - 'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id, - tmp_id) + 'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json' + % (self._SITES_MAP[site], tmp_id), tmp_id) if video_data.get('type') == 'article': video_data = video_data['media'] From c1084ddb0c87dac450d2b7c1b1cfef386d6f4481 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 3 Oct 2016 15:27:09 +0100 Subject: [PATCH 612/775] [thisoldhouse] Add new extractor(closes #10837) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/thisoldhouse.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/thisoldhouse.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e8928307c..dca4973d4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,6 +892,7 @@ from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE +from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE from .tlc import TlcDeIE diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py new file mode 100644 index 000000000..7629f0d10 --- /dev/null +++ b/youtube_dl/extractor/thisoldhouse.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class ThisOldHouseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', + 'md5': '568acf9ca25a639f0c4ff905826b662f', + 'info_dict': { + 'id': '2REGtUDQ', + 'ext': 'mp4', + 'title': 'How to Build a Storage Bench', + 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.', + 'timestamp': 1442548800, + 'upload_date': '20150918', + } + }, { + 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = drupal_settings['jwplatform']['video_id'] + return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id) From dcdb292fddc82ae11f4c0b647815a45c88a6b6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <trox1972@users.noreply.github.com> Date: Sun, 2 Oct 2016 13:39:18 +0200 Subject: [PATCH 613/775] Unify coding cookie --- devscripts/lazy_load_template.py | 2 +- docs/conf.py | 2 +- setup.py | 2 +- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 2 +- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/allocine.py | 2 +- youtube_dl/extractor/arte.py | 2 +- youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/canalplus.py | 2 +- youtube_dl/extractor/cbsnews.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/comcarcoff.py | 2 +- youtube_dl/extractor/crunchyroll.py | 2 +- youtube_dl/extractor/daum.py | 2 +- youtube_dl/extractor/dramafever.py | 2 +- youtube_dl/extractor/eitb.py | 2 +- youtube_dl/extractor/embedly.py | 2 +- youtube_dl/extractor/faz.py | 2 +- youtube_dl/extractor/firsttv.py | 2 +- youtube_dl/extractor/folketinget.py | 2 +- youtube_dl/extractor/francetv.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/helsinki.py | 2 +- youtube_dl/extractor/ina.py | 2 +- youtube_dl/extractor/jpopsukitv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kontrtube.py | 2 +- youtube_dl/extractor/krasview.py | 2 +- youtube_dl/extractor/lifenews.py | 2 +- youtube_dl/extractor/m6.py | 2 +- youtube_dl/extractor/mailru.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/musicplayon.py | 2 +- youtube_dl/extractor/myspace.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/newstube.py | 2 +- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/normalboots.py | 2 +- youtube_dl/extractor/nova.py | 2 +- youtube_dl/extractor/nowness.py | 2 +- youtube_dl/extractor/nrk.py | 2 +- youtube_dl/extractor/ntvru.py | 2 +- youtube_dl/extractor/nuevo.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/pandoratv.py | 2 +- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/porn91.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 2 +- youtube_dl/extractor/puls4.py | 2 +- youtube_dl/extractor/radiobremen.py | 2 +- youtube_dl/extractor/rmcdecouverte.py | 2 +- youtube_dl/extractor/rtl2.py | 2 +- youtube_dl/extractor/rtve.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/rutube.py | 2 +- youtube_dl/extractor/rutv.py | 2 +- youtube_dl/extractor/safari.py | 2 +- youtube_dl/extractor/sapo.py | 2 +- youtube_dl/extractor/sbs.py | 2 +- youtube_dl/extractor/screencast.py | 2 +- youtube_dl/extractor/screenwavemedia.py | 2 +- youtube_dl/extractor/smotri.py | 2 +- youtube_dl/extractor/sohu.py | 2 +- youtube_dl/extractor/soundcloud.py | 2 +- youtube_dl/extractor/southpark.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/srmediathek.py | 2 +- youtube_dl/extractor/streamcz.py | 2 +- youtube_dl/extractor/swrmediathek.py | 2 +- youtube_dl/extractor/sztvhu.py | 2 +- youtube_dl/extractor/tagesschau.py | 2 +- youtube_dl/extractor/tass.py | 2 +- youtube_dl/extractor/teachertube.py | 2 +- youtube_dl/extractor/teamcoco.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/toypics.py | 2 +- youtube_dl/extractor/tumblr.py | 2 +- youtube_dl/extractor/tv2.py | 2 +- youtube_dl/extractor/tvigle.py | 2 +- youtube_dl/extractor/vbox7.py | 2 +- youtube_dl/extractor/vesti.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- youtube_dl/extractor/vk.py | 2 +- youtube_dl/extractor/vodlocker.py | 2 +- youtube_dl/extractor/wdr.py | 2 +- youtube_dl/extractor/wrzuta.py | 2 +- youtube_dl/extractor/wsj.py | 2 +- youtube_dl/extractor/xboxclips.py | 2 +- youtube_dl/extractor/xnxx.py | 2 +- youtube_dl/extractor/xuite.py | 2 +- youtube_dl/extractor/zingmp3.py | 2 +- youtube_dl/postprocessor/embedthumbnail.py | 2 +- youtube_dl/utils.py | 2 +- 99 files changed, 99 insertions(+), 99 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 2e6e6641b..c4e5fc1f4 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/docs/conf.py b/docs/conf.py index 594ca61a6..0aaf1b8fc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 # # youtube-dl documentation build configuration file, created by # sphinx-quickstart on Fri Mar 14 21:05:43 2014. diff --git a/setup.py b/setup.py index 508b27f37..ce6dd1870 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import print_function diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 442aa663b..99825e343 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import absolute_import, unicode_literals diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f84b866df..643393558 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8f7ed6ef2..d62010cb2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 190bc2cc8..7d280d871 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e0c5c1804..dbac24b18 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2ec55b185..945cf19e8 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 69e8f4f57..6dab226af 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 216989230..91b0f5fa9 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 87c2e7089..4ec79d19d 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 747c245c8..588aad0d9 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index e4c10ad24..c38fd095a 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index b5c310ccb..732b4362a 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 3b6529f4b..c11595612 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import itertools diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py index 713cb7b32..ee5ead18b 100644 --- a/youtube_dl/extractor/eitb.py +++ b/youtube_dl/extractor/eitb.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py index 1cdb11e34..a5820b21e 100644 --- a/youtube_dl/extractor/embedly.py +++ b/youtube_dl/extractor/embedly.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index fd535457d..4bc8fc512 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 332d12020..6b662cc3c 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 75399fa7d..b3df93f28 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3233f66d5..e7068d1ae 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 489b3c7c1..9ea306e3a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index a43abd154..74e1720ee 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index 749e9154f..342a6130e 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/helsinki.py b/youtube_dl/extractor/helsinki.py index 93107b306..575fb332a 100644 --- a/youtube_dl/extractor/helsinki.py +++ b/youtube_dl/extractor/helsinki.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index 65712abc2..9544ff9d4 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py index 122e2dd8c..4b5f346d1 100644 --- a/youtube_dl/extractor/jpopsukitv.py +++ b/youtube_dl/extractor/jpopsukitv.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index fbe499497..d4da8f484 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 704bd7b34..1fda45107 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 0ae8ebd68..cf8876fa1 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 87120ecd1..afce2010e 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index 39d2742c8..9806875e8 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 9a7098c43..f7cc3c832 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index aa091a62c..478e39967 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py index 2174e5665..1854d59a5 100644 --- a/youtube_dl/extractor/musicplayon.py +++ b/youtube_dl/extractor/musicplayon.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index 0d5238d77..ab32e632e 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 0891d2772..055070ff5 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 0092b85ce..e3f35f1d8 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 6eaaa8416..a104e33f8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 06f2bda07..70ff2ab36 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index af44c3bb5..6aa0895b8 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 17671ad39..103952345 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index 74860eb20..7e5346316 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .brightcove import ( diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index ed42eb301..d471eb20c 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index e8702ebcd..7d7a785ab 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index ef093dec2..87fb94d1f 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index f2ccc53dc..50fbbc79c 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 8d49f5c4a..2b07958bb 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 229750665..a6a2c273f 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 9894f3262..073fc3e21 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from ..compat import ( diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 873d4f981..7cc07a2ad 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index 9c2ccbe2d..1c54af002 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .prosiebensat1 import ProSiebenSat1BaseIE diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 19a751da0..0aa8d059b 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index f3bb4fa66..2340dae53 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index de004671d..cb4ee8803 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f1b92f6da..6a43b036e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 1f7c26299..ce631b46c 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5d0ace5bf..fd1df925b 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a2379eb04..a5e672c0a 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index eabe41efe..8b35fd244 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py index 172cc1275..49a9b313a 100644 --- a/youtube_dl/extractor/sapo.py +++ b/youtube_dl/extractor/sapo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 96472fbc4..43131fb7e 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 356631700..ed9de9648 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 40333c825..7d77e8825 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 114358786..def46abda 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 48e2ba2dd..30760ca06 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1a8114aa7..3b7ecb3c3 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index e2a9e45ac..08f8c5744 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index b41d9f59f..ec1b60388 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 409d50304..b03272f7a 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .ard import ARDMediathekIE diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index d3d2b7eb7..9e533103c 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import hashlib diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index 58073eefe..6d69f7686 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index f562aa6d3..cfad33146 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 136e18f96..8670cee28 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py index c4ef70778..5293393ef 100644 --- a/youtube_dl/extractor/tass.py +++ b/youtube_dl/extractor/tass.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 82675431f..df5d5556f 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 79a778920..75346393b 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index ec6f4ecaa..f23b58713 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 6febf805b..cfbf7f4e1 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ce4f91f46..fd145ba42 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 2579ba8c6..938e05076 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -1,4 +1,4 @@ -# -*- coding:utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 4d8b57111..ebe411e12 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index f225ec684..bd28267b0 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index ead4c00c7..f3817ab28 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index e17988573..a1e0851b7 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index cb64ae0bd..5ab716880 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 50aacc6ac..309a47bf0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 58799d413..ac77bc623 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import collections diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index a938a4007..c85b474d2 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 390f9e830..f7e6360a3 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index bdd7097ba..0f53f1bcb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index a83e68b17..deb7483ae 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py index b113ab1c4..d9c277bc3 100644 --- a/youtube_dl/extractor/xboxclips.py +++ b/youtube_dl/extractor/xboxclips.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index bcb140305..e0a6255dc 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index a66daee46..4b9c1ee9c 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index bd708b42c..0f0e9d0eb 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 3bad5a266..2e4789eb2 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 044520037..0569d231c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals From 215ff6e0f3b092aac9edc91b8026ffc7b55d8b70 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 3 Oct 2016 18:16:55 +0100 Subject: [PATCH 614/775] [theweatherchannel] Add new extractor(closes #7188) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/theweatherchannel.py | 79 +++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/theweatherchannel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dca4973d4..f67e19526 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -890,6 +890,7 @@ from .theplatform import ( from .thescene import TheSceneIE from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE +from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py new file mode 100644 index 000000000..c34a49d03 --- /dev/null +++ b/youtube_dl/extractor/theweatherchannel.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .theplatform import ThePlatformIE +from ..utils import ( + determine_ext, + parse_duration, +) + + +class TheWeatherChannelIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', + 'md5': 'ab924ac9574e79689c24c6b95e957def', + 'info_dict': { + 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8', + 'ext': 'mp4', + 'title': 'Ice Climber Is In For A Shock', + 'description': 'md5:55606ce1378d4c72e6545e160c9d9695', + 'uploader': 'TWC - Digital (No Distro)', + 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = drupal_settings['twc']['contexts']['node']['uuid'] + video_data = self._download_json( + 'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) + seo_meta = video_data.get('seometa', {}) + title = video_data.get('title') or seo_meta['title'] + + urls = [] + thumbnails = [] + formats = [] + for variant_id, variant_url in video_data.get('variants', []).items(): + variant_url = variant_url.strip() + if not variant_url or variant_url in urls: + continue + urls.append(variant_url) + ext = determine_ext(variant_url) + if ext == 'jpg': + thumbnails.append({ + 'url': variant_url, + 'id': variant_id, + }) + elif ThePlatformIE.suitable(variant_url): + tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id) + formats.extend(tp_formats) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + variant_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=variant_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + variant_url, video_id, f4m_id=variant_id, fatal=False)) + else: + formats.append({ + 'url': variant_url, + 'format_id': variant_id, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'), + 'duration': parse_duration(video_data.get('duration')), + 'uploader': video_data.get('providername'), + 'uploader_id': video_data.get('providerid'), + 'thumbnails': thumbnails, + 'formats': formats, + } From c1b2a0858cafc3362e5da73b9fb737f18cde4618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Oct 2016 02:10:23 +0700 Subject: [PATCH 615/775] [youtube:live] Extend _VALID_URL (Closes #10839) --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f86823112..cb266eab6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2152,7 +2152,7 @@ class YoutubeUserIE(YoutubeChannelIE): class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' - _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live' + _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live' IE_NAME = 'youtube:live' _TESTS = [{ @@ -2178,6 +2178,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', + 'only_matching': True, }] def _real_extract(self, url): From 539c881bfc1380890a55a08dbf970900328f8ec5 Mon Sep 17 00:00:00 2001 From: Aleksander Nitecki <ixendr@itogi.re> Date: Mon, 3 Oct 2016 21:47:19 +0200 Subject: [PATCH 616/775] [techtalks] Allow URL-s with name part omitted. --- youtube_dl/extractor/techtalks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 16e945d8e..0ec7b1273 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -10,7 +10,7 @@ from ..utils import ( class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]*/)?(?P<id>\d+)/' _TEST = { 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', From 6eb5503b12286ef9813ee22e95622f09dab2ebe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Oct 2016 02:54:36 +0700 Subject: [PATCH 617/775] [techtalks] Relax _VALID_URL --- youtube_dl/extractor/techtalks.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 0ec7b1273..a5b62c717 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -10,9 +10,9 @@ from ..utils import ( class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]*/)?(?P<id>\d+)/' + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', 'info_dict': { 'id': '57758', @@ -38,7 +38,10 @@ class TechTalksIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + }, { + 'url': 'http://techtalks.tv/talks/57758', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 7232e54813481dc7b9b2ea9f70499a49badd75cc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 4 Oct 2016 07:59:53 +0100 Subject: [PATCH 618/775] [tonline] Add new extractor(#10376) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tonline.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/tonline.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f67e19526..e73956923 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -908,6 +908,7 @@ from .tnaflix import ( MovieFapIE, ) from .toggle import ToggleIE +from .tonline import TOnlineIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/tonline.py b/youtube_dl/extractor/tonline.py new file mode 100644 index 000000000..cc11eae2a --- /dev/null +++ b/youtube_dl/extractor/tonline.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class TOnlineIE(InfoExtractor): + IE_NAME = 't-online.de' + _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)' + _TEST = { + 'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html', + 'md5': '7d94dbdde5f9d77c5accc73c39632c29', + 'info_dict': { + 'id': '79166266', + 'ext': 'mp4', + 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"', + 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id) + title = video_data['subtitle'] + + formats = [] + for asset in video_data.get('assets', []): + asset_source = asset.get('source') or asset.get('source2') + if not asset_source: + continue + formats_id = [] + for field_key in ('type', 'profile'): + field_value = asset.get(field_key) + if field_value: + formats_id.append(field_value) + formats.append({ + 'format_id': '-'.join(formats_id), + 'url': asset_source, + }) + + thumbnails = [] + for image in video_data.get('images', []): + image_source = image.get('source') + if not image_source: + continue + thumbnails.append({ + 'url': image_source, + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'thumbnails': thumbnails, + 'formats': formats, + } From 185744f92f172a5cd1db317fbf87fee733cfdfe6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 4 Oct 2016 10:30:57 +0100 Subject: [PATCH 619/775] [lego] Add new extractor(closes #10369) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/lego.py | 86 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/lego.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e73956923..feee06004 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -437,6 +437,7 @@ from .lcp import ( ) from .learnr import LearnrIE from .lecture2go import Lecture2GoIE +from .lego import LEGOIE from .lemonde import LemondeIE from .leeco import ( LeIE, diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py new file mode 100644 index 000000000..5be7d622c --- /dev/null +++ b/youtube_dl/extractor/lego.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + unescapeHTML, + int_or_none, +) + + +class LEGOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)' + _TEST = { + 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', + 'md5': 'f34468f176cfd76488767fc162c405fa', + 'info_dict': { + 'id': '55492d823b1b4d5e985787fa8c2973b1', + 'ext': 'mp4', + 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + } + } + _BITRATES = [256, 512, 1024, 1536, 2560] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) + title = self._search_regex(r'<title>(.+?)', webpage, 'title') + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] + + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) + streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) + + formats = self._extract_akamai_formats( + '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id) + m3u8_formats = list(filter( + lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + formats)) + if len(m3u8_formats) == len(self._BITRATES): + self._sort_formats(m3u8_formats) + for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats): + progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate) + mp4_f = m3u8_format.copy() + mp4_f.update({ + 'url': progressive_base_url + 'mp4', + 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'), + 'protocol': 'http', + }) + web_f = { + 'url': progressive_base_url + 'webm', + 'format_id': m3u8_format['format_id'].replace('hls', 'webm'), + 'width': m3u8_format['width'], + 'height': m3u8_format['height'], + 'tbr': m3u8_format.get('tbr'), + 'ext': 'webm', + } + formats.extend([web_f, mp4_f]) + else: + for bitrate in self._BITRATES: + for ext in ('web', 'mp4'): + formats.append({ + 'format_id': '%s-%s' % (ext, bitrate), + 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext), + 'tbr': bitrate, + 'ext': ext, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': video_data.get('CoverImageUrl'), + 'duration': int_or_none(video_data.get('Length')), + 'formats': formats, + } From 0a33bb2cb2ca401ffe88e520d7bbd7482d976cbc Mon Sep 17 00:00:00 2001 From: Steffan Donal Date: Tue, 4 Oct 2016 09:52:02 +0100 Subject: [PATCH 620/775] Rename "Steffan 'Ruirize' James" to "Steffan Donal" Legal name change! --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 937742c5d..b6456052d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -26,7 +26,7 @@ Albert Kim Pierre Rudloff Huarong Huo Ismael Mejía -Steffan 'Ruirize' James +Steffan Donal Andras Elso Jelle van der Waa Marcin Cieślak From b1d798887e5bc26c938fe8c07ae5ccf382568f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Oct 2016 23:43:08 +0700 Subject: [PATCH 621/775] [npo] Add support for 2doc.nl (Closes #10842) --- youtube_dl/extractor/npo.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 9c7cc777b..c3915ec6e 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -459,8 +459,9 @@ class NPOPlaylistBaseIE(NPOIE): class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' - _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' - _PLAYLIST_TITLE_RE = r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P[^/]+)\.html' + _PLAYLIST_TITLE_RE = (r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)', + r']+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)') _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ @@ -492,6 +493,27 @@ class VPROIE(NPOPlaylistBaseIE): 'title': 'education education', }, 'playlist_count': 2, + }, + { + 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html', + 'info_dict': { + 'id': 'de-tegenprestatie', + 'title': 'De Tegenprestatie', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html', + 'info_dict': { + 'id': 'VARA_101375237', + 'ext': 'm4v', + 'title': 'MH17: Het verdriet van Nederland', + 'description': 'md5:09e1a37c1fdb144621e22479691a9f18', + 'upload_date': '20150716', + }, + 'params': { + # Skip because of m3u8 download + 'skip_download': True + }, } ] From 017eb829343dfff9b70ab7f2278053f35cee953c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 5 Oct 2016 18:27:02 +0100 Subject: [PATCH 622/775] [npo] detect geo restriction --- youtube_dl/extractor/npo.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c3915ec6e..c91f58461 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( fix_xml_ampersands, orderedSet, @@ -10,6 +11,7 @@ from ..utils import ( qualities, strip_jsonp, unified_strdate, + ExtractorError, ) @@ -181,9 +183,16 @@ class NPOIE(NPOBaseIE): continue streams = format_info.get('streams') if streams: - video_info = self._download_json( - streams[0] + '&type=json', - video_id, 'Downloading %s stream JSON' % format_id) + try: + video_info = self._download_json( + streams[0] + '&type=json', + video_id, 'Downloading %s stream JSON' % format_id) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise else: video_info = format_info video_url = video_info.get('url') From 33898fb19c1af161c503ebce8f9a4774fecee45e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 6 Oct 2016 10:45:57 +0100 Subject: [PATCH 623/775] [nzz] Add new extractor(#4407) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nzz.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/nzz.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index feee06004..72bc4f57c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -638,6 +638,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py new file mode 100644 index 000000000..2d352f53f --- /dev/null +++ b/youtube_dl/extractor/nzz.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, +) + + +class NZZIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P\d+)' + _TEST = { + 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', + 'info_dict': { + 'id': '9153', + }, + 'playlist_mincount': 6, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + entries = [] + for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage): + player_params = extract_attributes(player_element) + if player_params.get('data-type') not in ('kaltura_singleArticle',): + self.report_warning('Unsupported player type') + continue + entry_id = player_params['data-id'] + entries.append(self.url_result( + 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) + + return self.playlist_result(entries, page_id) From 09b9c45e242cb9e85beaa98b4783ec02065f1ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Oct 2016 23:22:52 +0700 Subject: [PATCH 624/775] [generic] Add support for multiple vimeo embeds (Closes #10862) --- youtube_dl/extractor/generic.py | 6 +++--- youtube_dl/extractor/vimeo.py | 36 ++++++++++++++++++--------------- youtube_dl/extractor/vk.py | 2 +- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9ea306e3a..8ef8fb5f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1754,9 +1754,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, ie='RtlNl') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url is not None: - return self.url_result(vimeo_url) + vimeo_urls = VimeoIE._extract_urls(url, webpage) + if vimeo_urls: + return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key()) vid_me_embed_url = self._search_regex( r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 309a47bf0..ea8fc5908 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -355,23 +355,27 @@ class VimeoIE(VimeoBaseInfoExtractor): return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) @staticmethod - def _extract_vimeo_url(url, webpage): + def _extract_urls(url, webpage): + urls = [] # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) - if mobj: - player_url = unescapeHTML(mobj.group('url')) - return VimeoIE._smuggle_referrer(player_url, url) - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return mobj.group(1) - # Look more for non-standard embedded Vimeo player - mobj = re.search( - r']+src=(?P[\'"])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage) - if mobj: - return mobj.group('url') + for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) + PLAIN_EMBED_RE = ( + # Look for embedded (swf embed) Vimeo player + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', + # Look more for non-standard embedded Vimeo player + r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', + ) + for embed_re in PLAIN_EMBED_RE: + for mobj in re.finditer(embed_re, webpage): + urls.append(mobj.group('url')) + return urls + + @staticmethod + def _extract_url(url, webpage): + urls = VimeoIE._extract_urls(url, webpage) + return urls[0] if urls else None def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ac77bc623..df43ba867 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -341,7 +341,7 @@ class VKIE(VKBaseIE): if youtube_url: return self.url_result(youtube_url, 'Youtube') - vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) + vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: return self.url_result(vimeo_url) From 831a34caa2112a9b2d867e05f8a4debf965e8389 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 03:28:41 +0800 Subject: [PATCH 625/775] [Makefilea] Fix for GNU make < 4 Closes #9387 The shell assignment operator != was introduced in GNU make 4.0, or specifically the commit in [1]. This fix removes such usages and fallback to a more portable syntax. Tested with: * GNU make 3.82 on CentOS 7.2 * bmake 20150910 on CentOS 7.2, source RPM from Fedora 24 [2] * GNU make 4.2.1 on Arch Linux (Arch official package) * bmake 20160926 on Arch Linux (Arch official package) * GNU make 3.82 on Arch Linux (Compiled from source) * Apple bsdmake-24 on macOS Sierra, binary package from Homebrew Thanks @bdeyal for the feedback of the first tests [1] http://git.savannah.gnu.org/cgit/make.git/commit/?id=b34438bee83ee906a23b881f257e684a0993b9b1 [2] http://koji.fedoraproject.org/koji/buildinfo?buildID=716769 --- ChangeLog | 6 ++++++ Makefile | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..be1cf90fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Support for GNU make < 4 is fixed (#9387) + + version 2016.10.02 Core diff --git a/Makefile b/Makefile index a2763a664..7393e3e1e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From c0a7b9b348bb580d32fc94ee90c1b3b02b668a9e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 16:02:53 +0800 Subject: [PATCH 626/775] Revert "[Makefilea] Fix for GNU make < 4" This reverts commit 831a34caa2112a9b2d867e05f8a4debf965e8389. The reverted commit breaks lazy extractors. --- ChangeLog | 6 ------ Makefile | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index be1cf90fb..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,3 @@ -version - -Core -* Support for GNU make < 4 is fixed (#9387) - - version 2016.10.02 Core diff --git a/Makefile b/Makefile index 7393e3e1e..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) +SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') +_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From 3d83a1ae924902a0421bea8e2e6cd57bb34ee299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 17:50:45 +0800 Subject: [PATCH 627/775] [generic] Support direct MMS links (closes #10838) --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..55e60758d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [generic] Support direct MMS links (#10838) + + version 2016.10.02 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8ef8fb5f4..1f18cbfe9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1412,6 +1412,18 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1551,6 +1563,13 @@ class GenericIE(InfoExtractor): else: video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + if parsed_url.scheme == 'mms': + return { + 'id': video_id, + 'title': video_id, + 'url': url, + } + self.to_screen('%s: Requesting header' % video_id) head_req = HEADRequest(url) From 98763ee354ffc13a57f28dbd006729affacb6d30 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:20:53 +0800 Subject: [PATCH 628/775] [extractor/common] Add id and title helpers for generic IEs --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1076b46da..da192728f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, + compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -2020,6 +2021,12 @@ class InfoExtractor(object): headers['Ytdl-request-proxy'] = geo_verification_proxy return headers + def _generic_id(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + + def _generic_title(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + class SearchInfoExtractor(InfoExtractor): """ From 9dcd6fd3aae77571116ee8b823b6b9224d0ef2ad Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:22:30 +0800 Subject: [PATCH 629/775] [generic,commonprotocols] Move mms suuport from GenericIE And use _generic_* helpers in those extractors --- ChangeLog | 2 +- youtube_dl/extractor/commonprotocols.py | 36 ++++++++++++++++++++----- youtube_dl/extractor/generic.py | 24 ++--------------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 55e60758d..3aa4d67f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version Extractors -+ [generic] Support direct MMS links (#10838) ++ [commonprotocols] Support direct MMS links (#10838) version 2016.10.02 diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py index 5d130a170..d98331a4e 100644 --- a/youtube_dl/extractor/commonprotocols.py +++ b/youtube_dl/extractor/commonprotocols.py @@ -1,13 +1,9 @@ from __future__ import unicode_literals -import os - from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_unquote, compat_urlparse, ) -from ..utils import url_basename class RtmpIE(InfoExtractor): @@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor): }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + video_id = self._generic_id(url) + title = self._generic_title(url) return { 'id': video_id, 'title': title, @@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor): 'format_id': compat_urlparse.urlparse(url).scheme, }], } + + +class MmsIE(InfoExtractor): + IE_DESC = False # Do not list + _VALID_URL = r'(?i)mms://.+' + + _TEST = { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + } + + def _real_extract(self, url): + video_id = self._generic_id(url) + title = self._generic_title(url) + + return { + 'id': video_id, + 'title': title, + 'url': url, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1f18cbfe9..7b8a9cf9a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -27,7 +27,6 @@ from ..utils import ( unified_strdate, unsmuggle_url, UnsupportedError, - url_basename, xpath_text, ) from .brightcove import ( @@ -1412,18 +1411,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, - { - # Direct MMS link - 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', - 'info_dict': { - 'id': 'MilesReid(0709)', - 'ext': 'wmv', - 'title': 'MilesReid(0709)', - }, - 'params': { - 'skip_download': True, # rtsp downloads, requiring mplayer or mpv - }, - }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1561,14 +1548,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - - if parsed_url.scheme == 'mms': - return { - 'id': video_id, - 'title': video_id, - 'url': url, - } + video_id = self._generic_id(url) self.to_screen('%s: Requesting header' % video_id) @@ -1597,7 +1577,7 @@ class GenericIE(InfoExtractor): info_dict = { 'id': video_id, - 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), + 'title': self._generic_title(url), 'upload_date': unified_strdate(head_response.headers.get('Last-Modified')) } From 85bcdd081ce0009bcb7135d8d68192d34969e168 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:31:26 +0800 Subject: [PATCH 630/775] [extractors] Add MmsIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 72bc4f57c..5c1d2abfb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,10 @@ from .comedycentral import ( ) from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import RtmpIE +from .commonprotocols import ( + MmsIE, + RtmpIE, +) from .condenast import CondeNastIE from .cracked import CrackedIE from .crackle import CrackleIE From 38588ab9770813cb92013b870edc15def4f9ac1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 20:04:49 +0800 Subject: [PATCH 631/775] [facebook] Fix for new handleServerJS syntax (closes #10846) According to the dump file in #10846, handleServerJS() now accepts an optional second argument. It's a string from available dump files. --- ChangeLog | 1 + youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3aa4d67f5..7aa0787ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 3a220e995..801573459 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id) + r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) for item in server_js_data.get('instances', []): if item[1][0] == 'VideoConfig': video_data = video_data_list2dict(item[2][0]['videoData']) From 3c6b3bf2217e91c0d01ca65fa2b013ffa132fdbc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 7 Oct 2016 15:53:03 +0100 Subject: [PATCH 632/775] [iprima] detect geo restriction --- youtube_dl/extractor/iprima.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 788bbe0d5..da2cdc656 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor): for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): extract_formats(src) + if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: + self.raise_geo_restricted() + self._sort_formats(formats) return { From f475e8812197027ba7770a421e7fc7094ee8ae0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:15:26 +0700 Subject: [PATCH 633/775] [vimeo] PEP 8 [ci skip] --- youtube_dl/extractor/vimeo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ea8fc5908..a46c5c282 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -359,7 +359,8 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = [] # Look for embedded (iframe) Vimeo player for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', + webpage): urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) PLAIN_EMBED_RE = ( # Look for embedded (swf embed) Vimeo player From 888f8d6ba40f17d8f8a13ca6259e0312d9befc87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:23:16 +0700 Subject: [PATCH 634/775] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7aa0787ca..fb248f9e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,21 @@ version Extractors ++ [iprima] Detect geo restriction * [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) ++ [generic] Add support for multiple vimeo embeds (#10862) ++ [nzz] Add support for nzz.ch (#4407) ++ [npo] Detect geo restriction ++ [npo] Add support for 2doc.nl (#10842) ++ [lego] Add support for lego.com (#10369) ++ [tonline] Add support for t-online.de (#10376) +* [techtalks] Relax URL regular expression (#10840) +* [youtube:live] Extend URL regular expression (#10839) ++ [theweatherchannel] Add support for weather.com (#7188) ++ [thisoldhouse] Add support for thisoldhouse.com (#10837) ++ [nhl] Add support for wch2016.com (#10833) +* [pornoxo] Use JWPlatform to improve metadata extraction version 2016.10.02 From dd4291f72984037286dfd1800fdc07204b0b621a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:25:30 +0700 Subject: [PATCH 635/775] release 2016.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e813e4c59..15a93776b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.02 +[debug] youtube-dl version 2016.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index fb248f9e9..7e9b2b873 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.07 Extractors + [iprima] Detect geo restriction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 828ed0ba9..5bbef0c41 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -364,6 +364,7 @@ - **Le**: 乐视网 - **Learnr** - **Lecture2Go** + - **LEGO** - **Lemonde** - **LePlaylist** - **LetvCloud**: 乐视云 @@ -507,6 +508,7 @@ - **Nuvid** - **NYTimes** - **NYTimesArticle** + - **NZZ** - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** @@ -692,6 +694,7 @@ - **SWRMediathek** - **Syfy** - **SztvHu** + - **t-online.de** - **Tagesschau** - **tagesschau:player** - **Tass** @@ -721,8 +724,10 @@ - **TheScene** - **TheSixtyOne** - **TheStar** + - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** + - **ThisOldHouse** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 161ba4391..ac0921b7a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.02' +__version__ = '2016.10.07' From 1dd58e14d846a64a3c014531b1dc7a377648c73b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 8 Oct 2016 08:33:02 +0100 Subject: [PATCH 636/775] [lego] improve info extraction and bypass geo restriction(closes #10872) --- youtube_dl/extractor/lego.py | 88 ++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py index 5be7d622c..d3bca6435 100644 --- a/youtube_dl/extractor/lego.py +++ b/youtube_dl/extractor/lego.py @@ -1,45 +1,86 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( unescapeHTML, - int_or_none, + parse_duration, + get_element_by_class, ) class LEGOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' + _TESTS = [{ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 'md5': 'f34468f176cfd76488767fc162c405fa', 'info_dict': { 'id': '55492d823b1b4d5e985787fa8c2973b1', 'ext': 'mp4', 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', - } - } + 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + }, + }, { + # geo-restricted but the contentUrl contain a valid url + 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', + 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', + 'info_dict': { + 'id': '13bdc2299ab24d9685701a915b3d71e7', + 'ext': 'mp4', + 'title': 'Aflevering 20 - Helden van het koninkrijk', + 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', + }, + }, { + # special characters in title + 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', + 'info_dict': { + 'id': '9685ee9d12e84ff38e84b4e3d0db533d', + 'ext': 'mp4', + 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters', + 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', + }, + 'params': { + 'skip_download': True, + }, + }] _BITRATES = [256, 512, 1024, 1536, 2560] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) - title = self._search_regex(r'(.+?)', webpage, 'title') - video_data = self._parse_json(unescapeHTML(self._search_regex( - r"video='([^']+)'", webpage, 'video data')), video_id) - progressive_base = self._search_regex( - r'data-video-progressive-url="([^"]+)"', - webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') - streaming_base = self._search_regex( - r'data-video-streaming-url="([^"]+)"', - webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') - item_id = video_data['ItemId'] + locale, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + title = get_element_by_class('video-header', webpage).strip() + progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/' + streaming_base = 'http://legoprod-f.akamaihd.net/' + content_url = self._html_search_meta('contentUrl', webpage) + path = self._search_regex( + r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)', + content_url, 'video path', default=None) + if not path: + player_url = self._proto_relative_url(self._search_regex( + r']+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)', + webpage, 'player url', default=None)) + if not player_url: + base_url = self._proto_relative_url(self._search_regex( + r'data-baseurl="([^"]+)"', webpage, 'base url', + default='http://www.lego.com/%s/mediaplayer/video/' % locale)) + player_url = base_url + video_id + player_webpage = self._download_webpage(player_url, video_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", player_webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] - net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) - base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) - path = '/'.join([net_storage_path, base_path]) + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) formats = self._extract_akamai_formats( @@ -80,7 +121,8 @@ class LEGOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('CoverImageUrl'), - 'duration': int_or_none(video_data.get('Length')), + 'description': self._html_search_meta('description', webpage), + 'thumbnail': self._html_search_meta('thumbnail', webpage), + 'duration': parse_duration(self._html_search_meta('duration', webpage)), 'formats': formats, } From 3adb9d119e049d2bbc92fe2b56f1a22f4a664892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:54:12 +0200 Subject: [PATCH 637/775] [reverbnation] Modernize --- youtube_dl/extractor/reverbnation.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 3c6725aeb..52f18e231 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import str_or_none @@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', - 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' + 'thumbnail': 're:^https?://.*\.jpg', }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') + song_id = self._match_id(url) api_res = self._download_json( 'https://api.reverbnation.com/song/%s' % song_id, @@ -31,14 +28,24 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + thumbnails = [] + if api_res.get('image'): + thumbnails.append({ + 'url': api_res.get('image'), + }) + if api_res.get('thumbnail'): + thumbnails.append({ + 'url': api_res.get('thumbnail'), + 'preference': -2, + }) + return { 'id': song_id, - 'title': api_res.get('name'), - 'url': api_res.get('url'), + 'title': api_res['name'], + 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnail': self._proto_relative_url( - api_res.get('image', api_res.get('thumbnail'))), + 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } From f68901e50a9286aa4d82348cac0e85e26359c81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Oct 2016 01:02:35 +0700 Subject: [PATCH 638/775] [reverbnation] Eliminate code duplication in thumbnails extraction --- youtube_dl/extractor/reverbnation.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 52f18e231..4875009e5 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import str_or_none +from ..utils import ( + qualities, + str_or_none, +) class ReverbNationIE(InfoExtractor): @@ -28,16 +31,15 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) thumbnails = [] - if api_res.get('image'): - thumbnails.append({ - 'url': api_res.get('image'), - }) - if api_res.get('thumbnail'): - thumbnails.append({ - 'url': api_res.get('thumbnail'), - 'preference': -2, - }) + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) return { 'id': song_id, From 2b51dac1f9750f6eb4988f3c23b0e8f618136b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 13:57:18 +0200 Subject: [PATCH 639/775] [slutload] Fix test and simplify --- youtube_dl/extractor/slutload.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 7efb29f65..18cc7721e 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor): _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' _TEST = { 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - 'md5': '0cf531ae8006b530bd9df947a6a0df77', + 'md5': '868309628ba00fd488cf516a113fd717', 'info_dict': { 'id': 'TD73btpBqSxc', 'ext': 'mp4', @@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'

    ([^<]+)', From 8204c733523675d505a8c726ec65b65e15485ce1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:22:55 +0800 Subject: [PATCH 640/775] [Makefile] Fix for GNU make < 4 (closes #9387) Shell assignment operator in BSD make != is ported to GNU make in version 4.0, so 3.x doesn't work. I choose to drop BSD make support as installing GNU make on *BSD systems is easier than installing newer GNU make. --- ChangeLog | 6 ++++++ Makefile | 4 ++-- README.md | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e9b2b873..3d3473a4b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) + + version 2016.10.07 Extractors diff --git a/Makefile b/Makefile index a2763a664..8d66e48c9 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ diff --git a/README.md b/README.md index 4debe15fe..1cb44b2cf 100644 --- a/README.md +++ b/README.md @@ -923,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests From b0082629a9cf65796d503786c45c144d992010e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:42:15 +0800 Subject: [PATCH 641/775] =?UTF-8?q?[nextmedia]=20Support=20action=20news?= =?UTF-8?q?=20(=E5=8B=95=E6=96=B0=E8=81=9E)=20on=20Apple=20Daily?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 3 +++ youtube_dl/extractor/nextmedia.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3d3473a4b..f74c6b5a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) +Extractors ++ [nextmedia] Recognize action news on AppleDaily + version 2016.10.07 diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index a08e48c4b..dee9056d3 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE): 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', 'upload_date': '20140417', }, + }, { + 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From 65f4c1de3d442a49367597a80687fddcf3d142a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:58:15 +0800 Subject: [PATCH 642/775] [allocine] Fix extraction (closes #10860) I change the URL of the third test case, because now the original URL does not contain a video anymore, and there's no easy to get the real URL from the /film/ one. --- ChangeLog | 1 + youtube_dl/extractor/allocine.py | 57 ++++++++++++-------------------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/ChangeLog b/ChangeLog index f74c6b5a4..6c6053a2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 7d280d871..b292ffdd9 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,29 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( qualities, - unescapeHTML, - xpath_element, + url_basename, ) class AllocineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 'md5': '0c9fcf59a841f65635fa300ac43d8269', 'info_dict': { 'id': '19546517', + 'display_id': '18635087', 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', - 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -31,19 +27,21 @@ class AllocineIE(InfoExtractor): 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', 'info_dict': { 'id': '19540403', + 'display_id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': 're:http://.*\.jpg', }, }, { - 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 'md5': '101250fb127ef9ca3d73186ff22a47ce', 'info_dict': { 'id': '19544709', + 'display_id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:601d15393ac40f249648ef000720e7e3', + 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -52,43 +50,30 @@ class AllocineIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - typ = mobj.group('typ') - display_id = mobj.group('id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - if typ == 'film': - video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') - else: - player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None) - if player: - player_data = json.loads(player) - video_id = compat_str(player_data['refMedia']) - else: - model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model') - model_data = self._parse_json(unescapeHTML(model), display_id) - video_id = compat_str(model_data['id']) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model') + model_data = self._parse_json(model, display_id) - xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - - video = xpath_element(xml, './/AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] - for k, v in video.items(): - if re.match(r'.+_path', k): - format_id = k.split('_')[0] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': v, - }) + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) self._sort_formats(formats) return { 'id': video_id, - 'title': video['videoTitle'], + 'display_id': display_id, + 'title': model_data['title'], 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 176006a1202cc6ef3d0a768ace41f97516c76c6d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 19:41:44 +0800 Subject: [PATCH 643/775] [allocine] Fix for /video/ videos (closes #10860) --- youtube_dl/extractor/allocine.py | 58 ++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index b292ffdd9..517b06def 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + remove_end, qualities, url_basename, ) @@ -46,7 +47,14 @@ class AllocineIE(InfoExtractor): }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', - 'only_matching': True, + 'md5': '3566c0668c0235e2d224fd8edb389f67', + 'info_dict': { + 'id': '19550147', + 'ext': 'mp4', + 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', + 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): @@ -54,26 +62,48 @@ class AllocineIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - model = self._html_search_regex( - r'data-model="([^"]+)"', webpage, 'data model') - model_data = self._parse_json(model, display_id) - + formats = [] quality = qualities(['ld', 'md', 'hd']) - formats = [] - for video_url in model_data['sources'].values(): - video_id, format_id = url_basename(video_url).split('_')[:2] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': video_url, - }) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model', default=None) + if model: + model_data = self._parse_json(model, display_id) + + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) + + title = model_data['title'] + else: + video_id = display_id + media_data = self._download_json( + 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + for key, value in media_data['video'].items(): + if not key.endswith('Path'): + continue + + format_id = key[:-len('Path')] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': value, + }) + + title = remove_end(self._html_search_regex( + r'(?s)(.+?)', webpage, 'title' + ).strip(), ' - AlloCiné') + self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, - 'title': model_data['title'], + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 71cdcb23316b55baf9330741ede4c77d08e4d77f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:30:35 +0800 Subject: [PATCH 644/775] [hbo] Support episode pages (closes #10892) --- ChangeLog | 1 + youtube_dl/extractor/hbo.py | 63 ++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c6053a2a..d49682c8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors ++ [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index dad0f3994..3606d64fd 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -12,17 +12,7 @@ from ..utils import ( ) -class HBOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '1c33253f0c7782142c993c0ba62a8753', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - } - } +class HBOBaseIE(InfoExtractor): _FORMATS_INFO = { '1920': { 'width': 1280, @@ -50,8 +40,7 @@ class HBOIE(InfoExtractor): }, } - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_from_id(self, video_id): video_data = self._download_xml( 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) title = xpath_text(video_data, 'title', 'title', True) @@ -116,7 +105,53 @@ class HBOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'duration': parse_duration(xpath_element(video_data, 'duration/tv14')), + 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), 'formats': formats, 'thumbnails': thumbnails, } + + +class HBOIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' + _TEST = { + 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', + 'md5': '1c33253f0c7782142c993c0ba62a8753', + 'info_dict': { + 'id': '1437839', + 'ext': 'mp4', + 'title': 'Ep. 64 Clip: Encryption', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 1072, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_from_id(video_id) + + +class HBOEpisodeIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P[0-9a-z-]+)\.html' + + _TESTS = [{ + 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', + 'md5': '689132b253cc0ab7434237fc3a293210', + 'info_dict': { + 'id': '1439518', + 'ext': 'mp4', + 'title': 'Ep. 52: Inside the Episode', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 240, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', + webpage, 'video ID', group='video_id') + + return self._extract_from_id(video_id) From 27b8d2ee9535e19cdaed69de7a08ba0e026700e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:41:30 +0800 Subject: [PATCH 645/775] [hbo] Add display_id and another test (#10892) --- youtube_dl/extractor/hbo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 3606d64fd..cbf774377 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -138,11 +138,15 @@ class HBOEpisodeIE(HBOBaseIE): 'md5': '689132b253cc0ab7434237fc3a293210', 'info_dict': { 'id': '1439518', + 'display_id': 'ep-52-inside-the-episode', 'ext': 'mp4', 'title': 'Ep. 52: Inside the Episode', 'thumbnail': 're:https?://.*\.jpg$', 'duration': 240, }, + }, { + 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', + 'only_matching': True, }] def _real_extract(self, url): @@ -154,4 +158,7 @@ class HBOEpisodeIE(HBOBaseIE): r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', webpage, 'video ID', group='video_id') - return self._extract_from_id(video_id) + info_dict = self._extract_from_id(video_id) + info_dict['display_id'] = display_id + + return info_dict From f165ca70eb4f7911949278e17751092a3cc8619f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:53:27 +0800 Subject: [PATCH 646/775] [abc.net.au:iview] Fix for non-series videos (closes #10895) --- ChangeLog | 1 + youtube_dl/extractor/abc.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index d49682c8b..26f01790a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 465249bbf..0247cabf9 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', - 'md5': '979d10b2939101f0d27a06b79edad536', + 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00', + 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'info_dict': { - 'id': 'FA1505V024S00', + 'id': 'ZX9735A001S00', 'ext': 'mp4', - 'title': 'Series 27 Ep 24', - 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', - 'upload_date': '20160820', - 'uploader_id': 'abc1', - 'timestamp': 1471719600, + 'title': 'Diaries Of A Broken Mind', + 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e', + 'upload_date': '20161010', + 'uploader_id': 'abc2', + 'timestamp': 1476064920, }, 'skip': 'Video gone', }] @@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_params = self._parse_json(self._search_regex( r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) - title = video_params['title'] + title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) @@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor): 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), 'series': video_params.get('seriesTitle'), 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], - 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), - 'episode': self._html_search_meta('episode_title', webpage), + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)), + 'episode': self._html_search_meta('episode_title', webpage, default=None), 'uploader_id': video_params.get('channel'), 'formats': formats, 'subtitles': subtitles, From 555787d717985531b3beba566cb976fd3f849aaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:44:35 +0800 Subject: [PATCH 647/775] [streamable] Add helper for extracting embedded videos --- youtube_dl/extractor/streamable.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 1c61437a4..56b926448 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -48,6 +50,15 @@ class StreamableIE(InfoExtractor): } ] + @staticmethod + def _extract_url(webpage): + print(webpage) + mobj = re.search( + r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', + webpage) + if mobj: + return mobj.group('src') + def _real_extract(self, url): video_id = self._match_id(url) From c452e69d3d3e6bbbec298a5d4b032cb502cef0ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:13 +0800 Subject: [PATCH 648/775] [footyroom] Fix extraction and update _TESTS (closes #10810) --- ChangeLog | 1 + youtube_dl/extractor/footyroom.py | 32 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 26f01790a..76c446a6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index d2503ae2e..118325b6d 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -2,25 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .streamable import StreamableIE class FootyRoomIE(InfoExtractor): - _VALID_URL = r'https?://footyroom\.com/(?P[^/]+)' + _VALID_URL = r'https?://footyroom\.com/matches/(?P\d+)' _TESTS = [{ - 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', + 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review', 'info_dict': { - 'id': 'schalke-04-0-2-real-madrid-2015-02', - 'title': 'Schalke 04 0 – 2 Real Madrid', + 'id': '79922154', + 'title': 'VIDEO Hull City 0 - 2 Chelsea', }, - 'playlist_count': 3, - 'skip': 'Video for this match is not available', + 'playlist_count': 2, + 'add_ie': [StreamableIE.ie_key()], }, { - 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', + 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review', 'info_dict': { - 'id': 'georgia-0-2-germany-2015-03', - 'title': 'Georgia 0 – 2 Germany', + 'id': '75817984', + 'title': 'VIDEO Georgia 0 - 2 Germany', }, 'playlist_count': 1, + 'add_ie': ['Playwire'] }] def _real_extract(self, url): @@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - playlist = self._parse_json( - self._search_regex( - r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'), + playlist = self._parse_json(self._search_regex( + r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'), playlist_id) playlist_title = self._og_search_title(webpage) @@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor): payload = video.get('payload') if not payload: continue - playwire_url = self._search_regex( + playwire_url = self._html_search_regex( r'data-config="([^"]+)"', payload, 'playwire url', default=None) if playwire_url: entries.append(self.url_result(self._proto_relative_url( playwire_url, 'http:'), 'Playwire')) + streamable_url = StreamableIE._extract_url(payload) + if streamable_url: + entries.append(self.url_result( + streamable_url, StreamableIE.ie_key())) + return self.playlist_result(entries, playlist_id, playlist_title) From 3d643f4cec5ded2be9958d5cd0e31176b2074e37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:52 +0800 Subject: [PATCH 649/775] [hbo] Add HBOEpisodeIE (#10892) --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5c1d2abfb..08bed8b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -348,7 +348,10 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import HBOIE +from .hbo import ( + HBOIE, + HBOEpisodeIE, +) from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE From 55642487f072565bea3b2826b836a1a3159a3807 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 20:50:52 +0800 Subject: [PATCH 650/775] [nhl] Skip invalid m3u8 formats (closes #10713) --- ChangeLog | 1 + youtube_dl/extractor/nhl.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 76c446a6d..9a7e7133b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 26149c88f..62ce800c0 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -274,6 +274,18 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713) + 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003', + 'md5': '50b2bb47f405121484dda3ccbea25459', + 'info_dict': { + 'id': '44315003', + 'ext': 'mp4', + 'title': 'Poile, Laviolette on Subban trade', + 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)', + 'timestamp': 1467242866, + 'upload_date': '20160629', + }, }, { 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', 'only_matching': True, @@ -301,9 +313,11 @@ class NHLIE(InfoExtractor): continue ext = determine_ext(playback_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( playback_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=playback.get('name', 'hls'), fatal=False)) + m3u8_id=playback.get('name', 'hls'), fatal=False) + self._check_formats(m3u8_formats, video_id) + formats.extend(m3u8_formats) else: height = int_or_none(playback.get('height')) formats.append({ From cea364f70c97dad933fa38698f3c9df1bdb485cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:40:28 +0800 Subject: [PATCH 651/775] [extractor/common] Support HTML media elements without child nodes --- ChangeLog | 1 + youtube_dl/extractor/common.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a7e7133b..49488c888 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ Support HTML media elements without child nodes * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, From 6f20b65e728ee30d9b987a39932a3355501f7f67 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:41:41 +0800 Subject: [PATCH 652/775] [test/test_http] Update tests After switching to HTML5 extraction helpers in generic.py, the result info_dict is always a playlist. --- test/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index fdc68ccb4..bb0a098e4 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger()}) r = ydl.extract_info('http://localhost:%d/302' % self.port) - self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port) class TestHTTPS(unittest.TestCase): @@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://localhost:%d/video.html' % self.port) - self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port) def _build_proxy_handler(name): From a093cfc78b584a6e5dbc4bbca525f9e40af9522d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:48:06 +0800 Subject: [PATCH 653/775] [vimeo:review] Fix extraction (#10900) Now Vimeo Review videos uses React. Thanks @davekaro for analyzing the problem! --- ChangeLog | 1 + youtube_dl/extractor/vimeo.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 49488c888..3e16a2cb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a46c5c282..b566241cc 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -837,6 +837,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'params': { 'videopassword': 'holygrail', }, + 'skip': 'video gone', }] def _real_initialize(self): @@ -844,9 +845,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - config_url = self._html_search_regex( - r'data-config-url="([^"]+)"', webpage, 'config URL', - default=NO_DEFAULT if video_password_verified else None) + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 9feb1c97318bbd575af6c2737dfe66412e1c0bb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 21:45:49 +0800 Subject: [PATCH 654/775] [dailymotion] Fix extraction and update _TESTS Closes #10901 Seems all videos use player V5 syntax now --- ChangeLog | 1 + youtube_dl/extractor/dailymotion.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3e16a2cb3..fd3e8c2fa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [dailymotion] Fix extraction (#10901) * [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 62b0747a5..4a3314ea7 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', 'uploader': 'HotWaves1012', 'age_limit': 18, - } + }, + 'skip': 'video gone', }, # geo-restricted, player v5 { @@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);'], + r'buildPlayer\(({.+?})\);', + r'var\s+config\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) From 591e384552f44fe5d77015d17fa7f71efa66f778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:12 +0700 Subject: [PATCH 655/775] [streamable] Remove debug output --- youtube_dl/extractor/streamable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 56b926448..2c26fa689 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -52,7 +52,6 @@ class StreamableIE(InfoExtractor): @staticmethod def _extract_url(webpage): - print(webpage) mobj = re.search( r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', webpage) From bcd6276520e67f59b95dffdb280703328cab82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:33 +0700 Subject: [PATCH 656/775] [downloader/common] Remove debug output --- youtube_dl/downloader/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8482cbd84..3dc144b4e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -346,7 +346,6 @@ class FileDownloader(object): min_sleep_interval = self.params.get('sleep_interval') if min_sleep_interval: max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - print(min_sleep_interval, max_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) From 7104ae799c18e36070d91d570d48c55d651cd4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:25:04 +0700 Subject: [PATCH 657/775] [ChangeLog] Actualize --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index fd3e8c2fa..cc526429a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,7 @@ Extractors + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily +* [lego] Improve info extraction and bypass geo restriction (#10872) version 2016.10.07 From 5c4bfd4da5d532bf8d5aaf1bb37396f7cfbc786b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:30:05 +0700 Subject: [PATCH 658/775] release 2016.10.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15a93776b..865817681 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.07 +[debug] youtube-dl version 2016.10.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 95392030e..62acf9abd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests diff --git a/ChangeLog b/ChangeLog index cc526429a..e3a733410 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.12 Core + Support HTML media elements without child nodes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5bbef0c41..9b540b3df 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -289,6 +289,7 @@ - **Groupon** - **Hark** - **HBO** + - **HBOEpisode** - **HearThisAt** - **Heise** - **HellPorno** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ac0921b7a..44cc18828 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.07' +__version__ = '2016.10.12' From 580d41193169d004c94145ef03d5c53f06d5a57c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 00:44:28 +0800 Subject: [PATCH 659/775] [parliamentliveuk] Recognize lower case URLs Closes #10912 Seems parliamentliveuk matches URLs case-insentive. For example this URL also works: http://parliamentlive.tv/EvEnt/Index/3F24936f-130f-40bf-9a5d-b3d6479da6a4 --- ChangeLog | 6 ++++++ youtube_dl/extractor/parliamentliveuk.py | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e3a733410..d2b78a489 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [parliamentliveuk] Lower case URLs are now recognized (#10912) + + version 2016.10.12 Core diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py index 874aacc55..ebdab8db9 100644 --- a/youtube_dl/extractor/parliamentliveuk.py +++ b/youtube_dl/extractor/parliamentliveuk.py @@ -6,9 +6,9 @@ from .common import InfoExtractor class ParliamentLiveUKIE(InfoExtractor): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' - _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TEST = { + _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', @@ -18,7 +18,10 @@ class ParliamentLiveUKIE(InfoExtractor): 'timestamp': 1422696664, 'upload_date': '20150131', }, - } + }, { + 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From b7f59a3bf69b5c935be085551d30ce4d0b8a97d4 Mon Sep 17 00:00:00 2001 From: Philip Xu Date: Thu, 13 Oct 2016 21:51:26 -0400 Subject: [PATCH 660/775] [huajiao] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/huajiao.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/huajiao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 08bed8b0c..75e16af4e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -372,6 +372,7 @@ from .hrti import ( HRTiIE, HRTiPlaylistIE, ) +from .huajiao import HuajiaoIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py new file mode 100644 index 000000000..352b48120 --- /dev/null +++ b/youtube_dl/extractor/huajiao.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import parse_duration, parse_iso8601 +from .common import InfoExtractor + + +class HuajiaoIE(InfoExtractor): + IE_DESC = '花椒直播' + _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.huajiao.com/l/38941232', + 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d', + 'info_dict': { + 'id': '38941232', + 'ext': 'mp4', + 'title': '#新人求关注#', + 'description': 're:.*', + 'duration': 2424.0, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1475866459, + 'upload_date': '20161007', + 'uploader': 'Penny_余姿昀', + 'uploader_id': '75206005', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + feed_json = self._search_regex( + r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str') + feed = self._parse_json(feed_json, video_id) + + description = self._html_search_meta( + 'description', webpage, 'description', fatal=False) + + return { + 'id': video_id, + 'title': feed['feed']['formated_title'], + 'description': description, + 'duration': parse_duration(feed['feed']['duration']), + 'thumbnail': feed['feed']['image'], + 'timestamp': parse_iso8601(feed['creatime'], ' '), + 'uploader': feed['author']['nickname'], + 'uploader_id': feed['author']['uid'], + 'formats': self._extract_m3u8_formats( + feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'), + } From a5f847314582d4464e587120c6e696399ff121cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 18:20:01 +0800 Subject: [PATCH 661/775] [cbsinteractive] Fix extraction for cnet.com --- ChangeLog | 1 + youtube_dl/extractor/cbsinteractive.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d2b78a489..edd547811 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py index 821db20b2..57b18e81d 100644 --- a/youtube_dl/extractor/cbsinteractive.py +++ b/youtube_dl/extractor/cbsinteractive.py @@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) data_json = self._html_search_regex( - r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'", + r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'", webpage, 'data json') data = self._parse_json(data_json, display_id) vdata = data.get('video') or data['videos'][0] From e2004ccaf711ff9aa9c0b647c3d6219093fb6c2a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 20:26:12 +0800 Subject: [PATCH 662/775] [canalplus] Fix video_id and update _TESTS Some tests are gone, and some redirect to different videos --- ChangeLog | 1 + youtube_dl/extractor/canalplus.py | 64 +++++++++++++++---------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index edd547811..32390f227 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [canalplus] Fix extraction for some videos * [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 6dab226af..1c3c41d26 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -6,11 +6,13 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( + dict_get, ExtractorError, HEADRequest, - unified_strdate, - qualities, int_or_none, + qualities, + remove_end, + unified_strdate, ) @@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', - 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1192814', + 'id': '1405510', + 'display_id': 'pid1830-c-zapping', 'ext': 'mp4', - 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", - 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", - 'upload_date': '20150105', + 'title': 'Zapping - 02/07/2016', + 'description': 'Le meilleur de toutes les chaînes, tous les jours', + 'upload_date': '20160702', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', 'info_dict': { 'id': '1108190', - 'ext': 'flv', - 'title': 'Le labyrinthe - Boing super ranger', + 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', + 'ext': 'mp4', + 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', 'upload_date': '20140724', }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', + 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html', + 'md5': '4b47b12b4ee43002626b97fad8fb1de5', 'info_dict': { - 'id': '1390231', + 'id': '1420213', + 'display_id': 'pid6318-videos-integrales', 'ext': 'mp4', - 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", - 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', - 'upload_date': '20160512', - }, - 'params': { - 'skip_download': True, + 'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016', + 'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799', + 'upload_date': '20161014', }, + 'skip': 'Only works from France', }, { - 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'info_dict': { - 'id': '1398334', + 'id': '1420176', + 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'ext': 'mp4', - 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", - 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', - 'upload_date': '20160607', - }, - 'params': { - 'skip_download': True, + 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ', + 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', + 'upload_date': '20161014', }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', @@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = mobj.group('display_id') or video_id + display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html') - if video_id is None: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)'], - webpage, 'video id', group='id') + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + [r']+?videoId=(["\'])(?P\d+)', + r'id=["\']canal_video_player(?P\d+)'], + webpage, 'video id', group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From 146969e05bc2e2774aa96c62030cdb85ca5c7667 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 23:42:11 +0800 Subject: [PATCH 663/775] [videomore] Support