From 8bf47118ef25987ee563b11558bfa44aae9189d1 Mon Sep 17 00:00:00 2001 From: RPing Date: Wed, 18 Nov 2015 17:12:18 +0800 Subject: [PATCH 1/5] enhance udn support --- test/unittest_all_urls.py | 159 +++++++++++++++++++++++++++++++ youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/udn.py | 7 ++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 test/unittest_all_urls.py diff --git a/test/unittest_all_urls.py b/test/unittest_all_urls.py new file mode 100644 index 000000000..2872c05e9 --- /dev/null +++ b/test/unittest_all_urls.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import gettestcases + +from youtube_dl.extractor import ( + FacebookIE, + gen_extractors, + YoutubeIE, +) + + +class TestAllURLsMatching(unittest.TestCase): + def setUp(self): + self.ies = gen_extractors() + + def matching_ies(self, url): + return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] + + def assertMatch(self, url, ie_list): + self.assertEqual(self.matching_ies(url), ie_list) + + def test_youtube_playlist_matching(self): + assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) + assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 + assertPlaylist('PL63F0C78739B09958') + assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 + self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) + # Top tracks + assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') + + def test_youtube_matching(self): + self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) + self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 + self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) + + def test_youtube_channel_matching(self): + assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') + + def test_youtube_user_matching(self): + self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + + def test_youtube_feeds(self): + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) + self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) + self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) + self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) + + def test_youtube_show_matching(self): + self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) + + def test_youtube_search_matching(self): + self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?lclk=week&search_query=making+mustard&filters=week', ['youtube:search:date']) + + def test_youtube_extract(self): + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') + assertExtractId('BaW_jenozKc', 'BaW_jenozKc') + + def test_facebook_matching(self): + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) + + def test_no_duplicates(self): + ies = gen_extractors() + for tc in gettestcases(include_onlymatching=True): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): + self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + else: + self.assertFalse( + ie.suitable(url), + '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) + + def test_keywords(self): + self.assertMatch(':ytsubs', ['youtube:subscriptions']) + self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) + self.assertMatch(':ythistory', ['youtube:history']) + self.assertMatch(':thedailyshow', ['ComedyCentralShows']) + self.assertMatch(':tds', ['ComedyCentralShows']) + + def test_vimeo_matching(self): + self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) + self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) + + # https://github.com/rg3/youtube-dl/issues/1930 + def test_soundcloud_not_matching_sets(self): + self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) + + def test_tumblr(self): + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) + + def test_pbs(self): + # https://github.com/rg3/youtube-dl/issues/2350 + self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) + self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) + + def test_yahoo_https(self): + # https://github.com/rg3/youtube-dl/issues/2701 + self.assertMatch( + 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', + ['Yahoo']) + + def test_appledaily(self): + self.assertMatch('http://www.appledaily.com.tw/animation/appledaily/new/20151117/36903515/', ['AppleDaily']) + self.assertMatch('http://www.appledaily.com.tw/realtimenews/article/sports/20151117/734539/', ['AppleDaily']) + + def test_ctsnews(self): + self.assertMatch('http://news.cts.com.tw/cts/life/201511/201511151683198.html#.VkssxbNZOHs', ['CtsNews']) + self.assertMatch('http://news.cts.com.tw/cts/international/201511/201511171683689.html#.Vksv_bNZOHs', ['CtsNews']) + + def test_UDN(self): + self.assertMatch('https://video.udn.com/news/398685', ['UDN']) + self.assertMatch('https://video.udn.com/embed/news/300040', ['UDNEmbed']) + self.assertMatch('https://video.udn.com/play/news/303776', ['UDNEmbed']) + + def test_xuite(self): + self.assertMatch('http://vlog.xuite.net/play/T2lMdGpZLTk0NDA1MS5mbHY=', ['Xuite']) + + def test_yam(self): + self.assertMatch('http://mymedia.yam.com/m/2283921', ['Yam']) + self.assertMatch('http://mymedia.yam.com/m/3599430', ['Yam']) + + def test_mlb(self): + self.assertMatch('http://m.mlb.com/video/topic/9674738/v529001783/111015-mlbcom-fastcast-gold-gloves-announced', ['MLB']) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 26e5745d6..c0665c0e7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -706,7 +706,10 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) -from .udn import UDNEmbedIE +from .udn import ( + UDNEmbedIE, + UDNIE +) from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 2151f8338..852f9cad0 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -34,6 +34,10 @@ class UDNEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + if isinstance(self, UDNIE): + p = url.index("com/") + 4 + url = url[:p] + "embed/" + url[p:] + page = self._download_webpage(url, video_id) options = json.loads(js_to_json(self._html_search_regex( @@ -73,3 +77,6 @@ class UDNEmbedIE(InfoExtractor): 'title': options['title'], 'thumbnail': thumbnail } + +class UDNIE(UDNEmbedIE): + _VALID_URL = r'https?://video\.udn\.com/news/(?P\d+)' From 8c3665f5de32fe0adedd72c06d7de23a7f844ec2 Mon Sep 17 00:00:00 2001 From: RPing Date: Wed, 18 Nov 2015 17:54:40 +0800 Subject: [PATCH 2/5] enhance test cases and fix UDN style --- test/unittest_all_urls.py | 11 ++++++----- youtube_dl/extractor/__init__.py | 5 +---- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/udn.py | 13 +++++-------- 4 files changed, 13 insertions(+), 18 deletions(-) diff --git a/test/unittest_all_urls.py b/test/unittest_all_urls.py index 2872c05e9..162853925 100644 --- a/test/unittest_all_urls.py +++ b/test/unittest_all_urls.py @@ -70,7 +70,6 @@ class TestAllURLsMatching(unittest.TestCase): def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) - self.assertMatch('https://www.youtube.com/results?lclk=week&search_query=making+mustard&filters=week', ['youtube:search:date']) def test_youtube_extract(self): assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) @@ -139,10 +138,10 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://news.cts.com.tw/cts/life/201511/201511151683198.html#.VkssxbNZOHs', ['CtsNews']) self.assertMatch('http://news.cts.com.tw/cts/international/201511/201511171683689.html#.Vksv_bNZOHs', ['CtsNews']) - def test_UDN(self): + def test_udn(self): self.assertMatch('https://video.udn.com/news/398685', ['UDN']) - self.assertMatch('https://video.udn.com/embed/news/300040', ['UDNEmbed']) - self.assertMatch('https://video.udn.com/play/news/303776', ['UDNEmbed']) + self.assertMatch('https://video.udn.com/embed/news/300040', ['UDN']) + self.assertMatch('https://video.udn.com/play/news/303776', ['UDN']) def test_xuite(self): self.assertMatch('http://vlog.xuite.net/play/T2lMdGpZLTk0NDA1MS5mbHY=', ['Xuite']) @@ -153,7 +152,9 @@ class TestAllURLsMatching(unittest.TestCase): def test_mlb(self): self.assertMatch('http://m.mlb.com/video/topic/9674738/v529001783/111015-mlbcom-fastcast-gold-gloves-announced', ['MLB']) - + self.assertMatch('http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb', ['MLB']) + self.assertMatch('http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#', ['MLB']) + self.assertMatch('http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', ['MLB']) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c0665c0e7..bceaa978f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -706,10 +706,7 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) -from .udn import ( - UDNEmbedIE, - UDNIE -) +from .udn import UDNIE from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51516a38a..619fcfe86 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -42,7 +42,7 @@ from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE -from .udn import UDNEmbedIE +from .udn import UDNIE from .senateisvp import SenateISVPIE from .bliptv import BlipTVIE from .svt import SVTIE diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 852f9cad0..346ea13a4 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -10,9 +10,9 @@ from ..utils import ( from ..compat import compat_urlparse -class UDNEmbedIE(InfoExtractor): +class UDNIE(InfoExtractor): IE_DESC = '聯合影音' - _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P\d+)' + _VALID_URL = r'https?://video\.udn\.com/((?:embed|play)/)?news/(?P\d+)' _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'md5': 'de06b4c90b042c128395a88f0384817e', @@ -32,12 +32,12 @@ class UDNEmbedIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) - - if isinstance(self, UDNIE): + if "embed" not in url and "play" not in url: p = url.index("com/") + 4 url = url[:p] + "embed/" + url[p:] + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) options = json.loads(js_to_json(self._html_search_regex( @@ -77,6 +77,3 @@ class UDNEmbedIE(InfoExtractor): 'title': options['title'], 'thumbnail': thumbnail } - -class UDNIE(UDNEmbedIE): - _VALID_URL = r'https?://video\.udn\.com/news/(?P\d+)' From 6c88c5119e7543c27437751ad9b7ab4846dedb21 Mon Sep 17 00:00:00 2001 From: RPing Date: Thu, 19 Nov 2015 00:19:33 +0800 Subject: [PATCH 3/5] enhance UDN support --- test/unittest_all_urls.py | 160 -------------------------------------- 1 file changed, 160 deletions(-) delete mode 100644 test/unittest_all_urls.py diff --git a/test/unittest_all_urls.py b/test/unittest_all_urls.py deleted file mode 100644 index 162853925..000000000 --- a/test/unittest_all_urls.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python - -from __future__ import unicode_literals - -# Allow direct execution -import os -import sys -import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - - -from test.helper import gettestcases - -from youtube_dl.extractor import ( - FacebookIE, - gen_extractors, - YoutubeIE, -) - - -class TestAllURLsMatching(unittest.TestCase): - def setUp(self): - self.ies = gen_extractors() - - def matching_ies(self, url): - return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] - - def assertMatch(self, url, ie_list): - self.assertEqual(self.matching_ies(url), ie_list) - - def test_youtube_playlist_matching(self): - assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) - assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 - assertPlaylist('PL63F0C78739B09958') - assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 - self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) - # Top tracks - assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') - - def test_youtube_matching(self): - self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) - self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 - self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) - self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) - self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) - self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) - - def test_youtube_channel_matching(self): - assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) - assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') - assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') - assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') - - def test_youtube_user_matching(self): - self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) - - def test_youtube_feeds(self): - self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) - self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) - self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) - self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) - - def test_youtube_show_matching(self): - self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) - - def test_youtube_search_matching(self): - self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) - self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) - - def test_youtube_extract(self): - assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) - assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') - assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') - assertExtractId('BaW_jenozKc', 'BaW_jenozKc') - - def test_facebook_matching(self): - self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) - self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) - - def test_no_duplicates(self): - ies = gen_extractors() - for tc in gettestcases(include_onlymatching=True): - url = tc['url'] - for ie in ies: - if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): - self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) - else: - self.assertFalse( - ie.suitable(url), - '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) - - def test_keywords(self): - self.assertMatch(':ytsubs', ['youtube:subscriptions']) - self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) - self.assertMatch(':ythistory', ['youtube:history']) - self.assertMatch(':thedailyshow', ['ComedyCentralShows']) - self.assertMatch(':tds', ['ComedyCentralShows']) - - def test_vimeo_matching(self): - self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) - self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) - self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) - self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) - self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) - self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) - - # https://github.com/rg3/youtube-dl/issues/1930 - def test_soundcloud_not_matching_sets(self): - self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) - - def test_tumblr(self): - self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) - self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) - - def test_pbs(self): - # https://github.com/rg3/youtube-dl/issues/2350 - self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS']) - self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) - - def test_yahoo_https(self): - # https://github.com/rg3/youtube-dl/issues/2701 - self.assertMatch( - 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', - ['Yahoo']) - - def test_appledaily(self): - self.assertMatch('http://www.appledaily.com.tw/animation/appledaily/new/20151117/36903515/', ['AppleDaily']) - self.assertMatch('http://www.appledaily.com.tw/realtimenews/article/sports/20151117/734539/', ['AppleDaily']) - - def test_ctsnews(self): - self.assertMatch('http://news.cts.com.tw/cts/life/201511/201511151683198.html#.VkssxbNZOHs', ['CtsNews']) - self.assertMatch('http://news.cts.com.tw/cts/international/201511/201511171683689.html#.Vksv_bNZOHs', ['CtsNews']) - - def test_udn(self): - self.assertMatch('https://video.udn.com/news/398685', ['UDN']) - self.assertMatch('https://video.udn.com/embed/news/300040', ['UDN']) - self.assertMatch('https://video.udn.com/play/news/303776', ['UDN']) - - def test_xuite(self): - self.assertMatch('http://vlog.xuite.net/play/T2lMdGpZLTk0NDA1MS5mbHY=', ['Xuite']) - - def test_yam(self): - self.assertMatch('http://mymedia.yam.com/m/2283921', ['Yam']) - self.assertMatch('http://mymedia.yam.com/m/3599430', ['Yam']) - - def test_mlb(self): - self.assertMatch('http://m.mlb.com/video/topic/9674738/v529001783/111015-mlbcom-fastcast-gold-gloves-announced', ['MLB']) - self.assertMatch('http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb', ['MLB']) - self.assertMatch('http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#', ['MLB']) - self.assertMatch('http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', ['MLB']) - -if __name__ == '__main__': - unittest.main() From b0ae68de85d0360757d0edb00fcce1055cd9fbbe Mon Sep 17 00:00:00 2001 From: RPing Date: Thu, 19 Nov 2015 00:30:12 +0800 Subject: [PATCH 4/5] [UDNEmbed]enhance UDN support and modify the name --- test/test_all_urls.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a9db42b30..152dba3f2 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -130,6 +130,11 @@ class TestAllURLsMatching(unittest.TestCase): 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) + def test_udn(self): + self.assertMatch('https://video.udn.com/news/398685', ['UDN']) + self.assertMatch('https://video.udn.com/embed/news/300040', ['UDN']) + self.assertMatch('https://video.udn.com/play/news/303776', ['UDN']) + if __name__ == '__main__': unittest.main() From 79f4be30a3a6df658b30f82eb676a3db29244ebe Mon Sep 17 00:00:00 2001 From: RPing Date: Thu, 19 Nov 2015 12:48:59 +0800 Subject: [PATCH 5/5] [UDNEmbed]modify regex in udn.py --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/udn.py | 8 ++------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bceaa978f..26e5745d6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -706,7 +706,7 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) -from .udn import UDNIE +from .udn import UDNEmbedIE from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 619fcfe86..51516a38a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -42,7 +42,7 @@ from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .myvi import MyviIE from .condenast import CondeNastIE -from .udn import UDNIE +from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE from .bliptv import BlipTVIE from .svt import SVTIE diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 346ea13a4..d9dd627db 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -10,9 +10,9 @@ from ..utils import ( from ..compat import compat_urlparse -class UDNIE(InfoExtractor): +class UDNEmbedIE(InfoExtractor): IE_DESC = '聯合影音' - _VALID_URL = r'https?://video\.udn\.com/((?:embed|play)/)?news/(?P\d+)' + _VALID_URL = r'(?:https?:)?//video\.udn\.com/(?:embed|play)/news/(?P\d+)' _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'md5': 'de06b4c90b042c128395a88f0384817e', @@ -32,10 +32,6 @@ class UDNIE(InfoExtractor): }] def _real_extract(self, url): - if "embed" not in url and "play" not in url: - p = url.index("com/") + 4 - url = url[:p] + "embed/" + url[p:] - video_id = self._match_id(url) page = self._download_webpage(url, video_id)