From a86adcc6f152f55b81a690f2758132fbe0ffa5f0 Mon Sep 17 00:00:00 2001 From: renu yarday Date: Mon, 18 Nov 2013 11:02:22 +0530 Subject: [PATCH 1/2] support for list of embeded yourtube urls --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7dd0ad09b..7e2490c44 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ updates_key.pem *.part test/testdata .tox +/.idea From 4a6899bc22de4fd53996187d79f73c365f4dd4a0 Mon Sep 17 00:00:00 2001 From: renu yarday Date: Mon, 18 Nov 2013 12:08:43 +0530 Subject: [PATCH 2/2] support for list of embeded yourtube urls --- youtube_dl/extractor/generic.py | 10 ++++++---- youtube_dl/extractor/youtube.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c7552fddb..02322649d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -177,11 +177,13 @@ class GenericIE(InfoExtractor): return self.url_result(surl, 'Vimeo') # Look for embedded YouTube player - mobj = re.search( - r']+?src=(["\'])(?Phttps?://(?:www\.)?youtube.com/embed/.+?)\1', webpage) + mobj = re.findall( + r']+?src=(["\'])(?P(https?:)?//(?:www\.)?youtube.com/embed/.+?)\1', webpage) if mobj: - surl = unescapeHTML(mobj.group(u'url')) - return self.url_result(surl, 'Youtube') + #surl = unescapeHTML(mobj.group(u'url')) + surl_list = [tuppl[1] for tuppl in mobj] + return [self.url_result(x, 'Youtube') for x in surl_list] + # Look for Bandcamp pages with custom domain mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1aa549740..1d73f44c7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -141,8 +141,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): IE_DESC = u'YouTube.com' _VALID_URL = r"""^ ( - (?:https?://)? # http(s):// (optional) - (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/| + (?:https?:)? # http(s):// (optional) + (?:(?:(?:(?://\w+\.)?youtube(?:-nocookie)?\.com/| tube\.majestyc\.net/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls