From d48a8622c8acd1ce14f5a296166a823f4926fd56 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 27 Jun 2015 12:12:10 +0100 Subject: [PATCH 1/4] add support for show url scheme --- youtube_dl/extractor/snagfilms.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 74cd2698d..3822a72ae 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -1,9 +1,9 @@ -from re import match,DOTALL +from re import match,search,DOTALL from .common import InfoExtractor from ..utils import js_to_json class SnagFilmsIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:www.|embed.)?snagfilms\.com/(?:films/title/(?P.+?)|embed/player\?.*filmId=(?P.+?))(?:&|/|$)' + _VALID_URL = r'(?:https?://)?(?:www.|embed.)?snagfilms\.com/(?:(?:films/title|show/(?P.+?))/(?P.+?)|embed/player\?.*filmId=(?P.+?))(?=&|/|$)' _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'info_dict': @@ -31,15 +31,14 @@ class SnagFilmsIE(InfoExtractor): }] def _real_extract(self, url): - display_id, video_id = match(self._VALID_URL,url).groups() + show_name, display_id, video_id = match(self._VALID_URL,url).groups() if display_id is None: embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - display_id = self._html_search_regex( - r"snagfilms\.com/films/title/(?P.+?)(?:/|')", - embed_webpage, - 'display_id' - ) - webpage = self._download_webpage('http://www.snagfilms.com/films/title/' + display_id, display_id) + url, show_name, display_id = search( + r"(?:https?://)?(?:www.)?snagfilms\.com/(?:films/title|show/(?P.+?))/(?P.+?)(?=/|')", + embed_webpage + ).group(0,1,2) + webpage = self._download_webpage(url, display_id) json_data = self._parse_json(self._html_search_regex( r'"data":{"film":(?P{.*?}})}', From c393ec222cc2b6efa032d8c8eaff69b4f9da4fca Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 27 Jun 2015 12:26:54 +0100 Subject: [PATCH 2/4] add test for show uri sheme --- youtube_dl/extractor/snagfilms.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 3822a72ae..231340c50 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -28,6 +28,18 @@ class SnagFilmsIE(InfoExtractor): 'description': 'A gripping portrait of the Occupy Wall Street media revolution, #WHILEWEWATCH is the first definitive film to emerge from Zuccotti Park—with full access and cooperation from masterminds who made #OccupyWallStreet a reality. The #OccupyWallStreet media team had no fear of a critical city government, big corporations, hostile police or a lagging mainstream media to tell their story. Through rain, snow, grueling days and sleeping on concrete, they pump out exhilarating ideas to the world. With little money, they rely on Twitter, texting, Wi-Fi, posters, Tumblr, live streams, YouTube, Facebook, dramatic marches, drumbeats and chants. As the film unfolds, we witness the burgeoning power of social media.
', 'categories': ['Documentary','Politics'] } + },{ + 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india', + 'info_dict': + { + 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000', + 'display_id': 'india', + 'ext': 'mp4', + 'title': 'India', + 'duration': 979, + 'description': 'Soccer brings women together to end the cycle of child marriages and human trafficking in India. Through soccer they can be safe.', + 'categories': ['Documentary','Sports','Politics'] + } }] def _real_extract(self, url): From 02d072a1d20d2f3517e40607fb7983069ff8814b Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 27 Jun 2015 13:06:54 +0100 Subject: [PATCH 3/4] hundle geolocation restriction --- youtube_dl/extractor/snagfilms.py | 48 ++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index 231340c50..e6637899a 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -1,9 +1,9 @@ from re import match,search,DOTALL from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import RegexNotFoundError,ExtractorError,js_to_json class SnagFilmsIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:www.|embed.)?snagfilms\.com/(?:(?:films/title|show/(?P.+?))/(?P.+?)|embed/player\?.*filmId=(?P.+?))(?=&|/|$)' + _VALID_URL = r'(?:https?://)(?:www.|embed.)?snagfilms\.com/(?:(?:films/title|show/(?P.+?))/(?P.+?)|embed/player\?.*filmId=(?P.+?))(?=&|/|$)' _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'info_dict': @@ -46,35 +46,49 @@ class SnagFilmsIE(InfoExtractor): show_name, display_id, video_id = match(self._VALID_URL,url).groups() if display_id is None: embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) - url, show_name, display_id = search( - r"(?:https?://)?(?:www.)?snagfilms\.com/(?:films/title|show/(?P.+?))/(?P.+?)(?=/|')", + p = search( + r"(?:https?://)(?:www.)?snagfilms\.com/(?:films/title|show/(?P.+?))/(?P.+?)(?=/|')", embed_webpage - ).group(0,1,2) + ) + if p is None: + if 'This film is not playable in your area.' in embed_webpage: + raise ExtractorError('This film is not playable in your area') + else: + raise ExtractorError('the Film you\'re looking for is not available') + url, show_name, display_id = p.group(0,1,2) webpage = self._download_webpage(url, display_id) - json_data = self._parse_json(self._html_search_regex( - r'"data":{"film":(?P{.*?}})}', - webpage, - 'data' - ), display_id) + try: + json_data = self._parse_json(self._html_search_regex( + r'"data":{"film":(?P{.*?}})}', + webpage, + 'data' + ), display_id) + except RegexNotFoundError: + raise ExtractorError('the Film you\'re looking for is not available') if video_id is None: video_id = json_data['id'] embed_webpage = self._download_webpage('http://www.snagfilms.com/embed/player?filmId=' + video_id, video_id) + try: + sources = self._parse_json(js_to_json(self._html_search_regex( + r'sources: (?P\[.*?\])', + embed_webpage, + 'sources', + flags=DOTALL + )), video_id) + except RegexNotFoundError: + if 'This film is not playable in your area.' in embed_webpage: + raise ExtractorError('This film is not playable in your area') + else: + raise ExtractorError('the Film you\'re looking for is not available') title = json_data['title'] duration = int(json_data['duration']) description = json_data['synopsis'] categories = [category['title'] for category in json_data['categories']] thumbnail = json_data['image'] - sources = self._parse_json(js_to_json(self._html_search_regex( - r'sources: (?P\[.*?\])', - embed_webpage, - 'sources', - flags=DOTALL - )), video_id) - formats = [] for source in sources: if source['type'] == 'm3u8': From 4a70c0af5360eace3b0686efae6ce45ab54ae8dc Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 27 Jun 2015 13:14:05 +0100 Subject: [PATCH 4/4] remove m3u8 extraction The extraction of m3u8 fails most of the time --- youtube_dl/extractor/snagfilms.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/snagfilms.py index e6637899a..05f7c054e 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/snagfilms.py @@ -91,9 +91,10 @@ class SnagFilmsIE(InfoExtractor): formats = [] for source in sources: - if source['type'] == 'm3u8': - formats.extend(self._extract_m3u8_formats(source['file'], video_id)) - else: + if source['type'] != 'm3u8': +# The extraction of m3u8 fails most of the time +# formats.extend(self._extract_m3u8_formats(source['file'], video_id)) +# else: formats.append({'url': source['file'],'ext': source['type'], 'resolution': source['label']}) self._sort_formats(formats)