From ce6cfe3a7f68bd591e135286853fd31ef3b98785 Mon Sep 17 00:00:00 2001 From: Diego Guerra Date: Wed, 9 Dec 2015 21:56:22 +0100 Subject: [PATCH] [putlocker] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/putlocker.py | 172 ++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 youtube_dl/extractor/putlocker.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3db5cd6d9..3e33e0cab 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -504,6 +504,7 @@ from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE +from .putlocker import PutLockerIE from .pyvideo import PyvideoIE from .qqmusic import ( QQMusicIE, diff --git a/youtube_dl/extractor/putlocker.py b/youtube_dl/extractor/putlocker.py new file mode 100644 index 000000000..e8f9063c7 --- /dev/null +++ b/youtube_dl/extractor/putlocker.py @@ -0,0 +1,172 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + smuggle_url +) + + +class PutLockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?putlocker\.is/(?P[^/]+)\.html' + _TESTS = [ + { + 'url': 'http://putlocker.is/watch-the-silence-of-the-lambs-online-free-putlocker.html', + 'md5': 'ad624b58450625faf64762b72b8ecb0d', + 'info_dict': { + 'id': 'the-silence-of-the-lambs', + 'ext': 'mp4', + 'title': 'The Silence of the Lambs', + 'description': ('Young FBI agent Clarice Starling is assigned to help find a missing ' + 'woman to save her from a psychopathic serial killer who skins his victims. Clarice ' + 'attempts to gain a better insight into the twisted mind of the killer by talking to ' + 'another psychopath Hannibal Lecter, who used to be a respected psychiatrist. FBI agent ' + 'Jack Crawford believes that Lecter, who is also a very powerful and clever mind ' + 'manipulator, has the answers to their questions and can help locate the killer. ' + 'However, Clarice must first gain Lecter\'s confidence before the inmate will give away ' + 'any information.'), + 'thumbnail': 'http://image4.putlocker.is/images/covers/the-silence-of-the-lambs-online-free-putlocker.jpg', + 'height': 410, + 'width': 728, + 'uploader': 'thevideos.tv' + } + }, + { + 'url': 'http://putlocker.is/watch-arrested-development-tvshow-season-1-episode-1-online-free-putlocker.html', + 'md5': '7afdf6e99831757dbcc3eb28f9da6f7b', + 'info_dict': { + 'id': 'arrested-development-tvshow-season-1-episode-1', + 'ext': 'mp4', + 'title': 'Arrested Development Season 1 Episode 1: Pilot', + 'description': ('Widower Michael Bluth has been working for his father\'s development ' + 'company since he was a teenager manning the family\'s frozen banana stand, and he ' + 'and his son George Michael have gone so far as to move into one of the company\'s ' + 'model homes. So when his father George Sr. throws his retirement party on the family ' + 'yacht, Michael expects that he will be announced as his father\'s successor. Instead, ' + 'Michael gets two surprises: His mother is the new President, and his father is under ' + 'investigation by the SEC. So Michael has to hold his wildly dysfunctional family together.'), + 'thumbnail': 'http://image4.putlocker.is/images/covers/arrested-development-tvshow-season-1-episode-1-online-free-putlocker.jpg', + 'height': 410, + 'width': 728, + 'uploader': 'thevideos.tv' + } + }, + { + 'url': 'http://putlocker.is/watch-community-tvshow-season-3-episode-4-online-free-putlocker.html', + 'md5': 'c34b6561ef5e2be973f0e2b6f33095d5', + 'info_dict': { + 'id': 'community-tvshow-season-3-episode-4', + 'ext': 'mp4', + 'title': 'Community Season 3 Episode 4: Remedial Chaos Theory', + 'description': ('When Troy and Abed decide to share an apartment, they host a party for ' + 'the group, which takes on an altered reality as several scenarios play out.'), + 'thumbnail': 'http://image4.putlocker.is/images/covers/community-tvshow-season-3-episode-4-online-free-putlocker.jpg', + 'height': 410, + 'width': 728, + 'uploader': 'thevideos.tv' + } + } + ] + + def trim_string(self, string, start='', end=''): + if start and string.startswith(start): + string = string[len(start):] + + if end and string.endswith(end): + string = string[:-len(end)] + + return string + + def extract_url_id(self, url): + url_id = self._match_id(url) + + # Try to remove generic substrings before and after the interesting section + return self.trim_string( + url_id, 'watch-', '-online-free-putlocker') + + def extract_webpage_title(self, webpage): + video_title = self._html_search_regex( + r'(?s)(.*?)', webpage, 'video title') + + # Try to remove generic substrings before and after the title + return self.trim_string( + video_title, 'Watch ', + ' Online Free Putlocker | Putlocker - Watch Movies Online Free') + + def extract_webpage_description(self, webpage): + description = self._html_search_regex( + r'(?s)Synopsis:[ ]?(.*?)', webpage, 'video description') + + # A generic phrase but by Putlocker should appear before the actual description. + # We try to find it and return the rest of the description + arr = description.split(' Putlocker. ') + + # Too many 'Putlocker' substrings found, this shouldn't happen. Return everything + if (len(arr) > 2): + return description + + # If the 'Putlocker' substring was not found that's fine, everything is returned + return arr[-1] + + def _real_extract(self, url): + video_id = self.extract_url_id(url) + + webpage = self._download_webpage(url, video_id) + + encoded_matches = re.findall(r'document\.write\(doit\(\'(.+)\'\)\)', webpage) + + # Every match is html to inject into the page, encoded in base64 + # twice. Only one will be the valid video URL, other content (such + # as ads) also loaded this way. + for encoded in encoded_matches: + html = base64.b64decode(base64.b64decode(encoded)) + + iframe_match = re.search(r'