From 9dc8c6eb232df8f12bc1bf0ac4ea327503d450f9 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 14:39:40 -0400
Subject: [PATCH 01/12] added moviestorm InfoExtractor.  This is a link farm
 handler that scrapes urls from a moviestorm page, which are then handed off
 to one of youtube-dl's other handlers for download/extraction

---
 youtube_dl/extractor/moviestorm.py | 128 +++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 youtube_dl/extractor/moviestorm.py

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
new file mode 100644
index 000000000..a4981850a
--- /dev/null
+++ b/youtube_dl/extractor/moviestorm.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os.path
+import re
+from time import sleep
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+from ..compat import (
+    compat_html_parser,
+    compat_urllib_parse,
+    compat_urllib_request,
+    compat_urlparse,
+)
+
+class MovieStormHTMLParser(compat_html_parser.HTMLParser):
+    def __init__(self):
+        self.found_button = False
+        self.watch_urls = []
+        self.direct_url = False
+        compat_html_parser.HTMLParser.__init__(self)
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict((k, v) for k, v in attrs)
+        if tag == 'td' and attrs['class'] == 'link_td':
+            self.found_button = True
+        elif tag == 'a' and self.found_button:
+            # suppress ishare and other direct links, can't handle now
+            if 'moviestorm' in attrs['href']:
+                self.watch_urls.append(attrs['href'].strip())
+        elif tag == 'a' and 'class' in attrs and attrs['class'] == 'real_link':
+        	self.direct_url = attrs['href'].strip()
+
+    def handle_endtag(self, tag):
+        if tag == 'td':
+            self.found_button = False
+
+    @classmethod
+    def extract_watch_urls(cls, html):
+        p = cls()
+        p.feed(html)
+        p.close()
+        return p.watch_urls
+
+    @classmethod
+    def extract_direct_url(cls, html):
+        p = cls()
+        p.feed(html)
+        p.close()
+        return p.direct_url
+
+class MovieStormIE(InfoExtractor):
+    IE_DESC = 'Movie Storm (link farm)'
+    IE_NAME = 'MovieStorm'
+    _VALID_URL = r'http://moviestorm\.eu/view/(\d+)-watch-(.*)/season-(\d+)/episode-(\d+)'
+    _LINK_FARM = True
+
+	# There are no tests for this IE because the links on any given moviestorm
+	# page can dynamically change, and because the actual download/extraction
+	# is ultimately preformed by another IE. An example of an acceptable url to
+	# feed to this IE is: http://moviestorm.eu/view/218-watch-the-simpsons/season-26/episode-1
+    _TEST = False
+
+	# moviestorm's drupal db config is unstable at times
+    # retry up to 5 times before giving up, 5 second delay
+    # between each retry
+    retry_count = 0
+    max_retries = 5
+    retry_wait = 5
+    direct_urls = []
+
+    def _parse_target(self, target):
+        uri = compat_urlparse.urlparse(target)
+        hash = uri.fragment[1:].split('?')[0]
+        token = os.path.basename(hash.rstrip('/'))
+        return (uri, hash, token)
+
+    def _real_extract(self, url):
+        # retry loop to capture moviestorm page
+        while True:
+        	if self.retry_count == 0:
+        	    note = 'Downloading link farm page'
+        	else:
+        		note = ('Unstable db connection, retying again in %s seconds '
+        			'[%s/%s]' % (self.retry_wait, self.retry_count,
+        			self.max_retries))
+
+        	(_, _, token) = self._parse_target(url)
+        	farmpage = self._download_webpage(
+            	url, token,
+            	note=note,
+            	errnote='Unable to download link farm page',
+            	fatal=False
+        	)
+
+        	if farmpage.strip() != 'MySQL server has gone away':
+        		break
+
+        	if self.retry_count < self.max_retries:
+        		self.retry_count += 1
+        		sleep(self.retry_wait)
+        	else:
+        		msg = 'The moviestorm database is currently unstable.  Please try again later.'
+        		raise ExtractorError(msg, expected=True)
+
+        # scrape WATCH button links from moviestorm page
+        self.to_screen(': Extracting watch page urls')
+        watch_urls = MovieStormHTMLParser.extract_watch_urls(farmpage)
+
+        # get direct urls from scraped watch pages
+        self.to_screen(': Extracting direct links from watch pages')
+        for watch_url in watch_urls:
+        	(_, _, token) = self._parse_target(watch_url)
+        	watchpage = self._download_webpage(
+        		watch_url, token,
+        		note=False,
+        		errnote='Unable to download link farm watch page',
+        		fatal=False
+        	)
+
+        	if watchpage is not None:
+        		direct_url = MovieStormHTMLParser.extract_direct_url(watchpage)
+        		if direct_url:
+        			self.direct_urls.append(direct_url)
+
+        self.to_screen(': Passing off farmed links to InfoExtractors')
+        return list(set(self.direct_urls))

From 4c7a02aa47534d6a7b522bfcb30eb70fadfe8c1c Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 14:42:46 -0400
Subject: [PATCH 02/12] added moviestorm entry to extractor/__init__.py

---
 youtube_dl/extractor/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index ad133603f..4e65570e0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -37,7 +37,6 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
 from .bbccouk import BBCCoUkIE
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
-from .beatportpro import BeatportProIE
 from .bet import BetIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
@@ -232,7 +231,6 @@ from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .jpopsukitv import JpopsukiIE
 from .kaltura import KalturaIE
-from .kanalplay import KanalPlayIE
 from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .keezmovies import KeezMoviesIE
@@ -281,6 +279,7 @@ from .moevideo import MoeVideoIE
 from .mofosex import MofosexIE
 from .mojvideo import MojvideoIE
 from .moniker import MonikerIE
+from .moviestorm import MovieStormIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motherless import MotherlessIE
@@ -559,7 +558,6 @@ from .videoweed import VideoWeedIE
 from .vidme import VidmeIE
 from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
-from .viewster import ViewsterIE
 from .vimeo import (
     VimeoIE,
     VimeoAlbumIE,

From 8654a8484f3e22f26fd464811121459e88409f55 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 14:47:06 -0400
Subject: [PATCH 03/12] added process_farmed_links method to YoutubeDL class to
 handle moviestorm linkfarm IE. This can be used by other linkfarm IEs that
 I/we plan to write in the future.

---
 youtube_dl/YoutubeDL.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e779fc9a8..59e8114a4 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -629,6 +629,28 @@ class YoutubeDL(object):
         for key, value in extra_info.items():
             info_dict.setdefault(key, value)
 
+    def process_farmed_links(self, direct_urls):
+        familiar_farmed_urls = []
+        for farmed_url in direct_urls:
+            for ie in self._ies:
+                # not all extractors have IE_NAME set, using class name for fuller coverage
+                c = ie.__class__.__name__
+
+                # ignore non-familiar links
+                if c != 'GenericIE' and c != 'MovieStormIE' and ie.suitable(farmed_url):
+                    familiar_farmed_urls.append( [ie, farmed_url] )
+
+        for tuple in familiar_farmed_urls:
+            ie = tuple[0]
+            familiar_farmed_url = tuple[1]
+
+            try:
+                ie_result = ie.extract(familiar_farmed_url)
+                return ie_result, ie
+            except:
+                # Failed extract, move on to next url in list
+                ie.to_screen("\033[0;33mWARNING:\033[0m failed attempt, trying next farmed link")
+
     def extract_info(self, url, download=True, ie_key=None, extra_info={},
                      process=True):
         '''
@@ -652,6 +674,11 @@ class YoutubeDL(object):
 
             try:
                 ie_result = ie.extract(url)
+
+                # handle link farm extractors
+                if hasattr(ie, '_LINK_FARM') and ie._LINK_FARM:
+                	ie_result, ie = self.process_farmed_links(ie_result)
+
                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
                     break
                 if isinstance(ie_result, list):

From 0dc602d9e35ab712c3be57e5a0b67ae2a36f2d28 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 14:53:41 -0400
Subject: [PATCH 04/12] replaced tabs with spaces in moviestorm IE

---
 youtube_dl/extractor/moviestorm.py | 46 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index a4981850a..5fceca6b1 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -56,13 +56,13 @@ class MovieStormIE(InfoExtractor):
     _VALID_URL = r'http://moviestorm\.eu/view/(\d+)-watch-(.*)/season-(\d+)/episode-(\d+)'
     _LINK_FARM = True
 
-	# There are no tests for this IE because the links on any given moviestorm
-	# page can dynamically change, and because the actual download/extraction
-	# is ultimately preformed by another IE. An example of an acceptable url to
-	# feed to this IE is: http://moviestorm.eu/view/218-watch-the-simpsons/season-26/episode-1
+    # There are no tests for this IE because the links on any given moviestorm
+    # page can dynamically change, and because the actual download/extraction
+    # is ultimately preformed by another IE. An example of an acceptable url to
+    # feed to this IE is: http://moviestorm.eu/view/218-watch-the-simpsons/season-26/episode-1
     _TEST = False
 
-	# moviestorm's drupal db config is unstable at times
+    # moviestorm's drupal db config is unstable at times
     # retry up to 5 times before giving up, 5 second delay
     # between each retry
     retry_count = 0
@@ -79,12 +79,12 @@ class MovieStormIE(InfoExtractor):
     def _real_extract(self, url):
         # retry loop to capture moviestorm page
         while True:
-        	if self.retry_count == 0:
+            if self.retry_count == 0:
         	    note = 'Downloading link farm page'
         	else:
-        		note = ('Unstable db connection, retying again in %s seconds '
-        			'[%s/%s]' % (self.retry_wait, self.retry_count,
-        			self.max_retries))
+        	    note = ('Unstable db connection, retying again in %s seconds '
+        	        '[%s/%s]' % (self.retry_wait, self.retry_count,
+        	        self.max_retries))
 
         	(_, _, token) = self._parse_target(url)
         	farmpage = self._download_webpage(
@@ -95,14 +95,14 @@ class MovieStormIE(InfoExtractor):
         	)
 
         	if farmpage.strip() != 'MySQL server has gone away':
-        		break
+        	    break
 
         	if self.retry_count < self.max_retries:
-        		self.retry_count += 1
-        		sleep(self.retry_wait)
+        	    self.retry_count += 1
+        	    sleep(self.retry_wait)
         	else:
-        		msg = 'The moviestorm database is currently unstable.  Please try again later.'
-        		raise ExtractorError(msg, expected=True)
+        	    msg = 'The moviestorm database is currently unstable.  Please try again later.'
+        	    raise ExtractorError(msg, expected=True)
 
         # scrape WATCH button links from moviestorm page
         self.to_screen(': Extracting watch page urls')
@@ -111,18 +111,18 @@ class MovieStormIE(InfoExtractor):
         # get direct urls from scraped watch pages
         self.to_screen(': Extracting direct links from watch pages')
         for watch_url in watch_urls:
-        	(_, _, token) = self._parse_target(watch_url)
-        	watchpage = self._download_webpage(
-        		watch_url, token,
-        		note=False,
-        		errnote='Unable to download link farm watch page',
-        		fatal=False
+            (_, _, token) = self._parse_target(watch_url)
+            watchpage = self._download_webpage(
+                watch_url, token,
+                note=False,
+                errnote='Unable to download link farm watch page',
+                fatal=False
         	)
 
         	if watchpage is not None:
-        		direct_url = MovieStormHTMLParser.extract_direct_url(watchpage)
-        		if direct_url:
-        			self.direct_urls.append(direct_url)
+        	    direct_url = MovieStormHTMLParser.extract_direct_url(watchpage)
+        	    if direct_url:
+        	        self.direct_urls.append(direct_url)
 
         self.to_screen(': Passing off farmed links to InfoExtractors')
         return list(set(self.direct_urls))

From b0f5c78ffde4cf855e803ebb3ac9cbe131a9fa52 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 14:58:16 -0400
Subject: [PATCH 05/12] replaced tabs with spaces

---
 youtube_dl/YoutubeDL.py            |  2 +-
 youtube_dl/extractor/moviestorm.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 59e8114a4..039bc49fb 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -677,7 +677,7 @@ class YoutubeDL(object):
 
                 # handle link farm extractors
                 if hasattr(ie, '_LINK_FARM') and ie._LINK_FARM:
-                	ie_result, ie = self.process_farmed_links(ie_result)
+                    ie_result, ie = self.process_farmed_links(ie_result)
 
                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
                     break
diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index 5fceca6b1..5486fa10d 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -30,7 +30,7 @@ class MovieStormHTMLParser(compat_html_parser.HTMLParser):
             if 'moviestorm' in attrs['href']:
                 self.watch_urls.append(attrs['href'].strip())
         elif tag == 'a' and 'class' in attrs and attrs['class'] == 'real_link':
-        	self.direct_url = attrs['href'].strip()
+            self.direct_url = attrs['href'].strip()
 
     def handle_endtag(self, tag):
         if tag == 'td':
@@ -80,11 +80,11 @@ class MovieStormIE(InfoExtractor):
         # retry loop to capture moviestorm page
         while True:
             if self.retry_count == 0:
-        	    note = 'Downloading link farm page'
-        	else:
-        	    note = ('Unstable db connection, retying again in %s seconds '
-        	        '[%s/%s]' % (self.retry_wait, self.retry_count,
-        	        self.max_retries))
+                note = 'Downloading link farm page'
+            else:
+                note = ('Unstable db connection, retying again in %s seconds '
+                    '[%s/%s]' % (self.retry_wait, self.retry_count,
+                    self.max_retries))
 
         	(_, _, token) = self._parse_target(url)
         	farmpage = self._download_webpage(

From 503a9f3f8e15d08fad4a0f14b63eff9744e90d22 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 15:01:05 -0400
Subject: [PATCH 06/12] replaced tabs with spaces

---
 youtube_dl/extractor/moviestorm.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index 5486fa10d..db6e66ad2 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -86,23 +86,23 @@ class MovieStormIE(InfoExtractor):
                     '[%s/%s]' % (self.retry_wait, self.retry_count,
                     self.max_retries))
 
-        	(_, _, token) = self._parse_target(url)
-        	farmpage = self._download_webpage(
-            	url, token,
-            	note=note,
-            	errnote='Unable to download link farm page',
-            	fatal=False
-        	)
+            (_, _, token) = self._parse_target(url)
+            farmpage = self._download_webpage(
+                url, token,
+                note=note,
+                errnote='Unable to download link farm page',
+                fatal=False
+            )
 
-        	if farmpage.strip() != 'MySQL server has gone away':
-        	    break
+            if farmpage.strip() != 'MySQL server has gone away':
+                break
 
-        	if self.retry_count < self.max_retries:
-        	    self.retry_count += 1
-        	    sleep(self.retry_wait)
-        	else:
-        	    msg = 'The moviestorm database is currently unstable.  Please try again later.'
-        	    raise ExtractorError(msg, expected=True)
+            if self.retry_count < self.max_retries:
+                self.retry_count += 1
+                sleep(self.retry_wait)
+            else:
+                msg = 'The moviestorm database is currently unstable.  Please try again later.'
+                raise ExtractorError(msg, expected=True)
 
         # scrape WATCH button links from moviestorm page
         self.to_screen(': Extracting watch page urls')

From b7a1a296fbda88aec066d84643b74ea245b3db12 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 15:03:49 -0400
Subject: [PATCH 07/12] replaced tabs with spaces

---
 youtube_dl/extractor/moviestorm.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index db6e66ad2..f96957832 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -117,12 +117,12 @@ class MovieStormIE(InfoExtractor):
                 note=False,
                 errnote='Unable to download link farm watch page',
                 fatal=False
-        	)
+            )
 
-        	if watchpage is not None:
-        	    direct_url = MovieStormHTMLParser.extract_direct_url(watchpage)
-        	    if direct_url:
-        	        self.direct_urls.append(direct_url)
+            if watchpage is not None:
+                direct_url = MovieStormHTMLParser.extract_direct_url(watchpage)
+                if direct_url:
+                    self.direct_urls.append(direct_url)
 
         self.to_screen(': Passing off farmed links to InfoExtractors')
         return list(set(self.direct_urls))

From 3ca77367f9adc97bc88c791b51cae3d83e874d13 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 15:15:53 -0400
Subject: [PATCH 08/12] removed unnecesary imports from extract/moviestorm.py

---
 youtube_dl/extractor/moviestorm.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index f96957832..d6ddd8e82 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -9,9 +9,7 @@ from .common import InfoExtractor
 from ..utils import ExtractorError
 from ..compat import (
     compat_html_parser,
-    compat_urllib_parse,
-    compat_urllib_request,
-    compat_urlparse,
+    compat_urlparse
 )
 
 class MovieStormHTMLParser(compat_html_parser.HTMLParser):

From 5af03d5cfecdaf3adf24f3ad4d8660a8b0743278 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 18:34:27 -0400
Subject: [PATCH 09/12] adjusted url handling to pull in all moviestorm urls
 and provide informative error if not a handleable moviestorm url.  This is to
 prevent youtube-dl from falling back on the generic IE for bad moviestorm
 urls, as that will always fail

---
 youtube_dl/extractor/moviestorm.py | 115 ++++++++++++++++++++---------
 1 file changed, 80 insertions(+), 35 deletions(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index d6ddd8e82..01ab19faf 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -17,6 +17,7 @@ class MovieStormHTMLParser(compat_html_parser.HTMLParser):
         self.found_button = False
         self.watch_urls = []
         self.direct_url = False
+        self.series_home_page = False
         compat_html_parser.HTMLParser.__init__(self)
 
     def handle_starttag(self, tag, attrs):
@@ -24,7 +25,7 @@ class MovieStormHTMLParser(compat_html_parser.HTMLParser):
         if tag == 'td' and attrs['class'] == 'link_td':
             self.found_button = True
         elif tag == 'a' and self.found_button:
-            # suppress ishare and other direct links, can't handle now
+            # Suppress ishare and other direct links, can't handle now
             if 'moviestorm' in attrs['href']:
                 self.watch_urls.append(attrs['href'].strip())
         elif tag == 'a' and 'class' in attrs and attrs['class'] == 'real_link':
@@ -34,35 +35,47 @@ class MovieStormHTMLParser(compat_html_parser.HTMLParser):
         if tag == 'td':
             self.found_button = False
 
-    @classmethod
-    def extract_watch_urls(cls, html):
-        p = cls()
-        p.feed(html)
-        p.close()
-        return p.watch_urls
+    def handle_data(self, data):
+        if data.strip() == 'SHOW EPISODES':
+            self.series_home_page = True
 
     @classmethod
-    def extract_direct_url(cls, html):
+    def custom_parse(cls, html, return_variable):
         p = cls()
         p.feed(html)
         p.close()
-        return p.direct_url
+        return getattr(p, return_variable)
 
 class MovieStormIE(InfoExtractor):
-    IE_DESC = 'Movie Storm (link farm)'
-    IE_NAME = 'MovieStorm'
-    _VALID_URL = r'http://moviestorm\.eu/view/(\d+)-watch-(.*)/season-(\d+)/episode-(\d+)'
-    _LINK_FARM = True
-
+    # HANDLER INFO:
     # There are no tests for this IE because the links on any given moviestorm
     # page can dynamically change, and because the actual download/extraction
-    # is ultimately preformed by another IE. An example of an acceptable url to
-    # feed to this IE is: http://moviestorm.eu/view/218-watch-the-simpsons/season-26/episode-1
+    # is ultimately preformed by another IE. Example urls to
+    # feed to this IE are:
+    #
+    #   EPISODE: http://moviestorm.eu/view/5821-watch-portlandia/season-1/episode-1
+    #   MOVIE:   http://moviestorm.eu/view/5269-watch-taken-3-online.html
+    #
+    # If the user provides a series url, like the one below, this IE should detect
+    # and raise an error:
+    #
+    #   SERIES:  http://moviestorm.eu/view/5821-watch-portlandia.html
+    #
+    # In other news, moviestorm's drupal db config is unstable at times retry up to 5
+    # times before giving up, waiting 5 second delay between each retry.
+    #
+    # Also, this IE will catch all links with http://moviestorm.eu urls. If it's an
+    # un-handleable url, an error will be thrown informing the user of appropriate
+    # urls to provide. Not using a more complex regex is meant to prevent unacceptable
+    # moviestorm urls from falling back into the generic IE, as that will always fail on
+    # moviestorm links.
+
+    IE_DESC = 'Movie Storm (link farm)'
+    IE_NAME = 'MovieStorm'
+    _VALID_URL = r'http://moviestorm\.eu'
+    _LINK_FARM = True
     _TEST = False
 
-    # moviestorm's drupal db config is unstable at times
-    # retry up to 5 times before giving up, 5 second delay
-    # between each retry
     retry_count = 0
     max_retries = 5
     retry_wait = 5
@@ -75,7 +88,12 @@ class MovieStormIE(InfoExtractor):
         return (uri, hash, token)
 
     def _real_extract(self, url):
-        # retry loop to capture moviestorm page
+        # Inform user to provide proper moviestorm link
+        if 'watch' not in url:
+            msg = ('The moviestorm handler requires either a movie page link or '
+                'a series episode page link.  Please try again with one of those.')
+            raise ExtractorError(msg, expected=True)
+
         while True:
             if self.retry_count == 0:
                 note = 'Downloading link farm page'
@@ -93,8 +111,21 @@ class MovieStormIE(InfoExtractor):
             )
 
             if farmpage.strip() != 'MySQL server has gone away':
+                series_home_page = MovieStormHTMLParser.custom_parse(
+                    farmpage,
+                    'series_home_page'
+                )
+
+                # Fail if provided series home page
+                if series_home_page:
+                    msg = ('It looks like you provided an show page url.  You must provide '
+                        'an episode page url or movie page url')
+                    raise ExtractorError(msg, expected=True)
+
+                # Success
                 break
 
+            # Continue retrying if moviestorm database is currently unstable
             if self.retry_count < self.max_retries:
                 self.retry_count += 1
                 sleep(self.retry_wait)
@@ -102,25 +133,39 @@ class MovieStormIE(InfoExtractor):
                 msg = 'The moviestorm database is currently unstable.  Please try again later.'
                 raise ExtractorError(msg, expected=True)
 
-        # scrape WATCH button links from moviestorm page
+        # Scrape WATCH button links from moviestorm page
         self.to_screen(': Extracting watch page urls')
-        watch_urls = MovieStormHTMLParser.extract_watch_urls(farmpage)
+        watch_urls = MovieStormHTMLParser.custom_parse(
+            farmpage,
+            'watch_urls'
+        )
 
-        # get direct urls from scraped watch pages
+        # Get direct urls from scraped watch pages
         self.to_screen(': Extracting direct links from watch pages')
-        for watch_url in watch_urls:
-            (_, _, token) = self._parse_target(watch_url)
-            watchpage = self._download_webpage(
-                watch_url, token,
-                note=False,
-                errnote='Unable to download link farm watch page',
-                fatal=False
-            )
+        direct_url_count = 1
 
-            if watchpage is not None:
-                direct_url = MovieStormHTMLParser.extract_direct_url(watchpage)
-                if direct_url:
-                    self.direct_urls.append(direct_url)
+        for watch_url in watch_urls:
+            # Stop after gathering 50 urls, moviestorm sends 503 if
+            # request too many in rapid succession
+            if direct_url_count < 50:
+                (_, _, token) = self._parse_target(watch_url)
+                watchpage = self._download_webpage(
+                    watch_url, token,
+                    note=False,
+                    errnote='Unable to download link farm watch page',
+                    fatal=False
+                )
+
+                if watchpage is not None:
+                    direct_url = MovieStormHTMLParser.custom_parse(
+                        watchpage,
+                        'direct_url'
+                    )
+
+                    if direct_url:
+                        self.direct_urls.append(direct_url)
+
+            direct_url_count += 1
 
         self.to_screen(': Passing off farmed links to InfoExtractors')
         return list(set(self.direct_urls))

From 0ede245462471186b8b47be4286c265d8a2304da Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 18:42:30 -0400
Subject: [PATCH 10/12] updated moviestorm extractor to use proper doc strins

---
 youtube_dl/extractor/moviestorm.py | 45 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index 01ab19faf..de02ab8fd 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -47,28 +47,29 @@ class MovieStormHTMLParser(compat_html_parser.HTMLParser):
         return getattr(p, return_variable)
 
 class MovieStormIE(InfoExtractor):
-    # HANDLER INFO:
-    # There are no tests for this IE because the links on any given moviestorm
-    # page can dynamically change, and because the actual download/extraction
-    # is ultimately preformed by another IE. Example urls to
-    # feed to this IE are:
-    #
-    #   EPISODE: http://moviestorm.eu/view/5821-watch-portlandia/season-1/episode-1
-    #   MOVIE:   http://moviestorm.eu/view/5269-watch-taken-3-online.html
-    #
-    # If the user provides a series url, like the one below, this IE should detect
-    # and raise an error:
-    #
-    #   SERIES:  http://moviestorm.eu/view/5821-watch-portlandia.html
-    #
-    # In other news, moviestorm's drupal db config is unstable at times retry up to 5
-    # times before giving up, waiting 5 second delay between each retry.
-    #
-    # Also, this IE will catch all links with http://moviestorm.eu urls. If it's an
-    # un-handleable url, an error will be thrown informing the user of appropriate
-    # urls to provide. Not using a more complex regex is meant to prevent unacceptable
-    # moviestorm urls from falling back into the generic IE, as that will always fail on
-    # moviestorm links.
+    """EXTRACTOR INFO:
+    There are no tests for this IE because the links on any given moviestorm
+    page can dynamically change, and because the actual download/extraction
+    is ultimately preformed by another IE. Example urls to
+    feed to this IE are:
+
+        EPISODE: http://moviestorm.eu/view/5821-watch-portlandia/season-1/episode-1
+        MOVIE:   http://moviestorm.eu/view/5269-watch-taken-3-online.html
+
+    If the user provides a series url, like the one below, this IE should detect
+    and raise an error:
+
+        SERIES:  http://moviestorm.eu/view/5821-watch-portlandia.html
+
+    In other news, moviestorm's drupal db config is unstable at times retry up to 5
+    times before giving up, waiting 5 second delay between each retry.
+
+    Also, this IE will catch all links with http://moviestorm.eu urls. If it's an
+    un-handleable url, an error will be thrown informing the user of appropriate
+    urls to provide. Not using a more complex regex is meant to prevent unacceptable
+    moviestorm urls from falling back into the generic IE, as that will always fail on
+    moviestorm links.
+    """
 
     IE_DESC = 'Movie Storm (link farm)'
     IE_NAME = 'MovieStorm'

From 9fcc22c0f2a50f47cf3bc5684721310570884fdb Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 21:26:11 -0400
Subject: [PATCH 11/12] fixed styling issues that flake8 didn't like

---
 youtube_dl/YoutubeDL.py            |  2 +-
 youtube_dl/extractor/moviestorm.py | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 039bc49fb..b47b74733 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -638,7 +638,7 @@ class YoutubeDL(object):
 
                 # ignore non-familiar links
                 if c != 'GenericIE' and c != 'MovieStormIE' and ie.suitable(farmed_url):
-                    familiar_farmed_urls.append( [ie, farmed_url] )
+                    familiar_farmed_urls.append([ie, farmed_url])
 
         for tuple in familiar_farmed_urls:
             ie = tuple[0]
diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index de02ab8fd..00cce76ed 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import os.path
-import re
 from time import sleep
 
 from .common import InfoExtractor
@@ -12,6 +11,7 @@ from ..compat import (
     compat_urlparse
 )
 
+
 class MovieStormHTMLParser(compat_html_parser.HTMLParser):
     def __init__(self):
         self.found_button = False
@@ -46,6 +46,7 @@ class MovieStormHTMLParser(compat_html_parser.HTMLParser):
         p.close()
         return getattr(p, return_variable)
 
+
 class MovieStormIE(InfoExtractor):
     """EXTRACTOR INFO:
     There are no tests for this IE because the links on any given moviestorm
@@ -92,7 +93,7 @@ class MovieStormIE(InfoExtractor):
         # Inform user to provide proper moviestorm link
         if 'watch' not in url:
             msg = ('The moviestorm handler requires either a movie page link or '
-                'a series episode page link.  Please try again with one of those.')
+                   'a series episode page link.  Please try again with one of those.')
             raise ExtractorError(msg, expected=True)
 
         while True:
@@ -100,8 +101,8 @@ class MovieStormIE(InfoExtractor):
                 note = 'Downloading link farm page'
             else:
                 note = ('Unstable db connection, retying again in %s seconds '
-                    '[%s/%s]' % (self.retry_wait, self.retry_count,
-                    self.max_retries))
+                        '[%s/%s]' % (self.retry_wait, self.retry_count,
+                                     self.max_retries))
 
             (_, _, token) = self._parse_target(url)
             farmpage = self._download_webpage(
@@ -120,7 +121,7 @@ class MovieStormIE(InfoExtractor):
                 # Fail if provided series home page
                 if series_home_page:
                     msg = ('It looks like you provided an show page url.  You must provide '
-                        'an episode page url or movie page url')
+                           'an episode page url or movie page url')
                     raise ExtractorError(msg, expected=True)
 
                 # Success

From 8f664d93690b0cc11b342c5f67c3ffc315fafa02 Mon Sep 17 00:00:00 2001
From: Philip Ardery <arderyp@gmail.com>
Date: Sat, 14 Mar 2015 22:54:13 -0400
Subject: [PATCH 12/12] added variable for max direct urls scrapped

---
 youtube_dl/extractor/moviestorm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/moviestorm.py b/youtube_dl/extractor/moviestorm.py
index 00cce76ed..b492449da 100644
--- a/youtube_dl/extractor/moviestorm.py
+++ b/youtube_dl/extractor/moviestorm.py
@@ -82,6 +82,7 @@ class MovieStormIE(InfoExtractor):
     max_retries = 5
     retry_wait = 5
     direct_urls = []
+    direct_url_max = 50
 
     def _parse_target(self, target):
         uri = compat_urlparse.urlparse(target)
@@ -149,7 +150,7 @@ class MovieStormIE(InfoExtractor):
         for watch_url in watch_urls:
             # Stop after gathering 50 urls, moviestorm sends 503 if
             # request too many in rapid succession
-            if direct_url_count < 50:
+            if direct_url_count < self.direct_url_max:
                 (_, _, token) = self._parse_target(watch_url)
                 watchpage = self._download_webpage(
                     watch_url, token,