Merge remote-tracking branch 'upstream/master'

2025-02-19 06:32:53 +08:00 · 2013-08-26 15:16:13 -07:00 · 2013-08-26 15:16:13 -07:00 · 99859d436c
commit 99859d436c
parent 39c6f507df 1b01e2b085
10 changed files with 127 additions and 18 deletions
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@ -26,9 +26,9 @@ tests = [
    # 85
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
-    # 84
+    # 84 - vflh9ybst 2013/08/23 (sporadic)
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
+     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
    # 83
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
 from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .hark import HarkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
@ -116,12 +118,14 @@ _ALL_CLASSES = [
 ]
 _ALL_CLASSES.append(GenericIE)
 def gen_extractors():
    """ Return a list of an instance of every supported extractor.
    The order does matter; the first extractor matched is the one handling the URL.
    """
    return [klass() for klass in _ALL_CLASSES]
 def get_info_extractor(ie_name):
    """Returns the info extractor class with the given ie_name"""
    return globals()[ie_name+'IE']
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
    _TEST ={
        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        u'file': u'93440716.mp4',
+        u'file': u'93440716.flv',
-        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'md5': u'e59995ac63d0457783ea05f93f12a866',
        u'info_dict': {
            u'title': u'网事知多少 第32期：车怒',
        },
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
        u'file': u'x33vw9.mp4',
        u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
        u'info_dict': {
-            u"uploader": u"Alex and Van .", 
+            u"uploader": u"Amphora Alex and Van .", 
            u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
        }
    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -7,12 +7,14 @@ from .common import InfoExtractor
 from ..utils import (
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    ExtractorError,
 )
 from .brightcove import BrightcoveIE
 class GenericIE(InfoExtractor):
    IE_DESC = u'Generic downloader that works on some sites'
    _VALID_URL = r'.*'
@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
            u'file': u'13601338388002.mp4',
            u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
            u'info_dict': {
-                u"uploader": u"www.hodiho.fr", 
+                u"uploader": u"www.hodiho.fr",
                u"title": u"R\u00e9gis plante sa Jeep"
            }
        },
@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
            raise ExtractorError(u'Invalid URL: %s' % url)
        self.report_extraction(video_id)
-        # Look for BrigthCove:
+        # Look for BrightCove:
        m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
        if m_brightcove is not None:
            self.to_screen(u'Brightcove video detected.')
@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_url = compat_urllib_parse.unquote(mobj.group(1))
        if video_url.startswith('//'):
            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
        if '://' not in video_url:
            video_url = url + ('' if url.endswith('/') else '/') + video_url
        video_id = os.path.basename(video_url)
        # here's a fun little line of code for you:
--- a/youtube_dl/extractor/hark.py
+++ b/youtube_dl/extractor/hark.py
@ -0,0 +1,35 @@
 # -*- coding: utf-8 -*-
 import re
 from .common import InfoExtractor
 from ..utils import determine_ext
 class HarkIE(InfoExtractor):
    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
    _TEST = {
        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
        u'file': u'mmbzyhkgny.mp3',
        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
        u'info_dict': {
            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
        webpage = self._download_webpage(embed_url, video_id)
        final_url = self._search_regex(r'src="(.+?).mp3"',
                                webpage, 'video url')+'.mp3'
        title = self._html_search_regex(r'<title>(.+?)</title>',
                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
                                'Sound Clip , Quote, MP3, and Ringtone - Hark','')
        return {'id': video_id,
                'url' : final_url,
                'title': title,
                'ext': determine_ext(final_url),
                }
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@ -0,0 +1,42 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    compat_parse_qs,
 )
 class Ro220IE(InfoExtractor):
    IE_NAME = '220.ro'
    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
    _TEST = {
        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
        u'file': u'LYV6doKo7f.mp4',
        u'md5': u'03af18b73a07b4088753930db7a34add',
        u'info_dict': {
            u"title": u"Luati-le Banii sez 4 ep 1",
            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
        webpage = self._download_webpage(url, video_id)
        flashVars_str = self._search_regex(
            r'<param name="flashVars" value="([^"]+)"',
            webpage, u'flashVars')
        flashVars = compat_parse_qs(flashVars_str)
        info = {
            '_type': 'video',
            'id': video_id,
            'ext': 'mp4',
            'url': flashVars['videoURL'][0],
            'title': flashVars['title'][0],
            'description': clean_html(flashVars['desc'][0]),
            'thumbnail': flashVars['preview'][0],
        }
        return info
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@ -8,8 +8,8 @@ from ..utils import (
 )
 class RTLnowIE(InfoExtractor):
-    """Information Extractor for RTLnow, RTL2now and VOXnow"""
+    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
    _TESTS = [{
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
        u'file': u'90419.flv',
@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
        u'params': {
            u'skip_download': True,
        },
    },
    {
        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
        u'file': u'99205.flv',
        u'info_dict': {
            u'upload_date': u'20080928', 
            u'title': u'Medicopter 117 - Angst!',
            u'description': u'Angst!',
            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
        },
        u'params': {
            u'skip_download': True,
        },
    }]
    def _real_extract(self,url):
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        elif len(s) == 85:
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
        elif len(s) == 84:
-            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
+            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
        elif len(s) == 83:
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
        elif len(s) == 82:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -476,7 +476,7 @@ def formatSeconds(secs):
 def make_HTTPS_handler(opts):
    if sys.version_info < (3,2):
        # Python's 2.x handler is very simplistic
-        return compat_urllib_request.HTTPSHandler()
+        return YoutubeDLHandlerHTTPS()
    else:
        import ssl
        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
        context.verify_mode = (ssl.CERT_NONE
                               if opts.no_check_certificate
                               else ssl.CERT_REQUIRED)
-        return compat_urllib_request.HTTPSHandler(context=context)
+        return YoutubeDLHandlerHTTPS(context=context)
 class ExtractorError(Exception):
    """Error during info extraction."""
@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
        self.downloaded = downloaded
        self.expected = expected
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+
 class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
    """Handler for HTTP requests and responses.
    This class, when installed with an OpenerDirector, automatically adds
@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
        ret.code = code
        return ret
-    def http_request(self, req):
+    def _http_request(self, req):
-        for h,v in std_headers.items():
+        for h, v in std_headers.items():
            if h in req.headers:
                del req.headers[h]
            req.add_header(h, v)
@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
            del req.headers['Youtubedl-user-agent']
        return req
-    def http_response(self, req, resp):
+    def _http_response(self, req, resp):
        old_resp = resp
        # gzip
        if resp.headers.get('Content-encoding', '') == 'gzip':
@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
            resp.msg = old_resp.msg
        return resp
-    https_request = http_request
+
-    https_response = http_response
+class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
    http_request = YoutubeDLHandler_Template._http_request
    http_response = YoutubeDLHandler_Template._http_response
 class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
    https_request = YoutubeDLHandler_Template._http_request
    https_response = YoutubeDLHandler_Template._http_response
 def unified_strdate(date_str):
    """Return a string with the date in the format YYYYMMDD"""