[ku6] Add new extractor

2024-11-22 06:02:52 +08:00 · 2014-05-30 21:15:59 -07:00 · 2014-05-30 21:15:59 -07:00 · 05741e05d9
commit 05741e05d9
parent 9aa6637644
2 changed files with 39 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -142,6 +142,7 @@ from .khanacademy import KhanAcademyIE
 from .kickstarter import KickStarterIE
 from .keek import KeekIE
 from .kontrtube import KontrTubeIE
 from .ku6 import Ku6IE
 from .la7 import LA7IE
 from .lifenews import LifeNewsIE
 from .liveleak import LiveLeakIE
--- a/youtube_dl/extractor/ku6.py
+++ b/youtube_dl/extractor/ku6.py
@ -0,0 +1,38 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class Ku6IE(InfoExtractor):
    _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
    _TEST = {
        'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
        'info_dict': {
            'id': 'JG-8yS14xzBr4bCn1pu0xw',
            'ext': 'f4v',
            u'title': u'techniques test',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        #title = self._html_search_meta('title', webpage, 'title')
        title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
        self.to_screen('title: '+title)
        dataUrl = 'http://v.ku6.com/fetchVideo4Player/'+video_id+'.html'
        jsonData = self._download_json(dataUrl, video_id)
        downloadUrl = jsonData['data']['f']
        return {
            'id': video_id,
            'title': title,
            'url': downloadUrl
            # TODO more properties (see youtube_dl/extractor/common.py)
        }