youtube-dl/youtube_dl/extractor/dailymotion.py

import re
import json
import itertools

from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor

from ..utils import (
    compat_urllib_request,
    compat_str,
    orderedSet,
    str_to_int,
    int_or_none,
    ExtractorError,
    unescapeHTML,
)

class DailymotionBaseInfoExtractor(InfoExtractor):
    @staticmethod
    def _build_request(url):
        """Build a request with the family filter disabled"""
        request = compat_urllib_request.Request(url)
        request.add_header('Cookie', 'family_filter=off')
        request.add_header('Cookie', 'ff=off')
        return request

class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
    """Information Extractor for Dailymotion"""

    _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
    IE_NAME = u'dailymotion'

    _FORMATS = [
        (u'stream_h264_ld_url', u'ld'),
        (u'stream_h264_url', u'standard'),
        (u'stream_h264_hq_url', u'hq'),
        (u'stream_h264_hd_url', u'hd'),
        (u'stream_h264_hd1080_url', u'hd180'),
    ]

    _TESTS = [
        {
            u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
            u'file': u'x33vw9.mp4',
            u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
            u'info_dict': {
                u"uploader": u"Amphora Alex and Van .", 
                u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
            }
        },
        # Vevo video
        {
            u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
            u'file': u'USUV71301934.mp4',
            u'info_dict': {
                u'title': u'Roar (Official)',
                u'uploader': u'Katy Perry',
                u'upload_date': u'20130905',
            },
            u'params': {
                u'skip_download': True,
            },
            u'skip': u'VEVO is only available in some countries',
        },
        # age-restricted video
        {
            u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
            u'file': u'xyh2zz.mp4',
            u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
            u'info_dict': {
                u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
                u'uploader': 'HotWaves1012',
                u'age_limit': 18,
            }

        }
    ]

    def _real_extract(self, url):
        # Extract id and simplified title from URL
        mobj = re.match(self._VALID_URL, url)

        video_id = mobj.group('id')

        url = 'http://www.dailymotion.com/video/%s' % video_id

        # Retrieve video webpage to extract further information
        request = self._build_request(url)
        webpage = self._download_webpage(request, video_id)

        # Extract URL, uploader and title from webpage
        self.report_extraction(video_id)

        # It may just embed a vevo video:
        m_vevo = re.search(
            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)',
            webpage)
        if m_vevo is not None:
            vevo_id = m_vevo.group('id')
            self.to_screen(u'Vevo video detected: %s' % vevo_id)
            return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')

        age_limit = self._rta_search(webpage)

        video_upload_date = None
        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
        if mobj is not None:
            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)

        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
                                            u'Downloading embed page')
        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
            'video info', flags=re.MULTILINE)
        info = json.loads(info)
        if info.get('error') is not None:
            msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
            raise ExtractorError(msg, expected=True)

        formats = []
        for (key, format_id) in self._FORMATS:
            video_url = info.get(key)
            if video_url is not None:
                m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
                if m_size is not None:
                    width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
                else:
                    width, height = None, None
                formats.append({
                    'url': video_url,
                    'ext': 'mp4',
                    'format_id': format_id,
                    'width': width,
                    'height': height,
                })
        if not formats:
            raise ExtractorError(u'Unable to extract video URL')

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, webpage)
        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(video_id, webpage)
            return

        view_count = self._search_regex(
            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
        if view_count is not None:
            view_count = str_to_int(view_count)

        return {
            'id':       video_id,
            'formats': formats,
            'uploader': info['owner.screenname'],
            'upload_date':  video_upload_date,
            'title':    self._og_search_title(webpage),
            'subtitles':    video_subtitles,
            'thumbnail': info['thumbnail_url'],
            'age_limit': age_limit,
            'view_count': view_count,
        }

    def _get_available_subtitles(self, video_id, webpage):
        try:
            sub_list = self._download_webpage(
                'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                video_id, note=False)
        except ExtractorError as err:
            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
            return {}
        info = json.loads(sub_list)
        if (info['total'] > 0):
            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
            return sub_lang_list
        self._downloader.report_warning(u'video doesn\'t have subtitles')
        return {}


class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
    IE_NAME = u'dailymotion:playlist'
    _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
    _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
    _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'

    def _extract_entries(self, id):
        video_ids = []
        for pagenum in itertools.count(1):
            request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
            webpage = self._download_webpage(request,
                                             id, u'Downloading page %s' % pagenum)

            video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))

            if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
                break
        return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
                   for video_id in orderedSet(video_ids)]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
        webpage = self._download_webpage(url, playlist_id)

        return {
            '_type': 'playlist',
            'id': playlist_id,
            'title': self._og_search_title(webpage),
            'entries': self._extract_entries(playlist_id),
        }


class DailymotionUserIE(DailymotionPlaylistIE):
    IE_NAME = u'dailymotion:user'
    _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
    _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        user = mobj.group('user')
        webpage = self._download_webpage(url, user)
        full_user = unescapeHTML(self._html_search_regex(
            r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
            webpage, u'user', flags=re.DOTALL))

        return {
            '_type': 'playlist',
            'id': user,
            'title': full_user,
            'entries': self._extract_entries(user),
        }
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`import re`
Dailymotion: fix the download of the video in the max quality (closes #986) 2013-07-05 20:15:26 +08:00			`import json`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00			`import itertools`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00
			`from .common import InfoExtractor`
[subtitles] Simplify the extraction of subtitles in subclasses and remove NoAutoSubtitlesInfoExtractor Subclasses just need to call the method extract_subtitles, which will call _extract_subtitles and _request_automatic_caption Now the default implementation of _request_automatic_caption returns {}. 2013-09-11 22:05:49 +08:00			`from .subtitles import SubtitlesInfoExtractor`
[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`from ..utils import (`
			`compat_urllib_request,`
[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00			`compat_str,`
[dailymotion] Fix playlist extraction The html code has changed, make the video ids extraction more solid. 2013-10-04 20:07:29 +08:00			`orderedSet,`
[dailymotion] Extract view count (#1895) 2013-12-06 20:36:36 +08:00			`str_to_int,`
[dailymotion] Convert width and height fields from strings to integers 2014-03-05 05:24:38 +08:00			`int_or_none,`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`ExtractorError,`
[dailymotion] Fix user playlist extraction 2014-04-23 20:42:34 +08:00			`unescapeHTML,`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`)`

[dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 18:44:02 +08:00			`class DailymotionBaseInfoExtractor(InfoExtractor):`
			`@staticmethod`
			`def _build_request(url):`
			`"""Build a request with the family filter disabled"""`
			`request = compat_urllib_request.Request(url)`
			`request.add_header('Cookie', 'family_filter=off')`
[dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 07:20:49 +08:00			`request.add_header('Cookie', 'ff=off')`
[dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 18:44:02 +08:00			`return request`
[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00
[dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 18:44:02 +08:00			`class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`"""Information Extractor for Dailymotion"""`

[daylimotion] Add support for urls from the mobile site (fixes #1953) It uses the 'touch' subdomain and adds a '#' before 'video' 2013-12-14 21:20:12 +08:00			`_VALID_URL = r'(?i)(?:https?://)?(?:(www\|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed\|#)/)?video/(?P<id>[^/?_]+)'`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`IE_NAME = u'dailymotion'`
[dailymotion] Extract all the available formats (closes #1028) 2013-10-23 23:33:38 +08:00
			`_FORMATS = [`
			`(u'stream_h264_ld_url', u'ld'),`
			`(u'stream_h264_url', u'standard'),`
			`(u'stream_h264_hq_url', u'hq'),`
			`(u'stream_h264_hd_url', u'hd'),`
			`(u'stream_h264_hd1080_url', u'hd180'),`
			`]`

[dailymotion] Detect vevo videos (fixes #1532) All videos from the Vevo user, just embed videos from vevo.com 2013-10-01 21:05:41 +08:00			`_TESTS = [`
			`{`
			`u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',`
			`u'file': u'x33vw9.mp4',`
			`u'md5': u'392c4b85a60a90dc4792da41ce3144eb',`
			`u'info_dict': {`
			`u"uploader": u"Amphora Alex and Van .",`
			`u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""`
			`}`
			`},`
			`# Vevo video`
			`{`
			`u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',`
			`u'file': u'USUV71301934.mp4',`
			`u'info_dict': {`
			`u'title': u'Roar (Official)',`
			`u'uploader': u'Katy Perry',`
			`u'upload_date': u'20130905',`
			`},`
			`u'params': {`
			`u'skip_download': True,`
			`},`
			`u'skip': u'VEVO is only available in some countries',`
			`},`
[dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 07:20:49 +08:00			`# age-restricted video`
			`{`
			`u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',`
			`u'file': u'xyh2zz.mp4',`
			`u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',`
			`u'info_dict': {`
			`u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',`
			`u'uploader': 'HotWaves1012',`
			`u'age_limit': 18,`
			`}`

			`}`
[dailymotion] Detect vevo videos (fixes #1532) All videos from the Vevo user, just embed videos from vevo.com 2013-10-01 21:05:41 +08:00			`]`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00
			`def _real_extract(self, url):`
			`# Extract id and simplified title from URL`
			`mobj = re.match(self._VALID_URL, url)`

[daylimotion] Add support for urls from the mobile site (fixes #1953) It uses the 'touch' subdomain and adds a '#' before 'video' 2013-12-14 21:20:12 +08:00			`video_id = mobj.group('id')`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00
[daylimotion] accept embed urls (fixes #1386) 2013-09-07 00:36:07 +08:00			`url = 'http://www.dailymotion.com/video/%s' % video_id`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00
			`# Retrieve video webpage to extract further information`
[dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 18:44:02 +08:00			`request = self._build_request(url)`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`webpage = self._download_webpage(request, video_id)`

			`# Extract URL, uploader and title from webpage`
			`self.report_extraction(video_id)`

[dailymotion] Detect vevo videos (fixes #1532) All videos from the Vevo user, just embed videos from vevo.com 2013-10-01 21:05:41 +08:00			`# It may just embed a vevo video:`
			`m_vevo = re.search(`
			`r'<link rel="video_src" href="[^"]?vevo.com[^"]?videoId=(?P<id>[\w]*)',`
			`webpage)`
			`if m_vevo is not None:`
			`vevo_id = m_vevo.group('id')`
			`self.to_screen(u'Vevo video detected: %s' % vevo_id)`
			`return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')`

[dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 07:20:49 +08:00			`age_limit = self._rta_search(webpage)`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00
			`video_upload_date = None`
			`mobj = re.search(r'<div class="[^"]uploaded_cont[^"]" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)`
			`if mobj is not None:`
			`video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)`

Dailymotion: fix the download of the video in the max quality (closes #986) 2013-07-05 20:15:26 +08:00			`embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id`
			`embed_page = self._download_webpage(embed_url, video_id,`
			`u'Downloading embed page')`
[dailymotion] improve the regex for extracting the video info 2013-09-03 17:33:59 +08:00			`info = self._search_regex(r'var info = ({.*?}),$', embed_page,`
			`'video info', flags=re.MULTILINE)`
Dailymotion: fix the download of the video in the max quality (closes #986) 2013-07-05 20:15:26 +08:00			`info = json.loads(info)`
[dailymotion] Raise ExtractorError if the dailymotion response reports an error 2013-09-21 18:15:54 +08:00			`if info.get('error') is not None:`
			`msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']`
			`raise ExtractorError(msg, expected=True)`
Dailymotion: fix the download of the video in the max quality (closes #986) 2013-07-05 20:15:26 +08:00
[dailymotion] Extract all the available formats (closes #1028) 2013-10-23 23:33:38 +08:00			`formats = []`
			`for (key, format_id) in self._FORMATS:`
			`video_url = info.get(key)`
			`if video_url is not None:`
			`m_size = re.search(r'H264-(\d+)x(\d+)', video_url)`
			`if m_size is not None:`
[dailymotion] Convert width and height fields from strings to integers 2014-03-05 05:24:38 +08:00			`width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))`
[dailymotion] Extract all the available formats (closes #1028) 2013-10-23 23:33:38 +08:00			`else:`
			`width, height = None, None`
			`formats.append({`
			`'url': video_url,`
			`'ext': 'mp4',`
			`'format_id': format_id,`
			`'width': width,`
			`'height': height,`
			`})`
			`if not formats:`
Dailymotion: fix the download of the video in the max quality (closes #986) 2013-07-05 20:15:26 +08:00			`raise ExtractorError(u'Unable to extract video URL')`

[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00			`# subtitles`
[subtitles] refactor to support websites with subtitle information the webpage. I added the parameter webpage, so now it's similar to the way automatic captions are handled. This is an improvement needed for websites like TED. 2013-11-03 01:01:05 +08:00			`video_subtitles = self.extract_subtitles(video_id, webpage)`
[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00			`if self._downloader.params.get('listsubtitles', False):`
[subtitles] refactor to support websites with subtitle information the webpage. I added the parameter webpage, so now it's similar to the way automatic captions are handled. This is an improvement needed for websites like TED. 2013-11-03 01:01:05 +08:00			`self._list_available_subtitles(video_id, webpage)`
[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00			`return`

[dailymotion] Fix view count extraction and make it non fatal (fixes #1940) 2013-12-11 02:47:00 +08:00			`view_count = self._search_regex(`
			`r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)`
			`if view_count is not None:`
			`view_count = str_to_int(view_count)`
[dailymotion] Extract view count (#1895) 2013-12-06 20:36:36 +08:00
[dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 07:20:49 +08:00			`return {`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`'id': video_id,`
[dailymotion] Extract all the available formats (closes #1028) 2013-10-23 23:33:38 +08:00			`'formats': formats,`
[Dailymotion] fix uploader name (fixes #3153) 2014-06-25 23:44:19 +08:00			`'uploader': info['owner.screenname'],`
Move DailyMotion into its own file 2013-06-24 02:09:47 +08:00			`'upload_date': video_upload_date,`
InfoExtractor: add some helper methods to extract OpenGraph info 2013-07-13 01:00:19 +08:00			`'title': self._og_search_title(webpage),`
[dailymotion] Added support for subtitles + new InfoExtractor for generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten. 2013-08-08 00:59:11 +08:00			`'subtitles': video_subtitles,`
[dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 07:20:49 +08:00			`'thumbnail': info['thumbnail_url'],`
			`'age_limit': age_limit,`
[dailymotion] Extract view count (#1895) 2013-12-06 20:36:36 +08:00			`'view_count': view_count,`
[dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 07:20:49 +08:00			`}`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00
[subtitles] refactor to support websites with subtitle information the webpage. I added the parameter webpage, so now it's similar to the way automatic captions are handled. This is an improvement needed for websites like TED. 2013-11-03 01:01:05 +08:00			`def _get_available_subtitles(self, video_id, webpage):`
[subtitles] made inheritance hierarchy flat as requested 2013-09-11 21:21:09 +08:00			`try:`
[subtitles] Use self._download_webpage for extracting the subtitles It raises ExtractorError for the same exceptions we have to catch. 2013-09-11 22:24:47 +08:00			`sub_list = self._download_webpage(`
			`'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,`
			`video_id, note=False)`
			`except ExtractorError as err:`
[subtitles] made inheritance hierarchy flat as requested 2013-09-11 21:21:09 +08:00			`self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))`
			`return {}`
			`info = json.loads(sub_list)`
			`if (info['total'] > 0):`
			`sub_lang_list = dict((l['language'], l['url']) for l in info['list'])`
			`return sub_lang_list`
			`self._downloader.report_warning(u'video doesn\'t have subtitles')`
			`return {}`

[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00
[dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 18:44:02 +08:00			`class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00			`IE_NAME = u'dailymotion:playlist'`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00			`_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'`
[dailymotion] Fix playlist+user 2014-04-04 08:04:16 +08:00			`_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]">.?<a\s+class="[^"]?icon-arrow_right[^"]?"'`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00			`_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00			`def _extract_entries(self, id):`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00			`video_ids = []`
			`for pagenum in itertools.count(1):`
[dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 18:44:02 +08:00			`request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))`
			`webpage = self._download_webpage(request,`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00			`id, u'Downloading page %s' % pagenum)`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00
[dailymotion] Fix user playlist extraction 2014-04-23 20:42:34 +08:00			`video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00
[dailymotion] Fix playlist+user 2014-04-04 08:04:16 +08:00			`if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:`
[dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 18:07:38 +08:00			`break`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00			`return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')`
[dailymotion] Fix playlist extraction The html code has changed, make the video ids extraction more solid. 2013-10-04 20:07:29 +08:00			`for video_id in orderedSet(video_ids)]`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`playlist_id = mobj.group('id')`
			`webpage = self._download_webpage(url, playlist_id)`

[dailymotion:playlist] Fix title 2014-04-11 08:16:46 +08:00			`return {`
			`'_type': 'playlist',`
			`'id': playlist_id,`
			`'title': self._og_search_title(webpage),`
			`'entries': self._extract_entries(playlist_id),`
			`}`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00

			`class DailymotionUserIE(DailymotionPlaylistIE):`
			`IE_NAME = u'dailymotion:user'`
[dailymotion] Fix playlist+user 2014-04-04 08:04:16 +08:00			`_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00			`_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'`

			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`user = mobj.group('user')`
			`webpage = self._download_webpage(url, user)`
[dailymotion] Fix user playlist extraction 2014-04-23 20:42:34 +08:00			`full_user = unescapeHTML(self._html_search_regex(`
			`r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),`
			`webpage, u'user', flags=re.DOTALL))`
[dailymotion] Add an extractor for users (closes #1476) 2013-09-21 18:45:53 +08:00
			`return {`
			`'_type': 'playlist',`
			`'id': user,`
			`'title': full_user,`
			`'entries': self._extract_entries(user),`
			`}`