youtube-dl/youtube_dl/extractor/tf1.py

# coding: utf-8

import json
import re

from .common import InfoExtractor

class TF1IE(InfoExtractor):
    """TF1 uses the wat.tv player."""
    _VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
    _TEST = {
        u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
        u'file': u'10635995.mp4',
        u'md5': u'2e378cc28b9957607d5e88f274e637d8',
        u'info_dict': {
            u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
            u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
        },
        u'skip': u'Sometimes wat serves the whole file with the --test option',
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        id = mobj.group(1)
        webpage = self._download_webpage(url, id)
        embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
                                webpage, 'embed url')
        embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
        wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
        wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
        wat_info = json.loads(wat_info)['media']
        wat_url = wat_info['url']
        return self.url_result(wat_url, 'Wat')
Move TF1IE to its own file 2013-06-29 21:18:19 +08:00			`# coding: utf-8`

			`import json`
			`import re`

			`from .common import InfoExtractor`

			`class TF1IE(InfoExtractor):`
[wat] fix the extraction of the video url (fixes #1103) Use the direct download link for Android. 2013-07-30 05:38:02 +08:00			`"""TF1 uses the wat.tv player."""`
Move TF1IE to its own file 2013-06-29 21:18:19 +08:00			`_VALID_URL = r'http://videos.tf1.fr/.-(.?).html'`
			`_TEST = {`
			`u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',`
WatIE: support videos divided in multiple parts (closes #222 and #659) The id for the videos is now the full id, no the one in the webpage url. Also extract more information: description, view_count and upload_date 2013-06-30 00:22:03 +08:00			`u'file': u'10635995.mp4',`
[wat] fix the extraction of the video url (fixes #1103) Use the direct download link for Android. 2013-07-30 05:38:02 +08:00			`u'md5': u'2e378cc28b9957607d5e88f274e637d8',`
Move TF1IE to its own file 2013-06-29 21:18:19 +08:00			`u'info_dict': {`
WatIE: support videos divided in multiple parts (closes #222 and #659) The id for the videos is now the full id, no the one in the webpage url. Also extract more information: description, view_count and upload_date 2013-06-30 00:22:03 +08:00			`u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',`
			`u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',`
Disable way and tf1 tests, the whole videos are served sometimes, so the md5 sum doesn't match. 2013-07-30 17:19:07 +08:00			`},`
			`u'skip': u'Sometimes wat serves the whole file with the --test option',`
Move TF1IE to its own file 2013-06-29 21:18:19 +08:00			`}`

			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`id = mobj.group(1)`
			`webpage = self._download_webpage(url, id)`
			`embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',`
			`webpage, 'embed url')`
			`embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')`
			`wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')`
			`wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')`
			`wat_info = json.loads(wat_info)['media']`
			`wat_url = wat_info['url']`
			`return self.url_result(wat_url, 'Wat')`