[nytimes] Add support for nytimes.com (Closes #2846)

2024-11-22 10:35:45 +08:00 · 2014-05-03 02:28:38 +07:00 · 2014-05-03 02:28:38 +07:00 · d664de44b7
commit d664de44b7
parent bbe99d26ec
2 changed files with 70 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -192,6 +192,7 @@ from .nowness import NownessIE
 from .nowvideo import NowVideoIE
 from .nrk import NRKIE
 from .ntv import NTVIE
 from .nytimes import NYTimesIE
 from .oe1 import OE1IE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@ -0,0 +1,69 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import parse_iso8601
 class NYTimesIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
        'md5': '18a525a510f942ada2720db5f31644c0',
        'info_dict': {
            'id': '100000002847155',
            'ext': 'mov',
            'title': 'Verbatim: What Is a Photocopier?',
            'description': 'md5:93603dada88ddbda9395632fdc5da260',
            'timestamp': 1398631707,
            'upload_date': '20140427',
            'uploader': 'Brett Weiner',
            'duration': 419,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        video_data = self._download_json(
            'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
        title = video_data['headline']
        description = video_data['summary']
        duration = video_data['duration'] / 1000.0
        uploader = video_data['byline']
        timestamp = parse_iso8601(video_data['publication_date'][:-8])
        formats = [
            {
                'url': video['url'],
                'format_id': video['type'],
                'vcodec': video['video_codec'],
                'width': video['width'],
                'height': video['height'],
                'filesize': video['fileSize'],
            } for video in video_data['renditions']
        ]
        self._sort_formats(formats)
        thumbnails = [
            {
                'url': 'http://www.nytimes.com/%s' % image['url'],
                'resolution': '%dx%d' % (image['width'], image['height']),
            } for image in video_data['images']
        ]
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'timestamp': timestamp,
            'uploader': uploader,
            'duration': duration,
            'formats': formats,
            'thumbnails': thumbnails,
        }