1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 04:53:06 +08:00

[iTunes] Add new extractor

The extractor only works for free content, like most podcasts, i.e. it does not download 30-seconds previews of paid songs.
This commit is contained in:
Kristoffer Risanger 2017-09-14 14:34:19 +02:00
parent 757984af90
commit 82260ae1f8
2 changed files with 59 additions and 0 deletions

View File

@ -465,6 +465,7 @@ from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE from .iprima import IPrimaIE
from .iqiyi import IqiyiIE from .iqiyi import IqiyiIE
from .ir90tv import Ir90TvIE from .ir90tv import Ir90TvIE
from .itunes import iTunesIE
from .itv import ITVIE from .itv import ITVIE
from .ivi import ( from .ivi import (
IviIE, IviIE,

View File

@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
unescapeHTML,
unified_strdate,
)
class iTunesIE(InfoExtractor):
_VALID_URL = r'https?://itunes\.apple\.com/[a-z]{2}?/?[a-z0-9-]+/?(?P<display_id>[a-z0-9-]+)?/(?:id)?(?P<id>[0-9]+)'
_TEST = {
'url': 'https://itunes.apple.com/us/itunes-u/uc-davis-symphony-orchestra/id403834767',
'info_dict': {
'id': '403834767',
'title': 'UC Davis Symphony Orchestra & University Chorus',
},
'playlist_count': 31,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id, display_id = mobj.group('id', 'display_id')
if not display_id:
display_id = playlist_id
webpage = self._download_webpage(url, display_id)
video_infos = re.findall(r'var\s+__desc_popup_d_\d+\s*=\s*({[^><]+});', webpage)
html_entries = re.findall(r'<tr\s+[^>]*role="row"[^>]+>', webpage)
entries = []
for idx, html_entry in enumerate(html_entries):
video_info = self._parse_json(video_infos[idx], display_id)
entry = extract_attributes(html_entry)
entries.append({
'id': entry['adam-id'],
'title': entry['preview-title'],
'description': video_info.get('description'),
'url': entry.get('audio-preview-url', entry.get('video-preview-url')),
'duration': int_or_none(entry.get('duration')),
'release_date': unified_strdate(video_info.get('release_date')),
'track': unescapeHTML(entry.get('preview-title')),
'track_number': int_or_none(entry.get('row-number')),
'track_id': entry.get('adam-id'),
'artist': unescapeHTML(entry.get('preview-artist')),
'album': unescapeHTML(entry.get('preview-album')),
})
title = self._html_search_regex(r'<h1>(.+)</h1>', webpage, 'title')
return self.playlist_result(entries, playlist_id, title)