1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-31 09:17:16 +08:00

Add npo:recents extractor

Extractor for npo.nl programs. Retrieves only recent episodes of the program in question (hence the name...). Some programs have so many episodes available that it doesn't make any practical sense to retrieve all.
This commit is contained in:
Jan Hoek 2017-03-21 17:29:25 +01:00
parent 8a8cc339b6
commit 503acf8c87
2 changed files with 61 additions and 0 deletions

View File

@ -686,6 +686,7 @@ from .npo import (
VPROIE,
WNLIE,
)
from .nporecents import NPORecentsIE
from .npr import NprIE
from .nrk import (
NRKIE,

View File

@ -0,0 +1,60 @@
#!/usr/bin/python
from .common import InfoExtractor
import re
import xml.etree.ElementTree as ET
class NPORecentsIE(InfoExtractor):
IE_Name = 'npo:recents'
_VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P<alt_id>[^/]+)/(?P<program_id>\w+_\d+)'
_TEST = {
'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993',
'info_dict': {
'title': 'Keuringsdienst van Waarde',
'id': 'KN_1678993',
'description': 'In dit programma staat centraal wat fabrikanten ons als consumenten vertellen. Klopt het wat ze claimen en wat ze ons in reclames verkopen? Verslaggevers Teun van de Keuken, Sofie van den Enk, Daan Nieber, Ersin Kiris, Marijn Frank en Maarten Remmers nemen de telefoon ter hand en bellen er actief op los. Ze stellen simpele vragen en krijgen de meest verbazingwekkende antwoorden op food, non-food en nieuwsgerelateerde kwesties. Prikkelend, onderzoekend en vasthoudend. Keuringsdienst van Waarde: simpele vragen,verbazingwekkende antwoorden.'
},
'playlist_mincount': 8
}
def _extract_entries(self, webpage, program_id, program_url):
is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage
if is_npo3:
episodes_url = '{}//search?category=broadcasts&page=1'.format(
program_url)
else:
episodes_url = '{}/search?media_type=broadcast&start=0&rows=8'.format(
program_url)
episodes = self._download_webpage(
episodes_url, program_id, note='Retrieving episodes')
tree = ET.fromstring(episodes.encode('utf-8'))
for element in tree.findall('.//div'):
if 'span4' in element.get('class'):
hyperlink = element.find('.//a')
inactive = hyperlink.find(
'./div[@class="program-not-available"]')
if inactive is None:
yield self.url_result(
url='http://npo.nl{}'.format(hyperlink.get('href')),
video_title=self._og_search_title(webpage))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
alt_id = mobj.group('alt_id')
program_id = mobj.group('program_id')
webpage = self._download_webpage(url, program_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
entries = self._extract_entries(webpage, program_id, url)
return {
'_type': 'playlist',
'id': program_id,
'display_id': alt_id,
'title': title,
'description': description,
'entries': entries
}