)?"""
_VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P
[^/]+)/(?P\w+_\d+)'
_TESTS = [{
# Example of an npo3 program
@@ -516,28 +517,21 @@ class NPORecentsIE(NPOIE):
if is_npo3:
episodes_url = '%s//search?category=broadcasts&page=1' % program_url
+ regex = self.npo3_regex
else:
episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url
+ regex = self.npo12_regex
- episodes = self._download_webpage(
- episodes_url, program_id, note='Retrieving episodes')
- tree = ET.fromstring(episodes.encode('utf-8'))
- for element in tree.findall('.//div'):
- if 'span4' in element.get('class'):
- hyperlink = element.find('.//a')
+ episodes = self._download_webpage(episodes_url, program_id, note='Retrieving episodes')
- # Note: ElementTree in Python 2.6+ doesn't support
- # the required XPath constructs
- inactive = False
- divs = hyperlink.findall('div')
- for div in divs:
- if div.attrib.get('class') == 'program-not-available':
- inactive = True
+ for match in re.finditer(regex, episodes):
+ url = match.group(1)
+ available = match.group(2) is None
- if not inactive:
- yield self.url_result(
- url='http://npo.nl%s' % hyperlink.get('href'),
- video_title=self._og_search_title(webpage))
+ if available:
+ yield self.url_result(
+ url='http://npo.nl%s' % url,
+ video_title=self._og_search_title(webpage))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)