From d05e77148c0a656ead9383901c2efa80c4020284 Mon Sep 17 00:00:00 2001
From: Olivier Mehani <shtrom@ssji.net>
Date: Sat, 14 Dec 2019 00:05:25 +1100
Subject: [PATCH] [abc:iview:shows] Handle human-friendly landing pages

Add an extractor to redirect human-friendly page URLs to their canonical
video URL.

This includes 'movie length' TV shows (#16868)

Signed-off-by: Olivier Mehani <shtrom@ssji.net>
---
 youtube_dl/extractor/abc.py        | 54 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 55 insertions(+)
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 4ac323bf6..3620fec4f 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -191,3 +191,57 @@ class ABCIViewIE(InfoExtractor):
             'subtitles': subtitles,
             'is_live': is_live,
         }
+
+
+class ABCIViewShowIE(InfoExtractor):
+    """
+    This is a stub extractor that looks for a canonical URL, and processes it
+    with the ABCIViewIE
+    """
+    IE_NAME = 'abc.net.au:iview:shows'
+    _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P<id>[^/?#]+)'
+    # The canonical URL to look for
+    _CANONICAL_URL = r'd_canonicalUrl\\":\\"(?P<url>https://iview.abc.net.au/video/(?P<id>[^/?#\\"]+))\\"'
+    _GEO_COUNTRIES = ['AU']
+
+    # ABC iview programs are normally available for 14 days only.
+    _TESTS = [{
+        'url': 'https://iview.abc.net.au/show/stick-man',
+        'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
+        'info_dict': {
+            'id': 'ZW0021A001S00',
+            'ext': 'mp4',
+            'title': "Stick Man",
+            'series': "Stick Man",
+            'description': 'md5:ffc3ab0c9df0255d646924dbd29fa0d5',
+            'uploader_id': 'abc4kids',
+            'timestamp': 1576249200,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        canonical_url = self._match_canonical_url(webpage)
+        self.report_canonical_url(video_id, canonical_url)
+
+        iview_ie = ABCIViewIE(self._downloader)
+
+        return iview_ie.extract(canonical_url)
+
+    # The below method may be moved to common.py if the redirection
+    # to canonical URL pattern is more widespread
+    @classmethod
+    def _match_canonical_url(cls, webpage):
+        if '_CANONICAL_URL_RE' not in cls.__dict__:
+            cls._CANONICAL_URL_RE = re.compile(cls._CANONICAL_URL)
+        m = cls._CANONICAL_URL_RE.search(webpage)
+        assert m
+        return compat_str(m.group('url'))
+
+    def report_canonical_url(self, video_id, canonical_url):
+        """Report URL redirect."""
+        self.to_screen('%s: Canonical URL: %s' % (video_id, canonical_url))
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index fd93730fa..e50f3192e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from .abc import (
     ABCIE,
     ABCIViewIE,
+    ABCIViewShowIE,
 )
 from .abcnews import (
     AbcNewsIE,