[nationalarchivesuk] Add nationalarchives.gov.uk

Added the nationalarchives.gov.uk extractor
2025-03-13 19:07:15 +08:00 · 2015-02-08 21:44:34 +01:00 · 2015-02-08 21:44:34 +01:00 · f03b60c1e1
commit f03b60c1e1
parent c36b09a502
2 changed files with 33 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -275,6 +275,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .myvidster import MyVidsterIE
+from .nationalarchivesuk import NationalArchivesUkIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
--- a/youtube_dl/extractor/nationalarchivesuk.py
+++ b/youtube_dl/extractor/nationalarchivesuk.py
@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class NationalArchivesUkIE(InfoExtractor):
+    _VALID_URL = r'https?://media.nationalarchives.gov.uk/index.php/(?P<id>.*)/?'
+    _TEST = {
+        'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/'
+            # TODO more properties, either as:
+            # * A value
+            # * MD5 checksum; start the string with md5:
+            # * A regular expression; start the string with re:
+            # * Any Python type (for example int or float)
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+	youtube_url = re.search(r'https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)(\w*)(&(amp;)?[\w\?=]*)?', webpage)
+	print(youtube_url)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            # TODO more properties (see youtube_dl/extractor/common.py)
+        }