From f03b60c1e10506eb75afefdd0510ca121914ff1d Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 21:44:34 +0100 Subject: [PATCH 1/2] [nationalarchivesuk] Add nationalarchives.gov.uk Added the nationalarchives.gov.uk extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nationalarchivesuk.py | 32 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/nationalarchivesuk.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 047f7002a..964de021a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -275,6 +275,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .myvidster import MyVidsterIE +from .nationalarchivesuk import NationalArchivesUkIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/nationalarchivesuk.py b/youtube_dl/extractor/nationalarchivesuk.py new file mode 100644 index 000000000..a763be5af --- /dev/null +++ b/youtube_dl/extractor/nationalarchivesuk.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class NationalArchivesUkIE(InfoExtractor): + _VALID_URL = r'https?://media.nationalarchives.gov.uk/index.php/(?P.*)/?' + _TEST = { + 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/' + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + youtube_url = re.search(r'https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)(\w*)(&(amp;)?[\w\?=]*)?', webpage) + print(youtube_url) + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + # TODO more properties (see youtube_dl/extractor/common.py) + } From 3ecfbf9c10f478bfe45763e65181d2f28185aa09 Mon Sep 17 00:00:00 2001 From: robin Date: Sun, 8 Feb 2015 21:50:21 +0100 Subject: [PATCH 2/2] [nationalarchivesuk] Edit to flake8 standards --- youtube_dl/extractor/nationalarchivesuk.py | 29 +++++++++++----------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/nationalarchivesuk.py b/youtube_dl/extractor/nationalarchivesuk.py index a763be5af..7bfa88c22 100644 --- a/youtube_dl/extractor/nationalarchivesuk.py +++ b/youtube_dl/extractor/nationalarchivesuk.py @@ -9,24 +9,25 @@ from .common import InfoExtractor class NationalArchivesUkIE(InfoExtractor): _VALID_URL = r'https?://media.nationalarchives.gov.uk/index.php/(?P.*)/?' _TEST = { - 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/' - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', + 'info_dict': { + 'id': 'Mrj4DVp2zeA', + 'ext': 'mp4', + + 'upload_date': '20150204', + 'uploader_id': 'NationalArchives08', + 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', + 'uploader': 'The National Archives UK', + 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6' } + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - youtube_url = re.search(r'https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)(\w*)(&(amp;)?[\w\?=]*)?', webpage) - print(youtube_url) + youtube_url = re.search(r'https?://(?:www\.)?youtu(?:be\.com/watch\?v=|\.be/)(\w*)(&(amp;)?[\w\?=]*)?', + webpage, re.MULTILINE).group(0) + self.to_screen('Youtube video detected') - return { - 'id': video_id, - 'title': title, - 'description': self._og_search_description(webpage), - # TODO more properties (see youtube_dl/extractor/common.py) - } + return self.url_result(youtube_url, ie='Youtube')