From fe1010df3d86b5ab2cf08d2e47d8954b835067df Mon Sep 17 00:00:00 2001 From: steve Date: Sat, 3 Nov 2018 19:14:02 -0400 Subject: [PATCH 1/3] initial commit --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/homemoviestube.py | 38 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/homemoviestube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e5488cce4..e9619acf8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -451,6 +451,7 @@ from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE +from .homemoviestube import HomeMoviesTubeIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( diff --git a/youtube_dl/extractor/homemoviestube.py b/youtube_dl/extractor/homemoviestube.py new file mode 100644 index 000000000..c0e59ba30 --- /dev/null +++ b/youtube_dl/extractor/homemoviestube.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class HomeMoviesTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?homemoviestube\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'https://homemoviestube.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + } From 9317b8c65dce0804cda19abcae2ad1edddebd6c1 Mon Sep 17 00:00:00 2001 From: steve Date: Sat, 3 Nov 2018 23:09:43 -0400 Subject: [PATCH 2/3] homemoviestube extractor in working state --- youtube_dl/extractor/homemoviestube.py | 35 +++++++++++++------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/homemoviestube.py b/youtube_dl/extractor/homemoviestube.py index c0e59ba30..dc187dbaa 100644 --- a/youtube_dl/extractor/homemoviestube.py +++ b/youtube_dl/extractor/homemoviestube.py @@ -1,38 +1,39 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class HomeMoviesTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?homemoviestube\.com/watch/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?homemoviestube\.com/videos/(?P[0-9]+)/(?P[^/]+)\.html' _TEST = { - 'url': 'https://homemoviestube.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'url': 'https://www.homemoviestube.com/videos/314747/creamed-again.html', + 'md5': 'a1f827520d82c0b70da391a8aed410c9', 'info_dict': { - 'id': '42', + 'id': '314747', + 'display_id': 'creamed-again', 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'title': 'Creamed again', } } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + video_url = self._html_search_regex( + r'(.+?)', webpage, 'title') return { 'id': video_id, + 'display_id': display_id, 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + 'url': video_url, } From e9a9031e96f1f49ffca96b5924463e95af93a876 Mon Sep 17 00:00:00 2001 From: steve Date: Sat, 3 Nov 2018 23:15:59 -0400 Subject: [PATCH 3/3] consistency --- youtube_dl/extractor/homemoviestube.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/homemoviestube.py b/youtube_dl/extractor/homemoviestube.py index dc187dbaa..35a1e51e0 100644 --- a/youtube_dl/extractor/homemoviestube.py +++ b/youtube_dl/extractor/homemoviestube.py @@ -26,8 +26,7 @@ class HomeMoviesTubeIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_url = self._html_search_regex( - r'(.+?)', webpage, 'title')