From 83964685bed6f08f59f76016026f763030e2165e Mon Sep 17 00:00:00 2001 From: Monastario Date: Tue, 16 Apr 2019 14:07:16 +0200 Subject: [PATCH 1/3] [Bajeczki] Add new extractor --- youtube_dl/extractor/bajeczki.py | 43 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/bajeczki.py diff --git a/youtube_dl/extractor/bajeczki.py b/youtube_dl/extractor/bajeczki.py new file mode 100644 index 000000000..f1fd72fe3 --- /dev/null +++ b/youtube_dl/extractor/bajeczki.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re +from .common import InfoExtractor + + +class BajeczkiIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?bajeczki\.org/(?P.*)' + _TEST = { + 'url': 'http://bajeczki.org/psi-patrol/pieski-ratuja-przyjaciol-ksiezniczki/', + 'md5': '01f72e7e641448785db6a9bd77a94b31', + 'info_dict': { + 'id': 'psi-patrol/pieski-ratuja-przyjaciol-ksiezniczki/', + 'ext': 'mp4', + 'title': 'Psi Patrol - Psia misja: Pieski ratują przyjaciół księżniczki | Bajki na Bajeczki.org', + # 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + # print (webpage) + # TODO more code goes here, for example ... + title = self._html_search_regex(r'(.+?)', webpage, 'title') + test = self._search_regex(r'(http.*\.mp4)', webpage, 'url') + print(test) + url = re.sub('\\\\', '', test) + print(url) + + return { + 'id': video_id, + 'title': title, + 'url': url, + # 'description': self._og_search_description(webpage), + # 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 86ecc0b66..e885727b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -89,6 +89,7 @@ from .awaan import ( ) from .azmedien import AZMedienIE from .baidu import BaiduVideoIE +from .bajeczki import BajeczkiIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bbc import ( From 88261409ed2bca17d98eca9df96da61a087a09de Mon Sep 17 00:00:00 2001 From: Monastario Date: Tue, 16 Apr 2019 17:59:23 +0200 Subject: [PATCH 2/3] [Bajeczki] Add new extractor --- youtube_dl/extractor/bajeczki.py | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/bajeczki.py b/youtube_dl/extractor/bajeczki.py index f1fd72fe3..c492c611b 100644 --- a/youtube_dl/extractor/bajeczki.py +++ b/youtube_dl/extractor/bajeczki.py @@ -1,6 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re + from .common import InfoExtractor @@ -12,32 +12,21 @@ class BajeczkiIE(InfoExtractor): 'info_dict': { 'id': 'psi-patrol/pieski-ratuja-przyjaciol-ksiezniczki/', 'ext': 'mp4', - 'title': 'Psi Patrol - Psia misja: Pieski ratują przyjaciół księżniczki | Bajki na Bajeczki.org', - # 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - } + 'title': 'Psi Patrol - Psia misja: Pieski ratują przyjaciół księżniczki', + }, } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - # print (webpage) - # TODO more code goes here, for example ... - title = self._html_search_regex(r'(.+?)', webpage, 'title') - test = self._search_regex(r'(http.*\.mp4)', webpage, 'url') - print(test) - url = re.sub('\\\\', '', test) - print(url) + + url = self._search_regex(r'(http.*\.mp4)', webpage, 'url').replace('\\', '') + + title = self._html_search_regex(r'(.+?)', webpage, 'title').split(' |', 1)[0] return { 'id': video_id, - 'title': title, 'url': url, - # 'description': self._og_search_description(webpage), - # 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + 'title': title, } From 6123630c0d94d144f5e1f5a5d114f50294515fda Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 24 Apr 2019 11:54:00 +0200 Subject: [PATCH 3/3] Update youtube_dl/extractor/bajeczki.py Co-Authored-By: Monastario --- youtube_dl/extractor/bajeczki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bajeczki.py b/youtube_dl/extractor/bajeczki.py index c492c611b..49dff6599 100644 --- a/youtube_dl/extractor/bajeczki.py +++ b/youtube_dl/extractor/bajeczki.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class BajeczkiIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?bajeczki\.org/(?P.*)' + _VALID_URL = r'https?://(?:www\.)?bajeczki\.org/(?P.*)' _TEST = { 'url': 'http://bajeczki.org/psi-patrol/pieski-ratuja-przyjaciol-ksiezniczki/', 'md5': '01f72e7e641448785db6a9bd77a94b31',