1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-10 18:10:00 +08:00

[Bajeczki] Add new extractor

This commit is contained in:
Monastario 2019-04-16 14:07:16 +02:00
parent cb6cd76f7b
commit 83964685be
2 changed files with 44 additions and 0 deletions

View File

@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class BajeczkiIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?bajeczki\.org/(?P<id>.*)'
_TEST = {
'url': 'http://bajeczki.org/psi-patrol/pieski-ratuja-przyjaciol-ksiezniczki/',
'md5': '01f72e7e641448785db6a9bd77a94b31',
'info_dict': {
'id': 'psi-patrol/pieski-ratuja-przyjaciol-ksiezniczki/',
'ext': 'mp4',
'title': 'Psi Patrol - Psia misja: Pieski ratują przyjaciół księżniczki | Bajki na Bajeczki.org',
# 'thumbnail': r're:^https?://.*\.jpg$',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# print (webpage)
# TODO more code goes here, for example ...
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
test = self._search_regex(r'(http.*\.mp4)', webpage, 'url')
print(test)
url = re.sub('\\\\', '', test)
print(url)
return {
'id': video_id,
'title': title,
'url': url,
# 'description': self._og_search_description(webpage),
# 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
# TODO more properties (see youtube_dl/extractor/common.py)
}

View File

@ -89,6 +89,7 @@ from .awaan import (
) )
from .azmedien import AZMedienIE from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bajeczki import BajeczkiIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import ( from .bbc import (