mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-24 05:32:52 +08:00
[asbook] Add new extractor
This commit is contained in:
parent
6f76679804
commit
69ed8da718
89
youtube_dl/extractor/asbook.py
Normal file
89
youtube_dl/extractor/asbook.py
Normal file
@ -0,0 +1,89 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
PACKED_CODES_RE, encode_base_n, ExtractorError)
|
||||
|
||||
|
||||
class AsBookIE(InfoExtractor):
|
||||
_VIDEO_RE = r'<h1 class="b-maintitle">(?P<title>.+)</h1>'
|
||||
_VALID_URL = r'http://asbook\.net/(?P<section>abooks|radioshow|inyaz)/(?P<subsection>\S+)/(?P<id>\S+).html'
|
||||
_TEST = {
|
||||
'url': 'http://asbook.net/abooks/fantastic/8904-grad-obrechennyy-boris-i-arkadiy-strugackie.html',
|
||||
'md5': 'ab3220ba94ed5bafa7fd796588198862',
|
||||
'info_dict': {
|
||||
'id': 'Град обреченный - 1',
|
||||
'ext': 'mp3',
|
||||
'title': '"Град обреченный" Аркадий и Борис Стругацкие',
|
||||
'upload_date': '20160216',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
book_id = self._match_id(url)
|
||||
|
||||
page = self._download_webpage(url, book_id)
|
||||
|
||||
json_url = None
|
||||
for mobj in re.finditer(PACKED_CODES_RE, page):
|
||||
packed_data = mobj.group(0).replace('\\\'', '\'')
|
||||
text = self.decode_packed_codes(packed_data)
|
||||
json_url = self._search_regex(r"json_url='(?P<json_url>\S+)';",
|
||||
text, 'json_url', default=None)
|
||||
if json_url is not None:
|
||||
break
|
||||
|
||||
if not json_url:
|
||||
raise ExtractorError('Could not get information about audiobook',
|
||||
expected=True)
|
||||
|
||||
title = None
|
||||
for mobj in re.finditer(self._VIDEO_RE, page):
|
||||
info = mobj.groupdict()
|
||||
if 'title' in info:
|
||||
title = info['title'].strip()
|
||||
break
|
||||
|
||||
playlist = self._download_json(json_url, book_id).get('playlist', None)
|
||||
|
||||
if not title:
|
||||
title = playlist[0]['comment']
|
||||
|
||||
return self.playlist_result(self._entries(playlist, title),
|
||||
book_id, title)
|
||||
|
||||
@staticmethod
|
||||
def decode_packed_codes(code):
|
||||
# This method copies the method from utils.decode_packed_codes,
|
||||
# but it correctly passes Cyrillic characters
|
||||
|
||||
mobj = re.search(PACKED_CODES_RE, code)
|
||||
obfucasted_code, base, count, symbols = mobj.groups()
|
||||
base = int(base)
|
||||
count = int(count)
|
||||
symbols = symbols.split('|')
|
||||
symbol_table = {}
|
||||
|
||||
while count:
|
||||
count -= 1
|
||||
base_n_count = encode_base_n(count, base)
|
||||
symbol_table[base_n_count] = symbols[count] or base_n_count
|
||||
|
||||
return re.sub(
|
||||
r'\b(\w+)\b', lambda mobj: symbol_table.get(mobj.group(0)),
|
||||
obfucasted_code)
|
||||
|
||||
def _entries(self, playlist, playlist_title):
|
||||
for item in playlist:
|
||||
info = {'_type': 'url_transparent',
|
||||
'url': item['file'],
|
||||
'ie_key': None,
|
||||
'id': item['comment'], # instead filename
|
||||
'title': playlist_title # item['comment']
|
||||
}
|
||||
|
||||
yield info
|
@ -295,6 +295,7 @@ from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .asbook import AsBookIE
|
||||
from .ellentv import (
|
||||
EllenTVIE,
|
||||
EllenTVClipsIE,
|
||||
|
Loading…
Reference in New Issue
Block a user