1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-07 04:00:02 +08:00

Changed more parsing logic

This commit is contained in:
BrutuZ 2020-03-14 16:20:07 -03:00
parent 3202fbcb8a
commit 81e1ddaf78

View File

@ -10,7 +10,6 @@ from ..utils import (
parse_iso8601, parse_iso8601,
unified_strdate, unified_strdate,
str_or_none, str_or_none,
parse_duration,
sanitize_url, sanitize_url,
compat_str, compat_str,
try_get, try_get,
@ -45,25 +44,32 @@ class HanimeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_slug = self._match_id(url) video_slug = self._match_id(url)
page_json = self._html_search_regex(r'<script>.+__NUXT__=(.+?);<\/script>', self._download_webpage(url, video_slug), 'Inline JSON') page_json = self._html_search_regex(r'__NUXT__=({.+?});<\/script>', self._download_webpage(url, video_slug), 'Inline JSON')
page_json = try_get(self._parse_json(page_json, video_slug), lambda x: x['state']['data']['video']['hentai_video'], dict) or {} page_json = try_get(self._parse_json(page_json, video_slug), lambda x: x['state']['data']['video']['hentai_video'], dict) or {}
api_json = try_get(self._download_json( api_json = try_get(self._download_json(
'https://members.hanime.tv/api/v3/videos_manifests/%s' % video_slug, 'https://members.hanime.tv/api/v3/videos_manifests/%s' % video_slug,
video_slug, video_slug,
'API Call', headers={'X-Directive': 'api'}), lambda x: x['videos_manifest']['servers'], list) or [] 'API Call', headers={'X-Directive': 'api'}), lambda x: x['videos_manifest']['servers'], list) or []
title = page_json.get('name')
duration = parse_duration('%sms' % page_json.get('duration_in_ms'))
tags = [] tags = []
for tag in page_json.get('hentai_tags'): for tag in page_json.get('hentai_tags', []):
if tag.get('text'): if tag.get('text'):
tags.append(tag.get('text')) tags.append(tag.get('text'))
thumbnails = []
if '/covers/' in page_json.get('poster_url'):
thumbnails.append({'preference': 0, 'id': 'Poster', 'url': page_json['poster_url']})
elif '/posters/' in page_json.get('poster_url'):
thumbnails.append({'preference': 1, 'id': 'Cover', 'url': page_json['cover_url']})
else:
thumbnails = None
formats = [] formats = []
video_id = None video_id = None
for server in api_json: for server in api_json:
for stream in server['streams']: for stream in server['streams']:
if stream.get('compatibility') != 'all': if stream.get('compatibility') != 'all':
continue continue
item_url = sanitize_url(stream.get('url')) or sanitize_url('https://hanime.tv/api/v1/m3u8s/%s.m3u8' % stream.get('id')) if not video_id:
video_id = compat_str(stream['id'])
item_url = sanitize_url(stream.get('url')) or sanitize_url('https://hanime.tv/api/v1/m3u8s/%s.m3u8' % stream['id'])
width = int_or_none(stream.get('width')) width = int_or_none(stream.get('width'))
height = int_or_none(stream.get('height')) height = int_or_none(stream.get('height'))
format = { format = {
@ -76,23 +82,14 @@ class HanimeIE(InfoExtractor):
'url': item_url, 'url': item_url,
} }
formats.append(format) formats.append(format)
if not title:
title = stream.get('video_stream_group_id')
if not duration:
duration = parse_duration(compat_str(stream.get('duration_in_ms')))
if not video_id:
video_id = compat_str(stream.get('id'))
formats.reverse() formats.reverse()
return { return {
'id': video_id, 'id': video_id or page_json.get('id') or video_slug,
'display_id': video_slug, 'display_id': video_slug,
'title': title, 'title': page_json.get('name') or video_slug.replace('-', ' '),
'description': clean_html(page_json.get('description')), 'description': clean_html(page_json.get('description')),
'thumbnails': [ 'thumbnails': thumbnails,
{'preference': 0, 'id': 'Poster', 'url': page_json.get('poster_url')},
{'preference': 1, 'id': 'Cover', 'url': page_json.get('cover_url')},
],
'release_date': unified_strdate(page_json.get('released_at') or compat_str(page_json.get('released_at_unix'))), 'release_date': unified_strdate(page_json.get('released_at') or compat_str(page_json.get('released_at_unix'))),
'upload_date': unified_strdate(page_json.get('created_at') or compat_str(page_json.get('created_at_unix'))), 'upload_date': unified_strdate(page_json.get('created_at') or compat_str(page_json.get('created_at_unix'))),
'timestamp': int_or_none(page_json.get('created_at_unix') or parse_iso8601(page_json.get('created_at'))), 'timestamp': int_or_none(page_json.get('created_at_unix') or parse_iso8601(page_json.get('created_at'))),
@ -100,7 +97,7 @@ class HanimeIE(InfoExtractor):
'view_count': int_or_none(page_json.get('views')), 'view_count': int_or_none(page_json.get('views')),
'like_count': int_or_none(page_json.get('likes')), 'like_count': int_or_none(page_json.get('likes')),
'dislike_count': int_or_none(page_json.get('dislikes')), 'dislike_count': int_or_none(page_json.get('dislikes')),
'duration': float_or_none(duration), 'duration': float_or_none(page_json.get('duration_in_ms') / 1000),
'tags': tags, 'tags': tags,
'formats': formats, 'formats': formats,
} }