From fbdf294dfda5730cfddad3e344239f7092e87398 Mon Sep 17 00:00:00 2001 From: grefog Date: Sat, 24 Aug 2019 12:57:18 -0600 Subject: [PATCH 1/4] Add support for nicovideo.jp Series --- youtube_dl/extractor/extractors.py | 6 +++++- youtube_dl/extractor/niconico.py | 31 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06de556b7..63a6d71cc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -743,7 +743,11 @@ from .nick import ( NickNightIE, NickRuIE, ) -from .niconico import NiconicoIE, NiconicoPlaylistIE +from .niconico import ( + NiconicoIE, + NiconicoPlaylistIE, + NiconicoSeriesIE, +) from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .ninenow import NineNowIE diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index eb07ca776..aeb3659a3 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import json import datetime +import re from .common import InfoExtractor from ..compat import ( @@ -468,3 +469,33 @@ class NiconicoPlaylistIE(InfoExtractor): 'id': list_id, 'entries': entries, } +class NiconicoSeriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/series/(?P\d+)' + + _TEST = { + 'url': 'https://www.nicovideo.jp/series/8253', + 'info_dict': { + 'id': '8253', + 'title' : '弦巻マキと結月ゆかりの未確認ゲーム日和', + }, + 'playlist_mincount':49, + } + def _real_extract(self, url): + series_id=url.split('/')[-1] + webpage=self._download_webpage(url, series_id) + entries=re.findall(r'(?<=(.*?)',webpage,'title'), + } + + From 5703dfcc71be692dbdbbf1ac57ec074ef928e15b Mon Sep 17 00:00:00 2001 From: grefog Date: Sat, 24 Aug 2019 13:41:10 -0600 Subject: [PATCH 2/4] Fix incorrect url parsing and delint niconico.py --- youtube_dl/extractor/niconico.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index aeb3659a3..fe4ec1a43 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -476,26 +476,22 @@ class NiconicoSeriesIE(InfoExtractor): 'url': 'https://www.nicovideo.jp/series/8253', 'info_dict': { 'id': '8253', - 'title' : '弦巻マキと結月ゆかりの未確認ゲーム日和', + 'title': '弦巻マキと結月ゆかりの未確認ゲーム日和', }, - 'playlist_mincount':49, + 'playlist_mincount': 49, } def _real_extract(self, url): - series_id=url.split('/')[-1] - webpage=self._download_webpage(url, series_id) - entries=re.findall(r'(?<=(.*?)',webpage,'title'), + 'title': self._search_regex(r'bodyTitle">(.*?)', webpage, 'title'), } - - From 184c95ced823185b10202a3f1aa42021df398edf Mon Sep 17 00:00:00 2001 From: grefog Date: Sat, 24 Aug 2019 14:06:55 -0600 Subject: [PATCH 3/4] Remove hard-coded value used in testing --- youtube_dl/extractor/niconico.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index fe4ec1a43..3ad262e64 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -491,7 +491,7 @@ class NiconicoSeriesIE(InfoExtractor): } for entry in entries] return { '_type': 'playlist', - 'id': u'8253', + 'id': series_id, 'entries': entries, 'title': self._search_regex(r'bodyTitle">(.*?)', webpage, 'title'), } From 5d3c06ba9e15367703fa3382110a3e4d57488c92 Mon Sep 17 00:00:00 2001 From: grefog Date: Mon, 26 Aug 2019 15:44:38 -0600 Subject: [PATCH 4/4] Improve code hygiene after code review --- youtube_dl/extractor/niconico.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 3ad262e64..13e4195a4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -23,6 +23,7 @@ from ..utils import ( unified_timestamp, urlencode_postdata, xpath_text, + urljoin, ) @@ -481,17 +482,18 @@ class NiconicoSeriesIE(InfoExtractor): 'playlist_mincount': 49, } def _real_extract(self, url): - series_id = url.split('/')[-1] + series_id = self._match_id(url) webpage = self._download_webpage(url, series_id) - entries = re.findall(r'(.*?)', webpage, 'title'), + 'title': self._search_regex(r'bodyTitle">(.*?)', webpage, 'title', fatal=False), }