From 94113b44edfe95b0ffcd4c05b925f16ffe1d3bed Mon Sep 17 00:00:00 2001
From: Pawit Pornkitprasan
Date: Sat, 14 Oct 2017 17:13:29 +0700
Subject: [PATCH] [VK] Fix downloading user playlist
Scraping HTML will only get 30 last videos. Use the JSON API to get
up to 1000 videos.
Fixes #14327
---
youtube_dl/extractor/vk.py | 49 +++++++++++++++++++++++++-------------
1 file changed, 32 insertions(+), 17 deletions(-)
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index ef8b9bcb7..3149dc08d 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -476,15 +476,29 @@ class VKIE(VKBaseIE):
class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
- _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
- _TEMPLATE_URL = 'https://vk.com/videos'
+ _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?:.*\bsection=(?P\w+))?(?!\?.*\bz=video)(?:[/?#&]|$)'
+ _TEMPLATE_URL = 'https://vk.com/al_video.php?act=load_videos_silent&al=1&need_albums=0&offset=0&oid=%s&rowlen=3§ion=%s'
_TESTS = [{
- 'url': 'http://vk.com/videos205387401',
+ 'url': 'https://vk.com/videos451841516?section=album_1',
'info_dict': {
- 'id': '205387401',
- 'title': "Tom Cruise's Videos",
+ 'id': '451841516',
+ 'title': 'album_1',
},
- 'playlist_mincount': 4,
+ 'playlist_count': 39,
+ }, {
+ 'url': 'https://m.vk.com/videos451841516',
+ 'info_dict': {
+ 'id': '451841516',
+ 'title': 'all',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'url': 'https://vk.com/videos451841516',
+ 'info_dict': {
+ 'id': '451841516',
+ 'title': 'all',
+ },
+ 'playlist_mincount': 40,
}, {
'url': 'http://vk.com/videos-77521',
'only_matching': True,
@@ -499,21 +513,22 @@ class VKUserVideosIE(VKBaseIE):
'only_matching': True,
}]
+ def _generate_entry(self, entry):
+ video_id = '%d_%d' % (entry[0], entry[1])
+ return self.url_result('http://vk.com/video' + video_id, 'VK', video_id=video_id)
+
def _real_extract(self, url):
- page_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ page_id = mobj.group('id')
+ section = mobj.group('section') or 'all'
- webpage = self._download_webpage(url, page_id)
+ data = self._download_json(
+ self._TEMPLATE_URL % (page_id, section), page_id,
+ transform_source=lambda s: re.sub(r'.*(?P.*?).*', r'\g', s))
- entries = [
- self.url_result(
- 'http://vk.com/video' + video_id, 'VK', video_id=video_id)
- for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
+ entries = [self._generate_entry(entry) for entry in reversed(data[section]['list'])]
- title = unescapeHTML(self._search_regex(
- r'\s*([^<]+?)\s+\|\s+\d+\s+videos',
- webpage, 'title', default=page_id))
-
- return self.playlist_result(entries, page_id, title)
+ return self.playlist_result(entries, page_id, section)
class VKWallPostIE(VKBaseIE):