From 89b2983b3f4bd3160b1e8cccd70ee36c30ffe729 Mon Sep 17 00:00:00 2001 From: quinlander Date: Sun, 14 Apr 2019 20:04:08 -0400 Subject: [PATCH 01/15] Added changba import and implemented simple changba extractor with single test --- youtube_dl/extractor/changba.py | 51 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/changba.py diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py new file mode 100644 index 000000000..1fbf7968f --- /dev/null +++ b/youtube_dl/extractor/changba.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + +class ChangbaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?changba\.com/s/(?P[0-9A-Za-z-_]+)' + _TEST = { + 'url': 'https://changba.com/s/0GHVw6vyXv9N2FhaFi2WJg', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '1152860688', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + id = self._search_regex(r'workid=([0-9]+)', webpage, 'id') + isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') + title = self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title') + + if int(isvideo) == 0: + ext = 'mp3' + try: + url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp3)', webpage, 'url') + except: + url = "http://lzscuw.changba.com/" + str(id) + ".mp3" + else: + ext = 'mp4' + try: + url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp4)', webpage, 'url') + except: + url = "http://lzscuw.changba.com/" + str(id) + ".mp4" + + return { + 'url': url, + 'id': id, + 'ext': ext, + 'title': title + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cc19af5c4..a68849319 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -185,6 +185,7 @@ from .ceskatelevize import ( CeskaTelevizeIE, CeskaTelevizePoradyIE, ) +from .changba import ChangbaIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE From 0b8c02e7161364d6087fb35b104e59fc5a87138e Mon Sep 17 00:00:00 2001 From: quinlander Date: Mon, 15 Apr 2019 16:38:29 -0400 Subject: [PATCH 02/15] added 2 basic tests --- youtube_dl/extractor/changba.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 1fbf7968f..643aa66b5 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -7,21 +7,30 @@ import re class ChangbaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?changba\.com/s/(?P[0-9A-Za-z-_]+)' - _TEST = { + _TESTS = [{ 'url': 'https://changba.com/s/0GHVw6vyXv9N2FhaFi2WJg', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'md5': 'ea55d17e939f3e2dabf483e47e8e5693', 'info_dict': { 'id': '1152860688', 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': '对你爱不完【炫酷慢摇】 ', + # 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type (for example int or float) } - } + }, + { + 'url': 'http://changba.com/s/nZqfbS_vCnieNNjJ7UiEGw?', + 'md5': 'e401463ffb03ed8900a0bccc641335e1', + 'info_dict': { + 'id': '1091968526', + 'ext': 'mp3', + 'title': '下雪 ', + } + }] def _real_extract(self, url): video_id = self._match_id(url) From 76a8117e486a6b226939a4745a4721ff349bd04c Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Mon, 15 Apr 2019 17:02:55 -0400 Subject: [PATCH 03/15] cleaned up changba.py --- youtube_dl/extractor/changba.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 643aa66b5..3b32e5511 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -39,18 +39,14 @@ class ChangbaIE(InfoExtractor): isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') title = self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title') + ext = ".mp4" if int(isvideo) == 0: - ext = 'mp3' - try: - url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp3)', webpage, 'url') - except: - url = "http://lzscuw.changba.com/" + str(id) + ".mp3" - else: - ext = 'mp4' - try: - url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp4)', webpage, 'url') - except: - url = "http://lzscuw.changba.com/" + str(id) + ".mp4" + ext = ".mp3" + + try: + url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp[3-4])', webpage, 'url') + except: + url = "http://lzscuw.changba.com/" + str(id) + ext return { 'url': url, From 3448add96293f37d03f6a314627d306fca24d3c4 Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Mon, 15 Apr 2019 17:14:22 -0400 Subject: [PATCH 04/15] fixed ext error --- youtube_dl/extractor/changba.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 3b32e5511..37feef085 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -39,14 +39,14 @@ class ChangbaIE(InfoExtractor): isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') title = self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title') - ext = ".mp4" + ext = "mp4" if int(isvideo) == 0: - ext = ".mp3" + ext = "mp3" try: url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp[3-4])', webpage, 'url') except: - url = "http://lzscuw.changba.com/" + str(id) + ext + url = "http://lzscuw.changba.com/" + str(id) + "." + ext return { 'url': url, From adb6f53432922f7cf92624cfe501b43b411d66e3 Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Mon, 15 Apr 2019 17:21:51 -0400 Subject: [PATCH 05/15] added more tests --- youtube_dl/extractor/changba.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 37feef085..bb6086540 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -30,6 +30,24 @@ class ChangbaIE(InfoExtractor): 'ext': 'mp3', 'title': '下雪 ', } + }, + { + 'url': 'http://changba.com/s/CPiNWbAa1qy0po0llqIJbg', + 'md5': '', + 'info_dict': { + 'id': '136918054', + 'ext': 'mp3', + 'title': '红豆 ', + } + }, + { + 'url': 'http://changba.com/s/-N00JJ30YruunrER5eBcWw', + 'md5': 'cd68f8da8d8c69afbb8e4dbbbfa8b277', + 'info_dict': { + 'id': '172671761', + 'ext': 'mp3', + 'title': '天与地 ', + } }] def _real_extract(self, url): From 159a2cff27045ee66c1f86ea681bf9e24bcbbee0 Mon Sep 17 00:00:00 2001 From: quinlander Date: Mon, 15 Apr 2019 17:38:36 -0400 Subject: [PATCH 06/15] cleaned up code to increase readability, fixed test 2 --- youtube_dl/extractor/changba.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index bb6086540..a4ea5f680 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -14,12 +14,6 @@ class ChangbaIE(InfoExtractor): 'id': '1152860688', 'ext': 'mp4', 'title': '对你爱不完【炫酷慢摇】 ', - # 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } }, { @@ -33,7 +27,7 @@ class ChangbaIE(InfoExtractor): }, { 'url': 'http://changba.com/s/CPiNWbAa1qy0po0llqIJbg', - 'md5': '', + 'md5': '7adcc9afb85ace8ff854bdd0e8567f50', 'info_dict': { 'id': '136918054', 'ext': 'mp3', @@ -54,21 +48,18 @@ class ChangbaIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) id = self._search_regex(r'workid=([0-9]+)', webpage, 'id') - isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') title = self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title') + isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') + ext = 'mp3' if int(isvideo) == 0 else 'mp4' - ext = "mp4" - if int(isvideo) == 0: - ext = "mp3" - try: url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp[3-4])', webpage, 'url') except: - url = "http://lzscuw.changba.com/" + str(id) + "." + ext + url = 'http://lzscuw.changba.com/{}.{}'.format(str(id), ext) return { 'url': url, 'id': id, 'ext': ext, - 'title': title + 'title': title, } From 8ac9af1ce2dbd6fc8b258bfc63204aea0d96176b Mon Sep 17 00:00:00 2001 From: quinlander Date: Mon, 15 Apr 2019 17:43:21 -0400 Subject: [PATCH 07/15] remove unecessary 're' import and ran flake8 --- youtube_dl/extractor/changba.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index a4ea5f680..2a3e8aef3 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -import re class ChangbaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?changba\.com/s/(?P[0-9A-Za-z-_]+)' @@ -15,8 +14,7 @@ class ChangbaIE(InfoExtractor): 'ext': 'mp4', 'title': '对你爱不完【炫酷慢摇】 ', } - }, - { + }, { 'url': 'http://changba.com/s/nZqfbS_vCnieNNjJ7UiEGw?', 'md5': 'e401463ffb03ed8900a0bccc641335e1', 'info_dict': { @@ -24,8 +22,7 @@ class ChangbaIE(InfoExtractor): 'ext': 'mp3', 'title': '下雪 ', } - }, - { + }, { 'url': 'http://changba.com/s/CPiNWbAa1qy0po0llqIJbg', 'md5': '7adcc9afb85ace8ff854bdd0e8567f50', 'info_dict': { @@ -33,8 +30,7 @@ class ChangbaIE(InfoExtractor): 'ext': 'mp3', 'title': '红豆 ', } - }, - { + }, { 'url': 'http://changba.com/s/-N00JJ30YruunrER5eBcWw', 'md5': 'cd68f8da8d8c69afbb8e4dbbbfa8b277', 'info_dict': { From 5de4ebf7b571fc8c7a5fb9ef66bc3955840af6b3 Mon Sep 17 00:00:00 2001 From: quinlander Date: Tue, 16 Apr 2019 19:29:41 -0400 Subject: [PATCH 08/15] simplified try block regex and specified better name for url --- youtube_dl/extractor/changba.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 2a3e8aef3..ac9b2be2b 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -49,12 +49,12 @@ class ChangbaIE(InfoExtractor): ext = 'mp3' if int(isvideo) == 0 else 'mp4' try: - url = self._search_regex(r'([a-z]+:\/\/[0-9a-z]+\.changba\.com\/[a-z]+\/[a-z]+\/[0-9]+\/[0-9]+\.mp[3-4])', webpage, 'url') + src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') except: - url = 'http://lzscuw.changba.com/{}.{}'.format(str(id), ext) + src_url = 'http://lzscuw.changba.com/{}.{}'.format(str(id), ext) return { - 'url': url, + 'url': src_url, 'id': id, 'ext': ext, 'title': title, From 51dedc9f730b62121b06b50461ccf685293b9f92 Mon Sep 17 00:00:00 2001 From: quinlander Date: Tue, 16 Apr 2019 19:53:21 -0400 Subject: [PATCH 09/15] replaced int to string conversion with 'int_or_none', made more robust by checking multiple subdomains for src link --- youtube_dl/extractor/changba.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index ac9b2be2b..f4a8113cc 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class ChangbaIE(InfoExtractor): @@ -44,14 +45,30 @@ class ChangbaIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) id = self._search_regex(r'workid=([0-9]+)', webpage, 'id') - title = self._search_regex(r']+class="title"[^>]*>([^<]+)', webpage, 'title') + title = self._search_regex( + r']+class="title"[^>]*>([^<]+)', webpage, 'title' + ) isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') - ext = 'mp3' if int(isvideo) == 0 else 'mp4' + ext = 'mp3' if int_or_none(isvideo) == 0 else 'mp4' + + SITE_SUBDOMAINS = [ + 'lzscuw', + 'upscuw', + 'aliuwmp3', + 'upuwmp3', + 'qiniuuwmp3' + ] try: src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') except: - src_url = 'http://lzscuw.changba.com/{}.{}'.format(str(id), ext) + for subdomain in SITE_SUBDOMAINS: + try: + src_url = 'http://{}.changba.com/{}.{}'.format( + subdomain, str(id), ext + ) + except: + continue return { 'url': src_url, From d8363c816bb6c691e728900114b513446d39db55 Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Thu, 18 Apr 2019 17:31:18 -0400 Subject: [PATCH 10/15] changed some of the stuff the guy wants to be changed --- youtube_dl/extractor/changba.py | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index f4a8113cc..7b2b782f3 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -31,14 +31,6 @@ class ChangbaIE(InfoExtractor): 'ext': 'mp3', 'title': '红豆 ', } - }, { - 'url': 'http://changba.com/s/-N00JJ30YruunrER5eBcWw', - 'md5': 'cd68f8da8d8c69afbb8e4dbbbfa8b277', - 'info_dict': { - 'id': '172671761', - 'ext': 'mp3', - 'title': '天与地 ', - } }] def _real_extract(self, url): @@ -48,27 +40,14 @@ class ChangbaIE(InfoExtractor): title = self._search_regex( r']+class="title"[^>]*>([^<]+)', webpage, 'title' ) - isvideo = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') - ext = 'mp3' if int_or_none(isvideo) == 0 else 'mp4' - - SITE_SUBDOMAINS = [ - 'lzscuw', - 'upscuw', - 'aliuwmp3', - 'upuwmp3', - 'qiniuuwmp3' - ] + # title = self._og_search_title(webpage) + is_video = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') + ext = 'mp3' if int_or_none(is_video) == 0 else 'mp4' try: src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') except: - for subdomain in SITE_SUBDOMAINS: - try: - src_url = 'http://{}.changba.com/{}.{}'.format( - subdomain, str(id), ext - ) - except: - continue + src_url = 'http://lzscuw.changba.com/' + str(id) + '.' + ext return { 'url': src_url, From 563034f100bb5527a25b5b45ba900c81a9963ee9 Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Thu, 18 Apr 2019 17:42:34 -0400 Subject: [PATCH 11/15] Added except on Regex error --- youtube_dl/extractor/changba.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 7b2b782f3..3e0098f73 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + RegexNotFoundError, +) class ChangbaIE(InfoExtractor): @@ -46,7 +49,7 @@ class ChangbaIE(InfoExtractor): try: src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') - except: + except RegexNotFoundError: src_url = 'http://lzscuw.changba.com/' + str(id) + '.' + ext return { From 188abeff783368023906531776758e43b9022ee9 Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Thu, 18 Apr 2019 18:56:15 -0400 Subject: [PATCH 12/15] decodes video files now --- youtube_dl/extractor/changba.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 3e0098f73..695ca5f0f 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -50,7 +52,15 @@ class ChangbaIE(InfoExtractor): try: src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') except RegexNotFoundError: - src_url = 'http://lzscuw.changba.com/' + str(id) + '.' + ext + encoded = self._search_regex( + r'video_url: \'([0-9A-Za-z]+)', webpage, 'video url' + ) + src_url = base64.b64decode(encoded).decode('utf-8') + + # src_url = self._og_search_url(webpage) + # src_url = self._og_search_property( + # 'video_url', webpage, fatal=True) + return { 'url': src_url, From eeb17b99796202d6858bbfbda1f9e9095c316c6f Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Fri, 19 Apr 2019 09:54:47 -0400 Subject: [PATCH 13/15] updated video downloading --- youtube_dl/extractor/changba.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 695ca5f0f..deebf57c0 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -13,12 +13,12 @@ from ..utils import ( class ChangbaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?changba\.com/s/(?P[0-9A-Za-z-_]+)' _TESTS = [{ - 'url': 'https://changba.com/s/0GHVw6vyXv9N2FhaFi2WJg', - 'md5': 'ea55d17e939f3e2dabf483e47e8e5693', + 'url': 'https://changba.com/s/PBZkNLjjPmuE_nW7EuUNpg?&cbcode=Kxhsv6044ik&from=pcrecommend', + 'md5': '88aa70b832c4071cffd7e06d759bc7e8', 'info_dict': { - 'id': '1152860688', + 'id': '1146278955', 'ext': 'mp4', - 'title': '对你爱不完【炫酷慢摇】 ', + 'title': ' ', } }, { 'url': 'http://changba.com/s/nZqfbS_vCnieNNjJ7UiEGw?', @@ -28,38 +28,28 @@ class ChangbaIE(InfoExtractor): 'ext': 'mp3', 'title': '下雪 ', } - }, { - 'url': 'http://changba.com/s/CPiNWbAa1qy0po0llqIJbg', - 'md5': '7adcc9afb85ace8ff854bdd0e8567f50', - 'info_dict': { - 'id': '136918054', - 'ext': 'mp3', - 'title': '红豆 ', - } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + # print(webpage) id = self._search_regex(r'workid=([0-9]+)', webpage, 'id') title = self._search_regex( r']+class="title"[^>]*>([^<]+)', webpage, 'title' ) + print(title) # title = self._og_search_title(webpage) - is_video = self._search_regex(r'&isvideo=([0-9])', webpage, 'isvideo') - ext = 'mp3' if int_or_none(is_video) == 0 else 'mp4' - + ext = None try: src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') + ext = 'mp3' except RegexNotFoundError: encoded = self._search_regex( - r'video_url: \'([0-9A-Za-z]+)', webpage, 'video url' + r'video_url: \'([0-9A-Za-z]+=*)', webpage, 'video url' ) src_url = base64.b64decode(encoded).decode('utf-8') - - # src_url = self._og_search_url(webpage) - # src_url = self._og_search_property( - # 'video_url', webpage, fatal=True) + ext = 'mp4' return { From 75be8e5e25179ac5ee448888c962f5e7a7d381f6 Mon Sep 17 00:00:00 2001 From: Julian Galperin Date: Fri, 19 Apr 2019 10:25:04 -0400 Subject: [PATCH 14/15] cleaned up code --- youtube_dl/extractor/changba.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index deebf57c0..6d8b07b21 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -19,6 +19,7 @@ class ChangbaIE(InfoExtractor): 'id': '1146278955', 'ext': 'mp4', 'title': ' ', + 'vcodec': None } }, { 'url': 'http://changba.com/s/nZqfbS_vCnieNNjJ7UiEGw?', @@ -27,23 +28,24 @@ class ChangbaIE(InfoExtractor): 'id': '1091968526', 'ext': 'mp3', 'title': '下雪 ', + 'vcodec': 'none' } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # print(webpage) id = self._search_regex(r'workid=([0-9]+)', webpage, 'id') title = self._search_regex( r']+class="title"[^>]*>([^<]+)', webpage, 'title' ) - print(title) - # title = self._og_search_title(webpage) ext = None + vcodec = None + try: src_url = self._search_regex(r'var a="([^"]*)', webpage, 'url') ext = 'mp3' + vcodec = 'none' except RegexNotFoundError: encoded = self._search_regex( r'video_url: \'([0-9A-Za-z]+=*)', webpage, 'video url' @@ -51,10 +53,10 @@ class ChangbaIE(InfoExtractor): src_url = base64.b64decode(encoded).decode('utf-8') ext = 'mp4' - return { 'url': src_url, 'id': id, 'ext': ext, 'title': title, + 'vcodec': vcodec } From f4dc0ea149f2d5a21f2c839d544bed85f4cbd10e Mon Sep 17 00:00:00 2001 From: quinlander Date: Fri, 19 Apr 2019 10:35:47 -0400 Subject: [PATCH 15/15] removed unnecessary int_or_none import --- youtube_dl/extractor/changba.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/changba.py b/youtube_dl/extractor/changba.py index 6d8b07b21..62ecec964 100644 --- a/youtube_dl/extractor/changba.py +++ b/youtube_dl/extractor/changba.py @@ -4,10 +4,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..utils import ( - int_or_none, - RegexNotFoundError, -) +from ..utils import RegexNotFoundError class ChangbaIE(InfoExtractor):