From eb4284a94bf6dce3aec31ce3016ee0816e5b2080 Mon Sep 17 00:00:00 2001 From: dubber0 Date: Sat, 22 Jul 2017 21:32:51 +0200 Subject: [PATCH 1/5] [aliexpress] Add new extractor --- youtube_dl/extractor/aliexpress.py | 36 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/aliexpress.py diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py new file mode 100644 index 000000000..810b9b9fb --- /dev/null +++ b/youtube_dl/extractor/aliexpress.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate +from datetime import datetime + +class AliExpressLiveIE(InfoExtractor): + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P[0-9]{16})' + _TEST = [{ + 'url': 'https://live.aliexpress.com/live/2800002704436634', + 'info_dict': { + 'id': '2800002704436634', + 'ext': 'm3u8', + 'title': 'CASIMA7.22', + 'uploader': 'CASIMA Official Store', + 'upload_date': '20170714', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }] + + def _real_extract(self, url): + page = self._download_webpage(url, self._match_id(url)) + upload_date = self._html_search_regex(r'"createTime":([0-9]{10})[0-9]{3},', page, 'upload_date') + + return { + 'id': str(self._match_id(url)), + 'title': self._html_search_regex(r'"title": "([^"]+)"', page, 'url'), + 'url': self._html_search_regex(r'"replyStreamUrl": "([^"]+)"', page, 'url'), + 'uploader': self._html_search_regex(r'"name":"([^"]+)"', page, 'uploader'), + 'upload_date': datetime.fromtimestamp(int(upload_date)).strftime('%Y%m%d'), + 'is_live': True, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index db7616caa..08a7ea926 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -45,6 +45,7 @@ from .anvato import AnvatoIE from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE +from .aliexpress import AliExpressLiveIE from .aparat import AparatIE from .appleconnect import AppleConnectIE from .appletrailers import ( From 9e8c64cb0c183519a4fe0e5fee4105100b04a30d Mon Sep 17 00:00:00 2001 From: dubber0 Date: Sat, 22 Jul 2017 21:40:10 +0200 Subject: [PATCH 2/5] [aliexpress] fixed pep8 errors --- youtube_dl/extractor/aliexpress.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py index 810b9b9fb..86a2be321 100644 --- a/youtube_dl/extractor/aliexpress.py +++ b/youtube_dl/extractor/aliexpress.py @@ -2,10 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate from datetime import datetime + class AliExpressLiveIE(InfoExtractor): + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P[0-9]{16})' _TEST = [{ 'url': 'https://live.aliexpress.com/live/2800002704436634', From 4b990cb33af8c13b1e0367f6f9af290f4e9313ce Mon Sep 17 00:00:00 2001 From: dubber0 Date: Sun, 23 Jul 2017 15:35:28 +0200 Subject: [PATCH 3/5] [aliexpress] rewrote code so that it parses JSON --- youtube_dl/extractor/aliexpress.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py index 86a2be321..ab18e2cd1 100644 --- a/youtube_dl/extractor/aliexpress.py +++ b/youtube_dl/extractor/aliexpress.py @@ -8,7 +8,7 @@ from datetime import datetime class AliExpressLiveIE(InfoExtractor): _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P[0-9]{16})' - _TEST = [{ + _TEST = { 'url': 'https://live.aliexpress.com/live/2800002704436634', 'info_dict': { 'id': '2800002704436634', @@ -21,17 +21,23 @@ class AliExpressLiveIE(InfoExtractor): 'params': { 'skip_download': True, } - }] + } def _real_extract(self, url): - page = self._download_webpage(url, self._match_id(url)) - upload_date = self._html_search_regex(r'"createTime":([0-9]{10})[0-9]{3},', page, 'upload_date') + vid_id = str(self._match_id(url)) + page = self._download_webpage(url, self._match_id(url)).replace('\n', '') + # runParams is a variable which contains information about the stream + run_params_json = self._search_regex(r'runParams = ([^<]+)[\s+]var [a-z]+', page, 'runParams') + run_params = self._parse_json(run_params_json, vid_id) + + # the given unix timestamp contains 000 at the end, so we have to strip it off by dividing it with 1000 + upload_date = datetime.fromtimestamp(run_params.get('followBar').get('createTime') / 1000).strftime('%Y%m%d') return { - 'id': str(self._match_id(url)), - 'title': self._html_search_regex(r'"title": "([^"]+)"', page, 'url'), - 'url': self._html_search_regex(r'"replyStreamUrl": "([^"]+)"', page, 'url'), - 'uploader': self._html_search_regex(r'"name":"([^"]+)"', page, 'uploader'), - 'upload_date': datetime.fromtimestamp(int(upload_date)).strftime('%Y%m%d'), + 'id': vid_id, + 'title': run_params['title'], + 'url': run_params['replyStreamUrl'], + 'uploader': run_params.get('followBar').get('name'), + 'upload_date': upload_date, 'is_live': True, } From 5e31985b03dbceddb860d2974c127861bada201c Mon Sep 17 00:00:00 2001 From: dubber0 Date: Sun, 23 Jul 2017 18:28:26 +0200 Subject: [PATCH 4/5] [aliexpress] fixed possible extraction issues and removed unnecessary things --- youtube_dl/extractor/aliexpress.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py index ab18e2cd1..2526a4aeb 100644 --- a/youtube_dl/extractor/aliexpress.py +++ b/youtube_dl/extractor/aliexpress.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from datetime import datetime class AliExpressLiveIE(InfoExtractor): @@ -10,34 +9,29 @@ class AliExpressLiveIE(InfoExtractor): _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P[0-9]{16})' _TEST = { 'url': 'https://live.aliexpress.com/live/2800002704436634', + 'md5': '7ac2bc46afdd18f0b45a0a340fc47ffe', 'info_dict': { 'id': '2800002704436634', 'ext': 'm3u8', 'title': 'CASIMA7.22', 'uploader': 'CASIMA Official Store', 'upload_date': '20170714', - 'is_live': True, + 'timestamp': 1500027138, }, - 'params': { - 'skip_download': True, - } } def _real_extract(self, url): - vid_id = str(self._match_id(url)) - page = self._download_webpage(url, self._match_id(url)).replace('\n', '') + video_id = self._match_id(url) + page = self._download_webpage(url, video_id) # runParams is a variable which contains information about the stream run_params_json = self._search_regex(r'runParams = ([^<]+)[\s+]var [a-z]+', page, 'runParams') - run_params = self._parse_json(run_params_json, vid_id) - - # the given unix timestamp contains 000 at the end, so we have to strip it off by dividing it with 1000 - upload_date = datetime.fromtimestamp(run_params.get('followBar').get('createTime') / 1000).strftime('%Y%m%d') + run_params = self._parse_json(run_params_json, video_id) return { - 'id': vid_id, + 'id': video_id, 'title': run_params['title'], 'url': run_params['replyStreamUrl'], - 'uploader': run_params.get('followBar').get('name'), - 'upload_date': upload_date, - 'is_live': True, + 'uploader': run_params.get('followBar', {'name': None}).get('name'), + # the given unix timestamp contains 000 at the end, so we have to strip it off by dividing it with 1000 + 'timestamp': run_params.get('followBar', {'createTime': 0}).get('createTime', 0) / 1000, } From 3e5114503252fc5c111aec670290904232cac36b Mon Sep 17 00:00:00 2001 From: dubber0 Date: Mon, 24 Jul 2017 19:21:57 +0200 Subject: [PATCH 5/5] [aliexpress] fixed possible regex and return issue and used better dict getter --- youtube_dl/extractor/aliexpress.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py index 2526a4aeb..3997213f8 100644 --- a/youtube_dl/extractor/aliexpress.py +++ b/youtube_dl/extractor/aliexpress.py @@ -1,7 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals + +import re + from .common import InfoExtractor +from ..utils import try_get, float_or_none +from ..compat import compat_str class AliExpressLiveIE(InfoExtractor): @@ -23,15 +28,13 @@ class AliExpressLiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) page = self._download_webpage(url, video_id) - # runParams is a variable which contains information about the stream - run_params_json = self._search_regex(r'runParams = ([^<]+)[\s+]var [a-z]+', page, 'runParams') + run_params_json = self._search_regex(r'runParams = (.+)[\s+]var myCtl', page, 'runParams', flags=re.DOTALL) run_params = self._parse_json(run_params_json, video_id) return { 'id': video_id, 'title': run_params['title'], 'url': run_params['replyStreamUrl'], - 'uploader': run_params.get('followBar', {'name': None}).get('name'), - # the given unix timestamp contains 000 at the end, so we have to strip it off by dividing it with 1000 - 'timestamp': run_params.get('followBar', {'createTime': 0}).get('createTime', 0) / 1000, + 'uploader': try_get(run_params, lambda x: x['followBar']['name'], compat_str), + 'timestamp': float_or_none(try_get(run_params, lambda x: x['followBar']['createTime']) / 1000), }