[aliexpress] rewrote code so that it parses JSON

2025-03-13 03:37:23 +08:00 · 2017-07-23 15:35:28 +02:00 · 2017-07-23 15:35:28 +02:00 · 4b990cb33a
commit 4b990cb33a
parent 9e8c64cb0c
1 changed files with 15 additions and 9 deletions
--- a/youtube_dl/extractor/aliexpress.py
+++ b/youtube_dl/extractor/aliexpress.py
@ -8,7 +8,7 @@ from datetime import datetime
 class AliExpressLiveIE(InfoExtractor):

    _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>[0-9]{16})'
-    _TEST = [{
+    _TEST = {
        'url': 'https://live.aliexpress.com/live/2800002704436634',
        'info_dict': {
            'id': '2800002704436634',
@ -21,17 +21,23 @@ class AliExpressLiveIE(InfoExtractor):
        'params': {
            'skip_download': True,
        }
-    }]
+    }

    def _real_extract(self, url):
-        page = self._download_webpage(url, self._match_id(url))
-        upload_date = self._html_search_regex(r'"createTime":([0-9]{10})[0-9]{3},', page, 'upload_date')
+        vid_id = str(self._match_id(url))
+        page = self._download_webpage(url, self._match_id(url)).replace('\n', '')
+        # runParams is a variable which contains information about the stream
+        run_params_json = self._search_regex(r'runParams = ([^<]+)[\s+]var [a-z]+', page, 'runParams')
+        run_params = self._parse_json(run_params_json, vid_id)
+
+        # the given unix timestamp contains 000 at the end, so we have to strip it off by dividing it with 1000
+        upload_date = datetime.fromtimestamp(run_params.get('followBar').get('createTime') / 1000).strftime('%Y%m%d')

        return {
-            'id': str(self._match_id(url)),
-            'title': self._html_search_regex(r'"title": "([^"]+)"', page, 'url'),
-            'url': self._html_search_regex(r'"replyStreamUrl": "([^"]+)"', page, 'url'),
-            'uploader': self._html_search_regex(r'"name":"([^"]+)"', page, 'uploader'),
-            'upload_date': datetime.fromtimestamp(int(upload_date)).strftime('%Y%m%d'),
+            'id': vid_id,
+            'title': run_params['title'],
+            'url': run_params['replyStreamUrl'],
+            'uploader': run_params.get('followBar').get('name'),
+            'upload_date': upload_date,
            'is_live': True,
        }