Support memritv.org

2025-02-14 11:12:51 +08:00 · 2015-07-13 07:43:48 -05:00 · 2015-07-13 07:43:48 -05:00 · cef57038f8
commit cef57038f8
parent 41c0d2f8cb
2 changed files with 85 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -299,6 +299,7 @@ from .malemotion import MalemotionIE
 from .mdr import MDRIE
 from .megavideoz import MegaVideozIE
 from .metacafe import MetacafeIE
+from .memri import MemriIE
 from .metacritic import MetacriticIE
 from .mgoon import MgoonIE
 from .minhateca import MinhatecaIE
--- a/youtube_dl/extractor/memri.py
+++ b/youtube_dl/extractor/memri.py
@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import time
+import hmac
+import hashlib
+import itertools
+import re
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    parse_iso8601,
+    unescapeHTML,
+    js_to_json,
+)
+from ..compat import compat_urllib_request
+from .common import InfoExtractor
+
+
+class MemriIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?memri(?:tv)?.org/(?:clip(?:/[^/]+)*/(?P<id>\d+)\.html?|.+clip_id=(?P<eid>\d+))'
+    IE_NAME = 'memri'
+    _TESTS = [{
+        'url': 'http://www.memritv.org/clip/en/4496.htm',
+        'info_dict': {
+            'id': '4496',
+            'ext': 'mp4',
+            'title': 'Takfiri, The Caliph\'s Favorite Cheese - Anti-ISIS Iraqi Satire',
+            'uploader': 'Memri',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('eid')
+        rurl = url
+        if mobj.groupdict().get('eid') == None:
+            rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id='+video_id
+
+        webpage = self._download_webpage(rurl, video_id)
+        jstr = self._search_regex( r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL)
+        jstr = re.sub(r'\n\s*//.*?\n','\n',jstr) # // comments break js_to_json
+        js = self._parse_json(jstr,'json',transform_source=js_to_json)
+
+        formats = []
+        for ent in js['media']['source']:
+            eurl = ent.get('src')
+            if ent.get('type','') == 'application/x-mpegURL':
+                formats.extend( self._extract_m3u8_formats(
+                   eurl, video_id, entry_protocol='m3u8', ext='mp4',
+                   m3u8_id='m3u8-mp4',
+                   preference=0)
+                )
+                continue
+            proto = re.search(r'^(.+?)://',eurl).group(1)
+            format = {
+                   'url': eurl,
+                   'ext': 'mp4',
+                   'protocol': proto,
+                   'format_id': proto+'-mp4',
+            }
+            if proto == 'rtmp':
+                urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)',eurl)
+                format['url'] = urlre.group(1)+urlre.group(3)
+                format['play_path'] = urlre.group(2)
+            formats.append(format)
+        if not formats:
+            if self._downloader.params.get('verbose', False):
+                raise ExtractorError('No video found in '+jstr+'\n')
+            else:
+                raise ExtractorError('No video found')
+
+        self._sort_formats(formats)
+        return {
+                'id': video_id,
+                'title': unescapeHTML(js['media']['title']),
+                'uploader': 'Memri',
+                'formats': formats,
+        }