From cef57038f8049cf721b5239866e0b3860ddbfd79 Mon Sep 17 00:00:00 2001 From: fnord Date: Mon, 13 Jul 2015 07:43:48 -0500 Subject: [PATCH 1/2] Support memritv.org --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/memri.py | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/memri.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cbaa07391..e8253682d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -299,6 +299,7 @@ from .malemotion import MalemotionIE from .mdr import MDRIE from .megavideoz import MegaVideozIE from .metacafe import MetacafeIE +from .memri import MemriIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .minhateca import MinhatecaIE diff --git a/youtube_dl/extractor/memri.py b/youtube_dl/extractor/memri.py new file mode 100644 index 000000000..24bc77ed2 --- /dev/null +++ b/youtube_dl/extractor/memri.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import time +import hmac +import hashlib +import itertools +import re +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + unescapeHTML, + js_to_json, +) +from ..compat import compat_urllib_request +from .common import InfoExtractor + + +class MemriIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?memri(?:tv)?.org/(?:clip(?:/[^/]+)*/(?P\d+)\.html?|.+clip_id=(?P\d+))' + IE_NAME = 'memri' + _TESTS = [{ + 'url': 'http://www.memritv.org/clip/en/4496.htm', + 'info_dict': { + 'id': '4496', + 'ext': 'mp4', + 'title': 'Takfiri, The Caliph\'s Favorite Cheese - Anti-ISIS Iraqi Satire', + 'uploader': 'Memri', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('eid') + rurl = url + if mobj.groupdict().get('eid') == None: + rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id='+video_id + + webpage = self._download_webpage(rurl, video_id) + jstr = self._search_regex( r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL) + jstr = re.sub(r'\n\s*//.*?\n','\n',jstr) # // comments break js_to_json + js = self._parse_json(jstr,'json',transform_source=js_to_json) + + formats = [] + for ent in js['media']['source']: + eurl = ent.get('src') + if ent.get('type','') == 'application/x-mpegURL': + formats.extend( self._extract_m3u8_formats( + eurl, video_id, entry_protocol='m3u8', ext='mp4', + m3u8_id='m3u8-mp4', + preference=0) + ) + continue + proto = re.search(r'^(.+?)://',eurl).group(1) + format = { + 'url': eurl, + 'ext': 'mp4', + 'protocol': proto, + 'format_id': proto+'-mp4', + } + if proto == 'rtmp': + urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)',eurl) + format['url'] = urlre.group(1)+urlre.group(3) + format['play_path'] = urlre.group(2) + formats.append(format) + if not formats: + if self._downloader.params.get('verbose', False): + raise ExtractorError('No video found in '+jstr+'\n') + else: + raise ExtractorError('No video found') + + self._sort_formats(formats) + return { + 'id': video_id, + 'title': unescapeHTML(js['media']['title']), + 'uploader': 'Memri', + 'formats': formats, + } From 7784f328ffc602a1dd1f63b6b130d01dfd18fdbc Mon Sep 17 00:00:00 2001 From: fnord Date: Fri, 17 Jul 2015 03:02:57 -0500 Subject: [PATCH 2/2] memri: fix syntax --- youtube_dl/extractor/memri.py | 52 +++++++++++++++-------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/memri.py b/youtube_dl/extractor/memri.py index 24bc77ed2..2b60aee46 100644 --- a/youtube_dl/extractor/memri.py +++ b/youtube_dl/extractor/memri.py @@ -1,20 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import time -import hmac -import hashlib -import itertools import re from ..utils import ( ExtractorError, - int_or_none, - parse_iso8601, unescapeHTML, js_to_json, ) -from ..compat import compat_urllib_request from .common import InfoExtractor @@ -39,46 +31,46 @@ class MemriIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') or mobj.group('eid') rurl = url - if mobj.groupdict().get('eid') == None: - rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id='+video_id + if mobj.groupdict().get('eid') is None: + rurl = 'http://www.memritv.org/embedded_player/index.php?clip_id=' + video_id webpage = self._download_webpage(rurl, video_id) - jstr = self._search_regex( r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL) - jstr = re.sub(r'\n\s*//.*?\n','\n',jstr) # // comments break js_to_json - js = self._parse_json(jstr,'json',transform_source=js_to_json) + jstr = self._search_regex(r'var config_overrides =.+?({.+?});', webpage, 'json', flags=re.DOTALL) + jstr = re.sub(r'\n\s*//.*?\n', '\n', jstr) # // comments break js_to_json + js = self._parse_json(jstr, 'json', transform_source=js_to_json) formats = [] for ent in js['media']['source']: eurl = ent.get('src') - if ent.get('type','') == 'application/x-mpegURL': - formats.extend( self._extract_m3u8_formats( - eurl, video_id, entry_protocol='m3u8', ext='mp4', - m3u8_id='m3u8-mp4', - preference=0) + if ent.get('type', '') == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + eurl, video_id, entry_protocol='m3u8', ext='mp4', + m3u8_id='m3u8-mp4', + preference=0) ) continue - proto = re.search(r'^(.+?)://',eurl).group(1) + proto = re.search(r'^(.+?)://', eurl).group(1) format = { - 'url': eurl, - 'ext': 'mp4', - 'protocol': proto, - 'format_id': proto+'-mp4', + 'url': eurl, + 'ext': 'mp4', + 'protocol': proto, + 'format_id': proto + '-mp4', } if proto == 'rtmp': - urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)',eurl) - format['url'] = urlre.group(1)+urlre.group(3) + urlre = re.search(r'^(.+?)(mp4:[^\?]+)(.+)', eurl) + format['url'] = urlre.group(1) + urlre.group(3) format['play_path'] = urlre.group(2) formats.append(format) if not formats: if self._downloader.params.get('verbose', False): - raise ExtractorError('No video found in '+jstr+'\n') + raise ExtractorError('No video found in ' + jstr + '\n') else: raise ExtractorError('No video found') self._sort_formats(formats) return { - 'id': video_id, - 'title': unescapeHTML(js['media']['title']), - 'uploader': 'Memri', - 'formats': formats, + 'id': video_id, + 'title': unescapeHTML(js['media']['title']), + 'uploader': 'Memri', + 'formats': formats, }