Merge remote-tracking branch 'upstream/master'

# Conflicts: # youtube_dl/extractor/facebook.py
2025-03-10 22:07:16 +08:00 · 2019-07-13 22:55:40 +03:00 · 2019-07-13 22:55:40 +03:00 · d08fed0d41
commit d08fed0d41
parent 25df555679 d89a0a8026
14 changed files with 288 additions and 160 deletions
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dl/extractor/biobiochiletv.py
@ -6,7 +6,6 @@ from ..utils import (
    ExtractorError,
    remove_end,
 )
-from .rudo import RudoIE


 class BioBioChileTVIE(InfoExtractor):
@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
    }, {
        'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
        'info_dict': {
-            'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
+            'id': 'b4xd0LK3SK',
            'ext': 'mp4',
-            'uploader': '(none)',
-            'upload_date': '20160708',
-            'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
+            # TODO: fix url_transparent information overriding
+            # 'uploader': 'Juan Pablo Echenique',
+            'title': 'Comentario Oscar Cáceres',
+        },
+        'params': {
+            # empty m3u8 manifest
+            'skip_download': True,
        },
    }, {
        'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

-        rudo_url = RudoIE._extract_url(webpage)
+        rudo_url = self._search_regex(
+            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
+            webpage, 'embed URL', None, group='url')
        if not rudo_url:
            raise ExtractorError('No videos found')

@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):

        thumbnail = self._og_search_thumbnail(webpage)
        uploader = self._html_search_regex(
-            r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
+            r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
            webpage, 'uploader', fatal=False)

        return {
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor):
        video = article_data.get('video')
        if video:
            video_type = video['type']
-            if video_type == 'cms.bleacherreport.com':
+            if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
                info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
            elif video_type == 'ooyala.com':
                info['url'] = 'ooyala:%s' % video['id']
@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor):


 class BleacherReportCMSIE(AMPIE):
-    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
+    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
    _TESTS = [{
-        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
+        info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
        info['id'] = video_id
        return info
--- a/youtube_dl/extractor/dbtv.py
+++ b/youtube_dl/extractor/dbtv.py
@ -7,50 +7,51 @@ from .common import InfoExtractor


 class DBTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
+    _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
    _TESTS = [{
-        'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
-        'md5': '2e24f67936517b143a234b4cadf792ec',
+        'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
+        'md5': 'b8f850ba1860adbda668d367f9b77699',
        'info_dict': {
-            'id': '3649835190001',
-            'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+            'id': 'PynxJnNWChE',
            'ext': 'mp4',
            'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
-            'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
+            'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
            'thumbnail': r're:https?://.*\.jpg',
-            'timestamp': 1404039863,
-            'upload_date': '20140629',
-            'duration': 69.544,
-            'uploader_id': '1027729757001',
+            'upload_date': '20160916',
+            'duration': 69,
+            'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
+            'uploader': 'Dagbladet',
        },
-        'add_ie': ['BrightcoveNew']
+        'add_ie': ['Youtube']
    }, {
-        'url': 'http://dbtv.no/3649835190001',
+        'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
        'only_matching': True,
    }, {
-        'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
-        'only_matching': True,
-    }, {
-        'url': 'http://dbtv.no/vice/5000634109001',
-        'only_matching': True,
-    }, {
-        'url': 'http://dbtv.no/filmtrailer/3359293614001',
+        'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return [url for _, url in re.findall(
-            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
+            r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
            webpage)]

    def _real_extract(self, url):
-        video_id, display_id = re.match(self._VALID_URL, url).groups()
-
-        return {
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
+        info = {
            '_type': 'url_transparent',
-            'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
            'id': video_id,
            'display_id': display_id,
-            'ie_key': 'BrightcoveNew',
        }
+        if len(video_id) == 11:
+            info.update({
+                'url': video_id,
+                'ie_key': 'Youtube',
+            })
+        else:
+            info.update({
+                'url': 'jwplatform:' + video_id,
+                'ie_key': 'JWPlatform',
+            })
+        return info
--- a/youtube_dl/extractor/dlive.py
+++ b/youtube_dl/extractor/dlive.py
@ -0,0 +1,94 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DLiveVODIE(InfoExtractor):
+    IE_NAME = 'dlive:vod'
+    _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[a-zA-Z0-9]+)'
+    _TEST = {
+        'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
+        'info_dict': {
+            'id': '3mTzOl4WR',
+            'ext': 'mp4',
+            'title': 'Minecraft with james charles epic',
+            'upload_date': '20190701',
+            'timestamp': 1562011015,
+            'uploader_id': 'pdp',
+        }
+    }
+
+    def _real_extract(self, url):
+        uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
+        broadcast = self._download_json(
+            'https://graphigo.prd.dlive.tv/', vod_id,
+            data=json.dumps({'query': '''query {
+  pastBroadcast(permlink:"%s+%s") {
+    content
+    createdAt
+    length
+    playbackUrl
+    title
+    thumbnailUrl
+    viewCount
+  }
+}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
+        title = broadcast['title']
+        formats = self._extract_m3u8_formats(
+            broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
+        self._sort_formats(formats)
+        return {
+            'id': vod_id,
+            'title': title,
+            'uploader_id': uploader_id,
+            'formats': formats,
+            'description': broadcast.get('content'),
+            'thumbnail': broadcast.get('thumbnailUrl'),
+            'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
+            'view_count': int_or_none(broadcast.get('viewCount')),
+        }
+
+
+class DLiveStreamIE(InfoExtractor):
+    IE_NAME = 'dlive:stream'
+    _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
+
+    def _real_extract(self, url):
+        display_name = self._match_id(url)
+        user = self._download_json(
+            'https://graphigo.prd.dlive.tv/', display_name,
+            data=json.dumps({'query': '''query {
+  userByDisplayName(displayname:"%s") {
+    livestream {
+      content
+      createdAt
+      title
+      thumbnailUrl
+      watchingCount
+    }
+    username
+  }
+}''' % display_name}).encode())['data']['userByDisplayName']
+        livestream = user['livestream']
+        title = livestream['title']
+        username = user['username']
+        formats = self._extract_m3u8_formats(
+            'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
+            display_name, 'mp4')
+        self._sort_formats(formats)
+        return {
+            'id': display_name,
+            'title': self._live_title(title),
+            'uploader': display_name,
+            'uploader_id': username,
+            'formats': formats,
+            'description': livestream.get('content'),
+            'thumbnail': livestream.get('thumbnailUrl'),
+            'is_live': True,
+            'timestamp': int_or_none(livestream.get('createdAt'), 1000),
+            'view_count': int_or_none(livestream.get('watchingCount')),
+        }
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -579,6 +579,7 @@ from .linkedin import (
 )
 from .linuxacademy import LinuxAcademyIE
 from .litv import LiTVIE
+from .livejournal import LiveJournalIE
 from .liveleak import (
    LiveLeakIE,
    LiveLeakEmbedIE,
@ -967,7 +968,6 @@ from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
 from .rtvnh import RTVNHIE
 from .rtvs import RTVSIE
-from .rudo import RudoIE
 from .ruhd import RUHDIE
 from .rutube import (
    RutubeIE,
@ -1255,6 +1255,10 @@ from .udn import UDNEmbedIE
 from .ufctv import UFCTVIE
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
+from .dlive import (
+    DLiveVODIE,
+    DLiveStreamIE,
+)
 from .umg import UMGDeIE
 from .unistra import UnistraIE
 from .unity import UnityIE
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -462,8 +462,8 @@ class FacebookIE(InfoExtractor):
            r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary,
            'uploader_id', fatal=False)

-        thumbnail = self._og_search_thumbnail(webpage)

+        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
        if is_live:
            view_count = parse_count(
                self._search_regex(r'viewerCount:([\d]+)', webpage, 'views', fatal=False) or \
--- a/youtube_dl/extractor/livejournal.py
+++ b/youtube_dl/extractor/livejournal.py
@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class LiveJournalIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
+        'md5': 'adaf018388572ced8a6f301ace49d4b2',
+        'info_dict': {
+            'id': '1263729',
+            'ext': 'mp4',
+            'title': 'Истребители против БПЛА',
+            'upload_date': '20190624',
+            'timestamp': 1561406715,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        record = self._parse_json(self._search_regex(
+            r'Site\.page\s*=\s*({.+?});', webpage,
+            'page data'), video_id)['video']['record']
+        storage_id = compat_str(record['storageid'])
+        title = record.get('name')
+        if title:
+            # remove filename extension(.mp4, .mov, etc...)
+            title = title.rsplit('.', 1)[0]
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'title': title,
+            'thumbnail': record.get('thumbnail'),
+            'timestamp': int_or_none(record.get('timecreate')),
+            'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
+            'ie_key': 'EaglePlatform',
+        }
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE):
    }, {
        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
        'only_matching': True,
+    }, {
+        # Status="NotFound", Message="Transcript not found"
+        'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
+        'only_matching': True,
    }]

    def _raise_unavailable(self, video_id):
@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE):

    def _get_subtitles(self, video_id):
        url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
-        subs = self._download_json(url, None, False)
+        subs = self._download_webpage(
+            url, video_id, 'Downloading subtitles JSON', fatal=False)
+        if not subs or 'Status="NotFound"' in subs:
+            return {}
+        subs = self._parse_json(subs, video_id, fatal=False)
+        if not subs:
+            return {}
        fixed_subs = self._fix_subtitles(subs)
        if fixed_subs:
            return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
-        else:
-            return {}
+        return {}


 class LyndaCourseIE(LyndaBaseIE):
--- a/youtube_dl/extractor/roosterteeth.py
+++ b/youtube_dl/extractor/roosterteeth.py
@ -4,11 +4,14 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_HTTPError,
+    compat_str,
+)
 from ..utils import (
    ExtractorError,
    int_or_none,
-    strip_or_none,
-    unescapeHTML,
+    str_or_none,
    urlencode_postdata,
 )

@ -21,15 +24,14 @@ class RoosterTeethIE(InfoExtractor):
        'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
        'md5': 'e2bd7764732d785ef797700a2489f212',
        'info_dict': {
-            'id': '26576',
+            'id': '9156',
            'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
            'ext': 'mp4',
-            'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
-            'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
+            'title': 'Million Dollars, But... The Game Announcement',
+            'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
            'thumbnail': r're:^https?://.*\.png$',
            'series': 'Million Dollars, But...',
            'episode': 'Million Dollars, But... The Game Announcement',
-            'comment_count': int,
        },
    }, {
        'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
@ -89,60 +91,55 @@ class RoosterTeethIE(InfoExtractor):

    def _real_extract(self, url):
        display_id = self._match_id(url)
+        api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id

-        webpage = self._download_webpage(url, display_id)
-
-        episode = strip_or_none(unescapeHTML(self._search_regex(
-            (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
-             r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
-            default=None, group='title')))
-
-        title = strip_or_none(self._og_search_title(
-            webpage, default=None)) or episode
-
-        m3u8_url = self._search_regex(
-            r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
-            webpage, 'm3u8 url', default=None, group='url')
-
-        if not m3u8_url:
-            if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
-                self.raise_login_required(
-                    '%s is only available for FIRST members' % display_id)
-
-            if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
-                self.raise_login_required('%s is not available yet' % display_id)
-
-            raise ExtractorError('Unable to extract m3u8 URL')
+        try:
+            m3u8_url = self._download_json(
+                api_episode_url + '/videos', display_id,
+                'Downloading video JSON metadata')['data'][0]['attributes']['url']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
+                    self.raise_login_required(
+                        '%s is only available for FIRST members' % display_id)
+            raise

        formats = self._extract_m3u8_formats(
-            m3u8_url, display_id, ext='mp4',
-            entry_protocol='m3u8_native', m3u8_id='hls')
+            m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
        self._sort_formats(formats)

-        description = strip_or_none(self._og_search_description(webpage))
-        thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
+        episode = self._download_json(
+            api_episode_url, display_id,
+            'Downloading episode JSON metadata')['data'][0]
+        attributes = episode['attributes']
+        title = attributes.get('title') or attributes['display_title']
+        video_id = compat_str(episode['id'])

-        series = self._search_regex(
-            (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
-            webpage, 'series', fatal=False)
-
-        comment_count = int_or_none(self._search_regex(
-            r'>Comments \((\d+)\)<', webpage,
-            'comment count', fatal=False))
-
-        video_id = self._search_regex(
-            (r'containerId\s*=\s*["\']episode-(\d+)\1',
-             r'<div[^<]+id=["\']episode-(\d+)'), webpage,
-            'video id', default=display_id)
+        thumbnails = []
+        for image in episode.get('included', {}).get('images', []):
+            if image.get('type') == 'episode_image':
+                img_attributes = image.get('attributes') or {}
+                for k in ('thumb', 'small', 'medium', 'large'):
+                    img_url = img_attributes.get(k)
+                    if img_url:
+                        thumbnails.append({
+                            'id': k,
+                            'url': img_url,
+                        })

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'series': series,
-            'episode': episode,
-            'comment_count': comment_count,
+            'description': attributes.get('description') or attributes.get('caption'),
+            'thumbnails': thumbnails,
+            'series': attributes.get('show_title'),
+            'season_number': int_or_none(attributes.get('season_number')),
+            'season_id': attributes.get('season_id'),
+            'episode': title,
+            'episode_number': int_or_none(attributes.get('number')),
+            'episode_id': str_or_none(episode.get('uuid')),
            'formats': formats,
+            'channel_id': attributes.get('channel_id'),
+            'duration': int_or_none(attributes.get('length')),
        }
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@ -1,53 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    js_to_json,
-    get_element_by_class,
-    unified_strdate,
-)
-
-
-class RudoIE(InfoExtractor):
-    _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
-
-    _TEST = {
-        'url': 'http://rudo.video/vod/oTzw0MGnyG',
-        'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
-        'info_dict': {
-            'id': 'oTzw0MGnyG',
-            'ext': 'mp4',
-            'title': 'Comentario Tomás Mosciatti',
-            'upload_date': '20160617',
-        },
-    }
-
-    @classmethod
-    def _extract_url(cls, webpage):
-        mobj = re.search(
-            r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
-            webpage)
-        if mobj:
-            return mobj.group('url')
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
-
-        jwplayer_data = self._parse_json(self._search_regex(
-            r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
-            transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
-
-        info_dict = self._parse_jwplayer_data(
-            jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
-
-        info_dict.update({
-            'title': self._og_search_title(webpage),
-            'upload_date': unified_strdate(get_element_by_class('date', webpage)),
-        })
-
-        return info_dict
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
+    merge_dicts,
    orderedSet,
    parse_duration,
    parse_resolution,
@ -26,6 +27,8 @@ class SpankBangIE(InfoExtractor):
            'description': 'dillion harper masturbates on a bed',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'silly2587',
+            'timestamp': 1422571989,
+            'upload_date': '20150129',
            'age_limit': 18,
        }
    }, {
@ -106,31 +109,36 @@ class SpankBangIE(InfoExtractor):

            for format_id, format_url in stream.items():
                if format_id.startswith(STREAM_URL_PREFIX):
+                    if format_url and isinstance(format_url, list):
+                        format_url = format_url[0]
                    extract_format(
                        format_id[len(STREAM_URL_PREFIX):], format_url)

        self._sort_formats(formats)

+        info = self._search_json_ld(webpage, video_id, default={})
+
        title = self._html_search_regex(
-            r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
+            r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
        description = self._search_regex(
            r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
-            webpage, 'description', fatal=False)
-        thumbnail = self._og_search_thumbnail(webpage)
-        uploader = self._search_regex(
-            r'class="user"[^>]*><img[^>]+>([^<]+)',
+            webpage, 'description', default=None)
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+        uploader = self._html_search_regex(
+            (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
+             r'class="user"[^>]*><img[^>]+>([^<]+)'),
            webpage, 'uploader', default=None)
        duration = parse_duration(self._search_regex(
            r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
-            webpage, 'duration', fatal=False))
+            webpage, 'duration', default=None))
        view_count = str_to_int(self._search_regex(
-            r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
+            r'([\d,.]+)\s+plays', webpage, 'view count', default=None))

        age_limit = self._rta_search(webpage)

-        return {
+        return merge_dicts({
            'id': video_id,
-            'title': title,
+            'title': title or video_id,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
@ -138,7 +146,8 @@ class SpankBangIE(InfoExtractor):
            'view_count': view_count,
            'formats': formats,
            'age_limit': age_limit,
-        }
+        }, info
+        )


 class SpankBangPlaylistIE(InfoExtractor):
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@ -22,7 +22,7 @@ class BellatorIE(MTVServicesInfoExtractor):
        'only_matching': True,
    }]

-    _FEED_URL = 'http://www.spike.com/feeds/mrss/'
+    _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
    _GEO_COUNTRIES = ['US']


--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@ -438,11 +438,22 @@ class TwitterIE(InfoExtractor):
        'params': {
            'skip_download': True,  # requires ffmpeg
        },
+    }, {
+        'url': 'https://twitter.com/foobar/status/1087791357756956680',
+        'info_dict': {
+            'id': '1087791357756956680',
+            'ext': 'mp4',
+            'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'description': 'md5:66d493500c013e3e2d434195746a7f78',
+            'uploader': 'Twitter',
+            'uploader_id': 'Twitter',
+            'duration': 61.567,
+        },
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        user_id = mobj.group('user_id')
        twid = mobj.group('id')

        webpage, urlh = self._download_webpage_handle(
@ -451,8 +462,13 @@ class TwitterIE(InfoExtractor):
        if 'twitter.com/account/suspended' in urlh.geturl():
            raise ExtractorError('Account suspended by Twitter.', expected=True)

-        if user_id is None:
-            mobj = re.match(self._VALID_URL, urlh.geturl())
+        user_id = None
+
+        redirect_mobj = re.match(self._VALID_URL, urlh.geturl())
+        if redirect_mobj:
+            user_id = redirect_mobj.group('user_id')
+
+        if not user_id:
            user_id = mobj.group('user_id')

        username = remove_end(self._og_search_title(webpage), ' on Twitter')
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -371,10 +371,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            (?:www\.)?hooktube\.com/|
                            (?:www\.)?yourepeat\.com/|
                            tube\.majestyc\.net/|
+                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
                            (?:(?:www|dev)\.)?invidio\.us/|
-                            (?:www\.)?invidiou\.sh/|
-                            (?:www\.)?invidious\.snopyta\.org/|
+                            (?:(?:www|no)\.)?invidiou\.sh/|
+                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
                            (?:www\.)?invidious\.kabi\.tk/|
+                            (?:www\.)?invidious\.enkirton\.net/|
+                            (?:www\.)?invidious\.13ad\.de/|
+                            (?:www\.)?tube\.poal\.co/|
                            (?:www\.)?vid\.wxzm\.sx/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls