From d1ec773e4f9208ca7cad2c93b7ed33ce7b91480f Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sun, 7 Aug 2016 00:57:10 -0500
Subject: [PATCH 1/7] [Go90] Add new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/go90.py       | 80 ++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 youtube_dl/extractor/go90.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 11b64eeaa..17d12e315 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -308,6 +308,7 @@ from .globo import (
     GloboIE,
     GloboArticleIE,
 )
+from .go90 import Go90IE
 from .godtube import GodTubeIE
 from .godtv import GodTVIE
 from .goldenmoustache import GoldenMoustacheIE
diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
new file mode 100644
index 000000000..cf51d7308
--- /dev/null
+++ b/youtube_dl/extractor/go90.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import urllib #DEBUG
+
+from .common import InfoExtractor
+
+
+class Go90IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?go90\.com/profiles/va_(?P<id>[a-f0-9]+)'
+    _TEST = {
+        'url': 'https://www.go90.com/profiles/va_07d47f43a7b04eb5b693252f2bd1086b',
+        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
+        'info_dict': {
+            'id': '07d47f43a7b04eb5b693252f2bd1086b',
+            'ext': 'mp4',
+            'title': 't@gged S1:E1 #shotgun',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            # TODO more properties, either as:
+            # * A value
+            # * MD5 checksum; start the string with md5:
+            # * A regular expression; start the string with re:
+            # * Any Python type (for example int or float)
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # TODO more code goes here, for example ...
+        #title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+        
+        series_title = self._html_search_regex(r'<h1\b[^>]* data-reactid="90">(.+?)</h1>', webpage, 'series_title')
+        season_episode_numbers = self._html_search_regex(r'<!-- react-text: 92 -->(.+?)<!-- /react-text -->', webpage, 'season_episode_numbers')
+        episode_title = self._html_search_regex(r'<!-- react-text: 93 -->(.+?)<!-- /react-text -->', webpage, 'episode_title')
+        
+        title = series_title + " " + season_episode_numbers + " " + episode_title
+        #print "[!!!] " + title
+        
+        #page_data_json = self._search_regex(r'<script\b[^>]*>window\.__data=(.+?);\s*</script>', webpage, 'page_data', flags=re.DOTALL)
+        #print self.transform_source(page_data_json)
+        #page_data = self._parse_json(page_data_json, video_id, transform_source=self.transform_source)
+        
+        
+        
+        video_api = "https://www.go90.com/api/metadata/video/" + video_id
+        
+        video_api_data = self._download_json(video_api, video_id)  #TODO: overwrite `note=` to output better explanation
+        #print "[!!!] " + video_api_data['url']
+        
+        video_token_url = re.sub(r'^//', 'https://', video_api_data['url'])  #TODO: use utils.sanitize_url()
+        #print "[!!!] " + video_token_url
+        
+        video_token_data = self._download_json(video_token_url, video_id)  #TODO: overwrite `note=` to output better explanation
+        #print "[!!!] " + video_token_data['playURL']
+        
+        m3u8_url = video_token_data['playURL']
+        
+        #DEBUG
+        testfile = urllib.URLopener()
+        testfile.retrieve(m3u8_url, video_id + ".m3u8")
+        #/DEBUG
+        
+        formats = []
+        formats.extend(self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'formats': formats,
+            #'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
+            # TODO more properties (see youtube_dl/extractor/common.py)
+        }
+    
+    #def transform_source(self, json_string):
+    #    return re.sub(re.sub(r':function.*?},([\[{"])', ':"",\g<1>', json_string, flags=re.DOTALL)
\ No newline at end of file

From f7fe731cbfa34cb96d5cfb97fae317d7dc5557b7 Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sat, 4 Feb 2017 12:11:21 -0600
Subject: [PATCH 2/7] [Go90] Use `UplynkPreplayIE` to extract data

---
 youtube_dl/extractor/go90.py | 82 +++++++++++++-----------------------
 1 file changed, 30 insertions(+), 52 deletions(-)

diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
index cf51d7308..c4322cdc7 100644
--- a/youtube_dl/extractor/go90.py
+++ b/youtube_dl/extractor/go90.py
@@ -1,10 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-import urllib #DEBUG
-
 from .common import InfoExtractor
+from .uplynk import UplynkPreplayIE
+from ..utils import sanitize_url
 
 
 class Go90IE(InfoExtractor):
@@ -16,7 +15,7 @@ class Go90IE(InfoExtractor):
             'id': '07d47f43a7b04eb5b693252f2bd1086b',
             'ext': 'mp4',
             'title': 't@gged S1:E1 #shotgun',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
             # TODO more properties, either as:
             # * A value
             # * MD5 checksum; start the string with md5:
@@ -29,52 +28,31 @@ class Go90IE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        # TODO more code goes here, for example ...
-        #title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
-        
-        series_title = self._html_search_regex(r'<h1\b[^>]* data-reactid="90">(.+?)</h1>', webpage, 'series_title')
-        season_episode_numbers = self._html_search_regex(r'<!-- react-text: 92 -->(.+?)<!-- /react-text -->', webpage, 'season_episode_numbers')
-        episode_title = self._html_search_regex(r'<!-- react-text: 93 -->(.+?)<!-- /react-text -->', webpage, 'episode_title')
-        
-        title = series_title + " " + season_episode_numbers + " " + episode_title
-        #print "[!!!] " + title
-        
-        #page_data_json = self._search_regex(r'<script\b[^>]*>window\.__data=(.+?);\s*</script>', webpage, 'page_data', flags=re.DOTALL)
-        #print self.transform_source(page_data_json)
-        #page_data = self._parse_json(page_data_json, video_id, transform_source=self.transform_source)
-        
-        
-        
-        video_api = "https://www.go90.com/api/metadata/video/" + video_id
-        
-        video_api_data = self._download_json(video_api, video_id)  #TODO: overwrite `note=` to output better explanation
-        #print "[!!!] " + video_api_data['url']
-        
-        video_token_url = re.sub(r'^//', 'https://', video_api_data['url'])  #TODO: use utils.sanitize_url()
-        #print "[!!!] " + video_token_url
-        
-        video_token_data = self._download_json(video_token_url, video_id)  #TODO: overwrite `note=` to output better explanation
-        #print "[!!!] " + video_token_data['playURL']
-        
-        m3u8_url = video_token_data['playURL']
-        
-        #DEBUG
-        testfile = urllib.URLopener()
-        testfile.retrieve(m3u8_url, video_id + ".m3u8")
-        #/DEBUG
-        
-        formats = []
-        formats.extend(self._extract_m3u8_formats(
-            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
 
-        return {
-            'id': video_id,
-            'title': title,
-            'description': self._og_search_description(webpage),
-            'formats': formats,
-            #'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
-            # TODO more properties (see youtube_dl/extractor/common.py)
-        }
-    
-    #def transform_source(self, json_string):
-    #    return re.sub(re.sub(r':function.*?},([\[{"])', ':"",\g<1>', json_string, flags=re.DOTALL)
\ No newline at end of file
+        # scrape data from webpage
+        page_data = {}
+        self.to_screen("Scrape data from webpage")
+
+        page_data['id'] = video_id
+
+        video_title = self._html_search_regex(
+            r'<title\b[^>]*>\s*(.*)\s*</title>', webpage, 'title')
+        page_data['title'] = video_title
+        self.to_screen("Title: " + page_data['title'])
+
+
+        # retrieve upLynk data
+        video_api = "https://www.go90.com/api/metadata/video/" + video_id
+        video_api_data = self._download_json(video_api, video_id)  #TODO: overwrite `note=` to output better explanation
+        video_token_url = sanitize_url(video_api_data['url'])
+
+        uplynk_preplay = UplynkPreplayIE(self._downloader)
+        uplynk_data = uplynk_preplay.extract(video_token_url)
+
+
+        # merge data
+        video_data = uplynk_data.copy()
+        video_data.update(page_data)
+        # TODO more properties (see youtube_dl/extractor/common.py)
+
+        return video_data

From f0870a92aa473c9a34a15acf46858d1591c31d7a Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sat, 4 Feb 2017 12:47:46 -0600
Subject: [PATCH 3/7] [Go90] Correctly pass the preplay URL to the uplynk
 extractor

---
 youtube_dl/extractor/go90.py | 37 ++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
index c4322cdc7..d60a4d698 100644
--- a/youtube_dl/extractor/go90.py
+++ b/youtube_dl/extractor/go90.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .uplynk import UplynkPreplayIE
 from ..utils import sanitize_url
 
 
@@ -14,14 +13,20 @@ class Go90IE(InfoExtractor):
         'info_dict': {
             'id': '07d47f43a7b04eb5b693252f2bd1086b',
             'ext': 'mp4',
-            'title': 't@gged S1:E1 #shotgun',
+            'title': 't@gged | #shotgun | go90',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader_id': '98ac1613c7624a8387596b5d5e441064',
             # TODO more properties, either as:
             # * A value
             # * MD5 checksum; start the string with md5:
             # * A regular expression; start the string with re:
             # * Any Python type (for example int or float)
-        }
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+        'add_ie': ['UplynkPreplay'],
     }
 
     def _real_extract(self, url):
@@ -33,26 +38,22 @@ class Go90IE(InfoExtractor):
         page_data = {}
         self.to_screen("Scrape data from webpage")
 
-        page_data['id'] = video_id
-
         video_title = self._html_search_regex(
             r'<title\b[^>]*>\s*(.*)\s*</title>', webpage, 'title')
-        page_data['title'] = video_title
-        self.to_screen("Title: " + page_data['title'])
+        self.to_screen("Title: " + video_title)
 
 
-        # retrieve upLynk data
+        # retrieve upLynk url
         video_api = "https://www.go90.com/api/metadata/video/" + video_id
         video_api_data = self._download_json(video_api, video_id)  #TODO: overwrite `note=` to output better explanation
-        video_token_url = sanitize_url(video_api_data['url'])
-
-        uplynk_preplay = UplynkPreplayIE(self._downloader)
-        uplynk_data = uplynk_preplay.extract(video_token_url)
+        uplynk_preplay_url = sanitize_url(video_api_data['url'])
 
 
-        # merge data
-        video_data = uplynk_data.copy()
-        video_data.update(page_data)
-        # TODO more properties (see youtube_dl/extractor/common.py)
-
-        return video_data
+        return {
+            '_type': 'url_transparent',
+            'url': uplynk_preplay_url,
+            'id': video_id,
+            'title': video_title,
+            'ie_key': 'UplynkPreplay',
+            # TODO more properties (see youtube_dl/extractor/common.py)
+        }

From 2e8076b1bb04b616efc618078fd9c20ffa321b37 Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sat, 4 Feb 2017 14:49:50 -0600
Subject: [PATCH 4/7] [Go90] Add properties for episodic content

---
 youtube_dl/extractor/go90.py | 61 +++++++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
index d60a4d698..fc5d7f739 100644
--- a/youtube_dl/extractor/go90.py
+++ b/youtube_dl/extractor/go90.py
@@ -1,8 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+from datetime import datetime
+
 from .common import InfoExtractor
-from ..utils import sanitize_url
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+    int_or_none,
+    sanitize_url,
+)
 
 
 class Go90IE(InfoExtractor):
@@ -13,14 +21,10 @@ class Go90IE(InfoExtractor):
         'info_dict': {
             'id': '07d47f43a7b04eb5b693252f2bd1086b',
             'ext': 'mp4',
-            'title': 't@gged | #shotgun | go90',
+            'title': 't@gged S1E1 #shotgun',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'description': 'md5:1ebcc7a686d93456a822d435d2ac7719',
             'uploader_id': '98ac1613c7624a8387596b5d5e441064',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
         },
         'params': {
             # m3u8 download
@@ -35,17 +39,45 @@ class Go90IE(InfoExtractor):
 
 
         # scrape data from webpage
-        page_data = {}
         self.to_screen("Scrape data from webpage")
 
-        video_title = self._html_search_regex(
-            r'<title\b[^>]*>\s*(.*)\s*</title>', webpage, 'title')
+        series_title = clean_html(get_element_by_id('series-title', webpage))
+        self.to_screen("Series Title: " + series_title)
+
+        episode_info = clean_html(get_element_by_id('episode-title', webpage))
+
+        season_number = None
+        episode_number = None
+        episode_title = None
+
+        episode_match = re.match(
+            r'S(?P<season_number>\d+):E(?P<episode_number>\d+)\s+(?P<episode_title>.*)',
+            episode_info)
+        if episode_match is not None:
+            season_number, episode_number, episode_title = episode_match.groups()
+            self.to_screen("Season: " + season_number)
+            self.to_screen("Episode Number: " + episode_number)
+            self.to_screen("Episode Title: " + episode_title)
+
+        video_title = series_title
+        if episode_match is not None:
+            video_title = '{} S{}E{} {}'.format(
+                series_title, season_number, episode_number, episode_title)
         self.to_screen("Title: " + video_title)
 
+        video_description = self._og_search_description(webpage)
+
+        release_date = None
+        air_date = clean_html(get_element_by_id('asset-air-date', webpage))
+        if air_date:
+            self.to_screen("Air Date: " + air_date)
+            release_datetime = datetime.strptime(air_date, '%b %d, %Y')
+            release_date = release_datetime.strftime('%Y%m%d')
+
 
         # retrieve upLynk url
         video_api = "https://www.go90.com/api/metadata/video/" + video_id
-        video_api_data = self._download_json(video_api, video_id)  #TODO: overwrite `note=` to output better explanation
+        video_api_data = self._download_json(video_api, video_id)
         uplynk_preplay_url = sanitize_url(video_api_data['url'])
 
 
@@ -54,6 +86,11 @@ class Go90IE(InfoExtractor):
             'url': uplynk_preplay_url,
             'id': video_id,
             'title': video_title,
+            'series': series_title,
+            'episode': episode_title,
+            'season_number': int_or_none(season_number),
+            'episode_number': int_or_none(episode_number),
+            'description': video_description,
+            'release_date': release_date,
             'ie_key': 'UplynkPreplay',
-            # TODO more properties (see youtube_dl/extractor/common.py)
         }

From 57e846fcb74388722f257523f4031113b7ea911e Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sat, 4 Feb 2017 16:00:58 -0600
Subject: [PATCH 5/7] [Go90] Add preceding zeros to season and episode numbers

---
 youtube_dl/extractor/go90.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
index fc5d7f739..22d8eae3c 100644
--- a/youtube_dl/extractor/go90.py
+++ b/youtube_dl/extractor/go90.py
@@ -17,11 +17,10 @@ class Go90IE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?go90\.com/profiles/va_(?P<id>[a-f0-9]+)'
     _TEST = {
         'url': 'https://www.go90.com/profiles/va_07d47f43a7b04eb5b693252f2bd1086b',
-        'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
         'info_dict': {
             'id': '07d47f43a7b04eb5b693252f2bd1086b',
             'ext': 'mp4',
-            'title': 't@gged S1E1 #shotgun',
+            'title': 't@gged S01E01 #shotgun',
             'thumbnail': r're:^https?://.*\.jpg$',
             'description': 'md5:1ebcc7a686d93456a822d435d2ac7719',
             'uploader_id': '98ac1613c7624a8387596b5d5e441064',
@@ -61,8 +60,8 @@ class Go90IE(InfoExtractor):
 
         video_title = series_title
         if episode_match is not None:
-            video_title = '{} S{}E{} {}'.format(
-                series_title, season_number, episode_number, episode_title)
+            video_title = '{} S{:02d}E{:02d} {}'.format(
+                series_title, int_or_none(season_number), int_or_none(episode_number), episode_title)
         self.to_screen("Title: " + video_title)
 
         video_description = self._og_search_description(webpage)

From 61799cc758ec3ae70ecafa5f2c4030fd53c2942f Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sat, 4 Feb 2017 16:26:35 -0600
Subject: [PATCH 6/7] [Go90] Add `episode_info` into the title if not episodic

---
 youtube_dl/extractor/go90.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
index 22d8eae3c..5ee048564 100644
--- a/youtube_dl/extractor/go90.py
+++ b/youtube_dl/extractor/go90.py
@@ -61,7 +61,10 @@ class Go90IE(InfoExtractor):
         video_title = series_title
         if episode_match is not None:
             video_title = '{} S{:02d}E{:02d} {}'.format(
-                series_title, int_or_none(season_number), int_or_none(episode_number), episode_title)
+                series_title, int_or_none(season_number),
+                int_or_none(episode_number), episode_title)
+        elif episode_info:
+            video_title = '{} -- {}'.format(series_title, episode_info)
         self.to_screen("Title: " + video_title)
 
         video_description = self._og_search_description(webpage)

From f4a31cf69443226e95edbf0ceb145bb69312be01 Mon Sep 17 00:00:00 2001
From: "Jeremie J. Jarosh" <jeremie@jarosh.org>
Date: Sat, 4 Feb 2017 17:15:48 -0600
Subject: [PATCH 7/7] [Go90] Use old string formatting in order to support
 python2.6

---
 youtube_dl/extractor/go90.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
index 5ee048564..24048a764 100644
--- a/youtube_dl/extractor/go90.py
+++ b/youtube_dl/extractor/go90.py
@@ -60,11 +60,11 @@ class Go90IE(InfoExtractor):
 
         video_title = series_title
         if episode_match is not None:
-            video_title = '{} S{:02d}E{:02d} {}'.format(
+            video_title = '%s S%02dE%02d %s' % (
                 series_title, int_or_none(season_number),
                 int_or_none(episode_number), episode_title)
         elif episode_info:
-            video_title = '{} -- {}'.format(series_title, episode_info)
+            video_title = '%s -- %s' % (series_title, episode_info)
         self.to_screen("Title: " + video_title)
 
         video_description = self._og_search_description(webpage)