From 368cb1fd0a4f92f041ad3d11eb52beec59417715 Mon Sep 17 00:00:00 2001 From: Laurent Raufaste Date: Sun, 29 May 2011 12:36:12 -0400 Subject: [PATCH 1/4] Fixed dailymotion video nd uploader extractor --- youtube-dl | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube-dl b/youtube-dl index 3ac27a857..bd666071d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -30,6 +30,7 @@ import time import urllib import urllib2 import zlib +import json # parse_qs was moved from the cgi module to the urlparse module recently. try: @@ -1328,16 +1329,21 @@ class DailymotionIE(InfoExtractor): return # Extract URL, uploader and title from webpage + # First we need to get the sequence urlencoded json variable self.report_extraction(video_id) - mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]*)\"\)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) # if needed add http://www.dailymotion.com/ if relative URL - - video_url = mediaURL + sequenceJsonContent = urllib.unquote_plus(mobj.group(1)) + sequenceJson = json.loads(sequenceJsonContent) + try: + video_url = sequenceJson[0]["layerList"][0]["sequenceList"][1]["layerList"][2]["param"]["videoPluginParameters"]["hqURL"] + except: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return # '' mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) @@ -1347,11 +1353,12 @@ class DailymotionIE(InfoExtractor): video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)(.+?)', webpage) - if mobj is None: + # Extract the video uploader nickname from the sequence JSON + try: + video_uploader = sequenceJson[0]["layerList"][0]["sequenceList"][1]["layerList"][0]["param"]["metadata"]["uploader"] + except: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return - video_uploader = mobj.group(1) try: # Process video information From a4188f6acd68e07c39ffe0a23e44c57ae164ed05 Mon Sep 17 00:00:00 2001 From: Laurent Raufaste Date: Sun, 29 May 2011 13:58:57 -0400 Subject: [PATCH 2/4] File format of the video downloaded is now mp4 --- youtube-dl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube-dl b/youtube-dl index bd666071d..8e277f39f 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1317,7 +1317,7 @@ class DailymotionIE(InfoExtractor): video_id = mobj.group(1) simple_title = mobj.group(2).decode('utf-8') - video_extension = 'flv' + video_extension = 'mp4' # Retrieve video webpage to extract further information request = urllib2.Request(url) From b95ed5235876a3fdf536bcfde6eaf1e721ab00ba Mon Sep 17 00:00:00 2001 From: Laurent Raufaste Date: Sun, 29 May 2011 13:59:10 -0400 Subject: [PATCH 3/4] Handle badly encoded Dailymotion JSON --- youtube-dl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8e277f39f..df16da48e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1338,11 +1338,15 @@ class DailymotionIE(InfoExtractor): # if needed add http://www.dailymotion.com/ if relative URL sequenceJsonContent = urllib.unquote_plus(mobj.group(1)) - sequenceJson = json.loads(sequenceJsonContent) + try: + sequenceJson = json.loads(sequenceJsonContent) + except: + self._downloader.trouble(u'ERROR: unable to extract media URL (Bad JSON encoding)') + return try: video_url = sequenceJson[0]["layerList"][0]["sequenceList"][1]["layerList"][2]["param"]["videoPluginParameters"]["hqURL"] except: - self._downloader.trouble(u'ERROR: unable to extract media URL') + self._downloader.trouble(u'ERROR: unable to extract media URL (Unable to find the URL)') return # '' From da6386f8f245ee160874159294cbc87c4b4407fb Mon Sep 17 00:00:00 2001 From: Laurent Raufaste Date: Mon, 30 May 2011 21:42:28 -0400 Subject: [PATCH 4/4] Handle the badly sent JSON from Dailymotion --- youtube-dl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube-dl b/youtube-dl index df16da48e..46c3c07a1 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1338,6 +1338,12 @@ class DailymotionIE(InfoExtractor): # if needed add http://www.dailymotion.com/ if relative URL sequenceJsonContent = urllib.unquote_plus(mobj.group(1)) + + # JSON does not suppot escaping of '. + # Replace every \' by a ' in the JSON string + sequenceJsonContent = sequenceJsonContent.replace("\\'", "'") + + # Build the JSON object based on the string try: sequenceJson = json.loads(sequenceJsonContent) except: