From e77f114bf98aa9bc0fb2dc791b1761d90ff93880 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 27 May 2016 21:35:57 -0500 Subject: [PATCH 1/3] Add Original Download Original is the actual file (rather than a google-reencoded version) --- youtube_dl/extractor/googledrive.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 766fc26d0..a74b7179f 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -83,6 +83,29 @@ class GoogleDriveIE(InfoExtractor): }) self._sort_formats(formats) + downloadable = True + # DownloadPage will either be the actual file, a "we can't virus-scan this" page with a confirmation button, or a "you don't have permission" page. + # The actual file supports range requests, but the confirmation/permission pages don't, so this will download the whole page for either of those. + downloadPage = self._download_webpage('https://docs.google.com/uc?export=download&id=%s' % video_id, video_id, headers={'Range': 'bytes=0-15'}, encoding='unicode_escape') + if 'html' in downloadPage: + confirm = self._search_regex(r'confirm=([^&"]+)', downloadPage, 'confirm', default=None) + if confirm: + dlstring = 'https://docs.google.com/uc?export=download&confirm=%s&id=%s' % (confirm, video_id) + else: + downloadable = False + else: + dlstring = 'https://docs.google.com/uc?export=download&id=%s' % video_id + if downloadable: + originalExtension = self._search_regex(r'"([^"]+)",[^,]*,[^,]*$', webpage, 'original extension', default=None) + originalSize = int_or_none(self._search_regex(r'"([^"]+)"[^"]*\n[^\n]*,[^,]*$', webpage, 'original size', default=None)) + formats.append({ + 'url': dlstring, + 'format_id': 'Original', + 'ext': originalExtension, + 'filesize': originalSize, + 'protocol': 'https', + }) + return { 'id': video_id, 'title': title, From 5b7d70d625cc36f5d799828261b610bbc14b390c Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 27 May 2016 22:03:12 -0500 Subject: [PATCH 2/3] Updated Test Cases Added test case for original video downloading, Updated md5 and duration for the old test case as google probably changed how video processing works on their servers, breaking the md5 and duration tests. --- youtube_dl/extractor/googledrive.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index a74b7179f..7e1b7d5a9 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -13,12 +13,27 @@ class GoogleDriveIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P[a-zA-Z0-9_-]{28,})' _TESTS = [{ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', - 'md5': '881f7700aec4f538571fa1e0eed4a7b6', + 'md5': '5c602afbbf2c1db91831f5d82f678554', + 'params': { + 'format': "Original" + }, 'info_dict': { 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', 'ext': 'mp4', 'title': 'Big Buck Bunny.mp4', - 'duration': 46, + 'duration': 45, + } + }, { + 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', + 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', + 'params': { + 'format': "37" + }, + 'info_dict': { + 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', + 'ext': 'mp4', + 'title': 'Big Buck Bunny.mp4', + 'duration': 45, } }, { # video id is longer than 28 characters @@ -105,7 +120,7 @@ class GoogleDriveIE(InfoExtractor): 'filesize': originalSize, 'protocol': 'https', }) - + return { 'id': video_id, 'title': title, From bdaf8a82b0e4e09cbb35a9cd72750efd974a926f Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Fri, 27 May 2016 22:10:22 -0500 Subject: [PATCH 3/3] Changed variable names originalURL makes more sense than dlstring --- youtube_dl/extractor/googledrive.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py index 7e1b7d5a9..85dbc46a3 100644 --- a/youtube_dl/extractor/googledrive.py +++ b/youtube_dl/extractor/googledrive.py @@ -105,16 +105,16 @@ class GoogleDriveIE(InfoExtractor): if 'html' in downloadPage: confirm = self._search_regex(r'confirm=([^&"]+)', downloadPage, 'confirm', default=None) if confirm: - dlstring = 'https://docs.google.com/uc?export=download&confirm=%s&id=%s' % (confirm, video_id) + originalURL = 'https://docs.google.com/uc?export=download&confirm=%s&id=%s' % (confirm, video_id) else: downloadable = False else: - dlstring = 'https://docs.google.com/uc?export=download&id=%s' % video_id + originalURL = 'https://docs.google.com/uc?export=download&id=%s' % video_id if downloadable: originalExtension = self._search_regex(r'"([^"]+)",[^,]*,[^,]*$', webpage, 'original extension', default=None) originalSize = int_or_none(self._search_regex(r'"([^"]+)"[^"]*\n[^\n]*,[^,]*$', webpage, 'original size', default=None)) formats.append({ - 'url': dlstring, + 'url': originalURL, 'format_id': 'Original', 'ext': originalExtension, 'filesize': originalSize,