From 4a43ae04688826582e6cd17bc24c5c4f18882043 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 19:37:21 +0100 Subject: [PATCH 1/7] Initial commit for http://genvideos.org --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/genvideos.py | 38 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/genvideos.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 908581bf7..a0115e893 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -220,6 +220,7 @@ from .gametrailers import GametrailersIE from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .generic import GenericIE +from .genvideos import GenVideosIE from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py new file mode 100644 index 000000000..eb976ca9e --- /dev/null +++ b/youtube_dl/extractor/genvideos.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class GenVideosIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'http://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': 're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + } \ No newline at end of file From 2c37b87dece3c1be81001ebd2e9944c977b78d11 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 21:37:30 +0100 Subject: [PATCH 2/7] get 360p video urls --- youtube_dl/extractor/genvideos.py | 38 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index eb976ca9e..0ba035eda 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -3,17 +3,25 @@ from __future__ import unicode_literals from .common import InfoExtractor +import requests +import json +from urlparse import parse_qs, urlparse + class GenVideosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?genvideos.org/watch\?v=(?P\w+)#?' #Tests only the basic url format. Example - https://genvideos.org/watch?v=kMjlhMWE5OT + # TODO check for other possible url formats also + # For example + # * http://genvideos.com/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI + # * http://genvideos.org/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI _TEST = { - 'url': 'http://yourextractor.com/watch/42', + 'url': 'http://genvideos.org/watch?v=kMjlhMWE5OT', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { - 'id': '42', + 'id': 'kMjlhMWE5OT', 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': 're:^https?://.*\.jpg$', + 'title': 'The Hunger Games (2012) - HD 1080p', + #'thumbnail': 're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -26,13 +34,25 @@ class GenVideosIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + #TODO retrieve video url + urls_data = requests.post( + "https://genvideos.org/video_info/iframe", + data={'v':video_id}, + headers={'referer': 'https://genvideos.org/'} + ) #returns json containing the url of the video (in 360p, 720p and 1080p). + #For example - {"360":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm18","720":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm22","1080":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm37"} + urls_data_json = json.loads(r.text) + _360p_url = parse_qs(urlparse(urls_data_json['360']).query)['url'] + # TODO : return all possible formats instead of just 360p + return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + 'url': _360p_url + #'description': self._og_search_description(webpage), + #'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file + } + \ No newline at end of file From 1c92eae5953d629e25853317c7bf2ba42987111e Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 22:01:06 +0100 Subject: [PATCH 3/7] Test passing but does not seem to download --- kMjlhMWE5OT.unknown_video | Bin 0 -> 10241 bytes youtube_dl/extractor/genvideos.py | 13 ++++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 kMjlhMWE5OT.unknown_video diff --git a/kMjlhMWE5OT.unknown_video b/kMjlhMWE5OT.unknown_video new file mode 100644 index 0000000000000000000000000000000000000000..9cc4f399b5a1f1d425c683e8f353424f20a7bda7 GIT binary patch literal 10241 zcmZwL37pr{{y*^diDXI?!k{$Ntq_Jn+NEWr5=u#!78*6}#Y9?EnkY?Lgb3q-fKCg2=pXp9S9C!WDf`R!1>zy6G@`en^ zKk$96b0_8x7%*H@zx?5SdhP%0{T?50+%KX`muTN1D^l$L-+z7mx4&rqAAfK5Up@b? z?+(&fC2v5lA^Sh9R4zK~+gGQ`zNok(cj(~UzS^!H+V?;qN9^zLevgfn{qe6Q+IKi= zf7^fi`X7t`U&WLeZG!R#M%C!uYv}%kPwd@q$k6{kPqN9Q4}8CWu8P|?<@d_V^ghufe_d_u2oo|MSC!4enR{!1oomUo>QBkAA*? zV#v@T2Ttigd&IrD!+U1?e*f3M&v~Gs%xK$g|7!lX{~ot*pTC`9))wi) zSE}sW8pqz?&&_2<-~II~7eyTLSd{r88g+Sbbc%!fyxVBB-beQsI#`%HD#Wp$e-UNx zsHE+uYcG+-FT?2IT|2M1GjE?XiF$)poStcxg@!MQl3a?h|W;?S}*XAtxG zDk8%L$+#3XWT+uyQ$%@tDt{jKQ+^TKBP!Iw*a{;lVYg#EiEhkd0jphz#F@D86|GUx z8i!Y=9oi1pcDS}9j^hdnSb}vcS+|lkDp{kFnk&7>R{n^nd;)T*d^_^3yb?7Wc_iA8 zbiaQ*={%2D+wqq$krKX+IN^BIae_Kd zc$iN;%L#o?w2u>K@H*Qgsyd6RJuzohnOEHvQSAuiUhOI-Actz^sOGbioYP5d(SDNl zlm3pVel}e(Z*_H5-x85k5jA98&m*kDK2JWDCX8SyAMhU$r^xP@r>Lcdnrk%1 z95wEsh_@nY9*n*<^{qJ<>($&F@sIipVj-mwwd7grN^+UQrij|kxORIc@dj(qw~q6y z(~7aoV{OE#hf-P#T1sa$BV<9 zXLKeHci@aSaTaGfi!+_YnGa!|Gk=IU>r^~bXZh)K)^pfHwmoDwU>NhU|Fbi3r_MIt z*)lo%?TB*@;X=AG4)^ICS=Y;;CFZPW&U)6VZ=L#bu0MzeSczxl+;bU>=knYSvHp2g z=*VQ&M4Vri=8U9--4PA4$)|_~yoA{zaJ z9u)9AA9)RWZ#F)Ufz0Mrwz4mxi96e*FZZHvQ+=B{)22B%tEQ{?5_L4ww^@6@^O zi|*Wm8ZVaZ#jUxWLRLq#P1t{1S+#Y4+Pdp)cST&HhD-98hkDypr8!-h!TN~y2V=kO zZ)G;CkVA*l>BA$uhMX@wkvd$BGr826bW~4Ab9Nj_5$hr@%b+D=Sb_X5m-*$Fb2slq zTu}}=T#?HhHbr!D4?D@L(`3}qX^nqUl*Rm=hoHXBFY{-_mG07&*>q+ivc2-Vh%T8p z?=J3L7yY~F-$nnfC)0*O%*3k38mA@l;%C+YOsI{l|o*UW3zKCA(>E%xJawmFii0ECJ z=8U8S^q7~7{EYU-n=K5ELVPd^G+!mfxL-MJgpa-&*q zG*@49^)*-Dv8?2C|LCql4&zzHUlI8Y=#GB*#q5meUkUG!{_^VY-t>2G2FQFsCQa~e z9dIWv@q5HTxepwO8V9Oz;NFNqwdlkxETc4Ha7}WV!={KK>KSqt9T~-P_CyRlhpTX3 zhL-SU#IR~;8+I3O`6saR9-f8Thu?!|^ri~5#xr_TA*&;9PE@BY!?E_wYP?xy1?DU; zXMuGJ*5FQza3@A|AmX;e zX-+;xtcw_xfu9khyaPuK;xVi}>Kpb(jIPh+$bIwz-ljBSjO@l}ACt?iyueQpW7RRX z2M=PN+sk4Px4YN3KZ{z&=`${y4h-W_K8zTD5Na8JJ+s*mF(Ejc>(DmgRklUkA;&u! zQ-GT9u;+>8(KfL=dCWt-6FqZmw#rB!}@V;j4%{J)3tnKwWn(LcMoY zr3LQ9U31wIFjD$hr|GnKXMi+Jo@@+stbWHZlR=AD6iJ8v51n71q9@w((;t;hA7Z_fE$ zk>7k7JW-Jx)cwRN{)$-80DD~U1YbuynL!`$h}@zO~kWhY0gMWFlVvai_avR z&e(VHyAjWorxjzFjkTUr@8V-=Ne|??*tstLG~)S0V}@}L=2&8mCFWR?$2_)1EUijI z?02a-m&$LM8kQNm%sX?LeJxv#IhXB@c;QqU&=sG(;IkJtN0hiPC5;%)TsB3#cqq;2 z!((_JUUcV{%YM03z+*cb82Npxct3-~5tmGP_Wchvx<U_=muUY@K0+z5V z;`O?8$KGGJ_t#~(x)O3-eFKm3dBhthki%_w2H*H4;!WrJW^d&4W-&V>-YQ26%=ec0 z-m>pCnRKEE>#nix+g0hx7*_Dlh`ZS{Vfse)wf=K>nBi%vt9p1 z#0MuLhYy_V2lw$YYX4AXALem8kMI)ue`KDIF2?*H&A?e~IEd3}jXXD~Z^J5nkNCJA z1JU>6brBmAb#NXVXY)qHCuL~D2#S!^C-%0<-QRQuH}W`NvNvM0{cUz{H{0K4=f2sw z|Fbrikjor4MSNP8=2-tz*?ua=PydeCl1U5n-7=0>_%q_OY_4D`Ya>4QEPQ?u1&4XLZE)>iRx|mN?(u z_tNHcXAlqJes0r$oBrFzVD4=j*@Q9M-HGk?vwboPc@K5%u&*8F+A$Jmzr!AO{2H;d z7CkZd&INoO@qNZKQtpN#+MGrtzBNH+Os z+qE^~=fJb^vpIi$5_$Y0k6&_dM}BcfezEqi)wzr*6!THU?n<~fyC?E`#Bb*MP0qh{ zqKI`7doqy89+~Ws$)3FtzpM3kwf_Dfn<!= zuOj~AB)TyRcjYhp{3{E!{bk+1toxUB|F-Vm*8STWf4_)=_u2cts~C%{_qmt*c14Q1 ziZ_Cz}9Na`_=nS2oG;KQjyZ|>yHNM#S=e1=lQ3O>cW zhtwpOIjrGZ_C_jKpFu37G}58^9ohwb551p_{2VFcFwUkw_h60;dn#{F<-0SUXIab6 zNEOsp!Cordz`ZPHJ@#|hIjH-v*(~FeNEOvyu@*gqtjcr#ZJUmo5Aq>F8|oS->}us$|fT zUf6q;*Vz&2m?|{HI>*fCE#!La$+TcN&#@`earz$Dh?{wu?;~Z_!1|eaOk)Y#A{~D` zS1=Vd9RE?I6Dn~D6L=MMo>-Z76tX%})r7pN7GN&FS54LG;ykNO=Y6)aFVab;;cQNN zkQHo>RQ(uo7|$ZMN6K<0S>2dLF*_rjT!-Gs>|~kwJ!(3o6Gg1#k4QDn#kw`d;4atr zEK*Gw)a=M))=(PhALTfYJm&ElvaNL_?a{xM{_$oBh|?!p9QRo zbZQ1!wB~x`c&fgq>U)~Lr}d){nVp`H+37Mn{a$2!`mRWIok88sIG4KaZrvXvoguR` zx-x_Hk|pqu)6lG5(zU z_!#>>XK$o>^|=~#)>CJF&rJQMS)P9e z*>q<%Z$xTPhAgx-(AHo>qzfw0m>YPMwUkC`n9$y^0O#CLrj4{U($?rI@|efgNR6w~ zl^J;M8?TSlM21c3qF)pJn&{V5zore4LDOP(MrtOnX6}jKou+1U*%GOFCY{i)xqi*{ zyD)>6+{TOi7OBPA^k)!iZ}EAgi;m$k@>#&9NG;{xvJRJ1fU|G;PNY_WXSY>zdg8s< zYGb6<&b77tT6;gWz60~L{w7kJEV?tB4UuvxqhF4GIS(NFi;w0~`eLn%m#`~RTUoTt zLl$jiafvK0xrB+l9;uyL+Nq^o4(?q$_paUMNbS8J+N-a@Dmlm-uQpXJB*0B#`c$)7cT~?k}j9@9HkuF#J4j?^X57@2e_WFgk<>b$zz zN7pI5&8|pS)g=%8t}?cpvE7XA)|DBoWlyB5&tU*hv5Ma#b+5-j?6bRlUUNKk=#8=0 ztjE}EjlH%#lPKajeu#A4sd%?s=gwdE8G9q;)@KlE&Q)`dn&dKvO_6$5p(8Ttxh7Jt zvNUHTCG3vWJDYqK@J*!a59K08Gn);OZm3K<3RxYgkA3$!jZSFmQ_R*#dG1P{`SPwH zk0RurC-)m`<6Li)%Z>N4lJ(eIUwi9oOy8-KI;`v$%#atYB}Xo9d(OCVg*O!mda+S0sn= zcwgN75}%>=oBxPZAfEzfQedA2_F3=|zeO71eK0~TBa9s}mX+v#%L%CGmIx?mOjB#TYu_n^k#PPJG2kzb21(fhfq}z?X-Fe>Ll^MK=%*V-W zTo&F<dK45#K!YbHPq4vUJR`ExqN#`;c_iK`8c#^#DEQ7l4?9CM3j&#={ zG{MixyKbR~w<1mUj+@*VbxdA}Jx|^nY04?I<5pxp<=04eyJvUjGKW_vjpTQ#X{xrV z<9G~pO!g7%YAcsnLi`le;&^4{=4xk-~V-_2Qp~M7_9w(?B>*G5Yu@-(u0TLjy!0M2aDMh z>7gogq>$B-9=5-S?eF2{jHHC!k>+M&&vWH9S6+`~(Vf|Bi1cV>>NALilty~2Cb<-_ z1U1Z4!#p+28_PIOSS1tE;CTmOJ77xUvVRou$L8UD2=qz8LjmGSZThM WYFhbTq?apT%*&TCl1FgHFaJL>3Oz{x literal 0 HcmV?d00001 diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index 0ba035eda..74a2e973a 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -5,6 +5,7 @@ from .common import InfoExtractor import requests import json +import urllib from urlparse import parse_qs, urlparse @@ -16,7 +17,7 @@ class GenVideosIE(InfoExtractor): # * http://genvideos.org/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI _TEST = { 'url': 'http://genvideos.org/watch?v=kMjlhMWE5OT', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + #'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': 'kMjlhMWE5OT', 'ext': 'mp4', @@ -35,6 +36,7 @@ class GenVideosIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + print("Title - " + title) #TODO retrieve video url urls_data = requests.post( "https://genvideos.org/video_info/iframe", @@ -42,17 +44,18 @@ class GenVideosIE(InfoExtractor): headers={'referer': 'https://genvideos.org/'} ) #returns json containing the url of the video (in 360p, 720p and 1080p). #For example - {"360":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm18","720":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm22","1080":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm37"} - urls_data_json = json.loads(r.text) - _360p_url = parse_qs(urlparse(urls_data_json['360']).query)['url'] + urls_data_json = json.loads(urls_data.text) + _360p_redirect_url = parse_qs(urlparse(urls_data_json['360']).query)['url'][0] + _360p_url = urllib.urlopen(_360p_redirect_url).geturl() # TODO : return all possible formats instead of just 360p return { 'id': video_id, 'title': title, - 'url': _360p_url + 'url': _360p_url, + 'ext': 'mp4' #'description': self._og_search_description(webpage), #'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # TODO more properties (see youtube_dl/extractor/common.py) } - \ No newline at end of file From c945a12e3946efc19a47bbaacbc158e64ee46eda Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 22:15:20 +0100 Subject: [PATCH 4/7] add one more test. Actually its downloading correctly `python test/test_download.py TestDownload.test_GenVideos` caps the download at 10kB --- kMjlhMWE5OT.unknown_video | Bin 10241 -> 0 bytes youtube_dl/extractor/genvideos.py | 11 +++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) delete mode 100644 kMjlhMWE5OT.unknown_video diff --git a/kMjlhMWE5OT.unknown_video b/kMjlhMWE5OT.unknown_video deleted file mode 100644 index 9cc4f399b5a1f1d425c683e8f353424f20a7bda7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10241 zcmZwL37pr{{y*^diDXI?!k{$Ntq_Jn+NEWr5=u#!78*6}#Y9?EnkY?Lgb3q-fKCg2=pXp9S9C!WDf`R!1>zy6G@`en^ zKk$96b0_8x7%*H@zx?5SdhP%0{T?50+%KX`muTN1D^l$L-+z7mx4&rqAAfK5Up@b? z?+(&fC2v5lA^Sh9R4zK~+gGQ`zNok(cj(~UzS^!H+V?;qN9^zLevgfn{qe6Q+IKi= zf7^fi`X7t`U&WLeZG!R#M%C!uYv}%kPwd@q$k6{kPqN9Q4}8CWu8P|?<@d_V^ghufe_d_u2oo|MSC!4enR{!1oomUo>QBkAA*? zV#v@T2Ttigd&IrD!+U1?e*f3M&v~Gs%xK$g|7!lX{~ot*pTC`9))wi) zSE}sW8pqz?&&_2<-~II~7eyTLSd{r88g+Sbbc%!fyxVBB-beQsI#`%HD#Wp$e-UNx zsHE+uYcG+-FT?2IT|2M1GjE?XiF$)poStcxg@!MQl3a?h|W;?S}*XAtxG zDk8%L$+#3XWT+uyQ$%@tDt{jKQ+^TKBP!Iw*a{;lVYg#EiEhkd0jphz#F@D86|GUx z8i!Y=9oi1pcDS}9j^hdnSb}vcS+|lkDp{kFnk&7>R{n^nd;)T*d^_^3yb?7Wc_iA8 zbiaQ*={%2D+wqq$krKX+IN^BIae_Kd zc$iN;%L#o?w2u>K@H*Qgsyd6RJuzohnOEHvQSAuiUhOI-Actz^sOGbioYP5d(SDNl zlm3pVel}e(Z*_H5-x85k5jA98&m*kDK2JWDCX8SyAMhU$r^xP@r>Lcdnrk%1 z95wEsh_@nY9*n*<^{qJ<>($&F@sIipVj-mwwd7grN^+UQrij|kxORIc@dj(qw~q6y z(~7aoV{OE#hf-P#T1sa$BV<9 zXLKeHci@aSaTaGfi!+_YnGa!|Gk=IU>r^~bXZh)K)^pfHwmoDwU>NhU|Fbi3r_MIt z*)lo%?TB*@;X=AG4)^ICS=Y;;CFZPW&U)6VZ=L#bu0MzeSczxl+;bU>=knYSvHp2g z=*VQ&M4Vri=8U9--4PA4$)|_~yoA{zaJ z9u)9AA9)RWZ#F)Ufz0Mrwz4mxi96e*FZZHvQ+=B{)22B%tEQ{?5_L4ww^@6@^O zi|*Wm8ZVaZ#jUxWLRLq#P1t{1S+#Y4+Pdp)cST&HhD-98hkDypr8!-h!TN~y2V=kO zZ)G;CkVA*l>BA$uhMX@wkvd$BGr826bW~4Ab9Nj_5$hr@%b+D=Sb_X5m-*$Fb2slq zTu}}=T#?HhHbr!D4?D@L(`3}qX^nqUl*Rm=hoHXBFY{-_mG07&*>q+ivc2-Vh%T8p z?=J3L7yY~F-$nnfC)0*O%*3k38mA@l;%C+YOsI{l|o*UW3zKCA(>E%xJawmFii0ECJ z=8U8S^q7~7{EYU-n=K5ELVPd^G+!mfxL-MJgpa-&*q zG*@49^)*-Dv8?2C|LCql4&zzHUlI8Y=#GB*#q5meUkUG!{_^VY-t>2G2FQFsCQa~e z9dIWv@q5HTxepwO8V9Oz;NFNqwdlkxETc4Ha7}WV!={KK>KSqt9T~-P_CyRlhpTX3 zhL-SU#IR~;8+I3O`6saR9-f8Thu?!|^ri~5#xr_TA*&;9PE@BY!?E_wYP?xy1?DU; zXMuGJ*5FQza3@A|AmX;e zX-+;xtcw_xfu9khyaPuK;xVi}>Kpb(jIPh+$bIwz-ljBSjO@l}ACt?iyueQpW7RRX z2M=PN+sk4Px4YN3KZ{z&=`${y4h-W_K8zTD5Na8JJ+s*mF(Ejc>(DmgRklUkA;&u! zQ-GT9u;+>8(KfL=dCWt-6FqZmw#rB!}@V;j4%{J)3tnKwWn(LcMoY zr3LQ9U31wIFjD$hr|GnKXMi+Jo@@+stbWHZlR=AD6iJ8v51n71q9@w((;t;hA7Z_fE$ zk>7k7JW-Jx)cwRN{)$-80DD~U1YbuynL!`$h}@zO~kWhY0gMWFlVvai_avR z&e(VHyAjWorxjzFjkTUr@8V-=Ne|??*tstLG~)S0V}@}L=2&8mCFWR?$2_)1EUijI z?02a-m&$LM8kQNm%sX?LeJxv#IhXB@c;QqU&=sG(;IkJtN0hiPC5;%)TsB3#cqq;2 z!((_JUUcV{%YM03z+*cb82Npxct3-~5tmGP_Wchvx<U_=muUY@K0+z5V z;`O?8$KGGJ_t#~(x)O3-eFKm3dBhthki%_w2H*H4;!WrJW^d&4W-&V>-YQ26%=ec0 z-m>pCnRKEE>#nix+g0hx7*_Dlh`ZS{Vfse)wf=K>nBi%vt9p1 z#0MuLhYy_V2lw$YYX4AXALem8kMI)ue`KDIF2?*H&A?e~IEd3}jXXD~Z^J5nkNCJA z1JU>6brBmAb#NXVXY)qHCuL~D2#S!^C-%0<-QRQuH}W`NvNvM0{cUz{H{0K4=f2sw z|Fbrikjor4MSNP8=2-tz*?ua=PydeCl1U5n-7=0>_%q_OY_4D`Ya>4QEPQ?u1&4XLZE)>iRx|mN?(u z_tNHcXAlqJes0r$oBrFzVD4=j*@Q9M-HGk?vwboPc@K5%u&*8F+A$Jmzr!AO{2H;d z7CkZd&INoO@qNZKQtpN#+MGrtzBNH+Os z+qE^~=fJb^vpIi$5_$Y0k6&_dM}BcfezEqi)wzr*6!THU?n<~fyC?E`#Bb*MP0qh{ zqKI`7doqy89+~Ws$)3FtzpM3kwf_Dfn<!= zuOj~AB)TyRcjYhp{3{E!{bk+1toxUB|F-Vm*8STWf4_)=_u2cts~C%{_qmt*c14Q1 ziZ_Cz}9Na`_=nS2oG;KQjyZ|>yHNM#S=e1=lQ3O>cW zhtwpOIjrGZ_C_jKpFu37G}58^9ohwb551p_{2VFcFwUkw_h60;dn#{F<-0SUXIab6 zNEOsp!Cordz`ZPHJ@#|hIjH-v*(~FeNEOvyu@*gqtjcr#ZJUmo5Aq>F8|oS->}us$|fT zUf6q;*Vz&2m?|{HI>*fCE#!La$+TcN&#@`earz$Dh?{wu?;~Z_!1|eaOk)Y#A{~D` zS1=Vd9RE?I6Dn~D6L=MMo>-Z76tX%})r7pN7GN&FS54LG;ykNO=Y6)aFVab;;cQNN zkQHo>RQ(uo7|$ZMN6K<0S>2dLF*_rjT!-Gs>|~kwJ!(3o6Gg1#k4QDn#kw`d;4atr zEK*Gw)a=M))=(PhALTfYJm&ElvaNL_?a{xM{_$oBh|?!p9QRo zbZQ1!wB~x`c&fgq>U)~Lr}d){nVp`H+37Mn{a$2!`mRWIok88sIG4KaZrvXvoguR` zx-x_Hk|pqu)6lG5(zU z_!#>>XK$o>^|=~#)>CJF&rJQMS)P9e z*>q<%Z$xTPhAgx-(AHo>qzfw0m>YPMwUkC`n9$y^0O#CLrj4{U($?rI@|efgNR6w~ zl^J;M8?TSlM21c3qF)pJn&{V5zore4LDOP(MrtOnX6}jKou+1U*%GOFCY{i)xqi*{ zyD)>6+{TOi7OBPA^k)!iZ}EAgi;m$k@>#&9NG;{xvJRJ1fU|G;PNY_WXSY>zdg8s< zYGb6<&b77tT6;gWz60~L{w7kJEV?tB4UuvxqhF4GIS(NFi;w0~`eLn%m#`~RTUoTt zLl$jiafvK0xrB+l9;uyL+Nq^o4(?q$_paUMNbS8J+N-a@Dmlm-uQpXJB*0B#`c$)7cT~?k}j9@9HkuF#J4j?^X57@2e_WFgk<>b$zz zN7pI5&8|pS)g=%8t}?cpvE7XA)|DBoWlyB5&tU*hv5Ma#b+5-j?6bRlUUNKk=#8=0 ztjE}EjlH%#lPKajeu#A4sd%?s=gwdE8G9q;)@KlE&Q)`dn&dKvO_6$5p(8Ttxh7Jt zvNUHTCG3vWJDYqK@J*!a59K08Gn);OZm3K<3RxYgkA3$!jZSFmQ_R*#dG1P{`SPwH zk0RurC-)m`<6Li)%Z>N4lJ(eIUwi9oOy8-KI;`v$%#atYB}Xo9d(OCVg*O!mda+S0sn= zcwgN75}%>=oBxPZAfEzfQedA2_F3=|zeO71eK0~TBa9s}mX+v#%L%CGmIx?mOjB#TYu_n^k#PPJG2kzb21(fhfq}z?X-Fe>Ll^MK=%*V-W zTo&F<dK45#K!YbHPq4vUJR`ExqN#`;c_iK`8c#^#DEQ7l4?9CM3j&#={ zG{MixyKbR~w<1mUj+@*VbxdA}Jx|^nY04?I<5pxp<=04eyJvUjGKW_vjpTQ#X{xrV z<9G~pO!g7%YAcsnLi`le;&^4{=4xk-~V-_2Qp~M7_9w(?B>*G5Yu@-(u0TLjy!0M2aDMh z>7gogq>$B-9=5-S?eF2{jHHC!k>+M&&vWH9S6+`~(Vf|Bi1cV>>NALilty~2Cb<-_ z1U1Z4!#p+28_PIOSS1tE;CTmOJ77xUvVRou$L8UD2=qz8LjmGSZThM WYFhbTq?apT%*&TCl1FgHFaJL>3Oz{x diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index 74a2e973a..7498e39e7 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -15,7 +15,7 @@ class GenVideosIE(InfoExtractor): # For example # * http://genvideos.com/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI # * http://genvideos.org/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI - _TEST = { + _TESTS = [{ 'url': 'http://genvideos.org/watch?v=kMjlhMWE5OT', #'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { @@ -29,7 +29,14 @@ class GenVideosIE(InfoExtractor): # * A regular expression; start the string with re: # * Any Python type (for example int or float) } - } + }, { + 'url': 'https://genvideos.org/watch?v=Pitch_Perfect_2_2015#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI', + 'info_dict':{ + 'id': 'Pitch_Perfect_2_2015', + 'ext': 'mp4', + 'title': 'Pitch Perfect 2 (2015) - HD 1080p' + } + }] def _real_extract(self, url): video_id = self._match_id(url) From ab410f74763089495af7ed21c7efae0c46b880e1 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 22:25:33 +0100 Subject: [PATCH 5/7] remove unnecessary redirect url retrieval. test with `python -m youtube_dl https://genvideos.org/watch?v=Pitch_Perfect_2_2015` working fine. --- youtube_dl/extractor/genvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index 7498e39e7..e5ec20c7a 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -52,8 +52,8 @@ class GenVideosIE(InfoExtractor): ) #returns json containing the url of the video (in 360p, 720p and 1080p). #For example - {"360":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm18","720":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm22","1080":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm37"} urls_data_json = json.loads(urls_data.text) - _360p_redirect_url = parse_qs(urlparse(urls_data_json['360']).query)['url'][0] - _360p_url = urllib.urlopen(_360p_redirect_url).geturl() + _360p_url = parse_qs(urlparse(urls_data_json['360']).query)['url'][0] + #_360p_url = urllib.urlopen(_360p_redirect_url).geturl() # TODO : return all possible formats instead of just 360p From 826c5473c1471f29f5d5e3ba42a94554688774d1 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 22:42:56 +0100 Subject: [PATCH 6/7] add description and hash value in tests --- youtube_dl/extractor/genvideos.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index e5ec20c7a..d3362e059 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -17,11 +17,12 @@ class GenVideosIE(InfoExtractor): # * http://genvideos.org/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI _TESTS = [{ 'url': 'http://genvideos.org/watch?v=kMjlhMWE5OT', - #'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'md5': '666118ec1176f14259bb9f11c74bea83', 'info_dict': { 'id': 'kMjlhMWE5OT', 'ext': 'mp4', 'title': 'The Hunger Games (2012) - HD 1080p', + 'description': 'In a dystopian future, the totalitarian nation of Panem is divided between 12 districts and the Capitol. Each year two young representatives from each distri...' #'thumbnail': 're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value @@ -31,10 +32,12 @@ class GenVideosIE(InfoExtractor): } }, { 'url': 'https://genvideos.org/watch?v=Pitch_Perfect_2_2015#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI', + 'md5': '441b5c9f0445724b0f6f9d6e5498e577', 'info_dict':{ 'id': 'Pitch_Perfect_2_2015', 'ext': 'mp4', - 'title': 'Pitch Perfect 2 (2015) - HD 1080p' + 'title': 'Pitch Perfect 2 (2015) - HD 1080p', + 'description': 'The Bellas are back, and they are better than ever. After being humiliated in front of none other than the President of the United States of America, the Bel...' } }] @@ -43,7 +46,6 @@ class GenVideosIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - print("Title - " + title) #TODO retrieve video url urls_data = requests.post( "https://genvideos.org/video_info/iframe", @@ -56,13 +58,12 @@ class GenVideosIE(InfoExtractor): #_360p_url = urllib.urlopen(_360p_redirect_url).geturl() # TODO : return all possible formats instead of just 360p - return { 'id': video_id, 'title': title, 'url': _360p_url, - 'ext': 'mp4' - #'description': self._og_search_description(webpage), + 'ext': 'mp4', + 'description': self._og_search_description(webpage), #'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # TODO more properties (see youtube_dl/extractor/common.py) } From c5c2d9c1e92b9c429afe7be7e2797837d6c17984 Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Sun, 20 Dec 2015 23:51:00 +0100 Subject: [PATCH 7/7] generalise regex for other url formats also, and return all possible video formats instead of just 360p --- youtube_dl/extractor/genvideos.py | 40 +++++++++++-------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/genvideos.py b/youtube_dl/extractor/genvideos.py index d3362e059..76cd89bd1 100644 --- a/youtube_dl/extractor/genvideos.py +++ b/youtube_dl/extractor/genvideos.py @@ -4,35 +4,24 @@ from __future__ import unicode_literals from .common import InfoExtractor import requests -import json import urllib from urlparse import parse_qs, urlparse class GenVideosIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?genvideos.org/watch\?v=(?P\w+)#?' #Tests only the basic url format. Example - https://genvideos.org/watch?v=kMjlhMWE5OT - # TODO check for other possible url formats also - # For example - # * http://genvideos.com/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI - # * http://genvideos.org/watch_kMjlhMWE5OT.html#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI + _VALID_URL = r'https?://(?:www\.)?genvideos.(org|com)/watch(\?v=|_)(?P\w+)#?' _TESTS = [{ 'url': 'http://genvideos.org/watch?v=kMjlhMWE5OT', - 'md5': '666118ec1176f14259bb9f11c74bea83', + 'md5': 'f610bd838c21c083ede8a05ca18d55e2', #1080p quality 'info_dict': { 'id': 'kMjlhMWE5OT', 'ext': 'mp4', 'title': 'The Hunger Games (2012) - HD 1080p', 'description': 'In a dystopian future, the totalitarian nation of Panem is divided between 12 districts and the Capitol. Each year two young representatives from each distri...' - #'thumbnail': 're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } }, { 'url': 'https://genvideos.org/watch?v=Pitch_Perfect_2_2015#video=tBa-Q-WkbPqwzs34b7ArqU7VomQMb2n-RAlARWKWKTI', - 'md5': '441b5c9f0445724b0f6f9d6e5498e577', + 'md5': 'df4011514016747ea3478d1776ffece2', #1080p quality 'info_dict':{ 'id': 'Pitch_Perfect_2_2015', 'ext': 'mp4', @@ -46,24 +35,23 @@ class GenVideosIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - #TODO retrieve video url + #TODO retrieve video url (using requests dependency for the moment) urls_data = requests.post( "https://genvideos.org/video_info/iframe", data={'v':video_id}, headers={'referer': 'https://genvideos.org/'} - ) #returns json containing the url of the video (in 360p, 720p and 1080p). - #For example - {"360":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm18","720":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm22","1080":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm37"} - urls_data_json = json.loads(urls_data.text) - _360p_url = parse_qs(urlparse(urls_data_json['360']).query)['url'][0] - #_360p_url = urllib.urlopen(_360p_redirect_url).geturl() - # TODO : return all possible formats instead of just 360p - + ) #returns json containing the url of the video (in 360p, 720p and 1080p). For example - {"360":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm18","720":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm22","1080":"\/\/html5player.org\/embed?url=https%3A%2F%2Flh3.googleusercontent.com%2FW6-SNGaDLWNyucD3pMqa1uMBapGDbtMTOtwpXrEu-w%3Dm37"} + urls_data_json = self._parse_json(urls_data.text,video_id) + formats = [] + for quality in sorted(urls_data_json,key=lambda q:int(q)): + formats.append({ + 'url':parse_qs(urlparse(urls_data_json[quality]).query)['url'][0], + 'ext':'mp4', + 'format_id':quality + }) return { 'id': video_id, 'title': title, - 'url': _360p_url, - 'ext': 'mp4', 'description': self._og_search_description(webpage), - #'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + 'formats':formats }