1
0
mirror of https://github.com/l1ving/youtube-dl synced 2024-11-24 19:12:52 +08:00

Added an IE for hark.com

This commit is contained in:
Yasoob 2013-08-11 22:23:05 +05:00
parent 356e067390
commit e3a88568b0
2 changed files with 36 additions and 0 deletions

View File

@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
import re
from .common import InfoExtractor
from ..utils import determine_ext
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
_TEST = {
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
u'file': u'mmbzyhkgny.mp3',
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
u'info_dict': {
u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
webpage = self._download_webpage(embed_url, video_id)
final_url = self._search_regex(r'src="(.+?).mp3"',
webpage, 'video url')+'.mp3'
title = self._html_search_regex(r'<title>(.+?)</title>',
webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
'Sound Clip , Quote, MP3, and Ringtone - Hark','')
return {'id': video_id,
'url' : final_url,
'title': title,
'ext': determine_ext(final_url),
}