diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 042b85267..81d3c95be 100755 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -3623,6 +3623,84 @@ class InfoQIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'\nERROR: unable to download ' + video_url) +class OpenClassroomIE(InfoExtractor): + """Information extractor for openclassroom.stanford.edu""" + _VALID_URL = r'^(?:https?://)?openclassroom\.stanford\.edu/([\w\d-]+)/(?:[\w\d-]+).*\?course=(.*)&video=(.*)&' + IE_NAME = u'openclassroom' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_xml(self, file_id): + """Report XML download""" + self._downloader.to_screen(u'[%s] Downloading xml %s' % (self.IE_NAME, file_id)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + section = mobj.group(1) + course = mobj.group(2) + file_id = mobj.group(3) + + # fetch xml first + file_url = 'http://openclassroom.stanford.edu/%s/courses/%s/videos/%s' % (section, course, file_id) + '.xml' + request = urllib2.Request(file_url) + try: + self.report_download_xml(file_id) + xmlData = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve xml: %s' % str(err)) + return + + # parse xml + try: + mobj = re.search(r'(.*)', xmlData) + title = mobj.group(1) + except: + self._downloader.screen_to(u'WARNING: unable to extract title') + + description = u'(no description)' + try: + rx = re.compile(r'(.*)', re.DOTALL) + mobj = re.search(rx, xmlData) + description = mobj.group(1) + description = description.replace('', '')\ + .replace('

', '').replace('

', '').replace('
', '\n') + description = description.replace(' ', '').strip() + + except: + pass + try: + mobj = re.search(r'(.*)', xmlData) + video_id = mobj.group(1) + ext = video_id.split('.')[-1] + except: + self._downloader.trouble(u'ERROR: unable to extract video id') + ext = '.flv' + video_id = file_id + ext # we have no video id, so try to guess from &video= in url + + file_url = 'http://openclassroom.stanford.edu/%s/courses/%s/videos/%s' % (section, course, video_id) + self._downloader.increment_downloads() + try: + # Process file information + self._downloader.process_info({ + 'id': file_id.decode('utf-8'), + 'url': file_url.decode('utf-8'), + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': title.decode('utf-8'), + 'stitle': _simplify_title(title.decode('utf-8')), + 'ext': ext.decode('utf-8'), + 'format': ext.decode('utf-8').upper(), + 'description': description, + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download video') + + class MixcloudIE(InfoExtractor): """Information extractor for www.mixcloud.com""" _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' @@ -4166,6 +4244,7 @@ def gen_extractors(): SoundcloudIE(), InfoQIE(), MixcloudIE(), + OpenClassroomIE(), GenericIE() ]