From db18b80c60f407ad042ce7187c52bce203778ead Mon Sep 17 00:00:00 2001 From: DarkZeros Date: Sun, 12 Jun 2016 00:37:53 +0100 Subject: [PATCH 1/4] Mitele May not have a title Example episode: http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/ Has no title, and trying to download just gives a crash error. The error is not there because something actually failed, but just because the video has no title. Setting it to NO_TITLE default makes it work properly, and only points out that it is missing in a download list. --- youtube_dl/extractor/mitele.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 3589c223d..005480f4d 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -70,7 +70,7 @@ class MiTeleIE(InfoExtractor): self._sort_formats(formats) title = self._search_regex( - r'class="Destacado-text"[^>]*>\s*([^<]+)', webpage, 'title') + r'class="Destacado-text"[^>]*>\s*([^<]+)', webpage, 'title', 'NO_TITLE') video_id = self._search_regex( r'data-media-id\s*=\s*"([^"]+)"', webpage, From 0eb5bef3e640d28a90c7f61686b017badb1e64d4 Mon Sep 17 00:00:00 2001 From: DarkZeros Date: Sun, 12 Jun 2016 01:07:37 +0100 Subject: [PATCH 2/4] Add test for Mitele without title Also default the title to display_id since for Mitele it somehow matches. --- youtube_dl/extractor/mitele.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 005480f4d..2079ad1f4 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -15,7 +15,7 @@ class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P[^/]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', # MD5 is unstable 'info_dict': { @@ -27,7 +27,19 @@ class MiTeleIE(InfoExtractor): 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, - } + }, { + 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', + # MD5 is unstable + 'info_dict': { + 'id': 'eLZSwoEd1S3pVyUm8lc6F', + 'display_id': 'programa-226', + 'ext': 'flv', + 'title': 'programa-226', #This is what we are testing, should be same as display_id since the video has no title + 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', + 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'duration': 7312, + }, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -70,7 +82,7 @@ class MiTeleIE(InfoExtractor): self._sort_formats(formats) title = self._search_regex( - r'class="Destacado-text"[^>]*>\s*([^<]+)', webpage, 'title', 'NO_TITLE') + r'class="Destacado-text"[^>]*>\s*([^<]+)', webpage, 'title', display_id) video_id = self._search_regex( r'data-media-id\s*=\s*"([^"]+)"', webpage, From 55ff992bcdfe39c69fe64ddc71c823d2357035f5 Mon Sep 17 00:00:00 2001 From: DarkZeros Date: Sun, 12 Jun 2016 14:01:06 +0100 Subject: [PATCH 3/4] Final Tweak to the alternative title Take it from the description, better formatting --- youtube_dl/extractor/mitele.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 2079ad1f4..bcea63e1a 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -34,7 +34,8 @@ class MiTeleIE(InfoExtractor): 'id': 'eLZSwoEd1S3pVyUm8lc6F', 'display_id': 'programa-226', 'ext': 'flv', - 'title': 'programa-226', #This is what we are testing, should be same as display_id since the video has no title + 'title': 'Programa 226', #This is what we are testing, should be same as display_id since the video has no title + 'title': 'Programa 226', #This is what we are testing, should be same as display_id since the video has no title 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 7312, @@ -81,8 +82,14 @@ class MiTeleIE(InfoExtractor): display_id, f4m_id=loc)) self._sort_formats(formats) + alt_title = self._search_regex( + r'class="temp"[^>]*>\s*

(?:\s*([^<]+)\s*)+

', webpage, 'alt_title', display_id) + #Alternative, but it is not good since it adds "Watch online" (Ver online) to the title + #alt_title = self._search_regex( + # r'([^>]*)<\/title>', webpage, 'alt_title', display_id) + title = self._search_regex( - r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title', display_id) + r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title', alt_title) video_id = self._search_regex( r'data-media-id\s*=\s*"([^"]+)"', webpage, From 697aa38e92c43027a37d24b3ec1e725d658d7f25 Mon Sep 17 00:00:00 2001 From: DarkZeros <mailszeros@gmail.com> Date: Sun, 12 Jun 2016 14:10:45 +0100 Subject: [PATCH 4/4] Fixed a typo --- youtube_dl/extractor/mitele.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index bcea63e1a..6290c88a9 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -35,7 +35,6 @@ class MiTeleIE(InfoExtractor): 'display_id': 'programa-226', 'ext': 'flv', 'title': 'Programa 226', #This is what we are testing, should be same as display_id since the video has no title - 'title': 'Programa 226', #This is what we are testing, should be same as display_id since the video has no title 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 7312,