1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 19:40:01 +08:00

fix problem with title regexp on some videos

fix problem with title regexp on some videos (testcase added in wat.tv :
lady gaga...)
This commit is contained in:
dabiboo 2015-09-23 17:15:03 +02:00
parent c56d3f5621
commit 0ecaec0429
2 changed files with 12 additions and 3 deletions

View File

@ -51,9 +51,8 @@ class UniversalMusicFranceIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
urlVideo = self._html_search_regex(r'var urlVideo = \'(.*)\';', webpage, 'urlVideo')
title = self._html_search_regex(r'<meta property="?og:title"? content="(.*)"/>', webpage, 'title')
title = self._html_search_regex(r'<meta\s*property="?og:title"?\s*content="(.*)"\s*/>', webpage, 'title')
request = compat_urllib_request.Request(self.GET_TOKEN_URL, urlencode_postdata({'videoUrl': urlVideo}))
request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
@ -61,7 +60,6 @@ class UniversalMusicFranceIE(InfoExtractor):
manifest_json = self._download_webpage(request, None, note='Getting token', errnote='unable to get token')
manifestUrl = self._parse_json(manifest_json, video_id).get("video")
print(manifestUrl);
return {
'id': video_id,
'title': title,

View File

@ -15,6 +15,17 @@ class WatIE(InfoExtractor):
_VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
IE_NAME = 'wat.tv'
_TESTS = [
{
'url': 'http://www.wat.tv/video/lady-gaga-but-beautiful-2014-72611_2ey39_.html',
'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc',
'info_dict': {
'id': 'lady-gaga-but-beautiful',
'display_id': 'lady-gaga-but-beautiful',
'ext': 'mp4',
'title': 'lady-gaga-but-beautiful',
'description': 'md5:1bbdde8d44751f43367ba68e8b9966a6'
},
},
{
'url': 'http://www.wat.tv/video/anna-bergendahl-for-you-2015-7dvjn_76lkz_.html',
'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc',