1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 07:17:29 +08:00

extract video tags from xhamster movie page

This commit is contained in:
lm 2017-01-15 16:27:46 +01:00
parent a7acf868a5
commit 1028acbd04

View File

@ -1,3 +1,4 @@
# coding=utf-8
from __future__ import unicode_literals
import re
@ -11,6 +12,8 @@ from ..utils import (
)
class XHamsterIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
_TESTS = [{
@ -24,6 +27,7 @@ class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
'tags': ['Amateur', 'MILFs', 'POV', 'Reality', 'Sexy', 'Office', 'Oral', 'Boss', 'Fake Hub']
},
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
@ -35,6 +39,7 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
'tags:': ['Britney Spears', 'Berühmtheiten', 'HD Videos', 'Sexy', 'Sexy Booty']
},
'params': {
'skip_download': True,
@ -50,6 +55,7 @@ class XHamsterIE(InfoExtractor):
'uploader': 'parejafree',
'duration': 72,
'age_limit': 18,
'tags': []
},
'params': {
'skip_download': True,
@ -84,6 +90,9 @@ class XHamsterIE(InfoExtractor):
r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
webpage, 'title')
video_tags = re.findall(r'<meta itemprop="name" content="(.+?)"', webpage)[2:]
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
description = mobj.group(1) if mobj else None
@ -154,6 +163,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit,
'tags': video_tags,
'formats': formats,
}