extract video tags from xhamster movie page

2025-03-13 07:17:29 +08:00 · 2017-01-15 16:27:46 +01:00 · 2017-01-15 16:27:46 +01:00 · 1028acbd04
commit 1028acbd04
parent a7acf868a5
1 changed files with 10 additions and 0 deletions
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@ -1,3 +1,4 @@
+# coding=utf-8
 from __future__ import unicode_literals

 import re
@ -11,6 +12,8 @@ from ..utils import (
 )


+
+
 class XHamsterIE(InfoExtractor):
    _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
    _TESTS = [{
@ -24,6 +27,7 @@ class XHamsterIE(InfoExtractor):
            'uploader': 'Ruseful2011',
            'duration': 893,
            'age_limit': 18,
+            'tags': ['Amateur', 'MILFs', 'POV', 'Reality', 'Sexy', 'Office', 'Oral', 'Boss', 'Fake Hub']
        },
    }, {
        'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
@ -35,6 +39,7 @@ class XHamsterIE(InfoExtractor):
            'uploader': 'jojo747400',
            'duration': 200,
            'age_limit': 18,
+            'tags:': ['Britney Spears', 'Berühmtheiten', 'HD Videos', 'Sexy', 'Sexy Booty']
        },
        'params': {
            'skip_download': True,
@ -50,6 +55,7 @@ class XHamsterIE(InfoExtractor):
            'uploader': 'parejafree',
            'duration': 72,
            'age_limit': 18,
+            'tags': []
        },
        'params': {
            'skip_download': True,
@ -84,6 +90,9 @@ class XHamsterIE(InfoExtractor):
             r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
            webpage, 'title')

+        video_tags = re.findall(r'<meta itemprop="name" content="(.+?)"', webpage)[2:]
+
+
        # Only a few videos have an description
        mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
        description = mobj.group(1) if mobj else None
@ -154,6 +163,7 @@ class XHamsterIE(InfoExtractor):
            'dislike_count': int_or_none(dislike_count),
            'comment_count': int_or_none(comment_count),
            'age_limit': age_limit,
+            'tags': video_tags,
            'formats': formats,
        }