mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-10 16:32:14 +08:00
tiktok fix code
This commit is contained in:
parent
8cddcb0322
commit
17880cbaca
@ -4,13 +4,26 @@ import youtube_dl
|
|||||||
|
|
||||||
class TikTokTestYoutubeDl(unittest.TestCase):
|
class TikTokTestYoutubeDl(unittest.TestCase):
|
||||||
def test_meta_data(self):
|
def test_meta_data(self):
|
||||||
url = 'https://www.tiktok.com/@danieltbraun/video/6817099671043853574'
|
url = 'https://www.tiktok.com/@oriangaon/video/6807126376001441030'
|
||||||
params = {}
|
params = {}
|
||||||
ydl = youtube_dl.YoutubeDL(params)
|
ydl = youtube_dl.YoutubeDL(params)
|
||||||
info = ydl.extract_info(url, download=False)
|
info = ydl.extract_info(url, download=False)
|
||||||
self.assertEquals(info['share_count'], 121)
|
self.assertEquals(info['id'], '6807126376001441030')
|
||||||
|
self.assertEquals(info['url'], 'https://www.tiktok.com/@oriangaon/video/6807126376001441030')
|
||||||
|
self.assertEquals(info['title'], '#foryou #foyou Mmmmm....,,')
|
||||||
|
self.assertEquals(info['uploader'], 'Oriangaon')
|
||||||
|
self.assertEquals(info['timestamp'], 1584907616)
|
||||||
|
self.assertEquals(info['thumbnail'],
|
||||||
|
'https://p16-va-default.akamaized.net/obj/tos-maliva-p-0068/d1a8fbd3e42dda3a1baa01ee9edad289')
|
||||||
|
self.assertGreaterEqual(info['view_count'], 79864)
|
||||||
|
self.assertEquals(info['uploader_id'], '6772113344733955077')
|
||||||
|
self.assertFalse(info['is_live'])
|
||||||
|
self.assertEquals(info['live_status'], 'not_live')
|
||||||
|
self.assertGreaterEqual(info['like_count'], 2213)
|
||||||
|
self.assertGreaterEqual(info['share_count'], 109)
|
||||||
|
self.assertGreaterEqual(info['comment_count'], 40)
|
||||||
|
self.assertEquals(info['duration'], 10)
|
||||||
|
self.assertEquals(info['ext'], 'mp.4')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import ast
|
import ast
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import requests
|
|
||||||
import json
|
import json
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -11,11 +10,7 @@ from ..utils import (
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none)
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# add to requirements.txt- bs4, newspaper, requests
|
|
||||||
|
|
||||||
|
|
||||||
class TikTokBaseIE(InfoExtractor):
|
class TikTokBaseIE(InfoExtractor):
|
||||||
@ -70,8 +65,6 @@ class TikTokBaseIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TikTokIE(TikTokBaseIE):
|
class TikTokIE(TikTokBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
@ -103,52 +96,31 @@ class TikTokIE(TikTokBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = url.split('/')[-1]
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# extract meta data using the official api
|
|
||||||
# Response json contains: provider url, title, html, author_namee, height, thumbnail_width, width, version,
|
|
||||||
# author_url, thumbnail_height, thumbnail_url, type, provider_name (tiktok)
|
|
||||||
|
|
||||||
json_api = self._download_json('https://www.tiktok.com/oembed?url=' + url, video_id)
|
json_api = self._download_json('https://www.tiktok.com/oembed?url=' + url, video_id)
|
||||||
|
|
||||||
# extract metadata with beautifulSoup
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
soup = BeautifulSoup(webpage, features="html.parser")
|
soup = BeautifulSoup(webpage, features="html.parser")
|
||||||
h2 = soup.find_all("h2", {"class": "jsx-1038045583 jsx-3192540912 jsx-2150087249 video-meta-count"})
|
|
||||||
data = h2[0].text.split(' ')
|
|
||||||
likes_count = self.numeric_convert(data[0])
|
|
||||||
comments_count = self.numeric_convert(data[3])
|
|
||||||
json_next_data = soup.find(id='__NEXT_DATA__')
|
json_next_data = soup.find(id='__NEXT_DATA__')
|
||||||
props = json_next_data.contents[0]
|
props = json_next_data.contents[0]
|
||||||
json_data_encode = json.dumps(props.encode('utf-8'))
|
json_data_encode = json.dumps(props.encode('utf-8'))
|
||||||
ast_le = ast.literal_eval(json_data_encode)
|
ast_le = ast.literal_eval(json_data_encode)
|
||||||
data_dict = json.loads(ast_le)
|
data_dict = json.loads(ast_le)
|
||||||
timestamp = self.numeric_convert(data_dict['props']['pageProps']['videoData']['itemInfos']['createTime'])
|
|
||||||
|
|
||||||
shares = data_dict['props']['pageProps']['videoData']['itemInfos']['shareCount']
|
item_info = data_dict['props']['pageProps']['videoData']['itemInfos']
|
||||||
views = data_dict['props']['pageProps']['videoData']['itemInfos']['playCount']
|
timestamp = int(item_info['createTime'])
|
||||||
duration = data_dict['props']['pageProps']['videoData']['itemInfos']['video']['videoMeta']['duration']
|
shares = item_info['shareCount']
|
||||||
provider_id = data_dict['props']['pageProps']['videoData']['itemInfos']['authorId']
|
views = item_info['playCount']
|
||||||
|
duration = item_info['video']['videoMeta']['duration']
|
||||||
# TO-DO- check on formats
|
provider_id = item_info['authorId']
|
||||||
|
comments_count = item_info['commentCount']
|
||||||
|
likes_count = item_info['diggCount']
|
||||||
|
|
||||||
return self.info_dict(video_id, str(url), json_api['title'],
|
return self.info_dict(video_id, str(url), json_api['title'],
|
||||||
json_api['author_name'], timestamp, json_api['thumbnail_url'],
|
json_api['author_name'], timestamp, json_api['thumbnail_url'],
|
||||||
views, provider_id, False, 'not_live', likes_count, shares, '', comments_count,duration)
|
views, provider_id, False, 'not_live', likes_count, shares, '', comments_count, duration)
|
||||||
|
|
||||||
def numeric_convert(self, unicode):
|
def info_dict(self, video_id, url, video_title,
|
||||||
if 'K' in unicode:
|
|
||||||
unicode=unicode[:-1]
|
|
||||||
return int(float(unicode)*1000)
|
|
||||||
if 'M' in unicode:
|
|
||||||
unicode=unicode[:-1]
|
|
||||||
return int(float(unicode)*100000)
|
|
||||||
else:
|
|
||||||
return int(unicode)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def info_dict (self, video_id, url, video_title,
|
|
||||||
uploader, timestamp, thumbnail,
|
uploader, timestamp, thumbnail,
|
||||||
view_count, uploader_id, is_live, live_status
|
view_count, uploader_id, is_live, live_status
|
||||||
, likes_count, shares_count, subtitles, comment_count, duration):
|
, likes_count, shares_count, subtitles, comment_count, duration):
|
||||||
@ -167,11 +139,13 @@ class TikTokIE(TikTokBaseIE):
|
|||||||
'share_count': shares_count,
|
'share_count': shares_count,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'duration': duration
|
'duration': duration,
|
||||||
|
'ext':'mp.4'
|
||||||
|
|
||||||
}
|
}
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
class TikTokUserIE(TikTokBaseIE):
|
class TikTokUserIE(TikTokBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
|
Loading…
x
Reference in New Issue
Block a user