1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-07 00:27:15 +08:00

[telegram] Add new extractor

This commit is contained in:
ESWZY 2020-05-05 20:04:41 +08:00
parent 00eb865b3c
commit 74e1c027a3
2 changed files with 103 additions and 0 deletions

View File

@ -1106,6 +1106,7 @@ from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telegram import TelegramIE
from .telemb import TeleMBIE
from .telequebec import (
TeleQuebecIE,

View File

@ -0,0 +1,102 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_timestamp,
int_or_none,
parse_duration,
)
class TelegramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?t\.me/s/(?P<id>[\s\S]+)'
_TESTS = [
{
'url': 'https://t.me/s/telegram/86',
'md5': 'afd6c7f574fead14e35ca83e3705e01d',
'info_dict': {
'id': 'telegram/86',
'ext': 'mp4',
'title': 'Telegram News Telegram',
'description': 'This video should give you an idea of how the new account switching feature works available on Android today and coming soon to other platforms. ✨🌟⭐️ Happy holidays!',
'thumbnail': 're:https://cdn(.*).telesco.pe/file/(.*)',
'uploader': 'Telegram News',
'upload_date': '20171230',
'timestamp': 1514667477,
}
}, {
'url': 'https://t.me/s/leehsienloong/382',
'md5': '01559de5a145d0547f4c53bd5340549d',
'info_dict': {
'id': 'leehsienloong/382',
'ext': 'mp4',
'title': 'Lee Hsien Loong Telegram',
'description': 'Dropped by Sengkang West this morning to say hi to everyone, and see how residents were doing. I was happy to see many young families out spending time with their little ones!',
'thumbnail': 're:https://cdn(.*).telesco.pe/file/(.*)',
'uploader': 'Lee Hsien Loong',
'upload_date': '20200315',
'timestamp': 1584276195,
'duration': 57,
}
}, {
'url': 'https://t.me/s/durov/82',
'only_matching': True,
}, {
'url': 'https://t.me/s/durov/83',
'only_matching': True,
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
original_webpage = self._download_webpage(url, video_id)
this_div = self._search_regex(
r'<div .* data-post="' + video_id + r'" .*>([\s\S]*?)</time></a>',
original_webpage, 'pageblock')
title = self._html_search_regex(
r'<title>(.+?)</title>',
original_webpage, 'title')
description = self._html_search_regex(
r'<div class="tgme_widget_message_text.*?>([\s\S]*?)</div>',
this_div, 'description', default=None)
url = self._search_regex(
r'<video src="(.*?)"',
this_div, None)
thumbnail = self._search_regex(
r"background-image:url\('([\s\S]*?)'\)",
this_div, 'thumbnail', default=None)
uploader = self._html_search_regex(
r'<a class="tgme_widget_message_owner_name" .*><span dir="auto">(.*)</span>',
this_div, 'uploader', default=None)
timestamp = unified_timestamp(
self._search_regex(
r'datetime="(.*)"',
this_div, 'upload_time', default=None)
)
duration = self._html_search_regex(
r'<time class="message_video_duration.*?>(.*?)</time>',
this_div, 'duration', default=None)
duration = int_or_none(parse_duration(duration))
info_dict = {
'id': video_id,
'title': title,
'description': description,
'url': url,
'thumbnail': thumbnail,
'uploader': uploader,
'timestamp': timestamp,
'duration': duration,
}
return info_dict