[viki] improve format extraction

This commit is contained in:
Remita Amine 2020-11-19 22:45:46 +01:00
parent daa25d4142
commit 59e583f7e8

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import hashlib import hashlib
import hmac import hmac
import itertools import itertools
@ -9,6 +10,10 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -165,19 +170,20 @@ class VikiIE(VikiBaseIE):
}, { }, {
# episode # episode
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
'md5': '5fa476a902e902783ac7a4d615cdbc7a', 'md5': '94e0e34fd58f169f40c184f232356cfe',
'info_dict': { 'info_dict': {
'id': '44699v', 'id': '44699v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Boys Over Flowers - Episode 1', 'title': 'Boys Over Flowers - Episode 1',
'description': 'md5:b89cf50038b480b88b5b3c93589a9076', 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
'duration': 4204, 'duration': 4172,
'timestamp': 1270496524, 'timestamp': 1270496524,
'upload_date': '20100405', 'upload_date': '20100405',
'uploader': 'group8', 'uploader': 'group8',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
# youtube external # youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -194,14 +200,15 @@ class VikiIE(VikiBaseIE):
'uploader_id': 'ad14065n', 'uploader_id': 'ad14065n',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'skip': 'Page not found!',
}, { }, {
'url': 'http://www.viki.com/player/44699v', 'url': 'http://www.viki.com/player/44699v',
'only_matching': True, 'only_matching': True,
}, { }, {
# non-English description # non-English description
'url': 'http://www.viki.com/videos/158036v-love-in-magic', 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
'md5': '1713ae35df5a521b31f6dc40730e7c9c', 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
'info_dict': { 'info_dict': {
'id': '158036v', 'id': '158036v',
'ext': 'mp4', 'ext': 'mp4',
@ -217,8 +224,11 @@ class VikiIE(VikiBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._call_api( resp = self._download_json(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON') 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON',
headers={'x-viki-app-ver': '4.0.57'})
video = resp['video']
self._check_errors(video) self._check_errors(video)
@ -265,60 +275,74 @@ class VikiIE(VikiBaseIE):
'subtitles': subtitles, 'subtitles': subtitles,
} }
streams = self._call_api(
'videos/%s/streams.json' % video_id, video_id,
'Downloading video streams JSON')
if 'external' in streams:
result.update({
'_type': 'url_transparent',
'url': streams['external']['url'],
})
return result
formats = [] formats = []
for format_id, stream_dict in streams.items():
height = int_or_none(self._search_regex( def add_format(format_id, format_dict, protocol='http'):
r'^(\d+)[pP]$', format_id, 'height', default=None)) # rtmps URLs does not seem to work
for protocol, format_dict in stream_dict.items(): if protocol == 'rtmps':
# rtmps URLs does not seem to work return
if protocol == 'rtmps': format_url = format_dict.get('url')
continue if not format_url:
format_url = format_dict['url'] return
if format_id == 'm3u8': qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
m3u8_formats = self._extract_m3u8_formats( stream = qs.get('stream', [None])[0]
format_url, video_id, 'mp4', if stream:
entry_protocol='m3u8_native', format_url = base64.b64decode(stream).decode()
m3u8_id='m3u8-%s' % protocol, fatal=False) if format_id in ('m3u8', 'hls'):
# Despite CODECS metadata in m3u8 all video-only formats m3u8_formats = self._extract_m3u8_formats(
# are actually video+audio format_url, video_id, 'mp4',
for f in m3u8_formats: entry_protocol='m3u8_native',
if f.get('acodec') == 'none' and f.get('vcodec') != 'none': m3u8_id='m3u8-%s' % protocol, fatal=False)
f['acodec'] = None # Despite CODECS metadata in m3u8 all video-only formats
formats.extend(m3u8_formats) # are actually video+audio
elif format_id == 'mpd': for f in m3u8_formats:
formats.extend(self._extract_mpd_formats( if '_drm/index_' in f['url']:
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
elif format_url.startswith('rtmp'):
mobj = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
continue continue
formats.append({ if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
'format_id': 'rtmp-%s' % format_id, f['acodec'] = None
'ext': 'flv', formats.append(f)
'url': mobj.group('url'), elif format_id in ('mpd', 'dash'):
'play_path': mobj.group('playpath'), formats.extend(self._extract_mpd_formats(
'app': mobj.group('app'), format_url, video_id, 'mpd-%s' % protocol, fatal=False))
'page_url': url, elif format_url.startswith('rtmp'):
}) mobj = re.search(
else: r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
formats.append({ format_url)
'url': format_url, if not mobj:
'format_id': '%s-%s' % (format_id, protocol), return
'height': height, formats.append({
}) 'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
})
else:
formats.append({
'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)),
})
for format_id, format_dict in (resp.get('streams') or {}).items():
add_format(format_id, format_dict)
if not formats:
streams = self._call_api(
'videos/%s/streams.json' % video_id, video_id,
'Downloading video streams JSON')
if 'external' in streams:
result.update({
'_type': 'url_transparent',
'url': streams['external']['url'],
})
return result
for format_id, stream_dict in streams.items():
for protocol, format_dict in stream_dict.items():
add_format(format_id, format_dict, protocol)
self._sort_formats(formats) self._sort_formats(formats)
result['formats'] = formats result['formats'] = formats