[vidzi] Fix extraction

This commit is contained in:
Yen Chi Hsuan 2016-02-26 14:26:07 +08:00
parent 481888294d
commit 8f4a2124a9
2 changed files with 31 additions and 15 deletions

View file

@ -8,7 +8,7 @@ from ..utils import int_or_none
class JWPlatformBaseIE(InfoExtractor): class JWPlatformBaseIE(InfoExtractor):
def _parse_jwplayer_data(self, jwplayer_data, video_id): def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
video_data = jwplayer_data['playlist'][0] video_data = jwplayer_data['playlist'][0]
subtitles = {} subtitles = {}
for track in video_data['tracks']: for track in video_data['tracks']:
@ -19,7 +19,7 @@ class JWPlatformBaseIE(InfoExtractor):
for source in video_data['sources']: for source in video_data['sources']:
source_url = self._proto_relative_url(source['file']) source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or '' source_type = source.get('type') or ''
if source_type == 'application/vnd.apple.mpegurl': if source_type in ('application/vnd.apple.mpegurl', 'hls'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native', fatal=False)) source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
elif source_type.startswith('audio'): elif source_type.startswith('audio'):
@ -37,7 +37,7 @@ class JWPlatformBaseIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_data['title'], 'title': video_data['title'] if require_title else video_data.get('title'),
'description': video_data.get('description'), 'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')), 'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')), 'timestamp': int_or_none(video_data.get('pubdate')),

View file

@ -1,11 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor import re
from ..utils import smuggle_url
from .jwplatform import JWPlatformBaseIE
from ..utils import (
base36,
js_to_json,
)
class VidziIE(InfoExtractor): class VidziIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?P<id>\w+)'
_TEST = { _TEST = {
'url': 'http://vidzi.tv/cghql9yq6emu.html', 'url': 'http://vidzi.tv/cghql9yq6emu.html',
@ -14,7 +19,6 @@ class VidziIE(InfoExtractor):
'id': 'cghql9yq6emu', 'id': 'cghql9yq6emu',
'ext': 'mp4', 'ext': 'mp4',
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭', 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
'uploader': 'vidzi.tv',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -29,11 +33,23 @@ class VidziIE(InfoExtractor):
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
# Vidzi now uses jwplayer, which can be handled by GenericIE mobj = re.search(r"}\('(.+)',36,(\d+),'([^']+)'\.split\('\|'\)", webpage)
return { code, count, symbols = mobj.groups()
'_type': 'url_transparent',
'id': video_id, count = int(count)
'title': title, symbols = symbols.split('|')
'url': smuggle_url(url, {'to_generic': True}),
'ie_key': 'Generic', while count:
} count -= 1
if symbols[count]:
code = re.sub(r'\b%s\b' % base36(count), symbols[count], code)
code = code.replace('\\\'', '\'')
jwplayer_data = self._parse_json(
self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
video_id, transform_source=js_to_json)
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
info_dict['title'] = title
return info_dict