[livestream] Parse SMIL (#2713)
This commit is contained in:
parent
b490b8849a
commit
cbf915f3f6
2 changed files with 64 additions and 10 deletions
|
@ -5,11 +5,14 @@ import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
xpath_with_ns,
|
ExtractorError,
|
||||||
compat_str,
|
find_xpath_attr,
|
||||||
|
int_or_none,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,20 +27,71 @@ class LivestreamIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Live from Webster Hall NYC',
|
'title': 'Live from Webster Hall NYC',
|
||||||
'upload_date': '20121012',
|
'upload_date': '20121012',
|
||||||
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'thumbnail': 're:^http://.*\.jpg$'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_video_info(self, video_data):
|
def _extract_video_info(self, video_data):
|
||||||
video_url = (
|
video_id = compat_str(video_data['id'])
|
||||||
video_data.get('progressive_url_hd') or
|
|
||||||
video_data.get('progressive_url')
|
FORMAT_KEYS = (
|
||||||
|
('sd', 'progressive_url'),
|
||||||
|
('hd', 'progressive_url_hd'),
|
||||||
)
|
)
|
||||||
|
formats = [{
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_data[key],
|
||||||
|
'quality': i + 1,
|
||||||
|
} for i, (format_id, key) in enumerate(FORMAT_KEYS)
|
||||||
|
if video_data.get(key)]
|
||||||
|
|
||||||
|
smil_url = video_data.get('smil_url')
|
||||||
|
if smil_url:
|
||||||
|
_SWITCH_XPATH = (
|
||||||
|
'.//{http://www.w3.org/2001/SMIL20/Language}body/'
|
||||||
|
'{http://www.w3.org/2001/SMIL20/Language}switch')
|
||||||
|
smil_doc = self._download_xml(
|
||||||
|
smil_url, video_id, note='Downloading SMIL information')
|
||||||
|
|
||||||
|
title_node = find_xpath_attr(
|
||||||
|
smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
|
||||||
|
'name', 'title')
|
||||||
|
if title_node is None:
|
||||||
|
self.report_warning('Cannot find SMIL id')
|
||||||
|
switch_node = smil_doc.find(_SWITCH_XPATH)
|
||||||
|
else:
|
||||||
|
title_id = title_node.attrib['content']
|
||||||
|
switch_node = find_xpath_attr(
|
||||||
|
smil_doc, _SWITCH_XPATH, 'id', title_id)
|
||||||
|
if switch_node is None:
|
||||||
|
raise ExtractorError('Cannot find switch node')
|
||||||
|
video_nodes = switch_node.findall(
|
||||||
|
'{http://www.w3.org/2001/SMIL20/Language}video')
|
||||||
|
|
||||||
|
for vn in video_nodes:
|
||||||
|
tbr = int_or_none(vn.attrib.get('system-bitrate'))
|
||||||
|
furl = (
|
||||||
|
'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145&seek=%s' %
|
||||||
|
(vn.attrib['src'], vn.attrib['clipBegin']))
|
||||||
|
formats.append({
|
||||||
|
'url': furl,
|
||||||
|
'format_id': 'smil_%d' % tbr,
|
||||||
|
'ext': 'flv',
|
||||||
|
'tbr': tbr,
|
||||||
|
'preference': -1000,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(video_data['id']),
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': video_data['caption'],
|
'title': video_data['caption'],
|
||||||
'thumbnail': video_data['thumbnail_url'],
|
'thumbnail': video_data.get('thumbnail_url'),
|
||||||
'upload_date': video_data['updated_at'].replace('-', '')[:8],
|
'upload_date': video_data['updated_at'].replace('-', '')[:8],
|
||||||
|
'like_count': video_data.get('likes', {}).get('total'),
|
||||||
|
'view_count': video_data.get('views'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -242,8 +242,8 @@ else:
|
||||||
if sys.version_info >= (2,7):
|
if sys.version_info >= (2,7):
|
||||||
def find_xpath_attr(node, xpath, key, val):
|
def find_xpath_attr(node, xpath, key, val):
|
||||||
""" Find the xpath xpath[@key=val] """
|
""" Find the xpath xpath[@key=val] """
|
||||||
assert re.match(r'^[a-zA-Z]+$', key)
|
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||||
assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
|
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||||
return node.find(expr)
|
return node.find(expr)
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue