[bloomberg] Extract the available formats (closes #2776)
It uses a helper method in the InfoExtractor class. The downloader will pick the requested formats using the bitrate in the info dict.
This commit is contained in:
parent
4958ae2058
commit
31bb8d3f51
3 changed files with 28 additions and 5 deletions
|
@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
|
requested_bitrate = info_dict.get('tbr')
|
||||||
self.to_screen('[download] Downloading f4m manifest')
|
self.to_screen('[download] Downloading f4m manifest')
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
|
@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||||
formats = sorted(formats, key=lambda f: f[0])
|
if requested_bitrate is None:
|
||||||
rate, media = formats[-1]
|
# get the best format
|
||||||
|
formats = sorted(formats, key=lambda f: f[0])
|
||||||
|
rate, media = formats[-1]
|
||||||
|
else:
|
||||||
|
rate, media = list(filter(
|
||||||
|
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||||
|
|
||||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||||
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
|
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
|
||||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||||
|
|
|
@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||||
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
# The md5 checksum changes
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': name.split('-')[-1],
|
'id': name.split('-')[-1],
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': f4m_url,
|
'formats': self._extract_f4m_formats(f4m_url, name),
|
||||||
'ext': 'flv',
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
@ -590,6 +591,22 @@ class InfoExtractor(object):
|
||||||
self.to_screen(msg)
|
self.to_screen(msg)
|
||||||
time.sleep(timeout)
|
time.sleep(timeout)
|
||||||
|
|
||||||
|
def _extract_f4m_formats(self, manifest_url, video_id):
|
||||||
|
manifest = self._download_xml(manifest_url, video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
|
||||||
|
formats.append({
|
||||||
|
'url': manifest_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'tbr': int_or_none(media_el.attrib.get('bitrate')),
|
||||||
|
'width': int_or_none(media_el.attrib.get('width')),
|
||||||
|
'height': int_or_none(media_el.attrib.get('height')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in a new issue