[YoutubeDL] Support DASH manifest downloading

This commit is contained in:
Yen Chi Hsuan 2015-06-03 23:10:18 +08:00
parent 8f94784124
commit 6800d3372f
3 changed files with 60 additions and 0 deletions

View file

@ -0,0 +1,50 @@
from __future__ import unicode_literals
from .common import FileDownloader
from ..compat import compat_urllib_request
import re
class DashSegmentsFD(FileDownloader):
"""
Download segments in a DASH manifest
"""
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
base_url = info_dict['url']
segment_urls = info_dict['segment_urls']
self.byte_counter = 0
def append_url_to_file(outf, target_url, target_name):
self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
req = compat_urllib_request.Request(target_url)
data = self.ydl.urlopen(req).read()
outf.write(data)
self.byte_counter += len(data)
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s/%s' % (base_url, target_url)
with open(tmpfilename, 'wb') as outf:
append_url_to_file(
outf, combine_url(base_url, info_dict['initialization_url']),
'initialization segment')
for i, segment_url in enumerate(segment_urls):
append_url_to_file(
outf, combine_url(base_url, segment_url),
'segment %d / %d' % (i + 1, len(segment_urls)))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': self.byte_counter,
'total_bytes': self.byte_counter,
'filename': filename,
'status': 'finished',
})
return True

View file

@ -6,6 +6,7 @@ import socket
import time import time
from .common import FileDownloader from .common import FileDownloader
from .dash import DashSegmentsFD
from ..compat import ( from ..compat import (
compat_urllib_request, compat_urllib_request,
compat_urllib_error, compat_urllib_error,
@ -19,6 +20,9 @@ from ..utils import (
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))):
return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict)
url = info_dict['url'] url = info_dict['url']
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
stream = None stream = None

View file

@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# TODO implement WebVTT downloading # TODO implement WebVTT downloading
pass pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'): elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList')
format_id = r.attrib['id'] format_id = r.attrib['id']
video_url = url_el.text video_url = url_el.text
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize': filesize, 'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')), 'fps': int_or_none(r.attrib.get('frameRate')),
} }
if segment_list:
f.update({
'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'],
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')]
})
try: try:
existing_format = next( existing_format = next(
fo for fo in formats fo for fo in formats