Merge remote-tracking branch 'origin/master'

This commit is contained in:
Philipp Hagemeister 2014-09-22 12:53:41 +02:00
commit 45c85d7ba1
6 changed files with 73 additions and 26 deletions

View file

@ -16,6 +16,7 @@ from ..utils import (
format_bytes, format_bytes,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
xpath_text,
) )
@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
# We only download the first fragment # We only download the first fragment
fragments_list = fragments_list[:1] fragments_list = fragments_list[:1]
total_frags = len(fragments_list) total_frags = len(fragments_list)
# For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
for (seg_i, frag_i) in fragments_list: for (seg_i, frag_i) in fragments_list:
name = 'Seg%d-Frag%d' % (seg_i, frag_i) name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name) frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url}) success = http_dl.download(frag_filename, {'url': url})
if not success: if not success:

View file

@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE
from .novamov import NovaMovIE from .novamov import NovaMovIE
from .nowness import NownessIE from .nowness import NownessIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .npo import NPOIE from .npo import (
NPOIE,
TegenlichtVproIE,
)
from .nrk import ( from .nrk import (
NRKIE, NRKIE,
NRKTVIE, NRKTVIE,

View file

@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', # md5 checksum is not stable
'info_dict': { 'info_dict': {
'id': 'u1RInQZRN7QJ', 'id': 'bTmnLCvIbaaH',
'ext': 'flv', 'ext': 'flv',
'title': 'I Am a Firefighter', 'title': 'I Am a Firefighter',
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',

View file

@ -7,6 +7,7 @@ from ..utils import (
unified_strdate, unified_strdate,
parse_duration, parse_duration,
qualities, qualities,
url_basename,
) )
@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
return self._get_info(video_id)
def _get_info(self, video_id):
metadata = self._download_json( metadata = self._download_json(
'http://e.omroep.nl/metadata/aflevering/%s' % video_id, 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id, video_id,
@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):
'duration': parse_duration(metadata.get('tijdsduur')), 'duration': parse_duration(metadata.get('tijdsduur')),
'formats': formats, 'formats': formats,
} }
class TegenlichtVproIE(NPOIE):
IE_NAME = 'tegenlicht.vpro.nl'
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
_TESTS = [
{
'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
'info_dict': {
'id': 'VPWON_1169289',
'ext': 'm4v',
'title': 'Tegenlicht',
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
'upload_date': '20130225',
},
},
]
def _real_extract(self, url):
name = url_basename(url)
webpage = self._download_webpage(url, name)
urn = self._html_search_meta('mediaurn', webpage)
info_page = self._download_json(
'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
return self._get_info(info_page['mid'])

View file

@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):
'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'md5': '3150cf278965eeabb5b4cea1c963fe0a',
'info_dict': { 'info_dict': {
'id': '320403011771', 'id': '320403011771',
'ext': 'flv', 'ext': 'mp4',
'title': 'Dingo Conservation', 'title': 'Dingo Conservation',
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', 'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',

View file

@ -5,6 +5,7 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
ExtractorError, ExtractorError,
xpath_with_ns, xpath_with_ns,
) )
@ -55,21 +56,19 @@ class ThePlatformIE(InfoExtractor):
body = meta.find(_x('smil:body')) body = meta.find(_x('smil:body'))
f4m_node = body.find(_x('smil:seq//smil:video')) f4m_node = body.find(_x('smil:seq//smil:video'))
if f4m_node is not None: if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
f4m_url = f4m_node.attrib['src'] f4m_url = f4m_node.attrib['src']
if 'manifest.f4m?' not in f4m_url: if 'manifest.f4m?' not in f4m_url:
f4m_url += '?' f4m_url += '?'
# the parameters are from syfy.com, other sites may use others, # the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com # they also work for nbc.com
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
formats = [{ formats = self._extract_f4m_formats(f4m_url, video_id)
'ext': 'flv',
'url': f4m_url,
}]
else: else:
base_url = head.find(_x('smil:meta')).attrib['base']
switch = body.find(_x('smil:switch'))
formats = [] formats = []
switch = body.find(_x('smil:switch'))
if switch is not None:
base_url = head.find(_x('smil:meta')).attrib['base']
for f in switch.findall(_x('smil:video')): for f in switch.findall(_x('smil:video')):
attr = f.attrib attr = f.attrib
width = int(attr['width']) width = int(attr['width'])
@ -85,6 +84,16 @@ class ThePlatformIE(InfoExtractor):
'height': height, 'height': height,
'vbr': vbr, 'vbr': vbr,
}) })
else:
switch = body.find(_x('smil:seq//smil:switch'))
for f in switch.findall(_x('smil:video')):
attr = f.attrib
vbr = int(attr['system-bitrate']) // 1000
formats.append({
'format_id': compat_str(vbr),
'url': attr['src'],
'vbr': vbr,
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {