From 70029bc348f27294b7f3e369f953167c1893c2bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Jan 2016 11:27:11 +0100 Subject: [PATCH 1/6] [youtube:user] Require 'https?://' in the url (fixes #8356) It was matching www.youtube.com/embed/WpfukLMe1TM. The generic extractor automatically adds http:// if it's missing. --- test/test_all_urls.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a0c11e6c1..f5af184e6 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -56,7 +56,7 @@ class TestAllURLsMatching(unittest.TestCase): assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') def test_youtube_user_matching(self): - self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user']) def test_youtube_feeds(self): self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 92b9f3ae4..a24c73584 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1846,7 +1846,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' From 055f4172781dd2a43d60f17a91a1d0c1a5f3e6b9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 29 Jan 2016 12:20:08 +0100 Subject: [PATCH 2/6] release 2016.01.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4ac7f9e93..d9f1e22b0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.27' +__version__ = '2016.01.29' From 2b4f5e68d1517bcadac4b25ecbac3b143104b1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 29 Jan 2016 15:36:33 +0100 Subject: [PATCH 3/6] [azubu] Add extractor for live streams (closes #8343) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/azubu.py | 40 +++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 532be7e4c..5e0d7d3dc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -50,7 +50,7 @@ from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .audimedia import AudiMediaIE from .audiomack import AudiomackIE, AudiomackAlbumIE -from .azubu import AzubuIE +from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 0961d339f..011edf128 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -3,7 +3,11 @@ from __future__ import unicode_literals import json from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + ExtractorError, + float_or_none, + sanitized_Request, +) class AzubuIE(InfoExtractor): @@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor): 'view_count': view_count, 'formats': formats, } + + +class AzubuLiveIE(InfoExtractor): + _VALID_URL = r'http://www.azubu.tv/(?P[^/]+)$' + + _TEST = { + 'url': 'http://www.azubu.tv/MarsTVMDLen', + 'only_matching': True, + } + + def _real_extract(self, url): + user = self._match_id(url) + + info = self._download_json( + 'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user), + user)['data'] + if info['type'] != 'STREAM': + raise ExtractorError('{0} is not streaming live'.format(user), expected=True) + + req = sanitized_Request( + 'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id']) + req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV') + bc_info = self._download_json(req, user) + m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS') + formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4') + + return { + 'id': info['id'], + 'title': self._live_title(info['title']), + 'uploader_id': user, + 'formats': formats, + 'is_live': True, + 'thumbnail': bc_info['poster'], + } From 68a0ea15b4c20ed0174a82ee79a6d3c3474b0f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Jan 2016 00:26:33 +0600 Subject: [PATCH 4/6] [cspan] Unescape path (Closes #8365) --- youtube_dl/extractor/cspan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index b78edf729..b8b9d058d 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -113,7 +113,7 @@ class CSpanIE(InfoExtractor): 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), }) if not formats: - path = get_text_attr(f, 'path') + path = unescapeHTML(get_text_attr(f, 'path')) if not path: continue formats = self._extract_m3u8_formats( From 350cf045d8f86497e8d79ae193b40cc44c8e670c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Jan 2016 01:47:46 +0600 Subject: [PATCH 5/6] [extractor/common] Restrict checks when auto calculating tbr --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 33290fd74..b3d57dfce 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -828,7 +828,7 @@ class InfoExtractor(object): for f in formats: # Automatically determine tbr when missing based on abr and vbr (improves # formats sorting in some cases) - if 'tbr' not in f and 'abr' in f and 'vbr' in f: + if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None: f['tbr'] = f['abr'] + f['vbr'] def _formats_key(f): From 83ab8a79ccc3b6ef143f7d636c0118f7c3e5777b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Jan 2016 01:48:54 +0600 Subject: [PATCH 6/6] [espn] Improve video id extraction (Closes #8368) --- youtube_dl/extractor/espn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 3762d8748..db4b263bc 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_id = self._search_regex( - r'class="video-play-button"[^>]+data-id="(\d+)', - webpage, 'video id') + r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P\d+)', + webpage, 'video id', group='id') cms = 'espn' if 'data-source="intl"' in webpage: