From 7def35712a7047578643f18eaf6dda79fd8c9291 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Sat, 21 May 2016 17:48:17 +0200 Subject: [PATCH 01/11] [vidio] Add extractor (Closes #7195) [Vidio] fix fallback value and wrap duration in int_or_none [Vidio] don't use video_id for _html_search_regex() --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidio.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/vidio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b5143ace..ed4e39574 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -910,6 +910,7 @@ from .videomore import ( ) from .videopremium import VideoPremiumIE from .videott import VideoTtIE +from .vidio import VidioIE from .vidme import ( VidmeIE, VidmeUserIE, diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py new file mode 100644 index 000000000..d17c663fd --- /dev/null +++ b/youtube_dl/extractor/vidio.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + +from ..utils import int_or_none + + +class VidioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d{6})-(?P[^/?]+)' + _TEST = { + 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'info_dict': { + 'id': '165683', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'ext': 'mp4', + 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'description': 'md5:27dc15f819b6a78a626490881adbadf8', + 'duration': 149, + }, + 'params': { + # m3u8 download + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._html_search_regex( + r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + + formats = self._extract_m3u8_formats( + video_data['sources'][0]['file'], + display_id, ext='mp4') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': video_data.get('image'), + 'description': self._og_search_description(webpage), + 'duration': int_or_none(video_data.get('clip_duration')), + } From 0fc832e1b2c8f48298e135d42818a16bfba4d3ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Jun 2016 16:47:43 +0700 Subject: [PATCH 02/11] [vidio] Improve (Closes #9562) --- youtube_dl/extractor/vidio.py | 65 ++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index d17c663fd..6898042de 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -2,28 +2,30 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import int_or_none class VidioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d{6})-(?P[^/?]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P\d+)-(?P[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'md5': 'cd2801394afc164e9775db6a140b91fe', 'info_dict': { 'id': '165683', - 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'display_id': 'dj_ambred-booyah-live-2015', 'ext': 'mp4', - 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', 'description': 'md5:27dc15f819b6a78a626490881adbadf8', - 'duration': 149, + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 149, + 'like_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True - } - } + }, { + 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -31,18 +33,41 @@ class VidioIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_data = self._parse_json(self._html_search_regex( - r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + title = self._og_search_title(webpage) - formats = self._extract_m3u8_formats( - video_data['sources'][0]['file'], - display_id, ext='mp4') + m3u8_url, duration, thumbnail = [None] * 3 + + clips = self._parse_json( + self._html_search_regex( + r'data-json-clips\s*=\s*(["\'])(?P\[.+?\])\1', + webpage, 'video data', default='[]', group='data'), + display_id, fatal=False) + if clips: + clip = clips[0] + m3u8_url = clip.get('sources', [{}])[0].get('file') + duration = clip.get('clip_duration') + thumbnail = clip.get('image') + + m3u8_url = m3u8_url or self._search_regex( + r'data(?:-vjs)?-clip-hls-url=(["\'])(?P.+?)\1', webpage, 'hls url') + formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native') + + duration = int_or_none(duration or self._search_regex( + r'data-video-duration=(["\'])(?P\d+)\1', webpage, 'duration')) + thumbnail = thumbnail or self._og_search_thumbnail(webpage) + + like_count = int_or_none(self._search_regex( + (r']+data-comment-vote-count=["\'](\d+)', + r']+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': video_data.get('image'), + 'display_id': display_id, + 'title': title, 'description': self._og_search_description(webpage), - 'duration': int_or_none(video_data.get('clip_duration')), + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'formats': formats, } From 1ae6c83bceb6dbc7093fe35ddafcde08dd0151a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:43:55 +0700 Subject: [PATCH 03/11] [compat] Add compat_input --- youtube_dl/compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 06e5f3ff6..fabac9fd2 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -482,6 +482,11 @@ if sys.version_info < (3, 0) and sys.platform == 'win32': else: compat_getpass = getpass.getpass +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + # Python < 2.6.5 require kwargs to be bytes try: def _testfunc(x): From e92b552a102f509066a605b26d6df38eb73764b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:44:51 +0700 Subject: [PATCH 04/11] [devscripts/buildserver] Use compat_input from compat --- devscripts/buildserver.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index f7979c43e..fc99c3213 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -13,6 +13,7 @@ import os.path sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) from youtube_dl.compat import ( + compat_input, compat_http_server, compat_str, compat_urlparse, @@ -30,11 +31,6 @@ try: except ImportError: # Python 2 import SocketServer as compat_socketserver -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True From db56f281d9c5d57cb2c44a2ea356a9a0a12b3b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:47:26 +0700 Subject: [PATCH 05/11] [devscripts/create-github-release] Add script for releasing on GitHub Yet only Basic authentication is supported either via .netrc or by manual input --- devscripts/create-github-release.py | 112 ++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 devscripts/create-github-release.py diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py new file mode 100644 index 000000000..f74d39490 --- /dev/null +++ b/devscripts/create-github-release.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import base64 +import json +import mimetypes +import netrc +import optparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_basestring, + compat_input, + compat_getpass, + compat_print, + compat_urllib_request, +) +from youtube_dl.utils import ( + make_HTTPS_handler, + sanitized_Request, +) + + +class GitHubReleaser(object): + _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _NETRC_MACHINE = 'github.com' + + def __init__(self, debuglevel=0): + self._init_github_account() + https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) + self._opener = compat_urllib_request.build_opener(https_handler) + + def _init_github_account(self): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + self._username = info[0] + self._password = info[2] + compat_print('Using GitHub credentials found in .netrc...') + return + else: + compat_print('No GitHub credentials found in .netrc') + except (IOError, netrc.NetrcParseError): + compat_print('Unable to parse .netrc') + self._username = compat_input( + 'Type your GitHub username or email address and press [Return]: ') + self._password = compat_getpass( + 'Type your GitHub password and press [Return]: ') + + def _call(self, req): + if isinstance(req, compat_basestring): + req = sanitized_Request(req) + # Authorizing manually since GitHub does not response with 401 with + # WWW-Authenticate header set (see + # https://developer.github.com/v3/#basic-authentication) + b64 = base64.b64encode( + ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') + req.add_header('Authorization', 'Basic %s' % b64) + response = self._opener.open(req).read().decode('utf-8') + return json.loads(response) + + def list_releases(self): + return self._call(self._API_URL) + + def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False): + data = { + 'tag_name': tag_name, + 'target_commitish': 'master', + 'name': name, + 'body': body, + 'draft': draft, + 'prerelease': prerelease, + } + req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8')) + return self._call(req) + + def create_asset(self, release_id, asset): + asset_name = os.path.basename(asset) + url = self._UPLOADS_URL % (release_id, asset_name) + # Our files are small enough to be loaded directly into memory. + data = open(asset, 'rb').read() + req = sanitized_Request(url, data) + mime_type, _ = mimetypes.guess_type(asset_name) + req.add_header('Content-Type', mime_type or 'application/octet-stream') + return self._call(req) + + +def main(): + parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected a version and a build directory') + + version, build_path = args + + releaser = GitHubReleaser(debuglevel=0) + + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, draft=True, prerelease=True) + release_id = new_release['id'] + + for asset in os.listdir(build_path): + compat_print('Uploading %s...' % asset) + releaser.create_asset(release_id, os.path.join(build_path, asset)) + + +if __name__ == '__main__': + main() From 39b32571df802ef869db1067454aa654f3f66235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 00:48:33 +0700 Subject: [PATCH 06/11] [devscripts/release.sh] Release to GitHub --- devscripts/release.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 1a7b1e054..87e8eda50 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -95,17 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) -/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." +/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -echo 'TODO: upload on GitHub' -exit 1 +ROOT=$(pwd) +python devscripts/create-github-release.py $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." git clone --branch gh-pages --single-branch . build/gh-pages -ROOT=$(pwd) ( set -e ORIGIN_URL=$(git config --get remote.origin.url) From 2c347352677f023678ffd488a51b19f54b97fa36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 01:44:13 +0700 Subject: [PATCH 07/11] [youtube] Add itags 256 and 258 --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3f102c30..6c9f77d95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, From 1e236d7e2350e055bbe230b12490e4369aaa0956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:16:05 +0700 Subject: [PATCH 08/11] [downloader/hls] Do not rely on EXT-X-PLAYLIST-TYPE:EVENT --- youtube_dl/downloader/hls.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 62136ee54..049fb78ce 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,11 +23,17 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] - r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely + # be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 From 631d4c87ee84183917fcdf5db59e1cd1bb48d9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:19:44 +0700 Subject: [PATCH 09/11] [twitch:vod] Use native hls --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f7b98e190..d898f14c3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -260,7 +260,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), - item_id, 'mp4') + item_id, 'mp4', entry_protocol='m3u8_native') self._prefer_source(formats) info['formats'] = formats From 51c4d85ce788497584bd056d571ed9b7b24c9651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:21:43 +0700 Subject: [PATCH 10/11] [downloader/hls] PEP 8 --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 049fb78ce..8e4a7189a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -32,7 +32,7 @@ class HlsFD(FragmentFD): # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely # be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + # # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 From 633b444fd29aa9d8b3ba722285ae2475ae66595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Jun 2016 03:31:10 +0700 Subject: [PATCH 11/11] [downloader/hls] Correct comment on twitch vods --- youtube_dl/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8e4a7189a..54f2108e9 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -29,8 +29,8 @@ class HlsFD(FragmentFD): # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # This heuristic also is not correct since segments may not be appended as well. - # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely - # be appended to the end of the playlist. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4]