[downloader/external] Fix cookie support

This commit is contained in:
bashonly 2023-07-04 16:40:56 -05:00 committed by dirkf
parent 8334ec961b
commit 21438a4194
3 changed files with 256 additions and 30 deletions

View file

@ -12,20 +12,65 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import ( from test.helper import (
FakeLogger, FakeLogger,
FakeYDL,
http_server_port, http_server_port,
try_rm, try_rm,
) )
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server from youtube_dl.compat import (
from youtube_dl.utils import encodeFilename compat_http_cookiejar_Cookie,
from youtube_dl.downloader.external import Aria2pFD compat_http_server,
compat_kwargs,
)
from youtube_dl.utils import (
encodeFilename,
join_nonempty,
)
from youtube_dl.downloader.external import (
Aria2cFD,
Aria2pFD,
AxelFD,
CurlFD,
FFmpegFD,
HttpieFD,
WgetFD,
)
import threading import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_SIZE = 10 * 1024 TEST_SIZE = 10 * 1024
TEST_COOKIE = {
'version': 0,
'name': 'test',
'value': 'ytdlp',
'port': None,
'port_specified': False,
'domain': '.example.com',
'domain_specified': True,
'domain_initial_dot': False,
'path': '/',
'path_specified': True,
'secure': False,
'expires': None,
'discard': False,
'comment': None,
'comment_url': None,
'rest': {},
}
TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE)
TEST_INFO = {'url': 'http://www.example.com/'}
def cookiejar_Cookie(**cookie_args):
return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args))
def ifExternalFDAvailable(externalFD):
return unittest.skipUnless(externalFD.available(),
externalFD.get_basename() + ' not found')
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args): def log_message(self, format, *args):
@ -70,7 +115,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
assert False, 'unrecognised server path' assert False, 'unrecognised server path'
@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found') @ifExternalFDAvailable(Aria2pFD)
class TestAria2pFD(unittest.TestCase): class TestAria2pFD(unittest.TestCase):
def setUp(self): def setUp(self):
self.httpd = compat_http_server.HTTPServer( self.httpd = compat_http_server.HTTPServer(
@ -111,5 +156,103 @@ class TestAria2pFD(unittest.TestCase):
}) })
@ifExternalFDAvailable(HttpieFD)
class TestHttpieFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = HttpieFD(ydl, {})
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['http', '--download', '--output', 'test', 'http://www.example.com/'])
# Test cookie header is added
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['http', '--download', '--output', 'test',
'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE])
@ifExternalFDAvailable(AxelFD)
class TestAxelFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = AxelFD(ydl, {})
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['axel', '-o', 'test', '--', 'http://www.example.com/'])
# Test cookie header is added
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE,
'--max-redirect=0', '--', 'http://www.example.com/'])
@ifExternalFDAvailable(WgetFD)
class TestWgetFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = WgetFD(ydl, {})
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
# Test cookiejar tempfile arg is added
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
@ifExternalFDAvailable(CurlFD)
class TestCurlFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = CurlFD(ydl, {})
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
# Test cookie header is added
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO))
@ifExternalFDAvailable(Aria2cFD)
class TestAria2cFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = Aria2cFD(ydl, {})
downloader._make_cmd('test', TEST_INFO)
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
# Test cookiejar tempfile arg is added
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
cmd = downloader._make_cmd('test', TEST_INFO)
self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
@ifExternalFDAvailable(FFmpegFD)
class TestFFmpegFD(unittest.TestCase):
_args = []
def _test_cmd(self, args):
self._args = args
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = FFmpegFD(ydl, {})
downloader._debug_cmd = self._test_cmd
info_dict = TEST_INFO.copy()
info_dict['ext'] = 'mp4'
downloader._call_downloader('test', info_dict)
self.assertEqual(self._args, [
'ffmpeg', '-y', '-i', 'http://www.example.com/',
'-c', 'copy', '-f', 'mp4', 'file:test'])
# Test cookies arg is added
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
downloader._call_downloader('test', info_dict)
self.assertEqual(self._args, [
'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n',
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -13,9 +13,7 @@ from ..utils import (
error_to_compat_str, error_to_compat_str,
format_bytes, format_bytes,
shell_quote, shell_quote,
T,
timeconvert, timeconvert,
traverse_obj,
) )
@ -379,9 +377,6 @@ class FileDownloader(object):
else '%.2f' % sleep_interval)) else '%.2f' % sleep_interval))
time.sleep(sleep_interval) time.sleep(sleep_interval)
info_dict['http_headers'] = dict(traverse_obj(info_dict, (
'http_headers', T(dict.items), lambda _, pair: pair[0].lower() != 'cookie'))) or None
return self.real_download(filename, info_dict) return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):

View file

@ -1,9 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os.path import os
import re import re
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
from .common import FileDownloader from .common import FileDownloader
@ -23,6 +24,8 @@ from ..utils import (
check_executable, check_executable,
is_outdated_version, is_outdated_version,
process_communicate_or_kill, process_communicate_or_kill,
T,
traverse_obj,
) )
@ -30,6 +33,7 @@ class ExternalFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
self._cookies_tempfile = None
try: try:
started = time.time() started = time.time()
@ -42,6 +46,13 @@ class ExternalFD(FileDownloader):
# should take place # should take place
retval = 0 retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename()) self.to_screen('[%s] Interrupted by user' % self.get_basename())
finally:
if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
try:
os.remove(self._cookies_tempfile)
except OSError:
self.report_warning(
'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
if retval == 0: if retval == 0:
status = { status = {
@ -97,6 +108,16 @@ class ExternalFD(FileDownloader):
def _configuration_args(self, default=[]): def _configuration_args(self, default=[]):
return cli_configuration_args(self.params, 'external_downloader_args', default) return cli_configuration_args(self.params, 'external_downloader_args', default)
def _write_cookies(self):
if not self.ydl.cookiejar.filename:
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
tmp_cookies.close()
self._cookies_tempfile = tmp_cookies.name
self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
# real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
return self.ydl.cookiejar.filename or self._cookies_tempfile
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """ """ Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@ -110,13 +131,21 @@ class ExternalFD(FileDownloader):
self.to_stderr(stderr.decode('utf-8', 'replace')) self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode return p.returncode
@staticmethod
def _header_items(info_dict):
return traverse_obj(
info_dict, ('http_headers', T(dict.items), Ellipsis))
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V' AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename] cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
for key, val in info_dict['http_headers'].items(): cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['--cookie', cookie_header]
for key, val in self._header_items(info_dict):
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--silent', 'noprogress')
@ -151,8 +180,11 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename] cmd = [self.exe, '-o', tmpfilename]
for key, val in info_dict['http_headers'].items(): for key, val in self._header_items(info_dict):
cmd += ['-H', '%s: %s' % (key, val)] cmd += ['-H', '%s: %s' % (key, val)]
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
cmd += self._configuration_args() cmd += self._configuration_args()
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@ -162,8 +194,10 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version' AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
for key, val in info_dict['http_headers'].items(): if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += ['--load-cookies', self._write_cookies()]
for key, val in self._header_items(info_dict):
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit') cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries') retry = self._option('--tries', 'retries')
@ -182,20 +216,57 @@ class WgetFD(ExternalFD):
class Aria2cFD(ExternalFD): class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v' AVAILABLE_OPT = '-v'
@staticmethod
def _aria2c_filename(fn):
return fn if os.path.isabs(fn) else os.path.join('.', fn)
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c'] cmd = [self.exe, '-c',
cmd += self._configuration_args([ '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--min-split-size', '1M', '--max-connection-per-server', '4']) '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
dn = os.path.dirname(tmpfilename) if 'fragments' in info_dict:
if dn: cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
cmd += ['--dir', dn] else:
cmd += ['--out', os.path.basename(tmpfilename)] cmd += ['--min-split-size', '1M']
for key, val in info_dict['http_headers'].items():
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += ['--load-cookies={0}'.format(self._write_cookies())]
for key, val in self._header_items(info_dict):
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._configuration_args(['--max-connection-per-server', '4'])
cmd += ['--out', os.path.basename(tmpfilename)]
cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address') cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy') cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args()
# aria2c strips out spaces from the beginning/end of filenames and paths.
# We work around this issue by adding a "./" to the beginning of the
# filename and relative path, and adding a "/" at the end of the path.
# See: https://github.com/yt-dlp/yt-dlp/issues/276
# https://github.com/ytdl-org/youtube-dl/issues/20312
# https://github.com/aria2/aria2/issues/1373
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
if 'fragments' not in info_dict:
cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
cmd += ['--auto-file-renaming=false']
if 'fragments' in info_dict:
cmd += ['--file-allocation=none', '--uri-selector=inorder']
url_list_file = '%s.frag.urls' % (tmpfilename, )
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode())
stream.close()
cmd += ['-i', self._aria2c_filename(url_list_file)]
else:
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@ -235,8 +306,10 @@ class Aria2pFD(ExternalFD):
} }
options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.') options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
options['out'] = os.path.basename(tmpfilename) options['out'] = os.path.basename(tmpfilename)
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
options['load-cookies'] = self._write_cookies()
options['header'] = [] options['header'] = []
for key, val in info_dict['http_headers'].items(): for key, val in self._header_items(info_dict):
options['header'].append('{0}: {1}'.format(key, val)) options['header'].append('{0}: {1}'.format(key, val))
download = aria2.add_uris([info_dict['url']], options) download = aria2.add_uris([info_dict['url']], options)
status = { status = {
@ -265,8 +338,16 @@ class HttpieFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
for key, val in info_dict['http_headers'].items(): for key, val in self._header_items(info_dict):
cmd += ['%s:%s' % (key, val)] cmd += ['%s:%s' % (key, val)]
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
# 2: https://httpie.io/docs/cli/sessions
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['Cookie:%s' % cookie_header]
return cmd return cmd
@ -312,7 +393,14 @@ class FFmpegFD(ExternalFD):
# if end_time: # if end_time:
# args += ['-t', compat_str(end_time - start_time)] # args += ['-t', compat_str(end_time - start_time)]
if info_dict['http_headers'] and re.match(r'^https?://', url): cookies = self.ydl.cookiejar.get_cookies_for_url(url)
if cookies:
args.extend(['-cookies', ''.join(
'{0}={1}; path={2}; domain={3};\r\n'.format(
cookie.name, cookie.value, cookie.path, cookie.domain)
for cookie in cookies)])
if info_dict.get('http_headers') and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers']) headers = handle_youtubedl_headers(info_dict['http_headers'])