Compare commits
140 commits
b16744db9c
...
65d61c4536
Author | SHA1 | Date | |
---|---|---|---|
65d61c4536 | |||
4f50831f8c | |||
380b0da4da | |||
5a0e54f64f | |||
|
00ef748cc0 | ||
|
66ab0814c4 | ||
|
bbd3e7e999 | ||
|
21caaf2380 | ||
|
31f50c8194 | ||
|
7d58f0769a | ||
|
86e3cf5e58 | ||
|
2efc8de4d2 | ||
|
e4178b5af3 | ||
|
2d2a4bc832 | ||
|
7d965e6b65 | ||
|
abef53466d | ||
|
e7926ae9f4 | ||
|
87e578c9b8 | ||
|
0861812d72 | ||
|
b870181229 | ||
|
a25e9f3c84 | ||
|
aac33155e4 | ||
|
2b7dd3b2a2 | ||
|
44faa71b19 | ||
|
7bce2ad441 | ||
|
ca71e56c48 | ||
|
2a4e9faa77 | ||
|
74eef6bb5e | ||
|
1fa8b86f0b | ||
|
b2ba24bb02 | ||
|
a190b55964 | ||
|
b2741f2654 | ||
|
8465222041 | ||
|
4339910df3 | ||
|
eaaf4c6736 | ||
|
4566e6e53e | ||
|
1e8ccdd2eb | ||
|
cb9366eda5 | ||
|
d9d07a9581 | ||
|
825a40744b | ||
|
47214e46d8 | ||
|
1d8d5a93f7 | ||
|
1634b1d61e | ||
|
21438a4194 | ||
|
8334ec961b | ||
|
3801d36416 | ||
|
b383be9887 | ||
|
46fde7caee | ||
|
648dc5304c | ||
|
1720c04dc5 | ||
|
d5ef405c5d | ||
|
f47fdb9564 | ||
|
b6dff4073d | ||
|
f24bc9272e | ||
|
b08a580906 | ||
|
2500300c2a | ||
|
58fc5bde47 | ||
|
fa7f0effbe | ||
|
ebdc82c586 | ||
|
9112e668a5 | ||
|
07af47960f | ||
|
ae8ba2c319 | ||
|
d6433cbb2c | ||
|
ff75c300f5 | ||
|
a2534f7b88 | ||
|
b8a86dcf1a | ||
|
2389c7cbd3 | ||
|
ee731f3d00 | ||
|
1f7c6f8b2b | ||
|
d89c2137ba | ||
|
d1c6c5c4d6 | ||
|
6ed3433828 | ||
|
a85a875fef | ||
|
11cc3f3ad0 | ||
|
64d6dd64c8 | ||
|
211cbfd5d4 | ||
|
26035bde46 | ||
|
2da3fa04a6 | ||
|
735e87adfc | ||
|
fe7e13066c | ||
|
213d1d91bf | ||
|
f8253a5289 | ||
|
d6ae3b77cd | ||
|
9f4d83ff42 | ||
|
25124bd640 | ||
|
78da22489b | ||
|
557dbac173 | ||
|
cdf40b6aa6 | ||
|
3f6d2bd76f | ||
|
88f28f620b | ||
|
f35b757c82 | ||
|
45495228b7 | ||
|
6fece0a96b | ||
|
70ff013910 | ||
|
e8de54bce5 | ||
|
baa6c5e95c | ||
|
5c985d4f81 | ||
|
8c86fd33dc | ||
|
27d41d7365 | ||
|
0402710227 | ||
|
3e92c60fcd | ||
|
3da17834a4 | ||
|
f7ce98a21e | ||
|
e67e52a8f8 | ||
|
1d3751c3fe | ||
|
6067451e43 | ||
|
57802e632f | ||
|
2dd6c6edd8 | ||
|
dd9aa74bee | ||
|
42b098dd79 | ||
|
6f8c2635a5 | ||
|
de48105dd8 | ||
|
822f19f05d | ||
|
33db85c571 | ||
|
f33923cba7 | ||
|
e8198c517b | ||
|
bafb6dec72 | ||
|
4e04f10499 | ||
|
90c9f789d9 | ||
|
249f2b6316 | ||
|
d6b14ba316 | ||
|
30e986b834 | ||
|
58988c1421 | ||
|
e19ec52322 | ||
|
f2f90887ca | ||
|
cd987e6fca | ||
|
d947ffe8e3 | ||
|
384f632e8a | ||
|
9d17948b5a | ||
|
f316f5d4e3 | ||
|
bc6f94e459 | ||
|
be3392a0d4 | ||
|
6d829d8119 | ||
|
98b0cf1cd0 | ||
|
e9611a2a36 | ||
|
807e593a32 | ||
|
297fbff23b | ||
|
37cbdfa0e7 | ||
|
295736c9cb | ||
|
14ef89a8da |
80 changed files with 7081 additions and 1555 deletions
434
.github/workflows/ci.yml
vendored
434
.github/workflows/ci.yml
vendored
|
@ -1,81 +1,445 @@
|
||||||
name: CI
|
name: CI
|
||||||
on: [push, pull_request]
|
|
||||||
|
env:
|
||||||
|
all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12
|
||||||
|
main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11
|
||||||
|
pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
|
||||||
|
cpython-versions: main
|
||||||
|
test-set: core
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
inputs:
|
||||||
|
cpython-versions:
|
||||||
|
type: string
|
||||||
|
default: all
|
||||||
|
test-set:
|
||||||
|
type: string
|
||||||
|
default: core
|
||||||
|
pull_request:
|
||||||
|
inputs:
|
||||||
|
cpython-versions:
|
||||||
|
type: string
|
||||||
|
default: main
|
||||||
|
test-set:
|
||||||
|
type: string
|
||||||
|
default: both
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
cpython-versions:
|
||||||
|
type: choice
|
||||||
|
description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11)
|
||||||
|
options:
|
||||||
|
- all
|
||||||
|
- main
|
||||||
|
required: true
|
||||||
|
default: main
|
||||||
|
test-set:
|
||||||
|
type: choice
|
||||||
|
description: core, download
|
||||||
|
options:
|
||||||
|
- both
|
||||||
|
- core
|
||||||
|
- download
|
||||||
|
required: true
|
||||||
|
default: both
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
select:
|
||||||
|
name: Select tests from inputs
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
cpython-versions: ${{ steps.run.outputs.cpython-versions }}
|
||||||
|
test-set: ${{ steps.run.outputs.test-set }}
|
||||||
|
own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
|
||||||
|
steps:
|
||||||
|
- name: Make version array
|
||||||
|
id: run
|
||||||
|
run: |
|
||||||
|
# Make a JSON Array from comma/space-separated string (no extra escaping)
|
||||||
|
json_list() { \
|
||||||
|
ret=""; IFS="${IFS},"; set -- $*; \
|
||||||
|
for a in "$@"; do \
|
||||||
|
ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \
|
||||||
|
done; \
|
||||||
|
printf '[%s]' "$ret"; }
|
||||||
|
tests="${{ inputs.test-set || env.test-set }}"
|
||||||
|
[ $tests = both ] && tests="core download"
|
||||||
|
printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT"
|
||||||
|
versions="${{ inputs.cpython-versions || env.cpython-versions }}"
|
||||||
|
if [ "$versions" = all ]; then \
|
||||||
|
versions="${{ env.all-cpython-versions }}"; else \
|
||||||
|
versions="${{ env.main-cpython-versions }}"; \
|
||||||
|
fi
|
||||||
|
printf 'cpython-versions=%s\n' \
|
||||||
|
"$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT"
|
||||||
|
# versions with a special get-pip.py in a per-version subdirectory
|
||||||
|
printf 'own-pip-versions=%s\n' \
|
||||||
|
"$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
|
||||||
tests:
|
tests:
|
||||||
name: Tests
|
name: Run tests
|
||||||
|
needs: select
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
env:
|
||||||
|
PIP: python -m pip
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK: true
|
||||||
|
PIP_NO_PYTHON_VERSION_WARNING: true
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-18.04]
|
os: [ubuntu-20.04]
|
||||||
# TODO: python 2.6
|
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
|
||||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
|
||||||
python-impl: [cpython]
|
python-impl: [cpython]
|
||||||
ytdl-test-set: [core, download]
|
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
|
||||||
run-tests-ext: [sh]
|
run-tests-ext: [sh]
|
||||||
include:
|
include:
|
||||||
# python 3.2 is only available on windows via setup-python
|
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
python-version: 3.2
|
python-version: 3.4
|
||||||
python-impl: cpython
|
python-impl: cpython
|
||||||
ytdl-test-set: core
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
python-version: 3.2
|
python-version: 3.4
|
||||||
python-impl: cpython
|
python-impl: cpython
|
||||||
ytdl-test-set: download
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
# jython
|
# jython
|
||||||
- os: ubuntu-18.04
|
- os: ubuntu-20.04
|
||||||
|
python-version: 2.7
|
||||||
python-impl: jython
|
python-impl: jython
|
||||||
ytdl-test-set: core
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||||
run-tests-ext: sh
|
run-tests-ext: sh
|
||||||
- os: ubuntu-18.04
|
- os: ubuntu-20.04
|
||||||
|
python-version: 2.7
|
||||||
python-impl: jython
|
python-impl: jython
|
||||||
ytdl-test-set: download
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||||
run-tests-ext: sh
|
run-tests-ext: sh
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Checkout
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
uses: actions/checkout@v3
|
||||||
uses: actions/setup-python@v2
|
#-------- Python 3 -----
|
||||||
if: ${{ matrix.python-impl == 'cpython' }}
|
- name: Set up supported Python ${{ matrix.python-version }}
|
||||||
|
id: setup-python
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}}
|
||||||
|
# wrap broken actions/setup-python@v4
|
||||||
|
uses: ytdl-org/setup-python@v1
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache-build: true
|
||||||
|
allow-build: info
|
||||||
|
- name: Locate supported Python ${{ matrix.python-version }}
|
||||||
|
if: ${{ env.pythonLocation }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV"
|
||||||
|
export expected="${{ steps.setup-python.outputs.python-path }}"
|
||||||
|
dirname() { printf '%s\n' \
|
||||||
|
'import os, sys' \
|
||||||
|
'print(os.path.dirname(sys.argv[1]))' \
|
||||||
|
| ${expected} - "$1"; }
|
||||||
|
expd="$(dirname "$expected")"
|
||||||
|
export python="$(command -v python)"
|
||||||
|
[ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV"
|
||||||
|
[ -x "$python" ] || printf '%s\n' \
|
||||||
|
'import os' \
|
||||||
|
'exp = os.environ["expected"]' \
|
||||||
|
'python = os.environ["python"]' \
|
||||||
|
'exps = os.path.split(exp)' \
|
||||||
|
'if python and (os.path.dirname(python) == exp[0]):' \
|
||||||
|
' exit(0)' \
|
||||||
|
'exps[1] = "python" + os.path.splitext(exps[1])[1]' \
|
||||||
|
'python = os.path.join(*exps)' \
|
||||||
|
'try:' \
|
||||||
|
' os.symlink(exp, python)' \
|
||||||
|
'except AttributeError:' \
|
||||||
|
' os.rename(exp, python)' \
|
||||||
|
| ${expected} -
|
||||||
|
printf '%s\n' \
|
||||||
|
'import sys' \
|
||||||
|
'print(sys.path)' \
|
||||||
|
| ${expected} -
|
||||||
|
#-------- Python 3.12 -
|
||||||
|
- name: Set up CPython 3.12 environment
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PYENV_ROOT=$HOME/.local/share/pyenv
|
||||||
|
echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
|
||||||
|
- name: Cache Python 3.12
|
||||||
|
id: cache312
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
key: python-3.12
|
||||||
|
path: |
|
||||||
|
${{ env.PYENV_ROOT }}
|
||||||
|
- name: Build and set up Python 3.12
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }}
|
||||||
|
# dl and build locally
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Install build environment
|
||||||
|
sudo apt-get install -y build-essential llvm libssl-dev tk-dev \
|
||||||
|
libncursesw5-dev libreadline-dev libsqlite3-dev \
|
||||||
|
libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
|
||||||
|
# Download PyEnv from its GitHub repository.
|
||||||
|
export PYENV_ROOT=${{ env.PYENV_ROOT }}
|
||||||
|
export PATH=$PYENV_ROOT/bin:$PATH
|
||||||
|
git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
|
||||||
|
pyenv install 3.12.0b4
|
||||||
|
- name: Locate Python 3.12
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4"
|
||||||
|
echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
|
||||||
|
echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
|
||||||
|
#-------- Python 2.7 --
|
||||||
|
- name: Set up Python 2.7
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }}
|
||||||
|
# install 2.7
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y python2 python-is-python2
|
||||||
|
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
|
||||||
|
#-------- Python 2.6 --
|
||||||
|
- name: Set up Python 2.6 environment
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
openssl_name=openssl-1.0.2u
|
||||||
|
echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
|
||||||
|
openssl_dir=$HOME/.local/opt/$openssl_name
|
||||||
|
echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV"
|
||||||
|
PYENV_ROOT=$HOME/.local/share/pyenv
|
||||||
|
echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
|
||||||
|
sudo apt-get install -y openssl ca-certificates
|
||||||
|
- name: Cache Python 2.6
|
||||||
|
id: cache26
|
||||||
|
if: ${{ matrix.python-version == '2.6' }}
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
key: python-2.6.9
|
||||||
|
path: |
|
||||||
|
${{ env.openssl_dir }}
|
||||||
|
${{ env.PYENV_ROOT }}
|
||||||
|
- name: Build and set up Python 2.6
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
|
||||||
|
# dl and build locally
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Install build environment
|
||||||
|
sudo apt-get install -y build-essential llvm libssl-dev tk-dev \
|
||||||
|
libncursesw5-dev libreadline-dev libsqlite3-dev \
|
||||||
|
libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
|
||||||
|
# Download and install OpenSSL 1.0.2, back in time
|
||||||
|
openssl_name=${{ env.openssl_name }}
|
||||||
|
openssl_targz=${openssl_name}.tar.gz
|
||||||
|
openssl_dir=${{ env.openssl_dir }}
|
||||||
|
openssl_inc=$openssl_dir/include
|
||||||
|
openssl_lib=$openssl_dir/lib
|
||||||
|
openssl_ssl=$openssl_dir/ssl
|
||||||
|
curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz
|
||||||
|
tar -xf $openssl_targz
|
||||||
|
( cd $openssl_name; \
|
||||||
|
./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \
|
||||||
|
--libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \
|
||||||
|
make && \
|
||||||
|
make install )
|
||||||
|
rm -rf $openssl_name
|
||||||
|
rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
|
||||||
|
# Download PyEnv from its GitHub repository.
|
||||||
|
export PYENV_ROOT=${{ env.PYENV_ROOT }}
|
||||||
|
export PATH=$PYENV_ROOT/bin:$PATH
|
||||||
|
git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
|
||||||
|
# Prevent pyenv build trying (and failing) to update pip
|
||||||
|
export GET_PIP=get-pip-2.6.py
|
||||||
|
echo 'import sys; sys.exit(0)' > ${GET_PIP}
|
||||||
|
GET_PIP=$(realpath $GET_PIP)
|
||||||
|
# Build and install Python
|
||||||
|
export CFLAGS="-I$openssl_inc"
|
||||||
|
export LDFLAGS="-L$openssl_lib"
|
||||||
|
export LD_LIBRARY_PATH="$openssl_lib"
|
||||||
|
pyenv install 2.6.9
|
||||||
|
- name: Locate Python 2.6
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
|
||||||
|
echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
|
||||||
|
echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
|
||||||
|
echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
|
||||||
|
#-------- Jython ------
|
||||||
- name: Set up Java 8
|
- name: Set up Java 8
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
uses: actions/setup-java@v1
|
uses: actions/setup-java@v3
|
||||||
with:
|
with:
|
||||||
java-version: 8
|
java-version: 8
|
||||||
|
distribution: 'zulu'
|
||||||
|
- name: Setup Jython environment
|
||||||
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
|
||||||
|
echo "PIP=pip" >> "$GITHUB_ENV"
|
||||||
|
- name: Cache Jython
|
||||||
|
id: cachejy
|
||||||
|
if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }}
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
# 2.7.3 now available, may solve SNI issue
|
||||||
|
key: jython-2.7.1
|
||||||
|
path: |
|
||||||
|
${{ env.JYTHON_ROOT }}
|
||||||
- name: Install Jython
|
- name: Install Jython
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }}
|
||||||
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
|
||||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
|
||||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
|
||||||
- name: Install nose
|
echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH"
|
||||||
if: ${{ matrix.python-impl != 'jython' }}
|
- name: Set up cached Jython
|
||||||
run: pip install nose
|
if: ${{ steps.cachejy.outputs.cache-hit }}
|
||||||
- name: Install nose (Jython)
|
shell: bash
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
|
||||||
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
|
||||||
run: |
|
run: |
|
||||||
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
|
JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
|
||||||
pip install nose-1.3.7-py2-none-any.whl
|
echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
|
||||||
|
- name: Install supporting Python 2.7 if possible
|
||||||
|
if: ${{ steps.cachejy.outputs.cache-hit }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y python2.7 || true
|
||||||
|
#-------- pip ---------
|
||||||
|
- name: Set up supported Python ${{ matrix.python-version }} pip
|
||||||
|
if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
|
||||||
|
# This step may run in either Linux or Windows
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "$PATH"
|
||||||
|
echo "$PYTHONHOME"
|
||||||
|
# curl is available on both Windows and Linux, -L follows redirects, -O gets name
|
||||||
|
python -m ensurepip || python -m pip --version || { \
|
||||||
|
get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
|
||||||
|
curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
|
||||||
|
python get-pip.py; }
|
||||||
|
- name: Set up Python 2.6 pip
|
||||||
|
if: ${{ matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
python -m pip --version || { \
|
||||||
|
curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \
|
||||||
|
python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; }
|
||||||
|
# work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751
|
||||||
|
echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV"
|
||||||
|
- name: Set up other Python ${{ matrix.python-version }} pip
|
||||||
|
if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
python -m pip --version || { \
|
||||||
|
curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
|
||||||
|
python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
|
||||||
|
#-------- unittest ----
|
||||||
|
- name: Upgrade Unittest for Python 2.6
|
||||||
|
if: ${{ matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||||
|
$PIP -qq show unittest2 || { \
|
||||||
|
for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
|
||||||
|
"f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
|
||||||
|
"c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \
|
||||||
|
"17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \
|
||||||
|
"72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/${u}"; \
|
||||||
|
$PIP install ${u##*/}; \
|
||||||
|
done; }
|
||||||
|
# make tests use unittest2
|
||||||
|
for test in ./test/test_*.py ./test/helper.py; do
|
||||||
|
sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
|
||||||
|
done
|
||||||
|
#-------- nose --------
|
||||||
|
- name: Install nose for Python ${{ matrix.python-version }}
|
||||||
|
if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "$PATH"
|
||||||
|
echo "$PYTHONHOME"
|
||||||
|
# Use PyNose for recent Pythons instead of Nose
|
||||||
|
py3ver="${{ matrix.python-version }}"
|
||||||
|
py3ver=${py3ver#3.}
|
||||||
|
[ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0
|
||||||
|
[ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
|
||||||
|
$PIP -qq show $nose || $PIP install $nose
|
||||||
|
- name: Install nose for other Python 2
|
||||||
|
if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||||
|
$PIP -qq show nose || { \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
|
||||||
|
$PIP install nose-1.3.7-py2-none-any.whl; }
|
||||||
|
- name: Install nose for other Python 3
|
||||||
|
if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
$PIP -qq show nose || { \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
|
||||||
|
$PIP install nose-1.3.7-py3-none-any.whl; }
|
||||||
|
- name: Set up nosetest test
|
||||||
|
if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# set PYTHON_VER
|
||||||
|
PYTHON_VER=${{ matrix.python-version }}
|
||||||
|
[ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}"
|
||||||
|
echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV"
|
||||||
|
echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV"
|
||||||
|
# define a test to validate the Python version used by nosetests
|
||||||
|
printf '%s\n' \
|
||||||
|
'from __future__ import unicode_literals' \
|
||||||
|
'import sys, os, platform' \
|
||||||
|
'try:' \
|
||||||
|
' import unittest2 as unittest' \
|
||||||
|
'except ImportError:' \
|
||||||
|
' import unittest' \
|
||||||
|
'class TestPython(unittest.TestCase):' \
|
||||||
|
' def setUp(self):' \
|
||||||
|
' self.ver = os.environ["PYTHON_VER"].split("-")' \
|
||||||
|
' def test_python_ver(self):' \
|
||||||
|
' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
|
||||||
|
' self.assertTrue(sys.version.startswith(self.ver[-1]))' \
|
||||||
|
' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \
|
||||||
|
' def test_python_impl(self):' \
|
||||||
|
' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
|
||||||
|
> test/test_python.py
|
||||||
|
#-------- TESTS -------
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
|
||||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||||
env:
|
env:
|
||||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
run: |
|
||||||
|
./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||||
flake8:
|
flake8:
|
||||||
name: Linter
|
name: Linter
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: 3.9
|
python-version: 3.9
|
||||||
- name: Install flake8
|
- name: Install flake8
|
||||||
run: pip install flake8
|
run: pip install flake8
|
||||||
- name: Run flake8
|
- name: Run flake8
|
||||||
run: flake8 .
|
run: flake8 .
|
||||||
|
|
||||||
|
|
121
README.md
121
README.md
|
@ -632,7 +632,7 @@ To use percent literals in an output template use `%%`. To output to stdout use
|
||||||
|
|
||||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||||
|
|
||||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title.
|
||||||
|
|
||||||
#### Output template and Windows batch files
|
#### Output template and Windows batch files
|
||||||
|
|
||||||
|
@ -918,7 +918,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||||
|
|
||||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||||
|
|
||||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt LOCALLY](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||||
|
|
||||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||||
|
|
||||||
|
@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later.
|
||||||
|
|
||||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
|
@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions!
|
||||||
|
|
||||||
## youtube-dl coding conventions
|
## youtube-dl coding conventions
|
||||||
|
|
||||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
This section introduces guidelines for writing idiomatic, robust and future-proof extractor code.
|
||||||
|
|
||||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
||||||
|
|
||||||
|
@ -1331,7 +1333,7 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`]
|
||||||
|
|
||||||
Use `url_or_none` for safe URL processing.
|
Use `url_or_none` for safe URL processing.
|
||||||
|
|
||||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
Use `traverse_obj` for safe metadata extraction from parsed JSON.
|
||||||
|
|
||||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||||
|
|
||||||
|
@ -1340,18 +1342,105 @@ Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/mast
|
||||||
#### More examples
|
#### More examples
|
||||||
|
|
||||||
##### Safely extract optional description from parsed JSON
|
##### Safely extract optional description from parsed JSON
|
||||||
|
|
||||||
|
When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples.
|
||||||
|
|
||||||
|
In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available.
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str)))
|
||||||
|
```
|
||||||
|
`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`.
|
||||||
|
|
||||||
|
Some extractors use the older and less capable `try_get()` function in the same way.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||||
```
|
```
|
||||||
|
|
||||||
##### Safely extract more optional metadata
|
##### Safely extract more optional metadata
|
||||||
|
|
||||||
|
In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {}
|
||||||
|
# formerly:
|
||||||
|
# video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||||
description = video.get('summary')
|
description = video.get('summary')
|
||||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||||
view_count = int_or_none(video.get('views'))
|
view_count = int_or_none(video.get('views'))
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Safely extract nested lists
|
||||||
|
|
||||||
|
Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"title": "Example video",
|
||||||
|
"comment": "try extracting this",
|
||||||
|
"media": [{
|
||||||
|
"type": "bad",
|
||||||
|
"size": 320,
|
||||||
|
"url": "https://some.cdn.site/bad.mp4"
|
||||||
|
}, {
|
||||||
|
"type": "streaming",
|
||||||
|
"url": "https://some.cdn.site/hls.m3u8"
|
||||||
|
}, {
|
||||||
|
"type": "super",
|
||||||
|
"size": 1280,
|
||||||
|
"url": "https://some.cdn.site/good.webm"
|
||||||
|
}],
|
||||||
|
"moreStuff": "more values",
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Then extractor code like this can collect the various fields of the JSON:
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
...
|
||||||
|
...
|
||||||
|
info_dict = {}
|
||||||
|
# extract title and description if valid and not empty
|
||||||
|
info_dict.update(traverse_obj(media_json, {
|
||||||
|
'title': ('title', T(txt_or_none)),
|
||||||
|
'description': ('comment', T(txt_or_none)),
|
||||||
|
}))
|
||||||
|
|
||||||
|
# extract any recognisable media formats
|
||||||
|
fmts = []
|
||||||
|
# traverse into "media" list, extract `dict`s with desired keys
|
||||||
|
for fmt in traverse_obj(media_json, ('media', Ellipsis, {
|
||||||
|
'format_id': ('type', T(txt_or_none)),
|
||||||
|
'url': ('url', T(url_or_none)),
|
||||||
|
'width': ('size', T(int_or_none)), })):
|
||||||
|
# bad `fmt` values were `None` and removed
|
||||||
|
if 'url' not in fmt:
|
||||||
|
continue
|
||||||
|
fmt_url = fmt['url'] # known to be valid URL
|
||||||
|
ext = determine_ext(fmt_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False))
|
||||||
|
else:
|
||||||
|
fmt['ext'] = ext
|
||||||
|
fmts.append(fmt)
|
||||||
|
|
||||||
|
# sort, raise if no formats
|
||||||
|
self._sort_formats(fmts)
|
||||||
|
|
||||||
|
info_dict['formats'] = fmts
|
||||||
|
...
|
||||||
|
```
|
||||||
|
The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found.
|
||||||
|
|
||||||
# EMBEDDING YOUTUBE-DL
|
# EMBEDDING YOUTUBE-DL
|
||||||
|
|
||||||
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
|
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
|
||||||
|
@ -1408,7 +1497,11 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
Bugs and suggestions should be reported in the issue tracker: <https://github.com/ytdl-org/youtube-dl/issues> (<https://yt-dl.org/bug> is an alias for this). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||||
|
|
||||||
|
## Opening a bug report or suggestion
|
||||||
|
|
||||||
|
Be sure to follow instructions provided **below** and **in the issue tracker**. Complete the appropriate issue template fully. Consider whether your problem is covered by an existing issue: if so, follow the discussion there. Avoid commenting on existing duplicate issues as such comments do not add to the discussion of the issue and are liable to be treated as spam.
|
||||||
|
|
||||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||||
```
|
```
|
||||||
|
@ -1428,17 +1521,17 @@ $ youtube-dl -v <your command line>
|
||||||
|
|
||||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||||
|
|
||||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
Finally please review your issue to avoid various common mistakes (you can and should use this as a checklist) listed below.
|
||||||
|
|
||||||
### Is the description of the issue itself sufficient?
|
### Is the description of the issue itself sufficient?
|
||||||
|
|
||||||
We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
|
We often get issue reports that are hard to understand. To avoid subsequent clarifications, and to assist participants who are not native English speakers, please elaborate on what feature you are requesting, or what bug you want to be fixed.
|
||||||
|
|
||||||
So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
|
Make sure that it's obvious
|
||||||
|
|
||||||
- What the problem is
|
- What the problem is
|
||||||
- How it could be fixed
|
- How it could be fixed
|
||||||
- How your proposed solution would look like
|
- How your proposed solution would look
|
||||||
|
|
||||||
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
|
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
|
||||||
|
|
||||||
|
@ -1448,14 +1541,14 @@ If your server has multiple IPs or you suspect censorship, adding `--call-home`
|
||||||
|
|
||||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
|
||||||
|
|
||||||
|
### Is the issue already documented?
|
||||||
|
|
||||||
|
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. Initially, at least, use the search term `-label:duplicate` to focus on active issues. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
|
Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
|
||||||
|
|
||||||
### Is the issue already documented?
|
|
||||||
|
|
||||||
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
|
|
||||||
|
|
||||||
### Why are existing options not enough?
|
### Why are existing options not enough?
|
||||||
|
|
||||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||||
|
|
1
devscripts/__init__.py
Normal file
1
devscripts/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# Empty file needed to make devscripts.utils properly importable from outside
|
|
@ -5,8 +5,12 @@ import os
|
||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
from utils import read_file
|
||||||
|
|
||||||
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
||||||
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
||||||
|
@ -18,9 +22,8 @@ def build_completion(opt_parser):
|
||||||
for option in group.option_list:
|
for option in group.option_list:
|
||||||
# for every long flag
|
# for every long flag
|
||||||
opts_flag.append(option.get_opt_string())
|
opts_flag.append(option.get_opt_string())
|
||||||
with open(BASH_COMPLETION_TEMPLATE) as f:
|
template = read_file(BASH_COMPLETION_TEMPLATE)
|
||||||
template = f.read()
|
with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f:
|
||||||
with open(BASH_COMPLETION_FILE, "w") as f:
|
|
||||||
# just using the special char
|
# just using the special char
|
||||||
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
|
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
|
||||||
f.write(filled_template)
|
f.write(filled_template)
|
||||||
|
|
83
devscripts/cli_to_api.py
Executable file
83
devscripts/cli_to_api.py
Executable file
|
@ -0,0 +1,83 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
"""
|
||||||
|
This script displays the API parameters corresponding to a yt-dl command line
|
||||||
|
|
||||||
|
Example:
|
||||||
|
$ ./cli_to_api.py -f best
|
||||||
|
{u'format': 'best'}
|
||||||
|
$
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import youtube_dl
|
||||||
|
from types import MethodType
|
||||||
|
|
||||||
|
|
||||||
|
def cli_to_api(*opts):
|
||||||
|
YDL = youtube_dl.YoutubeDL
|
||||||
|
|
||||||
|
# to extract the parsed options, break out of YoutubeDL instantiation
|
||||||
|
|
||||||
|
# return options via this Exception
|
||||||
|
class ParseYTDLResult(Exception):
|
||||||
|
def __init__(self, result):
|
||||||
|
super(ParseYTDLResult, self).__init__('result')
|
||||||
|
self.opts = result
|
||||||
|
|
||||||
|
# replacement constructor that raises ParseYTDLResult
|
||||||
|
def ytdl_init(ydl, ydl_opts):
|
||||||
|
super(YDL, ydl).__init__(ydl_opts)
|
||||||
|
raise ParseYTDLResult(ydl_opts)
|
||||||
|
|
||||||
|
# patch in the constructor
|
||||||
|
YDL.__init__ = MethodType(ytdl_init, YDL)
|
||||||
|
|
||||||
|
# core parser
|
||||||
|
def parsed_options(argv):
|
||||||
|
try:
|
||||||
|
youtube_dl._real_main(list(argv))
|
||||||
|
except ParseYTDLResult as result:
|
||||||
|
return result.opts
|
||||||
|
|
||||||
|
# from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
|
||||||
|
default = parsed_options([])
|
||||||
|
|
||||||
|
def neq_opt(a, b):
|
||||||
|
if a == b:
|
||||||
|
return False
|
||||||
|
if a is None and repr(type(object)).endswith(".utils.DateRange'>"):
|
||||||
|
return '0001-01-01 - 9999-12-31' != '{0}'.format(b)
|
||||||
|
return a != b
|
||||||
|
|
||||||
|
diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v))
|
||||||
|
if 'postprocessors' in diff:
|
||||||
|
diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
|
||||||
|
return diff
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
from pprint import PrettyPrinter
|
||||||
|
|
||||||
|
pprint = PrettyPrinter()
|
||||||
|
super_format = pprint.format
|
||||||
|
|
||||||
|
def format(object, context, maxlevels, level):
|
||||||
|
if repr(type(object)).endswith(".utils.DateRange'>"):
|
||||||
|
return '{0}: {1}>'.format(repr(object)[:-2], object), True, False
|
||||||
|
return super_format(object, context, maxlevels, level)
|
||||||
|
|
||||||
|
pprint.format = format
|
||||||
|
|
||||||
|
pprint.pprint(cli_to_api(*sys.argv))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import netrc
|
import netrc
|
||||||
|
@ -10,7 +9,9 @@ import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
|
@ -22,6 +23,7 @@ from youtube_dl.utils import (
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
|
from utils import read_file
|
||||||
|
|
||||||
|
|
||||||
class GitHubReleaser(object):
|
class GitHubReleaser(object):
|
||||||
|
@ -89,8 +91,7 @@ def main():
|
||||||
|
|
||||||
changelog_file, version, build_path = args
|
changelog_file, version, build_path = args
|
||||||
|
|
||||||
with io.open(changelog_file, encoding='utf-8') as inf:
|
changelog = read_file(changelog_file)
|
||||||
changelog = inf.read()
|
|
||||||
|
|
||||||
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
|
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
|
||||||
body = mobj.group(1) if mobj else ''
|
body = mobj.group(1) if mobj else ''
|
||||||
|
|
|
@ -6,10 +6,13 @@ import os
|
||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
from youtube_dl.utils import shell_quote
|
from youtube_dl.utils import shell_quote
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
||||||
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
||||||
|
|
||||||
|
@ -38,11 +41,9 @@ def build_completion(opt_parser):
|
||||||
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
||||||
commands.append(shell_quote(complete_cmd))
|
commands.append(shell_quote(complete_cmd))
|
||||||
|
|
||||||
with open(FISH_COMPLETION_TEMPLATE) as f:
|
template = read_file(FISH_COMPLETION_TEMPLATE)
|
||||||
template = f.read()
|
|
||||||
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
||||||
with open(FISH_COMPLETION_FILE, 'w') as f:
|
write_file(FISH_COMPLETION_FILE, filled_template)
|
||||||
f.write(filled_template)
|
|
||||||
|
|
||||||
|
|
||||||
parser = youtube_dl.parseOpts()[0]
|
parser = youtube_dl.parseOpts()[0]
|
||||||
|
|
|
@ -6,16 +6,21 @@ import sys
|
||||||
import hashlib
|
import hashlib
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
if len(sys.argv) <= 1:
|
if len(sys.argv) <= 1:
|
||||||
print('Specify the version number as parameter')
|
print('Specify the version number as parameter')
|
||||||
sys.exit()
|
sys.exit()
|
||||||
version = sys.argv[1]
|
version = sys.argv[1]
|
||||||
|
|
||||||
with open('update/LATEST_VERSION', 'w') as f:
|
write_file('update/LATEST_VERSION', version)
|
||||||
f.write(version)
|
|
||||||
|
|
||||||
versions_info = json.load(open('update/versions.json'))
|
versions_info = json.loads(read_file('update/versions.json'))
|
||||||
if 'signature' in versions_info:
|
if 'signature' in versions_info:
|
||||||
del versions_info['signature']
|
del versions_info['signature']
|
||||||
|
|
||||||
|
@ -39,5 +44,5 @@ for key, filename in filenames.items():
|
||||||
versions_info['versions'][version] = new_version
|
versions_info['versions'][version] = new_version
|
||||||
versions_info['latest'] = version
|
versions_info['latest'] = version
|
||||||
|
|
||||||
with open('update/versions.json', 'w') as jsonf:
|
with open('update/versions.json', 'w', encoding='utf-8') as jsonf:
|
||||||
json.dump(versions_info, jsonf, indent=4, sort_keys=True)
|
json.dumps(versions_info, jsonf, indent=4, sort_keys=True)
|
||||||
|
|
|
@ -2,14 +2,21 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
versions_info = json.load(open('update/versions.json'))
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
|
versions_info = json.loads(read_file('update/versions.json'))
|
||||||
version = versions_info['latest']
|
version = versions_info['latest']
|
||||||
version_dict = versions_info['versions'][version]
|
version_dict = versions_info['versions'][version]
|
||||||
|
|
||||||
# Read template page
|
# Read template page
|
||||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
template = read_file('download.html.in')
|
||||||
template = tmplf.read()
|
|
||||||
|
|
||||||
template = template.replace('@PROGRAM_VERSION@', version)
|
template = template.replace('@PROGRAM_VERSION@', version)
|
||||||
template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
|
template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
|
||||||
|
@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0])
|
||||||
template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
|
template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
|
||||||
template = template.replace('@TAR_URL@', version_dict['tar'][0])
|
template = template.replace('@TAR_URL@', version_dict['tar'][0])
|
||||||
template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
|
template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
|
||||||
with open('download.html', 'w', encoding='utf-8') as dlf:
|
|
||||||
dlf.write(template)
|
write_file('download.html', template)
|
||||||
|
|
|
@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import glob
|
import glob
|
||||||
import io # For Python 2 compatibility
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
year = str(datetime.datetime.now().year)
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
from youtube_dl import compat_str
|
||||||
|
|
||||||
|
year = compat_str(datetime.datetime.now().year)
|
||||||
for fn in glob.glob('*.html*'):
|
for fn in glob.glob('*.html*'):
|
||||||
with io.open(fn, encoding='utf-8') as f:
|
content = read_file(fn)
|
||||||
content = f.read()
|
|
||||||
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
||||||
if content != newc:
|
if content != newc:
|
||||||
tmpFn = fn + '.part'
|
tmpFn = fn + '.part'
|
||||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
write_file(tmpFn, newc)
|
||||||
outf.write(newc)
|
|
||||||
os.rename(tmpFn, fn)
|
os.rename(tmpFn, fn)
|
||||||
|
|
|
@ -2,10 +2,16 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
|
import os.path
|
||||||
import textwrap
|
import textwrap
|
||||||
|
import sys
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from utils import write_file
|
||||||
|
|
||||||
atom_template = textwrap.dedent("""\
|
atom_template = textwrap.dedent("""\
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
@ -72,5 +78,4 @@ for v in versions:
|
||||||
entries_str = textwrap.indent(''.join(entries), '\t')
|
entries_str = textwrap.indent(''.join(entries), '\t')
|
||||||
atom_template = atom_template.replace('@ENTRIES@', entries_str)
|
atom_template = atom_template.replace('@ENTRIES@', entries_str)
|
||||||
|
|
||||||
with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
|
write_file('update/releases.atom', atom_template)
|
||||||
atom_file.write(atom_template)
|
|
||||||
|
|
|
@ -5,15 +5,17 @@ import sys
|
||||||
import os
|
import os
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
# We must be able to import youtube_dl
|
# We must be able to import youtube_dl
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
|
template = read_file('supportedsites.html.in')
|
||||||
template = tmplf.read()
|
|
||||||
|
|
||||||
ie_htmls = []
|
ie_htmls = []
|
||||||
for ie in youtube_dl.list_extractors(age_limit=None):
|
for ie in youtube_dl.list_extractors(age_limit=None):
|
||||||
|
@ -29,8 +31,7 @@ def main():
|
||||||
|
|
||||||
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
|
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
|
||||||
|
|
||||||
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
|
write_file('supportedsites.html', template)
|
||||||
sitesf.write(template)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
||||||
|
@ -14,8 +15,7 @@ def main():
|
||||||
|
|
||||||
infile, outfile = args
|
infile, outfile = args
|
||||||
|
|
||||||
with io.open(infile, encoding='utf-8') as inf:
|
readme = read_file(infile)
|
||||||
readme = inf.read()
|
|
||||||
|
|
||||||
bug_text = re.search(
|
bug_text = re.search(
|
||||||
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
|
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
|
||||||
|
@ -25,8 +25,7 @@ def main():
|
||||||
|
|
||||||
out = bug_text + dev_text
|
out = bug_text + dev_text
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
write_file(outfile, out)
|
||||||
outf.write(out)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from utils import read_file, read_version, write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -13,17 +16,11 @@ def main():
|
||||||
|
|
||||||
infile, outfile = args
|
infile, outfile = args
|
||||||
|
|
||||||
with io.open(infile, encoding='utf-8') as inf:
|
issue_template_tmpl = read_file(infile)
|
||||||
issue_template_tmpl = inf.read()
|
|
||||||
|
|
||||||
# Get the version from youtube_dl/version.py without importing the package
|
out = issue_template_tmpl % {'version': read_version()}
|
||||||
exec(compile(open('youtube_dl/version.py').read(),
|
|
||||||
'youtube_dl/version.py', 'exec'))
|
|
||||||
|
|
||||||
out = issue_template_tmpl % {'version': locals()['__version__']}
|
write_file(outfile, out)
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
|
||||||
outf.write(out)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -1,28 +1,49 @@
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
from inspect import getsource
|
from inspect import getsource
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
lazy_extractors_filename = sys.argv[1]
|
lazy_extractors_filename = sys.argv[1]
|
||||||
if os.path.exists(lazy_extractors_filename):
|
if os.path.exists(lazy_extractors_filename):
|
||||||
os.remove(lazy_extractors_filename)
|
os.remove(lazy_extractors_filename)
|
||||||
|
# Py2: may be confused by leftover lazy_extractors.pyc
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
for c in ('c', 'o'):
|
||||||
|
try:
|
||||||
|
os.remove(lazy_extractors_filename + 'c')
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
from youtube_dl.compat import compat_register_utf8
|
||||||
|
|
||||||
|
compat_register_utf8()
|
||||||
|
|
||||||
from youtube_dl.extractor import _ALL_CLASSES
|
from youtube_dl.extractor import _ALL_CLASSES
|
||||||
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
|
|
||||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
module_template = read_file('devscripts/lazy_load_template.py')
|
||||||
module_template = f.read()
|
|
||||||
|
|
||||||
|
def get_source(m):
|
||||||
|
return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
|
||||||
|
|
||||||
|
|
||||||
module_contents = [
|
module_contents = [
|
||||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
module_template,
|
||||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
get_source(InfoExtractor.suitable),
|
||||||
|
get_source(InfoExtractor._match_valid_url) + '\n',
|
||||||
|
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||||
|
# needed for suitable() methods of Youtube extractor (see #28780)
|
||||||
|
'from youtube_dl.utils import parse_qs, variadic\n',
|
||||||
|
]
|
||||||
|
|
||||||
ie_template = '''
|
ie_template = '''
|
||||||
class {name}({bases}):
|
class {name}({bases}):
|
||||||
|
@ -54,7 +75,7 @@ def build_lazy_ie(ie, name):
|
||||||
valid_url=valid_url,
|
valid_url=valid_url,
|
||||||
module=ie.__module__)
|
module=ie.__module__)
|
||||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||||
s += '\n' + getsource(ie.suitable)
|
s += '\n' + get_source(ie.suitable)
|
||||||
if hasattr(ie, '_make_valid_url'):
|
if hasattr(ie, '_make_valid_url'):
|
||||||
# search extractors
|
# search extractors
|
||||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||||
|
@ -94,7 +115,17 @@ for ie in ordered_cls:
|
||||||
module_contents.append(
|
module_contents.append(
|
||||||
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
||||||
|
|
||||||
module_src = '\n'.join(module_contents) + '\n'
|
module_src = '\n'.join(module_contents)
|
||||||
|
|
||||||
with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
write_file(lazy_extractors_filename, module_src + '\n')
|
||||||
f.write(module_src)
|
|
||||||
|
# work around JVM byte code module limit in Jython
|
||||||
|
if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
|
||||||
|
import subprocess
|
||||||
|
from youtube_dl.compat import compat_subprocess_get_DEVNULL
|
||||||
|
# if Python 2.7 is available, use it to compile the module for Jython
|
||||||
|
try:
|
||||||
|
# if Python 2.7 is available, use it to compile the module for Jython
|
||||||
|
subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
import os.path
|
||||||
import sys
|
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from utils import read_file
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
README_FILE = 'README.md'
|
README_FILE = 'README.md'
|
||||||
helptext = sys.stdin.read()
|
helptext = sys.stdin.read()
|
||||||
|
@ -10,8 +16,7 @@ helptext = sys.stdin.read()
|
||||||
if isinstance(helptext, bytes):
|
if isinstance(helptext, bytes):
|
||||||
helptext = helptext.decode('utf-8')
|
helptext = helptext.decode('utf-8')
|
||||||
|
|
||||||
with io.open(README_FILE, encoding='utf-8') as f:
|
oldreadme = read_file(README_FILE)
|
||||||
oldreadme = f.read()
|
|
||||||
|
|
||||||
header = oldreadme[:oldreadme.index('# OPTIONS')]
|
header = oldreadme[:oldreadme.index('# OPTIONS')]
|
||||||
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
|
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
|
||||||
|
@ -20,7 +25,7 @@ options = helptext[helptext.index(' General Options:') + 19:]
|
||||||
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
|
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
|
||||||
options = '# OPTIONS\n' + options + '\n'
|
options = '# OPTIONS\n' + options + '\n'
|
||||||
|
|
||||||
with io.open(README_FILE, 'w', encoding='utf-8') as f:
|
with open(README_FILE, 'w', encoding='utf-8') as f:
|
||||||
f.write(header)
|
f.write(header)
|
||||||
f.write(options)
|
f.write(options)
|
||||||
f.write(footer)
|
f.write(footer)
|
||||||
|
|
|
@ -1,17 +1,19 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os.path
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
# Import youtube_dl
|
# Import youtube_dl
|
||||||
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
dirn = os.path.dirname
|
||||||
sys.path.insert(0, ROOT_DIR)
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
|
from utils import write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||||
|
@ -38,8 +40,7 @@ def main():
|
||||||
' - ' + md + '\n'
|
' - ' + md + '\n'
|
||||||
for md in gen_ies_md(ies))
|
for md in gen_ies_md(ies))
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
write_file(outfile, out)
|
||||||
outf.write(out)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||||
|
|
||||||
PREFIX = r'''%YOUTUBE-DL(1)
|
PREFIX = r'''%YOUTUBE-DL(1)
|
||||||
|
|
||||||
# NAME
|
# NAME
|
||||||
|
@ -29,8 +29,7 @@ def main():
|
||||||
|
|
||||||
outfile, = args
|
outfile, = args
|
||||||
|
|
||||||
with io.open(README_FILE, encoding='utf-8') as f:
|
readme = read_file(README_FILE)
|
||||||
readme = f.read()
|
|
||||||
|
|
||||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||||
|
@ -38,8 +37,7 @@ def main():
|
||||||
|
|
||||||
readme = filter_options(readme)
|
readme = filter_options(readme)
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
write_file(outfile, readme)
|
||||||
outf.write(readme)
|
|
||||||
|
|
||||||
|
|
||||||
def filter_options(readme):
|
def filter_options(readme):
|
||||||
|
|
62
devscripts/utils.py
Normal file
62
devscripts/utils.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import functools
|
||||||
|
import os.path
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_kwargs,
|
||||||
|
compat_open as open,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def read_file(fname):
|
||||||
|
with open(fname, encoding='utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
def write_file(fname, content, mode='w'):
|
||||||
|
with open(fname, mode, encoding='utf-8') as f:
|
||||||
|
return f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
def read_version(fname='youtube_dl/version.py'):
|
||||||
|
"""Get the version without importing the package"""
|
||||||
|
exec(compile(read_file(fname), fname, 'exec'))
|
||||||
|
return locals()['__version__']
|
||||||
|
|
||||||
|
|
||||||
|
def get_filename_args(has_infile=False, default_outfile=None):
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
if has_infile:
|
||||||
|
parser.add_argument('infile', help='Input file')
|
||||||
|
kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
|
||||||
|
kwargs['help'] = 'Output file'
|
||||||
|
parser.add_argument('outfile', **compat_kwargs(kwargs))
|
||||||
|
|
||||||
|
opts = parser.parse_args()
|
||||||
|
if has_infile:
|
||||||
|
return opts.infile, opts.outfile
|
||||||
|
return opts.outfile
|
||||||
|
|
||||||
|
|
||||||
|
def compose_functions(*functions):
|
||||||
|
return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
|
||||||
|
|
||||||
|
|
||||||
|
def run_process(*args, **kwargs):
|
||||||
|
kwargs.setdefault('text', True)
|
||||||
|
kwargs.setdefault('check', True)
|
||||||
|
kwargs.setdefault('capture_output', True)
|
||||||
|
if kwargs['text']:
|
||||||
|
kwargs.setdefault('encoding', 'utf-8')
|
||||||
|
kwargs.setdefault('errors', 'replace')
|
||||||
|
kwargs = compat_kwargs(kwargs)
|
||||||
|
return subprocess.run(args, **kwargs)
|
|
@ -7,6 +7,8 @@ import sys
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
|
|
||||||
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
||||||
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
||||||
|
@ -34,15 +36,13 @@ def build_completion(opt_parser):
|
||||||
|
|
||||||
flags = [opt.get_opt_string() for opt in opts]
|
flags = [opt.get_opt_string() for opt in opts]
|
||||||
|
|
||||||
with open(ZSH_COMPLETION_TEMPLATE) as f:
|
template = read_file(ZSH_COMPLETION_TEMPLATE)
|
||||||
template = f.read()
|
|
||||||
|
|
||||||
template = template.replace("{{fileopts}}", "|".join(fileopts))
|
template = template.replace("{{fileopts}}", "|".join(fileopts))
|
||||||
template = template.replace("{{diropts}}", "|".join(diropts))
|
template = template.replace("{{diropts}}", "|".join(diropts))
|
||||||
template = template.replace("{{flags}}", " ".join(flags))
|
template = template.replace("{{flags}}", " ".join(flags))
|
||||||
|
|
||||||
with open(ZSH_COMPLETION_FILE, "w") as f:
|
write_file(ZSH_COMPLETION_FILE, template)
|
||||||
f.write(template)
|
|
||||||
|
|
||||||
|
|
||||||
parser = youtube_dl.parseOpts()[0]
|
parser = youtube_dl.parseOpts()[0]
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
|
@ -9,14 +8,17 @@ import re
|
||||||
import types
|
import types
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
|
import unittest
|
||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
|
compat_open as open,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
IDENTITY,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
|
@ -27,10 +29,10 @@ def get_params(override=None):
|
||||||
"parameters.json")
|
"parameters.json")
|
||||||
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
"local_parameters.json")
|
"local_parameters.json")
|
||||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
with open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||||
parameters = json.load(pf)
|
parameters = json.load(pf)
|
||||||
if os.path.exists(LOCAL_PARAMETERS_FILE):
|
if os.path.exists(LOCAL_PARAMETERS_FILE):
|
||||||
with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
|
with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||||
parameters.update(json.load(pf))
|
parameters.update(json.load(pf))
|
||||||
if override:
|
if override:
|
||||||
parameters.update(override)
|
parameters.update(override)
|
||||||
|
@ -72,7 +74,8 @@ class FakeYDL(YoutubeDL):
|
||||||
def to_screen(self, s, skip_eol=None):
|
def to_screen(self, s, skip_eol=None):
|
||||||
print(s)
|
print(s)
|
||||||
|
|
||||||
def trouble(self, s, tb=None):
|
def trouble(self, *args, **kwargs):
|
||||||
|
s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message')
|
||||||
raise Exception(s)
|
raise Exception(s)
|
||||||
|
|
||||||
def download(self, x):
|
def download(self, x):
|
||||||
|
@ -89,6 +92,17 @@ class FakeYDL(YoutubeDL):
|
||||||
self.report_warning = types.MethodType(report_warning, self)
|
self.report_warning = types.MethodType(report_warning, self)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeLogger(object):
|
||||||
|
def debug(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def warning(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def error(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def gettestcases(include_onlymatching=False):
|
def gettestcases(include_onlymatching=False):
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
for tc in ie.get_testcases(include_onlymatching):
|
for tc in ie.get_testcases(include_onlymatching):
|
||||||
|
@ -128,7 +142,7 @@ def expect_value(self, got, expected, field):
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
contains_str in got,
|
contains_str in got,
|
||||||
'field %s (value: %r) should contain %r' % (field, got, contains_str))
|
'field %s (value: %r) should contain %r' % (field, got, contains_str))
|
||||||
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
|
elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected):
|
||||||
fn = eval(expected)
|
fn = eval(expected)
|
||||||
suite = expected.split(':', 1)[1].strip()
|
suite = expected.split(':', 1)[1].strip()
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
|
@ -286,3 +300,7 @@ def http_server_port(httpd):
|
||||||
else:
|
else:
|
||||||
sock = httpd.socket
|
sock = httpd.socket
|
||||||
return sock.getsockname()[1]
|
return sock.getsockname()[1]
|
||||||
|
|
||||||
|
|
||||||
|
def expectedFailureIf(cond):
|
||||||
|
return unittest.expectedFailure if cond else IDENTITY
|
||||||
|
|
|
@ -3,19 +3,37 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
|
||||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
|
||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
|
||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
from test.helper import (
|
||||||
|
expect_dict,
|
||||||
|
expect_value,
|
||||||
|
FakeYDL,
|
||||||
|
http_server_port,
|
||||||
|
)
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
|
compat_http_server,
|
||||||
|
compat_open as open,
|
||||||
|
)
|
||||||
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
|
from youtube_dl.extractor import (
|
||||||
|
get_info_extractor,
|
||||||
|
YoutubeIE,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
encode_data_uri,
|
||||||
|
ExtractorError,
|
||||||
|
RegexNotFoundError,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
TEAPOT_RESPONSE_STATUS = 418
|
TEAPOT_RESPONSE_STATUS = 418
|
||||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||||
|
@ -35,13 +53,13 @@ class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler)
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
|
||||||
class TestIE(InfoExtractor):
|
class DummyIE(InfoExtractor):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TestInfoExtractor(unittest.TestCase):
|
class TestInfoExtractor(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.ie = TestIE(FakeYDL())
|
self.ie = DummyIE(FakeYDL())
|
||||||
|
|
||||||
def test_ie_key(self):
|
def test_ie_key(self):
|
||||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||||
|
@ -62,6 +80,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
<meta name="og:test1" content='foo > < bar'/>
|
<meta name="og:test1" content='foo > < bar'/>
|
||||||
<meta name="og:test2" content="foo >//< bar"/>
|
<meta name="og:test2" content="foo >//< bar"/>
|
||||||
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
<meta property=og-test3 content='Ill-formatted opengraph'/>
|
||||||
|
<meta property=og:test4 content=unquoted-value/>
|
||||||
'''
|
'''
|
||||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
|
@ -74,6 +93,7 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
|
||||||
|
self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
|
||||||
|
|
||||||
def test_html_search_meta(self):
|
def test_html_search_meta(self):
|
||||||
ie = self.ie
|
ie = self.ie
|
||||||
|
@ -98,6 +118,71 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||||
|
|
||||||
|
def test_search_nextjs_data(self):
|
||||||
|
html = '''
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content=
|
||||||
|
"text/html; charset=utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width">
|
||||||
|
<title>Test _search_nextjs_data()</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="__next">
|
||||||
|
<div style="background-color:#17171E" class="FU" dir="ltr">
|
||||||
|
<div class="sc-93de261d-0 dyzzYE">
|
||||||
|
<div>
|
||||||
|
<header class="HD"></header>
|
||||||
|
<main class="MN">
|
||||||
|
<div style="height:0" class="HT0">
|
||||||
|
<div style="width:NaN%" data-testid=
|
||||||
|
"stream-container" class="WDN"></div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
<footer class="sc-6e5faf91-0 dEGaHS"></footer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
|
{"props":{"pageProps":{"video":{"id":"testid"}}}}
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
search = self.ie._search_nextjs_data(html, 'testID')
|
||||||
|
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
|
||||||
|
|
||||||
|
def test_search_nuxt_data(self):
|
||||||
|
html = '''
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content=
|
||||||
|
"text/html; charset=utf-8">
|
||||||
|
<title>Nuxt.js Test Page</title>
|
||||||
|
<meta name="viewport" content=
|
||||||
|
"width=device-width, initial-scale=1">
|
||||||
|
<meta data-hid="robots" name="robots" content="all">
|
||||||
|
</head>
|
||||||
|
<body class="BD">
|
||||||
|
<div id="__layout">
|
||||||
|
<h1 class="H1">Example heading</h1>
|
||||||
|
<div class="IN">
|
||||||
|
<p>Decoy text</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null));
|
||||||
|
</script>
|
||||||
|
<script src="/_nuxt/a12345b.js" defer="defer"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
search = self.ie._search_nuxt_data(html, 'testID')
|
||||||
|
self.assertEqual(search['track']['id'], 'testid')
|
||||||
|
|
||||||
def test_search_json_ld_realworld(self):
|
def test_search_json_ld_realworld(self):
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
||||||
expect_dict(
|
expect_dict(
|
||||||
|
@ -346,6 +431,24 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
}],
|
}],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# from https://0000.studio/
|
||||||
|
# with type attribute but without extension in URL
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._parse_html5_media_entries(
|
||||||
|
'https://0000.studio',
|
||||||
|
r'''
|
||||||
|
<video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
|
||||||
|
controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
|
||||||
|
</video>
|
||||||
|
''', None)[0],
|
||||||
|
{
|
||||||
|
'formats': [{
|
||||||
|
'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
|
||||||
def test_extract_jwplayer_data_realworld(self):
|
def test_extract_jwplayer_data_realworld(self):
|
||||||
# from http://www.suffolk.edu/sjc/
|
# from http://www.suffolk.edu/sjc/
|
||||||
expect_dict(
|
expect_dict(
|
||||||
|
@ -799,8 +902,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
]
|
]
|
||||||
|
|
||||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||||
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_m3u8_formats(
|
formats = self.ie._parse_m3u8_formats(
|
||||||
f.read(), m3u8_url, ext='mp4')
|
f.read(), m3u8_url, ext='mp4')
|
||||||
self.ie._sort_formats(formats)
|
self.ie._sort_formats(formats)
|
||||||
|
@ -1024,8 +1127,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
]
|
]
|
||||||
|
|
||||||
for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
|
for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
|
||||||
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
|
with open('./test/testdata/mpd/%s.mpd' % mpd_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_mpd_formats(
|
formats = self.ie._parse_mpd_formats(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
|
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
|
||||||
|
@ -1051,8 +1154,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
]
|
]
|
||||||
|
|
||||||
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
|
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
|
||||||
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
|
with open('./test/testdata/f4m/%s.f4m' % f4m_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_f4m_formats(
|
formats = self.ie._parse_f4m_formats(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
f4m_url, None)
|
f4m_url, None)
|
||||||
|
@ -1099,8 +1202,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
]
|
]
|
||||||
|
|
||||||
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||||
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
with open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
entries = self.ie._parse_xspf(
|
entries = self.ie._parse_xspf(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
|
||||||
|
|
|
@ -10,14 +10,31 @@ import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
|
|
||||||
from test.helper import FakeYDL, assertRegexpMatches
|
from test.helper import (
|
||||||
|
FakeYDL,
|
||||||
|
assertRegexpMatches,
|
||||||
|
try_rm,
|
||||||
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_str, compat_urllib_error
|
from youtube_dl.compat import (
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_cookies_SimpleCookie,
|
||||||
|
compat_kwargs,
|
||||||
|
compat_open as open,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_error,
|
||||||
|
)
|
||||||
|
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
from youtube_dl.utils import (
|
||||||
|
ExtractorError,
|
||||||
|
match_filter_func,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
|
||||||
TEST_URL = 'http://localhost/sample.mp4'
|
TEST_URL = 'http://localhost/sample.mp4'
|
||||||
|
|
||||||
|
@ -29,11 +46,14 @@ class YDL(FakeYDL):
|
||||||
self.msgs = []
|
self.msgs = []
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
self.downloaded_info_dicts.append(info_dict)
|
self.downloaded_info_dicts.append(info_dict.copy())
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
self.msgs.append(msg)
|
self.msgs.append(msg)
|
||||||
|
|
||||||
|
def dl(self, *args, **kwargs):
|
||||||
|
assert False, 'Downloader must not be invoked for test_YoutubeDL'
|
||||||
|
|
||||||
|
|
||||||
def _make_result(formats, **kwargs):
|
def _make_result(formats, **kwargs):
|
||||||
res = {
|
res = {
|
||||||
|
@ -42,8 +62,9 @@ def _make_result(formats, **kwargs):
|
||||||
'title': 'testttitle',
|
'title': 'testttitle',
|
||||||
'extractor': 'testex',
|
'extractor': 'testex',
|
||||||
'extractor_key': 'TestEx',
|
'extractor_key': 'TestEx',
|
||||||
|
'webpage_url': 'http://example.com/watch?v=shenanigans',
|
||||||
}
|
}
|
||||||
res.update(**kwargs)
|
res.update(**compat_kwargs(kwargs))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@ -681,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
|
|
||||||
class SimplePP(PostProcessor):
|
class SimplePP(PostProcessor):
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
with open(audiofile, 'wt') as f:
|
with open(audiofile, 'w') as f:
|
||||||
f.write('EXAMPLE')
|
f.write('EXAMPLE')
|
||||||
return [info['filepath']], info
|
return [info['filepath']], info
|
||||||
|
|
||||||
def run_pp(params, PP):
|
def run_pp(params, PP):
|
||||||
with open(filename, 'wt') as f:
|
with open(filename, 'w') as f:
|
||||||
f.write('EXAMPLE')
|
f.write('EXAMPLE')
|
||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
ydl.add_post_processor(PP())
|
ydl.add_post_processor(PP())
|
||||||
|
@ -705,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
|
|
||||||
class ModifierPP(PostProcessor):
|
class ModifierPP(PostProcessor):
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
with open(info['filepath'], 'wt') as f:
|
with open(info['filepath'], 'w') as f:
|
||||||
f.write('MODIFIED')
|
f.write('MODIFIED')
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
|
@ -930,17 +951,11 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||||
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||||
|
|
||||||
class _YDL(YDL):
|
ydl = YDL({
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(_YDL, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def trouble(self, s, tb=None):
|
|
||||||
pass
|
|
||||||
|
|
||||||
ydl = _YDL({
|
|
||||||
'format': 'extra',
|
'format': 'extra',
|
||||||
'ignoreerrors': True,
|
'ignoreerrors': True,
|
||||||
})
|
})
|
||||||
|
ydl.trouble = lambda *_, **__: None
|
||||||
|
|
||||||
class VideoIE(InfoExtractor):
|
class VideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'video:(?P<id>\d+)'
|
_VALID_URL = r'video:(?P<id>\d+)'
|
||||||
|
@ -1017,5 +1032,160 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
self.assertEqual(out_info['release_date'], '20210930')
|
self.assertEqual(out_info['release_date'], '20210930')
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeDLCookies(unittest.TestCase):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def encode_cookie(cookie):
|
||||||
|
if not isinstance(cookie, dict):
|
||||||
|
cookie = vars(cookie)
|
||||||
|
for name, value in cookie.items():
|
||||||
|
yield name, compat_str(value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def comparable_cookies(cls, cookies):
|
||||||
|
# Work around cookiejar cookies not being unicode strings
|
||||||
|
return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies))))
|
||||||
|
|
||||||
|
def assertSameCookies(self, c1, c2, msg=None):
|
||||||
|
return self.assertEqual(
|
||||||
|
*map(self.comparable_cookies, (c1, c2)),
|
||||||
|
msg=msg)
|
||||||
|
|
||||||
|
def assertSameCookieStrings(self, c1, c2, msg=None):
|
||||||
|
return self.assertSameCookies(
|
||||||
|
*map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)),
|
||||||
|
msg=msg)
|
||||||
|
|
||||||
|
def test_header_cookies(self):
|
||||||
|
|
||||||
|
ydl = FakeYDL()
|
||||||
|
ydl.report_warning = lambda *_, **__: None
|
||||||
|
|
||||||
|
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
|
||||||
|
return compat_http_cookiejar_Cookie(
|
||||||
|
version or 0, name, value, None, False,
|
||||||
|
domain, bool(domain), bool(domain), path, bool(path),
|
||||||
|
secure, expires, False, None, None, rest={})
|
||||||
|
|
||||||
|
test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s'))
|
||||||
|
|
||||||
|
def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None):
|
||||||
|
def _test():
|
||||||
|
ydl.cookiejar.clear()
|
||||||
|
ydl._load_cookies(encoded_cookies, autoscope=headers)
|
||||||
|
if headers:
|
||||||
|
ydl._apply_header_cookies(test_url)
|
||||||
|
data = {'url': test_url}
|
||||||
|
ydl._calc_headers(data)
|
||||||
|
self.assertSameCookies(
|
||||||
|
cookies, ydl.cookiejar,
|
||||||
|
'Extracted cookiejar.Cookie is not the same')
|
||||||
|
if not headers:
|
||||||
|
self.assertSameCookieStrings(
|
||||||
|
data.get('cookies'), round_trip or encoded_cookies,
|
||||||
|
msg='Cookie is not the same as round trip')
|
||||||
|
ydl.__dict__['_YoutubeDL__header_cookies'] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
_test()
|
||||||
|
except AssertionError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if not error_re:
|
||||||
|
raise
|
||||||
|
assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2))
|
||||||
|
|
||||||
|
test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)])
|
||||||
|
test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed')
|
||||||
|
test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [
|
||||||
|
cookie('cookie1', 'value1', domain=test_domain, path='/test'),
|
||||||
|
cookie('cookie2', 'value2', domain=test_domain, path='/')])
|
||||||
|
cookie_kw = compat_kwargs(
|
||||||
|
{'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', })
|
||||||
|
test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [
|
||||||
|
cookie('test', 'value', **cookie_kw)])
|
||||||
|
test('test="value; "; path=/test; domain=' + test_domain, [
|
||||||
|
cookie('test', 'value; ', domain=test_domain, path='/test')],
|
||||||
|
round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain))
|
||||||
|
test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)],
|
||||||
|
round_trip='name=""; Domain=' + test_domain)
|
||||||
|
test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True)
|
||||||
|
test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [],
|
||||||
|
headers=True, error_re='Invalid syntax')
|
||||||
|
ydl.report_warning = ydl.report_error
|
||||||
|
test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk')
|
||||||
|
|
||||||
|
def test_infojson_cookies(self):
|
||||||
|
TEST_FILE = 'test_infojson_cookies.info.json'
|
||||||
|
TEST_URL = 'https://example.com/example.mp4'
|
||||||
|
COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
|
||||||
|
COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
|
||||||
|
|
||||||
|
ydl = FakeYDL()
|
||||||
|
ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
|
||||||
|
|
||||||
|
def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
|
||||||
|
fmt = {'url': TEST_URL}
|
||||||
|
if fmts_header_cookies:
|
||||||
|
fmt['http_headers'] = COOKIE_HEADER
|
||||||
|
if cookies_field:
|
||||||
|
fmt['cookies'] = COOKIES
|
||||||
|
return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
|
||||||
|
|
||||||
|
def test(initial_info, note):
|
||||||
|
|
||||||
|
def failure_msg(why):
|
||||||
|
return ' when '.join((why, note))
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
result['processed'] = ydl.process_ie_result(initial_info)
|
||||||
|
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||||
|
msg=failure_msg('No cookies set in cookiejar after initial process'))
|
||||||
|
ydl.cookiejar.clear()
|
||||||
|
with open(TEST_FILE) as infojson:
|
||||||
|
result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
|
||||||
|
result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
|
||||||
|
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||||
|
msg=failure_msg('No cookies set in cookiejar after final process'))
|
||||||
|
ydl.cookiejar.clear()
|
||||||
|
for key in ('processed', 'loaded', 'final'):
|
||||||
|
info = result[key]
|
||||||
|
self.assertIsNone(
|
||||||
|
traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
|
||||||
|
msg=failure_msg('Cookie header not removed in {0} result'.format(key)))
|
||||||
|
self.assertSameCookieStrings(
|
||||||
|
traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
|
||||||
|
msg=failure_msg('No cookies field found in {0} result'.format(key)))
|
||||||
|
|
||||||
|
test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
|
||||||
|
test(make_info(info_header_cookies=True), 'info_dict header cokies')
|
||||||
|
test(make_info(fmts_header_cookies=True), 'format header cookies')
|
||||||
|
test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
|
||||||
|
test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
|
||||||
|
test(make_info(cookies_field=True), 'cookies format field')
|
||||||
|
test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
|
||||||
|
|
||||||
|
try_rm(TEST_FILE)
|
||||||
|
|
||||||
|
def test_add_headers_cookie(self):
|
||||||
|
def check_for_cookie_header(result):
|
||||||
|
return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
|
||||||
|
|
||||||
|
ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
|
||||||
|
ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
|
||||||
|
|
||||||
|
fmt = {'url': 'https://example.com/video.mp4'}
|
||||||
|
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||||
|
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
|
||||||
|
self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
|
||||||
|
self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
|
||||||
|
|
||||||
|
fmt = {'url': 'https://wrong.com/video.mp4'}
|
||||||
|
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||||
|
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
|
||||||
|
self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
|
||||||
|
self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
# will be ignored
|
# will be ignored
|
||||||
self.assertFalse(cookiejar._cookies)
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
def test_get_cookie_header(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||||
|
self.assertIn('HTTPONLY_COOKIE', header)
|
||||||
|
|
||||||
|
def test_get_cookies_for_url(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
|
||||||
|
self.assertEqual(len(cookies), 2)
|
||||||
|
cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
|
||||||
|
self.assertFalse(cookies)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -11,6 +11,7 @@ from test.helper import try_rm
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.utils import DownloadError
|
||||||
|
|
||||||
|
|
||||||
def _download_restricted(url, filename, age):
|
def _download_restricted(url, filename, age):
|
||||||
|
@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
ydl.download([url])
|
try:
|
||||||
|
ydl.download([url])
|
||||||
|
except DownloadError:
|
||||||
|
try_rm(json_filename)
|
||||||
res = os.path.exists(json_filename)
|
res = os.path.exists(json_filename)
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
return res
|
return res
|
||||||
|
@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
|
||||||
self.assertFalse(_download_restricted(url, filename, age))
|
self.assertFalse(_download_restricted(url, filename, age))
|
||||||
|
|
||||||
def test_youtube(self):
|
def test_youtube(self):
|
||||||
self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
|
self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
|
||||||
|
|
||||||
def test_youporn(self):
|
def test_youporn(self):
|
||||||
self._assert_restricted(
|
self._assert_restricted(
|
||||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
|
||||||
'505835.mp4', 2, old_age=25)
|
'16715086.mp4', 2, old_age=25)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -48,10 +48,11 @@ class TestCompat(unittest.TestCase):
|
||||||
|
|
||||||
def test_all_present(self):
|
def test_all_present(self):
|
||||||
import youtube_dl.compat
|
import youtube_dl.compat
|
||||||
all_names = youtube_dl.compat.__all__
|
all_names = sorted(
|
||||||
present_names = set(filter(
|
youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
|
||||||
|
present_names = set(map(compat_str, filter(
|
||||||
lambda c: '_' in c and not c.startswith('_'),
|
lambda c: '_' in c and not c.startswith('_'),
|
||||||
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
dir(youtube_dl.compat)))) - set(['unicode_literals'])
|
||||||
self.assertEqual(all_names, sorted(present_names))
|
self.assertEqual(all_names, sorted(present_names))
|
||||||
|
|
||||||
def test_compat_urllib_parse_unquote(self):
|
def test_compat_urllib_parse_unquote(self):
|
||||||
|
|
|
@ -20,15 +20,15 @@ from test.helper import (
|
||||||
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_open as open,
|
||||||
|
compat_urllib_error,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
@ -148,6 +148,7 @@ def generator(test_case, tname):
|
||||||
try_rm(tc_filename)
|
try_rm(tc_filename)
|
||||||
try_rm(tc_filename + '.part')
|
try_rm(tc_filename + '.part')
|
||||||
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||||
|
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
try:
|
try:
|
||||||
try_num = 1
|
try_num = 1
|
||||||
|
@ -213,7 +214,15 @@ def generator(test_case, tname):
|
||||||
# First, check test cases' data against extracted data alone
|
# First, check test cases' data against extracted data alone
|
||||||
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
|
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
|
||||||
# Now, check downloaded file consistency
|
# Now, check downloaded file consistency
|
||||||
|
# support test-case with volatile ID, signalled by regexp value
|
||||||
|
if tc.get('info_dict', {}).get('id', '').startswith('re:'):
|
||||||
|
test_id = tc['info_dict']['id']
|
||||||
|
tc['info_dict']['id'] = tc_res_dict['id']
|
||||||
|
else:
|
||||||
|
test_id = None
|
||||||
tc_filename = get_tc_filename(tc)
|
tc_filename = get_tc_filename(tc)
|
||||||
|
if test_id:
|
||||||
|
tc['info_dict']['id'] = test_id
|
||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
|
||||||
self.assertTrue(tc_filename in finished_hook_called)
|
self.assertTrue(tc_filename in finished_hook_called)
|
||||||
|
@ -236,7 +245,7 @@ def generator(test_case, tname):
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
os.path.exists(info_json_fn),
|
os.path.exists(info_json_fn),
|
||||||
'Missing info file %s' % info_json_fn)
|
'Missing info file %s' % info_json_fn)
|
||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||||
finally:
|
finally:
|
||||||
|
|
258
test/test_downloader_external.py
Normal file
258
test/test_downloader_external.py
Normal file
|
@ -0,0 +1,258 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import (
|
||||||
|
FakeLogger,
|
||||||
|
FakeYDL,
|
||||||
|
http_server_port,
|
||||||
|
try_rm,
|
||||||
|
)
|
||||||
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_server,
|
||||||
|
compat_kwargs,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
encodeFilename,
|
||||||
|
join_nonempty,
|
||||||
|
)
|
||||||
|
from youtube_dl.downloader.external import (
|
||||||
|
Aria2cFD,
|
||||||
|
Aria2pFD,
|
||||||
|
AxelFD,
|
||||||
|
CurlFD,
|
||||||
|
FFmpegFD,
|
||||||
|
HttpieFD,
|
||||||
|
WgetFD,
|
||||||
|
)
|
||||||
|
import threading
|
||||||
|
|
||||||
|
TEST_SIZE = 10 * 1024
|
||||||
|
|
||||||
|
TEST_COOKIE = {
|
||||||
|
'version': 0,
|
||||||
|
'name': 'test',
|
||||||
|
'value': 'ytdlp',
|
||||||
|
'port': None,
|
||||||
|
'port_specified': False,
|
||||||
|
'domain': '.example.com',
|
||||||
|
'domain_specified': True,
|
||||||
|
'domain_initial_dot': False,
|
||||||
|
'path': '/',
|
||||||
|
'path_specified': True,
|
||||||
|
'secure': False,
|
||||||
|
'expires': None,
|
||||||
|
'discard': False,
|
||||||
|
'comment': None,
|
||||||
|
'comment_url': None,
|
||||||
|
'rest': {},
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE)
|
||||||
|
|
||||||
|
TEST_INFO = {'url': 'http://www.example.com/'}
|
||||||
|
|
||||||
|
|
||||||
|
def cookiejar_Cookie(**cookie_args):
|
||||||
|
return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args))
|
||||||
|
|
||||||
|
|
||||||
|
def ifExternalFDAvailable(externalFD):
|
||||||
|
return unittest.skipUnless(externalFD.available(),
|
||||||
|
externalFD.get_basename() + ' not found')
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def send_content_range(self, total=None):
|
||||||
|
range_header = self.headers.get('Range')
|
||||||
|
start = end = None
|
||||||
|
if range_header:
|
||||||
|
mobj = re.match(r'bytes=(\d+)-(\d+)', range_header)
|
||||||
|
if mobj:
|
||||||
|
start, end = (int(mobj.group(i)) for i in (1, 2))
|
||||||
|
valid_range = start is not None and end is not None
|
||||||
|
if valid_range:
|
||||||
|
content_range = 'bytes %d-%d' % (start, end)
|
||||||
|
if total:
|
||||||
|
content_range += '/%d' % total
|
||||||
|
self.send_header('Content-Range', content_range)
|
||||||
|
return (end - start + 1) if valid_range else total
|
||||||
|
|
||||||
|
def serve(self, range=True, content_length=True):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'video/mp4')
|
||||||
|
size = TEST_SIZE
|
||||||
|
if range:
|
||||||
|
size = self.send_content_range(TEST_SIZE)
|
||||||
|
if content_length:
|
||||||
|
self.send_header('Content-Length', size)
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b'#' * size)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == '/regular':
|
||||||
|
self.serve()
|
||||||
|
elif self.path == '/no-content-length':
|
||||||
|
self.serve(content_length=False)
|
||||||
|
elif self.path == '/no-range':
|
||||||
|
self.serve(range=False)
|
||||||
|
elif self.path == '/no-range-no-content-length':
|
||||||
|
self.serve(range=False, content_length=False)
|
||||||
|
else:
|
||||||
|
assert False, 'unrecognised server path'
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(Aria2pFD)
|
||||||
|
class TestAria2pFD(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
|
self.port = http_server_port(self.httpd)
|
||||||
|
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||||
|
self.server_thread.daemon = True
|
||||||
|
self.server_thread.start()
|
||||||
|
|
||||||
|
def download(self, params, ep):
|
||||||
|
with subprocess.Popen(
|
||||||
|
['aria2c', '--enable-rpc'],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL
|
||||||
|
) as process:
|
||||||
|
if not process.poll():
|
||||||
|
filename = 'testfile.mp4'
|
||||||
|
params['logger'] = FakeLogger()
|
||||||
|
params['outtmpl'] = filename
|
||||||
|
ydl = YoutubeDL(params)
|
||||||
|
try_rm(encodeFilename(filename))
|
||||||
|
self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0)
|
||||||
|
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
||||||
|
try_rm(encodeFilename(filename))
|
||||||
|
process.kill()
|
||||||
|
|
||||||
|
def download_all(self, params):
|
||||||
|
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
||||||
|
self.download(params, ep)
|
||||||
|
|
||||||
|
def test_regular(self):
|
||||||
|
self.download_all({'external_downloader': 'aria2p'})
|
||||||
|
|
||||||
|
def test_chunked(self):
|
||||||
|
self.download_all({
|
||||||
|
'external_downloader': 'aria2p',
|
||||||
|
'http_chunk_size': 1000,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(HttpieFD)
|
||||||
|
class TestHttpieFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = HttpieFD(ydl, {})
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['http', '--download', '--output', 'test', 'http://www.example.com/'])
|
||||||
|
|
||||||
|
# Test cookie header is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['http', '--download', '--output', 'test',
|
||||||
|
'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE])
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(AxelFD)
|
||||||
|
class TestAxelFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = AxelFD(ydl, {})
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['axel', '-o', 'test', '--', 'http://www.example.com/'])
|
||||||
|
|
||||||
|
# Test cookie header is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE,
|
||||||
|
'--max-redirect=0', '--', 'http://www.example.com/'])
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(WgetFD)
|
||||||
|
class TestWgetFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = WgetFD(ydl, {})
|
||||||
|
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
# Test cookiejar tempfile arg is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(CurlFD)
|
||||||
|
class TestCurlFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = CurlFD(ydl, {})
|
||||||
|
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
# Test cookie header is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO))
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(Aria2cFD)
|
||||||
|
class TestAria2cFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = Aria2cFD(ydl, {})
|
||||||
|
downloader._make_cmd('test', TEST_INFO)
|
||||||
|
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
|
||||||
|
|
||||||
|
# Test cookiejar tempfile arg is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
cmd = downloader._make_cmd('test', TEST_INFO)
|
||||||
|
self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(FFmpegFD)
|
||||||
|
class TestFFmpegFD(unittest.TestCase):
|
||||||
|
_args = []
|
||||||
|
|
||||||
|
def _test_cmd(self, args):
|
||||||
|
self._args = args
|
||||||
|
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = FFmpegFD(ydl, {})
|
||||||
|
downloader._debug_cmd = self._test_cmd
|
||||||
|
info_dict = TEST_INFO.copy()
|
||||||
|
info_dict['ext'] = 'mp4'
|
||||||
|
|
||||||
|
downloader._call_downloader('test', info_dict)
|
||||||
|
self.assertEqual(self._args, [
|
||||||
|
'ffmpeg', '-y', '-i', 'http://www.example.com/',
|
||||||
|
'-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||||
|
|
||||||
|
# Test cookies arg is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
downloader._call_downloader('test', info_dict)
|
||||||
|
self.assertEqual(self._args, [
|
||||||
|
'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n',
|
||||||
|
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -9,7 +9,11 @@ import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import http_server_port, try_rm
|
from test.helper import (
|
||||||
|
FakeLogger,
|
||||||
|
http_server_port,
|
||||||
|
try_rm,
|
||||||
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server
|
||||||
from youtube_dl.downloader.http import HttpFD
|
from youtube_dl.downloader.http import HttpFD
|
||||||
|
@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
|
||||||
class FakeLogger(object):
|
|
||||||
def debug(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def warning(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def error(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestHttpFD(unittest.TestCase):
|
class TestHttpFD(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
|
@ -95,7 +88,7 @@ class TestHttpFD(unittest.TestCase):
|
||||||
self.assertTrue(downloader.real_download(filename, {
|
self.assertTrue(downloader.real_download(filename, {
|
||||||
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
||||||
}))
|
}))
|
||||||
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
|
||||||
try_rm(encodeFilename(filename))
|
try_rm(encodeFilename(filename))
|
||||||
|
|
||||||
def download_all(self, params):
|
def download_all(self, params):
|
||||||
|
|
|
@ -8,46 +8,54 @@ import unittest
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from youtube_dl.utils import encodeArgument
|
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
sys.path.insert(0, rootDir)
|
||||||
|
|
||||||
try:
|
from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
|
||||||
_DEV_NULL = subprocess.DEVNULL
|
from youtube_dl.utils import encodeArgument
|
||||||
except AttributeError:
|
|
||||||
_DEV_NULL = open(os.devnull, 'wb')
|
compat_register_utf8()
|
||||||
|
|
||||||
|
|
||||||
|
_DEV_NULL = compat_subprocess_get_DEVNULL()
|
||||||
|
|
||||||
|
|
||||||
class TestExecution(unittest.TestCase):
|
class TestExecution(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.module = 'youtube_dl'
|
||||||
|
if sys.version_info < (2, 7):
|
||||||
|
self.module += '.__main__'
|
||||||
|
|
||||||
def test_import(self):
|
def test_import(self):
|
||||||
subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
|
subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
|
||||||
|
|
||||||
def test_module_exec(self):
|
def test_module_exec(self):
|
||||||
if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution
|
subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
|
||||||
|
|
||||||
def test_main_exec(self):
|
def test_main_exec(self):
|
||||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
|
||||||
def test_cmdline_umlauts(self):
|
def test_cmdline_umlauts(self):
|
||||||
|
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
[sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
|
[sys.executable, '-m', self.module, encodeArgument('ä'), '--version'],
|
||||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
def test_lazy_extractors(self):
|
def test_lazy_extractors(self):
|
||||||
|
lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py')
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
finally:
|
finally:
|
||||||
try:
|
for x in ('', 'c') if sys.version_info[0] < 3 else ('',):
|
||||||
os.remove('youtube_dl/extractor/lazy_extractors.py')
|
try:
|
||||||
except (IOError, OSError):
|
os.remove(lazy_extractors + x)
|
||||||
pass
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -8,30 +8,163 @@ import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import http_server_port
|
import contextlib
|
||||||
from youtube_dl import YoutubeDL
|
import gzip
|
||||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
import io
|
||||||
import ssl
|
import ssl
|
||||||
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
# avoid deprecated alias assertRaisesRegexp
|
||||||
|
if hasattr(unittest.TestCase, 'assertRaisesRegex'):
|
||||||
|
unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
|
||||||
|
|
||||||
|
try:
|
||||||
|
import brotli
|
||||||
|
except ImportError:
|
||||||
|
brotli = None
|
||||||
|
try:
|
||||||
|
from urllib.request import pathname2url
|
||||||
|
except ImportError:
|
||||||
|
from urllib import pathname2url
|
||||||
|
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_server,
|
||||||
|
compat_str as str,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_HTTPError,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
sanitized_Request,
|
||||||
|
update_Request,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
from test.helper import (
|
||||||
|
expectedFailureIf,
|
||||||
|
FakeYDL,
|
||||||
|
FakeLogger,
|
||||||
|
http_server_port,
|
||||||
|
)
|
||||||
|
from youtube_dl import YoutubeDL
|
||||||
|
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
protocol_version = 'HTTP/1.1'
|
||||||
|
|
||||||
|
# work-around old/new -style class inheritance
|
||||||
|
def super(self, meth_name, *args, **kwargs):
|
||||||
|
from types import MethodType
|
||||||
|
try:
|
||||||
|
super()
|
||||||
|
fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)
|
||||||
|
except TypeError:
|
||||||
|
fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)
|
||||||
|
self.super = MethodType(fn, self)
|
||||||
|
return self.super(meth_name, *args, **kwargs)
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _headers(self):
|
||||||
|
payload = str(self.headers).encode('utf-8')
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'application/json')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _redirect(self):
|
||||||
|
self.send_response(int(self.path[len('/redirect_'):]))
|
||||||
|
self.send_header('Location', '/method')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def _method(self, method, payload=None):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Length', str(len(payload or '')))
|
||||||
|
self.send_header('Method', method)
|
||||||
|
self.end_headers()
|
||||||
|
if payload:
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _status(self, status):
|
||||||
|
payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8')
|
||||||
|
self.send_response(int(status))
|
||||||
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _read_data(self):
|
||||||
|
if 'Content-Length' in self.headers:
|
||||||
|
return self.rfile.read(int(self.headers['Content-Length']))
|
||||||
|
|
||||||
|
def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
|
||||||
|
return '{0}://{1}:{2}/{3}'.format(
|
||||||
|
scheme, host,
|
||||||
|
port if port is not None
|
||||||
|
else http_server_port(self.server), path)
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
data = self._read_data()
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('POST', data)
|
||||||
|
elif self.path.startswith('/headers'):
|
||||||
|
self._headers()
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_HEAD(self):
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('HEAD')
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_PUT(self):
|
||||||
|
data = self._read_data()
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('PUT', data)
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
|
|
||||||
|
def respond(payload=b'<html><video src="/vid.mp4" /></html>',
|
||||||
|
payload_type='text/html; charset=utf-8',
|
||||||
|
payload_encoding=None,
|
||||||
|
resp_code=200):
|
||||||
|
self.send_response(resp_code)
|
||||||
|
self.send_header('Content-Type', payload_type)
|
||||||
|
if payload_encoding:
|
||||||
|
self.send_header('Content-Encoding', payload_encoding)
|
||||||
|
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def gzip_compress(p):
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with contextlib.closing(gzip.GzipFile(fileobj=buf, mode='wb')) as f:
|
||||||
|
f.write(p)
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
if self.path == '/video.html':
|
if self.path == '/video.html':
|
||||||
self.send_response(200)
|
respond()
|
||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
|
||||||
elif self.path == '/vid.mp4':
|
elif self.path == '/vid.mp4':
|
||||||
self.send_response(200)
|
respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')
|
||||||
self.send_header('Content-Type', 'video/mp4')
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
|
||||||
elif self.path == '/302':
|
elif self.path == '/302':
|
||||||
if sys.version_info[0] == 3:
|
if sys.version_info[0] == 3:
|
||||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||||
|
@ -39,71 +172,336 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
return
|
return
|
||||||
|
|
||||||
new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
|
new_url = self._test_url('中文.html')
|
||||||
self.send_response(302)
|
self.send_response(302)
|
||||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||||
self.send_response(200)
|
respond()
|
||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
elif self.path == '/%c7%9f':
|
||||||
|
respond()
|
||||||
|
elif self.path == '/redirect_dotsegments':
|
||||||
|
self.send_response(301)
|
||||||
|
# redirect to /headers but with dot segments before
|
||||||
|
self.send_header('Location', '/a/b/./../../headers')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
elif self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('GET')
|
||||||
|
elif self.path.startswith('/headers'):
|
||||||
|
self._headers()
|
||||||
|
elif self.path.startswith('/308-to-headers'):
|
||||||
|
self.send_response(308)
|
||||||
|
self.send_header('Location', '/headers')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
elif self.path == '/trailing_garbage':
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
compressed = gzip_compress(payload) + b'trailing garbage'
|
||||||
|
respond(compressed, payload_encoding='gzip')
|
||||||
|
elif self.path == '/302-non-ascii-redirect':
|
||||||
|
new_url = self._test_url('中文.html')
|
||||||
|
# actually respond with permanent redirect
|
||||||
|
self.send_response(301)
|
||||||
|
self.send_header('Location', new_url)
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
elif self.path == '/content-encoding':
|
||||||
|
encodings = self.headers.get('ytdl-encoding', '')
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||||
|
if encoding == 'br' and brotli:
|
||||||
|
payload = brotli.compress(payload)
|
||||||
|
elif encoding == 'gzip':
|
||||||
|
payload = gzip_compress(payload)
|
||||||
|
elif encoding == 'deflate':
|
||||||
|
payload = zlib.compress(payload)
|
||||||
|
elif encoding == 'unsupported':
|
||||||
|
payload = b'raw'
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self._status(415)
|
||||||
|
return
|
||||||
|
respond(payload, payload_encoding=encodings)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert False
|
self._status(404)
|
||||||
|
|
||||||
|
def send_header(self, keyword, value):
|
||||||
|
"""
|
||||||
|
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
|
||||||
|
This is against what is defined in RFC 3986: but we need to test that we support this
|
||||||
|
since some sites incorrectly do this.
|
||||||
|
"""
|
||||||
|
if keyword.lower() == 'connection':
|
||||||
|
return self.super('send_header', keyword, value)
|
||||||
|
|
||||||
class FakeLogger(object):
|
if not hasattr(self, '_headers_buffer'):
|
||||||
def debug(self, msg):
|
self._headers_buffer = []
|
||||||
pass
|
|
||||||
|
|
||||||
def warning(self, msg):
|
self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))
|
||||||
pass
|
|
||||||
|
|
||||||
def error(self, msg):
|
def end_headers(self):
|
||||||
pass
|
if hasattr(self, '_headers_buffer'):
|
||||||
|
self.wfile.write(b''.join(self._headers_buffer))
|
||||||
|
self._headers_buffer = []
|
||||||
|
self.super('end_headers')
|
||||||
|
|
||||||
|
|
||||||
class TestHTTP(unittest.TestCase):
|
class TestHTTP(unittest.TestCase):
|
||||||
|
# when does it make sense to check the SSL certificate?
|
||||||
|
_check_cert = (
|
||||||
|
sys.version_info >= (3, 2)
|
||||||
|
or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19)))
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
# HTTP server
|
||||||
|
self.http_httpd = compat_http_server.HTTPServer(
|
||||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
self.port = http_server_port(self.httpd)
|
self.http_port = http_server_port(self.http_httpd)
|
||||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
|
||||||
self.server_thread.daemon = True
|
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
|
||||||
self.server_thread.start()
|
self.http_server_thread.daemon = True
|
||||||
|
self.http_server_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from socketserver import ThreadingMixIn
|
||||||
|
except ImportError:
|
||||||
|
from SocketServer import ThreadingMixIn
|
||||||
|
|
||||||
|
class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# HTTPS server
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
self.https_httpd = ThreadingHTTPServer(
|
||||||
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
|
try:
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||||
|
sslctx.verify_mode = ssl.CERT_NONE
|
||||||
|
sslctx.check_hostname = False
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
self.https_httpd.socket = sslctx.wrap_socket(
|
||||||
|
self.https_httpd.socket, server_side=True)
|
||||||
|
except AttributeError:
|
||||||
|
self.https_httpd.socket = ssl.wrap_socket(
|
||||||
|
self.https_httpd.socket, certfile=certfn, server_side=True)
|
||||||
|
|
||||||
|
self.https_port = http_server_port(self.https_httpd)
|
||||||
|
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
|
||||||
|
self.https_server_thread.daemon = True
|
||||||
|
self.https_server_thread.start()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
|
||||||
|
def closer(svr):
|
||||||
|
def _closer():
|
||||||
|
svr.shutdown()
|
||||||
|
svr.server_close()
|
||||||
|
return _closer
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.http_httpd))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.http_server_thread.join(2.0)
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.https_httpd))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.https_server_thread.join(2.0)
|
||||||
|
|
||||||
|
def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
|
||||||
|
return '{0}://{1}:{2}/{3}'.format(
|
||||||
|
scheme, host,
|
||||||
|
port if port is not None
|
||||||
|
else self.https_port if scheme == 'https'
|
||||||
|
else self.http_port, path)
|
||||||
|
|
||||||
|
@unittest.skipUnless(_check_cert, 'No support for certificate check in SSL')
|
||||||
|
def test_nocheckcertificate(self):
|
||||||
|
with FakeYDL({'logger': FakeLogger()}) as ydl:
|
||||||
|
with self.assertRaises(compat_urllib_error.URLError):
|
||||||
|
ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
|
||||||
|
|
||||||
|
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
|
||||||
|
r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
|
||||||
|
self.assertEqual(r.getcode(), 200)
|
||||||
|
r.close()
|
||||||
|
|
||||||
|
def test_percent_encode(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))
|
||||||
|
self.assertEqual(res.getcode(), 200)
|
||||||
|
res.close()
|
||||||
|
# don't normalize existing percent encodings
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))
|
||||||
|
self.assertEqual(res.getcode(), 200)
|
||||||
|
res.close()
|
||||||
|
|
||||||
def test_unicode_path_redirection(self):
|
def test_unicode_path_redirection(self):
|
||||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
with FakeYDL() as ydl:
|
||||||
if sys.version_info[0] == 3:
|
r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))
|
||||||
return
|
self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))
|
||||||
|
r.close()
|
||||||
|
|
||||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
def test_redirect(self):
|
||||||
r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
|
with FakeYDL() as ydl:
|
||||||
self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
|
def do_req(redirect_status, method, check_no_content=False):
|
||||||
|
data = b'testdata' if method in ('POST', 'PUT') else None
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('redirect_{0}'.format(redirect_status)),
|
||||||
|
method=method, data=data))
|
||||||
|
if check_no_content:
|
||||||
|
self.assertNotIn('Content-Type', res.headers)
|
||||||
|
return res.read().decode('utf-8'), res.headers.get('method', '')
|
||||||
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
|
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
|
||||||
|
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
|
||||||
|
|
||||||
class TestHTTPS(unittest.TestCase):
|
# 301 and 302 turn POST only into a GET, with no Content-Type
|
||||||
def setUp(self):
|
self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))
|
||||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))
|
||||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
|
||||||
self.httpd.socket = ssl.wrap_socket(
|
|
||||||
self.httpd.socket, certfile=certfn, server_side=True)
|
|
||||||
self.port = http_server_port(self.httpd)
|
|
||||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
|
||||||
self.server_thread.daemon = True
|
|
||||||
self.server_thread.start()
|
|
||||||
|
|
||||||
def test_nocheckcertificate(self):
|
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
|
||||||
if sys.version_info >= (2, 7, 9): # No certificate checking anyways
|
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
|
||||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
|
||||||
self.assertRaises(
|
|
||||||
Exception,
|
|
||||||
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
|
|
||||||
|
|
||||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
# 307 and 308 should not change method
|
||||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
for m in ('POST', 'PUT'):
|
||||||
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
self.assertEqual(do_req(307, m), ('testdata', m))
|
||||||
|
self.assertEqual(do_req(308, m), ('testdata', m))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
|
||||||
|
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
# These should not redirect and instead raise an HTTPError
|
||||||
|
for code in (300, 304, 305, 306):
|
||||||
|
with self.assertRaises(compat_urllib_HTTPError):
|
||||||
|
do_req(code, 'GET')
|
||||||
|
|
||||||
|
# Jython 2.7.1 times out for some reason
|
||||||
|
@expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2))
|
||||||
|
def test_content_type(self):
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
|
||||||
|
with FakeYDL({'nocheckcertificate': True}) as ydl:
|
||||||
|
# method should be auto-detected as POST
|
||||||
|
r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))
|
||||||
|
|
||||||
|
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||||
|
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||||
|
|
||||||
|
# test http
|
||||||
|
r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))
|
||||||
|
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||||
|
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||||
|
|
||||||
|
def test_update_req(self):
|
||||||
|
req = sanitized_Request('http://example.com')
|
||||||
|
assert req.data is None
|
||||||
|
assert req.get_method() == 'GET'
|
||||||
|
assert not req.has_header('Content-Type')
|
||||||
|
# Test that zero-byte payloads will be sent
|
||||||
|
req = update_Request(req, data=b'')
|
||||||
|
assert req.data == b''
|
||||||
|
assert req.get_method() == 'POST'
|
||||||
|
# yt-dl expects data to be encoded and Content-Type to be added by sender
|
||||||
|
# assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded'
|
||||||
|
|
||||||
|
def test_cookiejar(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
|
||||||
|
0, 'test', 'ytdl', None, False, '127.0.0.1', True,
|
||||||
|
False, '/headers', True, False, None, False, None, None, {}))
|
||||||
|
data = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('headers'))).read().decode('utf-8')
|
||||||
|
self.assertIn('Cookie: test=ytdl', data)
|
||||||
|
|
||||||
|
def test_passed_cookie_header(self):
|
||||||
|
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
# Specified Cookie header should be used
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
|
||||||
|
self.assertIn('Cookie: test=test', res)
|
||||||
|
|
||||||
|
# Specified Cookie header should be removed on any redirect
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
|
||||||
|
self.assertNotIn('Cookie: test=test', res)
|
||||||
|
|
||||||
|
# Specified Cookie header should override global cookiejar for that request
|
||||||
|
ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
|
||||||
|
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||||
|
False, '/headers', True, False, None, False, None, None, {}))
|
||||||
|
data = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
|
||||||
|
self.assertNotIn('Cookie: test=ytdlp', data)
|
||||||
|
self.assertIn('Cookie: test=test', data)
|
||||||
|
|
||||||
|
def test_no_compression_compat_header(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
data = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
self._test_url('headers'),
|
||||||
|
headers={'Youtubedl-no-compression': True})).read()
|
||||||
|
self.assertIn(b'Accept-Encoding: identity', data)
|
||||||
|
self.assertNotIn(b'youtubedl-no-compression', data.lower())
|
||||||
|
|
||||||
|
def test_gzip_trailing_garbage(self):
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')
|
||||||
|
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
def __test_compression(self, encoding):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
self._test_url('content-encoding'),
|
||||||
|
headers={'ytdl-encoding': encoding}))
|
||||||
|
# decoded encodings are removed: only check for valid decompressed data
|
||||||
|
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
@unittest.skipUnless(brotli, 'brotli support is not installed')
|
||||||
|
def test_brotli(self):
|
||||||
|
self.__test_compression('br')
|
||||||
|
|
||||||
|
def test_deflate(self):
|
||||||
|
self.__test_compression('deflate')
|
||||||
|
|
||||||
|
def test_gzip(self):
|
||||||
|
self.__test_compression('gzip')
|
||||||
|
|
||||||
|
def test_multiple_encodings(self):
|
||||||
|
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
|
||||||
|
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||||
|
self.__test_compression(pair)
|
||||||
|
|
||||||
|
def test_unsupported_encoding(self):
|
||||||
|
# it should return the raw content
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
self._test_url('content-encoding'),
|
||||||
|
headers={'ytdl-encoding': 'unsupported'}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
|
||||||
|
self.assertEqual(res.read(), b'raw')
|
||||||
|
|
||||||
|
def test_remove_dot_segments(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))
|
||||||
|
self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
|
||||||
|
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))
|
||||||
|
self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
|
||||||
|
|
||||||
|
|
||||||
def _build_proxy_handler(name):
|
def _build_proxy_handler(name):
|
||||||
|
@ -117,7 +515,7 @@ def _build_proxy_handler(name):
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))
|
||||||
return HTTPTestRequestHandler
|
return HTTPTestRequestHandler
|
||||||
|
|
||||||
|
|
||||||
|
@ -137,10 +535,30 @@ class TestProxy(unittest.TestCase):
|
||||||
self.geo_proxy_thread.daemon = True
|
self.geo_proxy_thread.daemon = True
|
||||||
self.geo_proxy_thread.start()
|
self.geo_proxy_thread.start()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
|
||||||
|
def closer(svr):
|
||||||
|
def _closer():
|
||||||
|
svr.shutdown()
|
||||||
|
svr.server_close()
|
||||||
|
return _closer
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.proxy))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.proxy_thread.join(2.0)
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.geo_proxy))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.geo_proxy_thread.join(2.0)
|
||||||
|
|
||||||
|
def _test_proxy(self, host='127.0.0.1', port=None):
|
||||||
|
return '{0}:{1}'.format(
|
||||||
|
host, port if port is not None else self.port)
|
||||||
|
|
||||||
def test_proxy(self):
|
def test_proxy(self):
|
||||||
geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
|
geo_proxy = self._test_proxy(port=self.geo_port)
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
'proxy': self._test_proxy(),
|
||||||
'geo_verification_proxy': geo_proxy,
|
'geo_verification_proxy': geo_proxy,
|
||||||
})
|
})
|
||||||
url = 'http://foo.com/bar'
|
url = 'http://foo.com/bar'
|
||||||
|
@ -154,7 +572,7 @@ class TestProxy(unittest.TestCase):
|
||||||
|
|
||||||
def test_proxy_with_idn(self):
|
def test_proxy_with_idn(self):
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
'proxy': self._test_proxy(),
|
||||||
})
|
})
|
||||||
url = 'http://中文.tw/'
|
url = 'http://中文.tw/'
|
||||||
response = ydl.urlopen(url).read().decode('utf-8')
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
|
@ -162,5 +580,25 @@ class TestProxy(unittest.TestCase):
|
||||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileURL(unittest.TestCase):
|
||||||
|
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
|
def test_file_urls(self):
|
||||||
|
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
tf.write(b'foobar')
|
||||||
|
tf.close()
|
||||||
|
url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
self.assertRaisesRegexp(
|
||||||
|
compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)
|
||||||
|
# not yet implemented
|
||||||
|
"""
|
||||||
|
with FakeYDL({'enable_file_urls': True}) as ydl:
|
||||||
|
res = ydl.urlopen(url)
|
||||||
|
self.assertEqual(res.read(), b'foobar')
|
||||||
|
res.close()
|
||||||
|
"""
|
||||||
|
os.unlink(tf.name)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -11,8 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from youtube_dl.compat import compat_re_Pattern
|
|
||||||
|
|
||||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,6 +18,7 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
jsi = JSInterpreter('function x(){;}')
|
jsi = JSInterpreter('function x(){;}')
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
self.assertEqual(jsi.call_function('x'), None)
|
||||||
|
self.assertEqual(repr(jsi.extract_function('x')), 'F<x>')
|
||||||
|
|
||||||
jsi = JSInterpreter('function x3(){return 42;}')
|
jsi = JSInterpreter('function x3(){return 42;}')
|
||||||
self.assertEqual(jsi.call_function('x3'), 42)
|
self.assertEqual(jsi.call_function('x3'), 42)
|
||||||
|
@ -34,6 +33,55 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
||||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
self.assertEqual(jsi.call_function('x4', 3), 7)
|
||||||
|
|
||||||
|
def test_add(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 42 + 7;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 49)
|
||||||
|
jsi = JSInterpreter('function f(){return 42 + undefined;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
jsi = JSInterpreter('function f(){return 42 + null;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 42)
|
||||||
|
|
||||||
|
def test_sub(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 42 - 7;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 35)
|
||||||
|
jsi = JSInterpreter('function f(){return 42 - undefined;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
jsi = JSInterpreter('function f(){return 42 - null;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 42)
|
||||||
|
|
||||||
|
def test_mul(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 42 * 7;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 294)
|
||||||
|
jsi = JSInterpreter('function f(){return 42 * undefined;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
jsi = JSInterpreter('function f(){return 42 * null;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
|
||||||
|
def test_div(self):
|
||||||
|
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f', 0, 0)))
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1)))
|
||||||
|
self.assertTrue(math.isinf(jsi.call_function('f', 2, 0)))
|
||||||
|
self.assertEqual(jsi.call_function('f', 0, 3), 0)
|
||||||
|
|
||||||
|
def test_mod(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 42 % 7;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
jsi = JSInterpreter('function f(){return 42 % 0;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
jsi = JSInterpreter('function f(){return 42 % undefined;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
|
||||||
|
def test_exp(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 42 ** 2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 1764)
|
||||||
|
jsi = JSInterpreter('function f(){return 42 ** undefined;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
jsi = JSInterpreter('function f(){return 42 ** null;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 1)
|
||||||
|
jsi = JSInterpreter('function f(){return undefined ** 42;}')
|
||||||
|
self.assertTrue(math.isnan(jsi.call_function('f')))
|
||||||
|
|
||||||
def test_empty_return(self):
|
def test_empty_return(self):
|
||||||
jsi = JSInterpreter('function f(){return; y()}')
|
jsi = JSInterpreter('function f(){return; y()}')
|
||||||
self.assertEqual(jsi.call_function('f'), None)
|
self.assertEqual(jsi.call_function('f'), None)
|
||||||
|
@ -140,15 +188,18 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
''')
|
''')
|
||||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
self.assertTrue(math.isnan(jsi.call_function('x')))
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
def test_Date(self):
|
||||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 86000)
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(dt) { return new Date(dt) - 0; }
|
function x(dt) { return new Date(dt) - 0; }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
||||||
|
|
||||||
|
# date format m/d/y
|
||||||
|
self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000)
|
||||||
|
|
||||||
|
# epoch 0
|
||||||
|
self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0)
|
||||||
|
|
||||||
def test_call(self):
|
def test_call(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { return 2; }
|
function x() { return 2; }
|
||||||
|
@ -158,6 +209,57 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
self.assertEqual(jsi.call_function('z'), 5)
|
self.assertEqual(jsi.call_function('z'), 5)
|
||||||
self.assertEqual(jsi.call_function('y'), 2)
|
self.assertEqual(jsi.call_function('y'), 2)
|
||||||
|
|
||||||
|
def test_if(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
let a = 9;
|
||||||
|
if (0==0) {a++}
|
||||||
|
return a
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
if (0==0) {return 10}
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
if (0!=0) {return 1}
|
||||||
|
else {return 10}
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
""" # Unsupported
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
if (0!=0) return 1;
|
||||||
|
else {return 10}
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_elseif(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
if (0!=0) {return 1}
|
||||||
|
else if (1==0) {return 2}
|
||||||
|
else {return 10}
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
""" # Unsupported
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
if (0!=0) return 1;
|
||||||
|
else if (1==0) {return 2}
|
||||||
|
else {return 10}
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
# etc
|
||||||
|
"""
|
||||||
|
|
||||||
def test_for_loop(self):
|
def test_for_loop(self):
|
||||||
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
|
@ -165,6 +267,13 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
|
def test_while_loop(self):
|
||||||
|
# function x() { a=0; while (a<10) {a++} a }
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() { a=0; while (a<10) {a++} return a }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 10)
|
||||||
|
|
||||||
def test_switch(self):
|
def test_switch(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function x(f) { switch(f){
|
||||||
|
@ -381,15 +490,57 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
self.assertIs(jsi.call_function('x'), None)
|
self.assertIs(jsi.call_function('x'), None)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/; return a; }
|
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
||||||
''')
|
''')
|
||||||
self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
|
attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
|
||||||
|
'split', 'sub', 'subn'))
|
||||||
|
if sys.version_info >= (2, 7):
|
||||||
|
# documented for 2.6 but may not be found
|
||||||
|
attrs.update(('flags', 'groupindex', 'groups', 'pattern'))
|
||||||
|
self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
||||||
|
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a="data-name".replace("data-", ""); return a }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 'name')
|
||||||
|
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 'name')
|
||||||
|
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a="data-name".replace(/^.+-/, ""); return a; }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 'name')
|
||||||
|
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a="data-name".replace(/a/g, "o"); return a; }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 'doto-nome')
|
||||||
|
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a="data-name".replaceAll("a", "o"); return a; }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 'doto-nome')
|
||||||
|
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a=[/[)\\]/]; return a[0]; }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
|
||||||
|
|
||||||
|
""" # fails
|
||||||
|
jsi = JSInterpreter(r'''
|
||||||
|
function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 5)
|
||||||
|
"""
|
||||||
|
|
||||||
def test_char_code_at(self):
|
def test_char_code_at(self):
|
||||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||||
|
@ -406,6 +557,36 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||||
|
|
||||||
|
def test_bitwise_operators_madness(self):
|
||||||
|
jsi = JSInterpreter('function x(){return null << 5}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 0)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function x(){return undefined >> 5}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 0)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function x(){return 42 << NaN}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 42)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function x(){return 42 << Infinity}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 42)
|
||||||
|
|
||||||
|
def test_32066(self):
|
||||||
|
jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}")
|
||||||
|
self.assertEqual(jsi.call_function('x'), 70)
|
||||||
|
|
||||||
|
def test_unary_operators(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 2 - - - 2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
# fails
|
||||||
|
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
|
||||||
|
# self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
|
||||||
|
""" # fails so far
|
||||||
|
def test_packed(self):
|
||||||
|
jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
|
||||||
|
self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -5,16 +5,18 @@ from __future__ import unicode_literals
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from youtube_dl.swfinterp import SWFInterpreter
|
from youtube_dl.swfinterp import SWFInterpreter
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
TEST_DIR = os.path.join(
|
TEST_DIR = os.path.join(
|
||||||
|
@ -43,7 +45,7 @@ def _make_testfunc(testfile):
|
||||||
'-static-link-runtime-shared-libraries', as_file])
|
'-static-link-runtime-shared-libraries', as_file])
|
||||||
except OSError as ose:
|
except OSError as ose:
|
||||||
if ose.errno == errno.ENOENT:
|
if ose.errno == errno.ENOENT:
|
||||||
print('mxmlc not found! Skipping test.')
|
self.skipTest('mxmlc not found!')
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
@ -51,7 +53,7 @@ def _make_testfunc(testfile):
|
||||||
swf_content = swf_f.read()
|
swf_content = swf_f.read()
|
||||||
swfi = SWFInterpreter(swf_content)
|
swfi = SWFInterpreter(swf_content)
|
||||||
|
|
||||||
with io.open(as_file, 'r', encoding='utf-8') as as_f:
|
with open(as_file, 'r', encoding='utf-8') as as_f:
|
||||||
as_content = as_f.read()
|
as_content = as_f.read()
|
||||||
|
|
||||||
def _find_spec(key):
|
def _find_spec(key):
|
||||||
|
|
|
@ -2,19 +2,21 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
import io
|
dirn = os.path.dirname
|
||||||
import re
|
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = dirn(dirn(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
sys.path.insert(0, rootDir)
|
||||||
|
|
||||||
IGNORED_FILES = [
|
IGNORED_FILES = [
|
||||||
'setup.py', # http://bugs.python.org/issue13943
|
'setup.py', # http://bugs.python.org/issue13943
|
||||||
'conf.py',
|
'conf.py',
|
||||||
'buildserver.py',
|
'buildserver.py',
|
||||||
|
'get-pip.py',
|
||||||
]
|
]
|
||||||
|
|
||||||
IGNORED_DIRS = [
|
IGNORED_DIRS = [
|
||||||
|
@ -23,6 +25,7 @@ IGNORED_DIRS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
from test.helper import assertRegexpMatches
|
from test.helper import assertRegexpMatches
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
class TestUnicodeLiterals(unittest.TestCase):
|
class TestUnicodeLiterals(unittest.TestCase):
|
||||||
|
@ -40,7 +43,7 @@ class TestUnicodeLiterals(unittest.TestCase):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
fn = os.path.join(dirpath, basename)
|
fn = os.path.join(dirpath, basename)
|
||||||
with io.open(fn, encoding='utf-8') as inf:
|
with open(fn, encoding='utf-8') as inf:
|
||||||
code = inf.read()
|
code = inf.read()
|
||||||
|
|
||||||
if "'" not in code and '"' not in code:
|
if "'" not in code and '"' not in code:
|
||||||
|
|
|
@ -20,7 +20,7 @@ import xml.etree.ElementTree
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
encode_base_n,
|
base_url,
|
||||||
caesar,
|
caesar,
|
||||||
clean_html,
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
|
@ -29,10 +29,12 @@ from youtube_dl.utils import (
|
||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
|
encode_base_n,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
|
expand_path,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
@ -51,6 +53,7 @@ from youtube_dl.utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
LazyList,
|
LazyList,
|
||||||
limit_length,
|
limit_length,
|
||||||
|
lowercase_escape,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
month_by_name,
|
month_by_name,
|
||||||
|
@ -59,30 +62,33 @@ from youtube_dl.utils import (
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
|
parse_bitrate,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
|
parse_codecs,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
parse_bitrate,
|
parse_qs,
|
||||||
pkcs1pad,
|
pkcs1pad,
|
||||||
read_batch_urls,
|
|
||||||
sanitize_filename,
|
|
||||||
sanitize_path,
|
|
||||||
sanitize_url,
|
|
||||||
expand_path,
|
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
replace_extension,
|
read_batch_urls,
|
||||||
remove_start,
|
remove_start,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
|
replace_extension,
|
||||||
rot47,
|
rot47,
|
||||||
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
|
sanitize_url,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
|
T,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
|
@ -91,10 +97,8 @@ from youtube_dl.utils import (
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
lowercase_escape,
|
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
base_url,
|
|
||||||
urljoin,
|
urljoin,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urshift,
|
urshift,
|
||||||
|
@ -112,7 +116,7 @@ from youtube_dl.utils import (
|
||||||
cli_option,
|
cli_option,
|
||||||
cli_valueless_option,
|
cli_valueless_option,
|
||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
parse_codecs,
|
YoutubeDLHandler,
|
||||||
)
|
)
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
|
@ -122,7 +126,6 @@ from youtube_dl.compat import (
|
||||||
compat_setenv,
|
compat_setenv,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_parse_qs,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -250,6 +253,7 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
||||||
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
|
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
|
||||||
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
||||||
|
self.assertEqual(sanitize_url('foo bar'), 'foo bar')
|
||||||
|
|
||||||
def test_expand_path(self):
|
def test_expand_path(self):
|
||||||
def env(var):
|
def env(var):
|
||||||
|
@ -679,38 +683,36 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertTrue(isinstance(data, bytes))
|
self.assertTrue(isinstance(data, bytes))
|
||||||
|
|
||||||
def test_update_url_query(self):
|
def test_update_url_query(self):
|
||||||
def query_dict(url):
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
return compat_parse_qs(compat_urlparse.urlparse(url).query)
|
|
||||||
self.assertEqual(query_dict(update_url_query(
|
|
||||||
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
|
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
|
||||||
query_dict('http://example.com/path?quality=HD&format=mp4'))
|
parse_qs('http://example.com/path?quality=HD&format=mp4'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
|
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
|
||||||
query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
|
parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
|
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
|
||||||
query_dict('http://example.com/path?fields=id,formats,subtitles'))
|
parse_qs('http://example.com/path?fields=id,formats,subtitles'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
|
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
|
||||||
query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
|
parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path?manifest=f4m', {'manifest': []})),
|
'http://example.com/path?manifest=f4m', {'manifest': []})),
|
||||||
query_dict('http://example.com/path'))
|
parse_qs('http://example.com/path'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
|
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
|
||||||
query_dict('http://example.com/path?system=LINUX'))
|
parse_qs('http://example.com/path?system=LINUX'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
|
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
|
||||||
query_dict('http://example.com/path?fields=id,formats,subtitles'))
|
parse_qs('http://example.com/path?fields=id,formats,subtitles'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'width': 1080, 'height': 720})),
|
'http://example.com/path', {'width': 1080, 'height': 720})),
|
||||||
query_dict('http://example.com/path?width=1080&height=720'))
|
parse_qs('http://example.com/path?width=1080&height=720'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'bitrate': 5020.43})),
|
'http://example.com/path', {'bitrate': 5020.43})),
|
||||||
query_dict('http://example.com/path?bitrate=5020.43'))
|
parse_qs('http://example.com/path?bitrate=5020.43'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'test': '第二行тест'})),
|
'http://example.com/path', {'test': '第二行тест'})),
|
||||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||||
|
|
||||||
def test_multipart_encode(self):
|
def test_multipart_encode(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -902,6 +904,111 @@ class TestUtil(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||||
|
|
||||||
|
def test_remove_dot_segments(self):
|
||||||
|
|
||||||
|
def remove_dot_segments(p):
|
||||||
|
q = '' if p.startswith('/') else '/'
|
||||||
|
p = 'http://example.com' + q + p
|
||||||
|
p = compat_urlparse.urlsplit(YoutubeDLHandler._fix_path(p)).path
|
||||||
|
return p[1:] if q else p
|
||||||
|
|
||||||
|
self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
|
||||||
|
self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
|
||||||
|
self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
|
||||||
|
self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
|
||||||
|
self.assertEqual(remove_dot_segments('/..'), '/')
|
||||||
|
self.assertEqual(remove_dot_segments('/./'), '/')
|
||||||
|
self.assertEqual(remove_dot_segments('/./a'), '/a')
|
||||||
|
self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
|
||||||
|
self.assertEqual(remove_dot_segments('/'), '/')
|
||||||
|
self.assertEqual(remove_dot_segments('/t'), '/t')
|
||||||
|
self.assertEqual(remove_dot_segments('t'), 't')
|
||||||
|
self.assertEqual(remove_dot_segments(''), '')
|
||||||
|
self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
|
||||||
|
self.assertEqual(remove_dot_segments('../a'), 'a')
|
||||||
|
self.assertEqual(remove_dot_segments('./a'), 'a')
|
||||||
|
self.assertEqual(remove_dot_segments('.'), '')
|
||||||
|
self.assertEqual(remove_dot_segments('////'), '////')
|
||||||
|
|
||||||
|
def test_js_to_json_vars_strings(self):
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'null': a,
|
||||||
|
'nullStr': b,
|
||||||
|
'true': c,
|
||||||
|
'trueStr': d,
|
||||||
|
'false': e,
|
||||||
|
'falseStr': f,
|
||||||
|
'unresolvedVar': g,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': 'null',
|
||||||
|
'b': '"null"',
|
||||||
|
'c': 'true',
|
||||||
|
'd': '"true"',
|
||||||
|
'e': 'false',
|
||||||
|
'f': '"false"',
|
||||||
|
'g': 'var',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'null': None,
|
||||||
|
'nullStr': 'null',
|
||||||
|
'true': True,
|
||||||
|
'trueStr': 'true',
|
||||||
|
'false': False,
|
||||||
|
'falseStr': 'false',
|
||||||
|
'unresolvedVar': 'var'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'int': a,
|
||||||
|
'intStr': b,
|
||||||
|
'float': c,
|
||||||
|
'floatStr': d,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': '123',
|
||||||
|
'b': '"123"',
|
||||||
|
'c': '1.23',
|
||||||
|
'd': '"1.23"',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'int': 123,
|
||||||
|
'intStr': '123',
|
||||||
|
'float': 1.23,
|
||||||
|
'floatStr': '1.23',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'object': a,
|
||||||
|
'objectStr': b,
|
||||||
|
'array': c,
|
||||||
|
'arrayStr': d,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': '{}',
|
||||||
|
'b': '"{}"',
|
||||||
|
'c': '[]',
|
||||||
|
'd': '"[]"',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'object': {},
|
||||||
|
'objectStr': '{}',
|
||||||
|
'array': [],
|
||||||
|
'arrayStr': '[]',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def test_js_to_json_realworld(self):
|
def test_js_to_json_realworld(self):
|
||||||
inp = '''{
|
inp = '''{
|
||||||
'clip':{'provider':'pseudo'}
|
'clip':{'provider':'pseudo'}
|
||||||
|
@ -972,10 +1079,10 @@ class TestUtil(unittest.TestCase):
|
||||||
!42: 42
|
!42: 42
|
||||||
}''')
|
}''')
|
||||||
self.assertEqual(json.loads(on), {
|
self.assertEqual(json.loads(on), {
|
||||||
'a': 0,
|
'a': True,
|
||||||
'b': 1,
|
'b': False,
|
||||||
'c': 0,
|
'c': False,
|
||||||
'd': 42.42,
|
'd': True,
|
||||||
'e': [],
|
'e': [],
|
||||||
'f': "abc",
|
'f': "abc",
|
||||||
'g': "",
|
'g': "",
|
||||||
|
@ -1045,10 +1152,26 @@ class TestUtil(unittest.TestCase):
|
||||||
on = js_to_json('{ "040": "040" }')
|
on = js_to_json('{ "040": "040" }')
|
||||||
self.assertEqual(json.loads(on), {'040': '040'})
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
|
on = js_to_json('[1,//{},\n2]')
|
||||||
|
self.assertEqual(json.loads(on), [1, 2])
|
||||||
|
|
||||||
|
on = js_to_json(r'"\^\$\#"')
|
||||||
|
self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
|
||||||
|
|
||||||
|
on = js_to_json('\'"\\""\'')
|
||||||
|
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
||||||
|
def test_js_to_json_template_literal(self):
|
||||||
|
self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
|
||||||
|
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||||
|
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||||
|
|
||||||
def test_extract_attributes(self):
|
def test_extract_attributes(self):
|
||||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
|
@ -1562,8 +1685,10 @@ Line 1
|
||||||
self.assertEqual(variadic(None), (None, ))
|
self.assertEqual(variadic(None), (None, ))
|
||||||
self.assertEqual(variadic('spam'), ('spam', ))
|
self.assertEqual(variadic('spam'), ('spam', ))
|
||||||
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
||||||
|
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
||||||
|
|
||||||
def test_traverse_obj(self):
|
def test_traverse_obj(self):
|
||||||
|
str = compat_str
|
||||||
_TEST_DATA = {
|
_TEST_DATA = {
|
||||||
100: 100,
|
100: 100,
|
||||||
1.2: 1.2,
|
1.2: 1.2,
|
||||||
|
@ -1581,6 +1706,11 @@ Line 1
|
||||||
'dict': {},
|
'dict': {},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# define a pukka Iterable
|
||||||
|
def iter_range(stop):
|
||||||
|
for from_ in range(stop):
|
||||||
|
yield from_
|
||||||
|
|
||||||
# Test base functionality
|
# Test base functionality
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||||
msg='allow tuple path')
|
msg='allow tuple path')
|
||||||
|
@ -1596,22 +1726,60 @@ Line 1
|
||||||
|
|
||||||
# Test Ellipsis behavior
|
# Test Ellipsis behavior
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
||||||
(item for item in _TEST_DATA.values() if item is not None),
|
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
||||||
msg='`...` should give all values except `None`')
|
msg='`...` should give all non-discarded values')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
||||||
msg='`...` selection for dicts should select all values')
|
msg='`...` selection for dicts should select all values')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
msg='nested `...` queries should work')
|
msg='nested `...` queries should work')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
|
||||||
msg='`...` query result should be flattened')
|
msg='`...` query result should be flattened')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
|
||||||
|
msg='`...` should accept iterables')
|
||||||
|
|
||||||
# Test function as key
|
# Test function as key
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||||
[_TEST_DATA['urls']],
|
[_TEST_DATA['urls']],
|
||||||
msg='function as query key should perform a filter based on (key, value)')
|
msg='function as query key should perform a filter based on (key, value)')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
|
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
|
||||||
msg='exceptions in the query function should be caught')
|
msg='exceptions in the query function should be caught')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||||
|
msg='function key should accept iterables')
|
||||||
|
if __debug__:
|
||||||
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a: Ellipsis)
|
||||||
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
|
||||||
|
|
||||||
|
# Test set as key (transformation/type, like `expected_type`)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
|
||||||
|
msg='Function in set should be a transformation')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
|
||||||
|
msg='Type in set should be a type filter')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
|
||||||
|
msg='A single set should be wrapped into a path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
|
||||||
|
msg='Transformation function should not raise')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
|
||||||
|
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
|
||||||
|
msg='Function in set should be a transformation')
|
||||||
|
if __debug__:
|
||||||
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, set())
|
||||||
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, set((str.upper, str)))
|
||||||
|
|
||||||
|
# Test `slice` as a key
|
||||||
|
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
||||||
|
msg='slice on a dictionary should not throw')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
|
||||||
# Test alternative paths
|
# Test alternative paths
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||||
|
@ -1657,15 +1825,23 @@ Line 1
|
||||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||||
msg='triple nesting in dict path should be treated as branches')
|
msg='triple nesting in dict path should be treated as branches')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||||
msg='remove `None` values when dict key')
|
msg='remove `None` values when top level dict key fails')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||||
msg='do not remove `None` values if `default`')
|
msg='use `default` if key fails and `default`')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
||||||
msg='do not remove empty values when dict key')
|
msg='remove empty values when dict key')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
|
||||||
msg='do not remove empty values when dict key and a default')
|
msg='use `default` when dict key and a default')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
||||||
msg='if branch in dict key not successful, return `[]`')
|
msg='remove empty values when nested dict key fails')
|
||||||
|
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
||||||
|
msg='default to dict if pruned')
|
||||||
|
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='default to dict if pruned and default is given')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
|
||||||
|
msg='use nested `default` when nested dict key fails and `default`')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
|
||||||
|
msg='remove key if branch in dict key not successful')
|
||||||
|
|
||||||
# Testing default parameter behavior
|
# Testing default parameter behavior
|
||||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||||
|
@ -1689,20 +1865,55 @@ Line 1
|
||||||
msg='if branched but not successful return `[]`, not `default`')
|
msg='if branched but not successful return `[]`, not `default`')
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
||||||
msg='if branched but object is empty return `[]`, not `default`')
|
msg='if branched but object is empty return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj(None, Ellipsis), [],
|
||||||
|
msg='if branched but object is `None` return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
|
||||||
|
msg='if branched but state is `None` return `[]`, not `default`')
|
||||||
|
|
||||||
|
branching_paths = [
|
||||||
|
('fail', Ellipsis),
|
||||||
|
(Ellipsis, 'fail'),
|
||||||
|
100 * ('fail',) + (Ellipsis,),
|
||||||
|
(Ellipsis,) + 100 * ('fail',),
|
||||||
|
]
|
||||||
|
for branching_path in branching_paths:
|
||||||
|
self.assertEqual(traverse_obj({}, branching_path), [],
|
||||||
|
msg='if branched but state is `None`, return `[]` (not `default`)')
|
||||||
|
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
|
||||||
|
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
|
||||||
|
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
|
||||||
|
msg='if branching in last alternative and previous did match, return single value')
|
||||||
|
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
|
||||||
|
msg='if branching in first alternative and non-branching path does match, return single value')
|
||||||
|
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
|
||||||
|
msg='if branching in first alternative and non-branching path does not match, return `default`')
|
||||||
|
|
||||||
# Testing expected_type behavior
|
# Testing expected_type behavior
|
||||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
||||||
msg='accept matching `expected_type` type')
|
'str', msg='accept matching `expected_type` type')
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
||||||
msg='reject non matching `expected_type` type')
|
None, msg='reject non-matching `expected_type` type')
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
||||||
msg='transform type using type function')
|
'0', msg='transform type using type function')
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
||||||
expected_type=lambda _: 1 / 0), None,
|
None, msg='wrap expected_type function in try_call')
|
||||||
msg='wrap expected_type function in try_call')
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
|
['str'], msg='eliminate items that expected_type fails on')
|
||||||
msg='eliminate items that expected_type fails on')
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
|
||||||
|
{0: 100}, msg='type as expected_type should filter dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
||||||
|
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
|
||||||
|
1, msg='expected_type should not filter non-final dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
||||||
|
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
|
||||||
|
[{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
|
||||||
|
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
|
||||||
|
[4], msg='expected_type regression for type matching in tuple branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
|
||||||
|
[], msg='expected_type regression for type matching in dict result')
|
||||||
|
|
||||||
# Test get_all behavior
|
# Test get_all behavior
|
||||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||||
|
@ -1747,14 +1958,23 @@ Line 1
|
||||||
_traverse_string=True), '.',
|
_traverse_string=True), '.',
|
||||||
msg='traverse into converted data if `traverse_string`')
|
msg='traverse into converted data if `traverse_string`')
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
||||||
_traverse_string=True), list('str'),
|
_traverse_string=True), 'str',
|
||||||
msg='`...` branching into string should result in list')
|
msg='`...` should result in string (same value) if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
||||||
|
_traverse_string=True), 'sr',
|
||||||
|
msg='`slice` should result in string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
|
||||||
|
_traverse_string=True), 'str',
|
||||||
|
msg='function should result in string if `traverse_string`')
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||||
_traverse_string=True), ['s', 'r'],
|
_traverse_string=True), ['s', 'r'],
|
||||||
msg='branching into string should result in list')
|
msg='branching should result in list if `traverse_string`')
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
|
self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
|
||||||
_traverse_string=True), list('str'),
|
msg='branching should result in list if `traverse_string`')
|
||||||
msg='function branching into string should result in list')
|
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
|
||||||
# Test is_user_input behavior
|
# Test is_user_input behavior
|
||||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||||
|
@ -1791,6 +2011,8 @@ Line 1
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
msg='failing str key on a `re.Match` should return `default`')
|
||||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||||
msg='failing int key on a `re.Match` should return `default`')
|
msg='failing int key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||||
|
msg='function on a `re.Match` should give group name as well')
|
||||||
|
|
||||||
def test_get_first(self):
|
def test_get_first(self):
|
||||||
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
||||||
|
|
|
@ -11,12 +11,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
from test.helper import get_params, try_rm
|
from test.helper import get_params, try_rm
|
||||||
|
|
||||||
|
|
||||||
import io
|
|
||||||
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||||
|
@ -51,7 +50,7 @@ class TestAnnotations(unittest.TestCase):
|
||||||
ydl.download([TEST_ID])
|
ydl.download([TEST_ID])
|
||||||
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
|
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
|
||||||
annoxml = None
|
annoxml = None
|
||||||
with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
|
with open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
|
||||||
annoxml = xml.etree.ElementTree.parse(annof)
|
annoxml = xml.etree.ElementTree.parse(annof)
|
||||||
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
|
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
|
||||||
root = annoxml.getroot()
|
root = annoxml.getroot()
|
||||||
|
|
|
@ -8,11 +8,14 @@ import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import io
|
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
from youtube_dl.compat import (
|
||||||
|
compat_open as open,
|
||||||
|
compat_str,
|
||||||
|
compat_urlretrieve,
|
||||||
|
)
|
||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
@ -67,6 +70,10 @@ _SIG_TESTS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
_NSIG_TESTS = [
|
_NSIG_TESTS = [
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js',
|
||||||
|
'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg',
|
||||||
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js',
|
||||||
'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
|
'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w',
|
||||||
|
@ -135,6 +142,22 @@ _NSIG_TESTS = [
|
||||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||||
|
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||||
|
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||||
|
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -188,7 +211,7 @@ def t_factory(name, sig_func, url_pattern):
|
||||||
|
|
||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
compat_urlretrieve(url, fn)
|
compat_urlretrieve(url, fn)
|
||||||
with io.open(fn, encoding='utf-8') as testf:
|
with open(fn, encoding='utf-8') as testf:
|
||||||
jscode = testf.read()
|
jscode = testf.read()
|
||||||
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
||||||
|
|
||||||
|
|
|
@ -4,11 +4,9 @@
|
||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
|
@ -26,25 +24,38 @@ import tokenize
|
||||||
import traceback
|
import traceback
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ssl import OPENSSL_VERSION
|
||||||
|
except ImportError:
|
||||||
|
# Must be Python 2.6, should be built against 1.0.2
|
||||||
|
OPENSSL_VERSION = 'OpenSSL 1.0.2(?)'
|
||||||
from string import ascii_letters
|
from string import ascii_letters
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_cookiejar,
|
compat_collections_chain_map as ChainMap,
|
||||||
|
compat_filter as filter,
|
||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_cookies_SimpleCookie,
|
||||||
|
compat_integer_types,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_map as map,
|
||||||
compat_numeric_types,
|
compat_numeric_types,
|
||||||
|
compat_open as open,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_tokenize_tokenize,
|
compat_tokenize_tokenize,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_request_DataHandler,
|
compat_urllib_request_DataHandler,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
|
bug_reports_message,
|
||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
|
@ -62,7 +73,9 @@ from .utils import (
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
|
join_nonempty,
|
||||||
locked_file,
|
locked_file,
|
||||||
|
LazyList,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -85,6 +98,7 @@ from .utils import (
|
||||||
std_headers,
|
std_headers,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
|
traverse_obj,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
url_basename,
|
url_basename,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
|
@ -94,6 +108,7 @@ from .utils import (
|
||||||
YoutubeDLCookieProcessor,
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
YoutubeDLRedirectHandler,
|
YoutubeDLRedirectHandler,
|
||||||
|
ytdl_is_updateable,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
|
@ -363,6 +378,9 @@ class YoutubeDL(object):
|
||||||
self.params.update(params)
|
self.params.update(params)
|
||||||
self.cache = Cache(self)
|
self.cache = Cache(self)
|
||||||
|
|
||||||
|
self._header_cookies = []
|
||||||
|
self._load_cookies_from_headers(self.params.get('http_headers'))
|
||||||
|
|
||||||
def check_deprecated(param, option, suggestion):
|
def check_deprecated(param, option, suggestion):
|
||||||
if self.params.get(param) is not None:
|
if self.params.get(param) is not None:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
|
@ -569,7 +587,7 @@ class YoutubeDL(object):
|
||||||
if self.params.get('cookiefile') is not None:
|
if self.params.get('cookiefile') is not None:
|
||||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None):
|
def trouble(self, *args, **kwargs):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
|
|
||||||
Depending on if the downloader has been configured to ignore
|
Depending on if the downloader has been configured to ignore
|
||||||
|
@ -578,6 +596,11 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
tb, if given, is additional traceback information.
|
tb, if given, is additional traceback information.
|
||||||
"""
|
"""
|
||||||
|
# message=None, tb=None, is_error=True
|
||||||
|
message = args[0] if len(args) > 0 else kwargs.get('message', None)
|
||||||
|
tb = args[1] if len(args) > 1 else kwargs.get('tb', None)
|
||||||
|
is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True)
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
self.to_stderr(message)
|
self.to_stderr(message)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
|
@ -590,7 +613,10 @@ class YoutubeDL(object):
|
||||||
else:
|
else:
|
||||||
tb_data = traceback.format_list(traceback.extract_stack())
|
tb_data = traceback.format_list(traceback.extract_stack())
|
||||||
tb = ''.join(tb_data)
|
tb = ''.join(tb_data)
|
||||||
self.to_stderr(tb)
|
if tb:
|
||||||
|
self.to_stderr(tb)
|
||||||
|
if not is_error:
|
||||||
|
return
|
||||||
if not self.params.get('ignoreerrors', False):
|
if not self.params.get('ignoreerrors', False):
|
||||||
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
||||||
exc_info = sys.exc_info()[1].exc_info
|
exc_info = sys.exc_info()[1].exc_info
|
||||||
|
@ -599,11 +625,18 @@ class YoutubeDL(object):
|
||||||
raise DownloadError(message, exc_info)
|
raise DownloadError(message, exc_info)
|
||||||
self._download_retcode = 1
|
self._download_retcode = 1
|
||||||
|
|
||||||
def report_warning(self, message):
|
def report_warning(self, message, only_once=False, _cache={}):
|
||||||
'''
|
'''
|
||||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
|
if only_once:
|
||||||
|
m_hash = hash((self, message))
|
||||||
|
m_cnt = _cache.setdefault(m_hash, 0)
|
||||||
|
_cache[m_hash] = m_cnt + 1
|
||||||
|
if m_cnt > 0:
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('logger') is not None:
|
if self.params.get('logger') is not None:
|
||||||
self.params['logger'].warning(message)
|
self.params['logger'].warning(message)
|
||||||
else:
|
else:
|
||||||
|
@ -616,7 +649,7 @@ class YoutubeDL(object):
|
||||||
warning_message = '%s %s' % (_msg_header, message)
|
warning_message = '%s %s' % (_msg_header, message)
|
||||||
self.to_stderr(warning_message)
|
self.to_stderr(warning_message)
|
||||||
|
|
||||||
def report_error(self, message, tb=None):
|
def report_error(self, message, *args, **kwargs):
|
||||||
'''
|
'''
|
||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||||
in red if stderr is a tty file.
|
in red if stderr is a tty file.
|
||||||
|
@ -625,8 +658,18 @@ class YoutubeDL(object):
|
||||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'ERROR:'
|
_msg_header = 'ERROR:'
|
||||||
error_message = '%s %s' % (_msg_header, message)
|
kwargs['message'] = '%s %s' % (_msg_header, message)
|
||||||
self.trouble(error_message, tb)
|
self.trouble(*args, **kwargs)
|
||||||
|
|
||||||
|
def report_unscoped_cookies(self, *args, **kwargs):
|
||||||
|
# message=None, tb=False, is_error=False
|
||||||
|
if len(args) <= 2:
|
||||||
|
kwargs.setdefault('is_error', False)
|
||||||
|
if len(args) <= 0:
|
||||||
|
kwargs.setdefault(
|
||||||
|
'message',
|
||||||
|
'Unscoped cookies are not allowed: please specify some sort of scoping')
|
||||||
|
self.report_error(*args, **kwargs)
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
|
@ -822,7 +865,7 @@ class YoutubeDL(object):
|
||||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
self.report_error(msg)
|
self.report_error(msg)
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), tb=e.format_traceback())
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -832,8 +875,83 @@ class YoutubeDL(object):
|
||||||
raise
|
raise
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
def _remove_cookie_header(self, http_headers):
|
||||||
|
"""Filters out `Cookie` header from an `http_headers` dict
|
||||||
|
The `Cookie` header is removed to prevent leaks as a result of unscoped cookies.
|
||||||
|
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||||
|
|
||||||
|
@param http_headers An `http_headers` dict from which any `Cookie` header
|
||||||
|
should be removed, or None
|
||||||
|
"""
|
||||||
|
return dict(filter(lambda pair: pair[0].lower() != 'cookie', (http_headers or {}).items()))
|
||||||
|
|
||||||
|
def _load_cookies(self, data, **kwargs):
|
||||||
|
"""Loads cookies from a `Cookie` header
|
||||||
|
|
||||||
|
This tries to work around the security vulnerability of passing cookies to every domain.
|
||||||
|
|
||||||
|
@param data The Cookie header as a string to load the cookies from
|
||||||
|
@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
|
||||||
|
If `True`, save cookies for later to be stored in the jar with a limited scope
|
||||||
|
If a URL, save cookies in the jar with the domain of the URL
|
||||||
|
"""
|
||||||
|
# autoscope=True (kw-only)
|
||||||
|
autoscope = kwargs.get('autoscope', True)
|
||||||
|
|
||||||
|
for cookie in compat_http_cookies_SimpleCookie(data).values() if data else []:
|
||||||
|
if autoscope and any(cookie.values()):
|
||||||
|
raise ValueError('Invalid syntax in Cookie Header')
|
||||||
|
|
||||||
|
domain = cookie.get('domain') or ''
|
||||||
|
expiry = cookie.get('expires')
|
||||||
|
if expiry == '': # 0 is valid so we check for `''` explicitly
|
||||||
|
expiry = None
|
||||||
|
prepared_cookie = compat_http_cookiejar_Cookie(
|
||||||
|
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
|
||||||
|
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
|
||||||
|
bool(cookie.get('secure')), expiry, False, None, None, {})
|
||||||
|
|
||||||
|
if domain:
|
||||||
|
self.cookiejar.set_cookie(prepared_cookie)
|
||||||
|
elif autoscope is True:
|
||||||
|
self.report_warning(
|
||||||
|
'Passing cookies as a header is a potential security risk; '
|
||||||
|
'they will be scoped to the domain of the downloaded urls. '
|
||||||
|
'Please consider loading cookies from a file or browser instead.',
|
||||||
|
only_once=True)
|
||||||
|
self._header_cookies.append(prepared_cookie)
|
||||||
|
elif autoscope:
|
||||||
|
self.report_warning(
|
||||||
|
'The extractor result contains an unscoped cookie as an HTTP header. '
|
||||||
|
'If you are specifying an input URL, ' + bug_reports_message(),
|
||||||
|
only_once=True)
|
||||||
|
self._apply_header_cookies(autoscope, [prepared_cookie])
|
||||||
|
else:
|
||||||
|
self.report_unscoped_cookies()
|
||||||
|
|
||||||
|
def _load_cookies_from_headers(self, headers):
|
||||||
|
self._load_cookies(traverse_obj(headers, 'cookie', casesense=False))
|
||||||
|
|
||||||
|
def _apply_header_cookies(self, url, cookies=None):
|
||||||
|
"""This method applies stray header cookies to the provided url
|
||||||
|
|
||||||
|
This loads header cookies and scopes them to the domain provided in `url`.
|
||||||
|
While this is not ideal, it helps reduce the risk of them being sent to
|
||||||
|
an unintended destination.
|
||||||
|
"""
|
||||||
|
parsed = compat_urllib_parse.urlparse(url)
|
||||||
|
if not parsed.hostname:
|
||||||
|
return
|
||||||
|
|
||||||
|
for cookie in map(copy.copy, cookies or self._header_cookies):
|
||||||
|
cookie.domain = '.' + parsed.hostname
|
||||||
|
self.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
@__handle_extraction_exceptions
|
@__handle_extraction_exceptions
|
||||||
def __extract_info(self, url, ie, download, extra_info, process):
|
def __extract_info(self, url, ie, download, extra_info, process):
|
||||||
|
# Compat with passing cookies in http headers
|
||||||
|
self._apply_header_cookies(url)
|
||||||
|
|
||||||
ie_result = ie.extract(url)
|
ie_result = ie.extract(url)
|
||||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||||
return
|
return
|
||||||
|
@ -859,7 +977,7 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
"""
|
"""
|
||||||
Take the result of the ie(may be modified) and resolve all unresolved
|
Take the result of the ie (may be modified) and resolve all unresolved
|
||||||
references (URLs, playlist items).
|
references (URLs, playlist items).
|
||||||
|
|
||||||
It will also download the videos if 'download'.
|
It will also download the videos if 'download'.
|
||||||
|
@ -1386,17 +1504,16 @@ class YoutubeDL(object):
|
||||||
'abr': formats_info[1].get('abr'),
|
'abr': formats_info[1].get('abr'),
|
||||||
'ext': output_ext,
|
'ext': output_ext,
|
||||||
}
|
}
|
||||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
|
||||||
|
|
||||||
def selector_function(ctx):
|
def selector_function(ctx):
|
||||||
for pair in itertools.product(
|
selector_fn = lambda x: _build_selector_function(x)(ctx)
|
||||||
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
for pair in itertools.product(*map(selector_fn, selector.selector)):
|
||||||
yield _merge(pair)
|
yield _merge(pair)
|
||||||
|
|
||||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||||
|
|
||||||
def final_selector(ctx):
|
def final_selector(ctx):
|
||||||
ctx_copy = copy.deepcopy(ctx)
|
ctx_copy = dict(ctx)
|
||||||
for _filter in filters:
|
for _filter in filters:
|
||||||
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||||
return selector_function(ctx_copy)
|
return selector_function(ctx_copy)
|
||||||
|
@ -1431,23 +1548,45 @@ class YoutubeDL(object):
|
||||||
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
||||||
return _build_selector_function(parsed_selector)
|
return _build_selector_function(parsed_selector)
|
||||||
|
|
||||||
def _calc_headers(self, info_dict):
|
def _calc_headers(self, info_dict, load_cookies=False):
|
||||||
res = std_headers.copy()
|
if load_cookies: # For --load-info-json
|
||||||
|
# load cookies from http_headers in legacy info.json
|
||||||
|
self._load_cookies(traverse_obj(info_dict, ('http_headers', 'Cookie'), casesense=False),
|
||||||
|
autoscope=info_dict['url'])
|
||||||
|
# load scoped cookies from info.json
|
||||||
|
self._load_cookies(info_dict.get('cookies'), autoscope=False)
|
||||||
|
|
||||||
add_headers = info_dict.get('http_headers')
|
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
||||||
if add_headers:
|
|
||||||
res.update(add_headers)
|
|
||||||
|
|
||||||
cookies = self._calc_cookies(info_dict)
|
|
||||||
if cookies:
|
if cookies:
|
||||||
res['Cookie'] = cookies
|
# Make a string like name1=val1; attr1=a_val1; ...name2=val2; ...
|
||||||
|
# By convention a cookie name can't be a well-known attribute name
|
||||||
|
# so this syntax is unambiguous and can be parsed by (eg) SimpleCookie
|
||||||
|
encoder = compat_http_cookies_SimpleCookie()
|
||||||
|
values = []
|
||||||
|
attributes = (('Domain', '='), ('Path', '='), ('Secure',), ('Expires', '='), ('Version', '='))
|
||||||
|
attributes = tuple([x[0].lower()] + list(x) for x in attributes)
|
||||||
|
for cookie in cookies:
|
||||||
|
_, value = encoder.value_encode(cookie.value)
|
||||||
|
# Py 2 '' --> '', Py 3 '' --> '""'
|
||||||
|
if value == '':
|
||||||
|
value = '""'
|
||||||
|
values.append('='.join((cookie.name, value)))
|
||||||
|
for attr in attributes:
|
||||||
|
value = getattr(cookie, attr[0], None)
|
||||||
|
if value:
|
||||||
|
values.append('%s%s' % (''.join(attr[1:]), value if len(attr) == 3 else ''))
|
||||||
|
info_dict['cookies'] = '; '.join(values)
|
||||||
|
|
||||||
|
res = std_headers.copy()
|
||||||
|
res.update(info_dict.get('http_headers') or {})
|
||||||
|
res = self._remove_cookie_header(res)
|
||||||
|
|
||||||
if 'X-Forwarded-For' not in res:
|
if 'X-Forwarded-For' not in res:
|
||||||
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
||||||
if x_forwarded_for_ip:
|
if x_forwarded_for_ip:
|
||||||
res['X-Forwarded-For'] = x_forwarded_for_ip
|
res['X-Forwarded-For'] = x_forwarded_for_ip
|
||||||
|
|
||||||
return res
|
return res or None
|
||||||
|
|
||||||
def _calc_cookies(self, info_dict):
|
def _calc_cookies(self, info_dict):
|
||||||
pr = sanitized_Request(info_dict['url'])
|
pr = sanitized_Request(info_dict['url'])
|
||||||
|
@ -1626,10 +1765,13 @@ class YoutubeDL(object):
|
||||||
format['protocol'] = determine_protocol(format)
|
format['protocol'] = determine_protocol(format)
|
||||||
# Add HTTP headers, so that external programs can use them from the
|
# Add HTTP headers, so that external programs can use them from the
|
||||||
# json output
|
# json output
|
||||||
full_format_info = info_dict.copy()
|
format['http_headers'] = self._calc_headers(ChainMap(format, info_dict), load_cookies=True)
|
||||||
full_format_info.update(format)
|
|
||||||
format['http_headers'] = self._calc_headers(full_format_info)
|
# Safeguard against old/insecure infojson when using --load-info-json
|
||||||
# Remove private housekeeping stuff
|
info_dict['http_headers'] = self._remove_cookie_header(
|
||||||
|
info_dict.get('http_headers') or {}) or None
|
||||||
|
|
||||||
|
# Remove private housekeeping stuff (copied to http_headers in _calc_headers())
|
||||||
if '__x_forwarded_for_ip' in info_dict:
|
if '__x_forwarded_for_ip' in info_dict:
|
||||||
del info_dict['__x_forwarded_for_ip']
|
del info_dict['__x_forwarded_for_ip']
|
||||||
|
|
||||||
|
@ -1772,7 +1914,7 @@ class YoutubeDL(object):
|
||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
print_mandatory('format')
|
print_mandatory('format')
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
self.to_stdout(json.dumps(info_dict))
|
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
@ -1832,7 +1974,7 @@ class YoutubeDL(object):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
descfile.write(info_dict['description'])
|
descfile.write(info_dict['description'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write description file ' + descfn)
|
self.report_error('Cannot write description file ' + descfn)
|
||||||
|
@ -1847,7 +1989,7 @@ class YoutubeDL(object):
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||||
annofile.write(info_dict['annotations'])
|
annofile.write(info_dict['annotations'])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
self.report_warning('There are no annotations to write.')
|
self.report_warning('There are no annotations to write.')
|
||||||
|
@ -1874,7 +2016,7 @@ class YoutubeDL(object):
|
||||||
try:
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_info['data'])
|
subfile.write(sub_info['data'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
|
@ -1883,24 +2025,16 @@ class YoutubeDL(object):
|
||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
sub_data = ie._request_webpage(
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
sub_info['url'], info_dict['id'], note=False).read()
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
with open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||||
subfile.write(sub_data)
|
subfile.write(sub_data)
|
||||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
(sub_lang, error_to_compat_str(err)))
|
(sub_lang, error_to_compat_str(err)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
self._write_info_json(
|
||||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
'video description', info_dict,
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
replace_extension(filename, 'info.json', info_dict.get('ext')))
|
||||||
self.to_screen('[info] Video description metadata is already present')
|
|
||||||
else:
|
|
||||||
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
|
|
||||||
try:
|
|
||||||
write_json_file(self.filter_requested_info(info_dict), infofn)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
|
||||||
return
|
|
||||||
|
|
||||||
self._write_thumbnails(info_dict, filename)
|
self._write_thumbnails(info_dict, filename)
|
||||||
|
|
||||||
|
@ -1921,7 +2055,11 @@ class YoutubeDL(object):
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
return fd.download(name, info)
|
|
||||||
|
new_info = dict((k, v) for k, v in info.items() if not k.startswith('__p'))
|
||||||
|
new_info['http_headers'] = self._calc_headers(new_info)
|
||||||
|
|
||||||
|
return fd.download(name, new_info)
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
downloaded = []
|
downloaded = []
|
||||||
|
@ -2086,16 +2224,13 @@ class YoutubeDL(object):
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
if self.params.get('dump_single_json', False):
|
if self.params.get('dump_single_json', False):
|
||||||
self.to_stdout(json.dumps(res))
|
self.to_stdout(json.dumps(self.sanitize_info(res)))
|
||||||
|
|
||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
def download_with_info_file(self, info_filename):
|
def download_with_info_file(self, info_filename):
|
||||||
with contextlib.closing(fileinput.FileInput(
|
with open(info_filename, encoding='utf-8') as f:
|
||||||
[info_filename], mode='r',
|
info = self.filter_requested_info(json.load(f))
|
||||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
|
||||||
# FileInput doesn't have a read method, we can't call json.load
|
|
||||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
|
||||||
try:
|
try:
|
||||||
self.process_ie_result(info, download=True)
|
self.process_ie_result(info, download=True)
|
||||||
except DownloadError:
|
except DownloadError:
|
||||||
|
@ -2108,10 +2243,36 @@ class YoutubeDL(object):
|
||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def filter_requested_info(info_dict):
|
def sanitize_info(info_dict, remove_private_keys=False):
|
||||||
return dict(
|
''' Sanitize the infodict for converting to json '''
|
||||||
(k, v) for k, v in info_dict.items()
|
if info_dict is None:
|
||||||
if k not in ['requested_formats', 'requested_subtitles'])
|
return info_dict
|
||||||
|
|
||||||
|
if remove_private_keys:
|
||||||
|
reject = lambda k, v: (v is None
|
||||||
|
or k.startswith('__')
|
||||||
|
or k in ('requested_formats',
|
||||||
|
'requested_subtitles'))
|
||||||
|
else:
|
||||||
|
reject = lambda k, v: False
|
||||||
|
|
||||||
|
def filter_fn(obj):
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))
|
||||||
|
elif isinstance(obj, (list, tuple, set, LazyList)):
|
||||||
|
return list(map(filter_fn, obj))
|
||||||
|
elif obj is None or any(isinstance(obj, c)
|
||||||
|
for c in (compat_integer_types,
|
||||||
|
(compat_str, float, bool))):
|
||||||
|
return obj
|
||||||
|
else:
|
||||||
|
return repr(obj)
|
||||||
|
|
||||||
|
return filter_fn(info_dict)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def filter_requested_info(cls, info_dict):
|
||||||
|
return cls.sanitize_info(info_dict, True)
|
||||||
|
|
||||||
def post_process(self, filename, ie_info):
|
def post_process(self, filename, ie_info):
|
||||||
"""Run all the postprocessors on the given file."""
|
"""Run all the postprocessors on the given file."""
|
||||||
|
@ -2318,9 +2479,12 @@ class YoutubeDL(object):
|
||||||
self.get_encoding()))
|
self.get_encoding()))
|
||||||
write_string(encoding_str, encoding=None)
|
write_string(encoding_str, encoding=None)
|
||||||
|
|
||||||
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
|
||||||
|
writeln_debug('youtube-dl version ', __version__)
|
||||||
if _LAZY_LOADER:
|
if _LAZY_LOADER:
|
||||||
self._write_string('[debug] Lazy loading extractors enabled' + '\n')
|
writeln_debug('Lazy loading extractors enabled')
|
||||||
|
if ytdl_is_updateable():
|
||||||
|
writeln_debug('Single file build')
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
sp = subprocess.Popen(
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
|
@ -2329,7 +2493,7 @@ class YoutubeDL(object):
|
||||||
out, err = process_communicate_or_kill(sp)
|
out, err = process_communicate_or_kill(sp)
|
||||||
out = out.decode().strip()
|
out = out.decode().strip()
|
||||||
if re.match('[0-9a-f]+', out):
|
if re.match('[0-9a-f]+', out):
|
||||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
writeln_debug('Git HEAD: ', out)
|
||||||
except Exception:
|
except Exception:
|
||||||
try:
|
try:
|
||||||
sys.exc_clear()
|
sys.exc_clear()
|
||||||
|
@ -2342,9 +2506,22 @@ class YoutubeDL(object):
|
||||||
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
|
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
|
||||||
return impl_name
|
return impl_name
|
||||||
|
|
||||||
self._write_string('[debug] Python version %s (%s) - %s\n' % (
|
def libc_ver():
|
||||||
platform.python_version(), python_implementation(),
|
try:
|
||||||
platform_name()))
|
return platform.libc_ver()
|
||||||
|
except OSError: # We may not have access to the executable
|
||||||
|
return []
|
||||||
|
|
||||||
|
libc = join_nonempty(*libc_ver(), delim=' ')
|
||||||
|
writeln_debug('Python %s (%s %s %s) - %s - %s%s' % (
|
||||||
|
platform.python_version(),
|
||||||
|
python_implementation(),
|
||||||
|
platform.machine(),
|
||||||
|
platform.architecture()[0],
|
||||||
|
platform_name(),
|
||||||
|
OPENSSL_VERSION,
|
||||||
|
(' - %s' % (libc, )) if libc else ''
|
||||||
|
))
|
||||||
|
|
||||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||||
|
@ -2356,17 +2533,17 @@ class YoutubeDL(object):
|
||||||
)
|
)
|
||||||
if not exe_str:
|
if not exe_str:
|
||||||
exe_str = 'none'
|
exe_str = 'none'
|
||||||
self._write_string('[debug] exe versions: %s\n' % exe_str)
|
writeln_debug('exe versions: %s' % (exe_str, ))
|
||||||
|
|
||||||
proxy_map = {}
|
proxy_map = {}
|
||||||
for handler in self._opener.handlers:
|
for handler in self._opener.handlers:
|
||||||
if hasattr(handler, 'proxies'):
|
if hasattr(handler, 'proxies'):
|
||||||
proxy_map.update(handler.proxies)
|
proxy_map.update(handler.proxies)
|
||||||
self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
|
writeln_debug('Proxy map: ', compat_str(proxy_map))
|
||||||
|
|
||||||
if self.params.get('call_home', False):
|
if self.params.get('call_home', False):
|
||||||
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
|
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
|
||||||
self._write_string('[debug] Public IP address: %s\n' % ipaddr)
|
writeln_debug('Public IP address: %s' % (ipaddr, ))
|
||||||
latest_version = self.urlopen(
|
latest_version = self.urlopen(
|
||||||
'https://yt-dl.org/latest/version').read().decode('utf-8')
|
'https://yt-dl.org/latest/version').read().decode('utf-8')
|
||||||
if version_tuple(latest_version) > version_tuple(__version__):
|
if version_tuple(latest_version) > version_tuple(__version__):
|
||||||
|
@ -2383,7 +2560,7 @@ class YoutubeDL(object):
|
||||||
opts_proxy = self.params.get('proxy')
|
opts_proxy = self.params.get('proxy')
|
||||||
|
|
||||||
if opts_cookiefile is None:
|
if opts_cookiefile is None:
|
||||||
self.cookiejar = compat_cookiejar.CookieJar()
|
self.cookiejar = YoutubeDLCookieJar()
|
||||||
else:
|
else:
|
||||||
opts_cookiefile = expand_path(opts_cookiefile)
|
opts_cookiefile = expand_path(opts_cookiefile)
|
||||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||||
|
@ -2444,6 +2621,28 @@ class YoutubeDL(object):
|
||||||
encoding = preferredencoding()
|
encoding = preferredencoding()
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
|
def _write_info_json(self, label, info_dict, infofn, overwrite=None):
|
||||||
|
if not self.params.get('writeinfojson', False):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def msg(fmt, lbl):
|
||||||
|
return fmt % (lbl + ' metadata',)
|
||||||
|
|
||||||
|
if overwrite is None:
|
||||||
|
overwrite = not self.params.get('nooverwrites', False)
|
||||||
|
|
||||||
|
if not overwrite and os.path.exists(encodeFilename(infofn)):
|
||||||
|
self.to_screen(msg('[info] %s is already present', label.title()))
|
||||||
|
return 'exists'
|
||||||
|
else:
|
||||||
|
self.to_screen(msg('[info] Writing %s as JSON to: ' + infofn, label))
|
||||||
|
try:
|
||||||
|
write_json_file(self.filter_requested_info(info_dict), infofn)
|
||||||
|
return True
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(msg('Cannot write %s to JSON file ' + infofn, label))
|
||||||
|
return
|
||||||
|
|
||||||
def _write_thumbnails(self, info_dict, filename):
|
def _write_thumbnails(self, info_dict, filename):
|
||||||
if self.params.get('writethumbnail', False):
|
if self.params.get('writethumbnail', False):
|
||||||
thumbnails = info_dict.get('thumbnails')
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
|
|
@ -5,7 +5,6 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
@ -17,6 +16,7 @@ from .options import (
|
||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
|
compat_register_utf8,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
|
@ -46,10 +46,8 @@ from .YoutubeDL import YoutubeDL
|
||||||
|
|
||||||
|
|
||||||
def _real_main(argv=None):
|
def _real_main(argv=None):
|
||||||
# Compatibility fixes for Windows
|
# Compatibility fix for Windows
|
||||||
if sys.platform == 'win32':
|
compat_register_utf8()
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/820
|
|
||||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
|
||||||
|
|
||||||
workaround_optparse_bug9161()
|
workaround_optparse_bug9161()
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .compat import compat_getenv
|
from .compat import (
|
||||||
|
compat_getenv,
|
||||||
|
compat_open as open,
|
||||||
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
expand_path,
|
expand_path,
|
||||||
|
@ -83,7 +85,7 @@ class Cache(object):
|
||||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
with open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||||
return self._validate(json.load(cachef), min_ver)
|
return self._validate(json.load(cachef), min_ver)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1663,5 +1663,5 @@ def casefold(s):
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
casefold
|
'casefold',
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import binascii
|
import binascii
|
||||||
import collections
|
import collections
|
||||||
import ctypes
|
import ctypes
|
||||||
|
import datetime
|
||||||
import email
|
import email
|
||||||
import getpass
|
import getpass
|
||||||
import io
|
import io
|
||||||
|
@ -19,20 +21,30 @@ import socket
|
||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import types
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
# naming convention
|
||||||
|
# 'compat_' + Python3_name.replace('.', '_')
|
||||||
|
# other aliases exist for convenience and/or legacy
|
||||||
|
|
||||||
# deal with critical unicode/str things first
|
# deal with critical unicode/str things first
|
||||||
try:
|
try:
|
||||||
# Python 2
|
# Python 2
|
||||||
compat_str, compat_basestring, compat_chr = (
|
compat_str, compat_basestring, compat_chr = (
|
||||||
unicode, basestring, unichr
|
unicode, basestring, unichr
|
||||||
)
|
)
|
||||||
from .casefold import casefold as compat_casefold
|
|
||||||
except NameError:
|
except NameError:
|
||||||
compat_str, compat_basestring, compat_chr = (
|
compat_str, compat_basestring, compat_chr = (
|
||||||
str, str, chr
|
str, (str, bytes), chr
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# casefold
|
||||||
|
try:
|
||||||
|
compat_str.casefold
|
||||||
compat_casefold = lambda s: s.casefold()
|
compat_casefold = lambda s: s.casefold()
|
||||||
|
except AttributeError:
|
||||||
|
from .casefold import casefold as compat_casefold
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import collections.abc as compat_collections_abc
|
import collections.abc as compat_collections_abc
|
||||||
|
@ -44,6 +56,22 @@ try:
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib2 as compat_urllib_request
|
import urllib2 as compat_urllib_request
|
||||||
|
|
||||||
|
# Also fix up lack of method arg in old Pythons
|
||||||
|
try:
|
||||||
|
_req = compat_urllib_request.Request
|
||||||
|
_req('http://127.0.0.1', method='GET')
|
||||||
|
except TypeError:
|
||||||
|
class _request(object):
|
||||||
|
def __new__(cls, url, *args, **kwargs):
|
||||||
|
method = kwargs.pop('method', None)
|
||||||
|
r = _req(url, *args, **kwargs)
|
||||||
|
if method:
|
||||||
|
r.get_method = types.MethodType(lambda _: method, r)
|
||||||
|
return r
|
||||||
|
|
||||||
|
compat_urllib_request.Request = _request
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.error as compat_urllib_error
|
import urllib.error as compat_urllib_error
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
|
@ -53,26 +81,32 @@ try:
|
||||||
import urllib.parse as compat_urllib_parse
|
import urllib.parse as compat_urllib_parse
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib as compat_urllib_parse
|
import urllib as compat_urllib_parse
|
||||||
|
import urlparse as _urlparse
|
||||||
|
for a in dir(_urlparse):
|
||||||
|
if not hasattr(compat_urllib_parse, a):
|
||||||
|
setattr(compat_urllib_parse, a, getattr(_urlparse, a))
|
||||||
|
del _urlparse
|
||||||
|
|
||||||
try:
|
# unfavoured aliases
|
||||||
from urllib.parse import urlparse as compat_urllib_parse_urlparse
|
compat_urlparse = compat_urllib_parse
|
||||||
except ImportError: # Python 2
|
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
|
||||||
from urlparse import urlparse as compat_urllib_parse_urlparse
|
|
||||||
|
|
||||||
try:
|
|
||||||
import urllib.parse as compat_urlparse
|
|
||||||
except ImportError: # Python 2
|
|
||||||
import urlparse as compat_urlparse
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.response as compat_urllib_response
|
import urllib.response as compat_urllib_response
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib as compat_urllib_response
|
import urllib as compat_urllib_response
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_urllib_response.addinfourl.status
|
||||||
|
except AttributeError:
|
||||||
|
# .getcode() is deprecated in Py 3.
|
||||||
|
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookiejar as compat_cookiejar
|
import http.cookiejar as compat_cookiejar
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import cookielib as compat_cookiejar
|
import cookielib as compat_cookiejar
|
||||||
|
compat_http_cookiejar = compat_cookiejar
|
||||||
|
|
||||||
if sys.version_info[0] == 2:
|
if sys.version_info[0] == 2:
|
||||||
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
||||||
|
@ -84,20 +118,35 @@ if sys.version_info[0] == 2:
|
||||||
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
||||||
else:
|
else:
|
||||||
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||||
|
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookies as compat_cookies
|
import http.cookies as compat_cookies
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import Cookie as compat_cookies
|
import Cookie as compat_cookies
|
||||||
|
compat_http_cookies = compat_cookies
|
||||||
|
|
||||||
if sys.version_info[0] == 2:
|
if sys.version_info[0] == 2 or sys.version_info < (3, 3):
|
||||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||||
def load(self, rawdata):
|
def load(self, rawdata):
|
||||||
if isinstance(rawdata, compat_str):
|
must_have_value = 0
|
||||||
rawdata = str(rawdata)
|
if not isinstance(rawdata, dict):
|
||||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
|
||||||
|
# attribute must have value for parsing
|
||||||
|
rawdata, must_have_value = re.subn(
|
||||||
|
r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
|
if isinstance(rawdata, compat_str):
|
||||||
|
rawdata = str(rawdata)
|
||||||
|
super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||||
|
if must_have_value > 0:
|
||||||
|
for morsel in self.values():
|
||||||
|
for attr in ('secure', 'httponly'):
|
||||||
|
if morsel.get(attr):
|
||||||
|
morsel[attr] = True
|
||||||
else:
|
else:
|
||||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||||
|
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import html.entities as compat_html_entities
|
import html.entities as compat_html_entities
|
||||||
|
@ -2346,21 +2395,29 @@ try:
|
||||||
import http.client as compat_http_client
|
import http.client as compat_http_client
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import httplib as compat_http_client
|
import httplib as compat_http_client
|
||||||
|
try:
|
||||||
|
compat_http_client.HTTPResponse.getcode
|
||||||
|
except AttributeError:
|
||||||
|
# Py < 3.1
|
||||||
|
compat_http_client.HTTPResponse.getcode = lambda self: self.status
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.error import HTTPError as compat_HTTPError
|
from urllib.error import HTTPError as compat_HTTPError
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib2 import HTTPError as compat_HTTPError
|
from urllib2 import HTTPError as compat_HTTPError
|
||||||
|
compat_urllib_HTTPError = compat_HTTPError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.request import urlretrieve as compat_urlretrieve
|
from urllib.request import urlretrieve as compat_urlretrieve
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib import urlretrieve as compat_urlretrieve
|
from urllib import urlretrieve as compat_urlretrieve
|
||||||
|
compat_urllib_request_urlretrieve = compat_urlretrieve
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from html.parser import HTMLParser as compat_HTMLParser
|
from html.parser import HTMLParser as compat_HTMLParser
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||||
|
compat_html_parser_HTMLParser = compat_HTMLParser
|
||||||
|
|
||||||
try: # Python 2
|
try: # Python 2
|
||||||
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||||
|
@ -2374,6 +2431,7 @@ except ImportError: # Python <3.4
|
||||||
# and uniform cross-version exception handling
|
# and uniform cross-version exception handling
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
|
@ -2390,6 +2448,8 @@ try:
|
||||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
||||||
|
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||||
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||||
else re.compile(r'([\x00-\x7f]+)'))
|
else re.compile(r'([\x00-\x7f]+)'))
|
||||||
|
@ -2456,9 +2516,6 @@ except ImportError: # Python 2
|
||||||
string = string.replace('+', ' ')
|
string = string.replace('+', ' ')
|
||||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||||
|
|
||||||
try:
|
|
||||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
|
||||||
except ImportError: # Python 2
|
|
||||||
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
|
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
|
||||||
# Possible solutions are to either port it from python 3 with all
|
# Possible solutions are to either port it from python 3 with all
|
||||||
# the friends or manually ensure input query contains only byte strings.
|
# the friends or manually ensure input query contains only byte strings.
|
||||||
|
@ -2480,7 +2537,62 @@ except ImportError: # Python 2
|
||||||
def encode_list(l):
|
def encode_list(l):
|
||||||
return [encode_elem(e) for e in l]
|
return [encode_elem(e) for e in l]
|
||||||
|
|
||||||
return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
|
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
|
||||||
|
|
||||||
|
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||||
|
# Python 2's version is apparently totally broken
|
||||||
|
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
qs, _coerce_result = qs, compat_str
|
||||||
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
|
r = []
|
||||||
|
for name_value in pairs:
|
||||||
|
if not name_value and not strict_parsing:
|
||||||
|
continue
|
||||||
|
nv = name_value.split('=', 1)
|
||||||
|
if len(nv) != 2:
|
||||||
|
if strict_parsing:
|
||||||
|
raise ValueError('bad query field: %r' % (name_value,))
|
||||||
|
# Handle case of a control-name with no equal sign
|
||||||
|
if keep_blank_values:
|
||||||
|
nv.append('')
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if len(nv[1]) or keep_blank_values:
|
||||||
|
name = nv[0].replace('+', ' ')
|
||||||
|
name = compat_urllib_parse_unquote(
|
||||||
|
name, encoding=encoding, errors=errors)
|
||||||
|
name = _coerce_result(name)
|
||||||
|
value = nv[1].replace('+', ' ')
|
||||||
|
value = compat_urllib_parse_unquote(
|
||||||
|
value, encoding=encoding, errors=errors)
|
||||||
|
value = _coerce_result(value)
|
||||||
|
r.append((name, value))
|
||||||
|
return r
|
||||||
|
|
||||||
|
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
parsed_result = {}
|
||||||
|
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||||
|
encoding=encoding, errors=errors)
|
||||||
|
for name, value in pairs:
|
||||||
|
if name in parsed_result:
|
||||||
|
parsed_result[name].append(value)
|
||||||
|
else:
|
||||||
|
parsed_result[name] = [value]
|
||||||
|
return parsed_result
|
||||||
|
|
||||||
|
setattr(compat_urllib_parse, '_urlencode',
|
||||||
|
getattr(compat_urllib_parse, 'urlencode'))
|
||||||
|
for name, fix in (
|
||||||
|
('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
|
||||||
|
('parse_unquote', compat_urllib_parse_unquote),
|
||||||
|
('unquote_plus', compat_urllib_parse_unquote_plus),
|
||||||
|
('urlencode', compat_urllib_parse_urlencode),
|
||||||
|
('parse_qs', compat_parse_qs)):
|
||||||
|
setattr(compat_urllib_parse, name, fix)
|
||||||
|
|
||||||
|
compat_urllib_parse_parse_qs = compat_parse_qs
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||||
|
@ -2520,6 +2632,7 @@ try:
|
||||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||||
except ImportError: # Python 2.6
|
except ImportError: # Python 2.6
|
||||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
|
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
|
||||||
|
|
||||||
etree = xml.etree.ElementTree
|
etree = xml.etree.ElementTree
|
||||||
|
|
||||||
|
@ -2533,10 +2646,11 @@ try:
|
||||||
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||||
# the following will crash with:
|
# the following will crash with:
|
||||||
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||||
isinstance(None, xml.etree.ElementTree.Element)
|
isinstance(None, etree.Element)
|
||||||
from xml.etree.ElementTree import Element as compat_etree_Element
|
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||||
except TypeError: # Python <=2.6
|
except TypeError: # Python <=2.6
|
||||||
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||||
|
compat_xml_etree_ElementTree_Element = compat_etree_Element
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
def compat_etree_fromstring(text):
|
def compat_etree_fromstring(text):
|
||||||
|
@ -2592,6 +2706,7 @@ else:
|
||||||
if k == uri or v == prefix:
|
if k == uri or v == prefix:
|
||||||
del etree._namespace_map[k]
|
del etree._namespace_map[k]
|
||||||
etree._namespace_map[uri] = prefix
|
etree._namespace_map[uri] = prefix
|
||||||
|
compat_xml_etree_register_namespace = compat_etree_register_namespace
|
||||||
|
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||||
|
@ -2603,53 +2718,6 @@ if sys.version_info < (2, 7):
|
||||||
else:
|
else:
|
||||||
compat_xpath = lambda xpath: xpath
|
compat_xpath = lambda xpath: xpath
|
||||||
|
|
||||||
try:
|
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
|
||||||
except ImportError: # Python 2
|
|
||||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
|
||||||
# Python 2's version is apparently totally broken
|
|
||||||
|
|
||||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
||||||
encoding='utf-8', errors='replace'):
|
|
||||||
qs, _coerce_result = qs, compat_str
|
|
||||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
|
||||||
r = []
|
|
||||||
for name_value in pairs:
|
|
||||||
if not name_value and not strict_parsing:
|
|
||||||
continue
|
|
||||||
nv = name_value.split('=', 1)
|
|
||||||
if len(nv) != 2:
|
|
||||||
if strict_parsing:
|
|
||||||
raise ValueError('bad query field: %r' % (name_value,))
|
|
||||||
# Handle case of a control-name with no equal sign
|
|
||||||
if keep_blank_values:
|
|
||||||
nv.append('')
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
if len(nv[1]) or keep_blank_values:
|
|
||||||
name = nv[0].replace('+', ' ')
|
|
||||||
name = compat_urllib_parse_unquote(
|
|
||||||
name, encoding=encoding, errors=errors)
|
|
||||||
name = _coerce_result(name)
|
|
||||||
value = nv[1].replace('+', ' ')
|
|
||||||
value = compat_urllib_parse_unquote(
|
|
||||||
value, encoding=encoding, errors=errors)
|
|
||||||
value = _coerce_result(value)
|
|
||||||
r.append((name, value))
|
|
||||||
return r
|
|
||||||
|
|
||||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
||||||
encoding='utf-8', errors='replace'):
|
|
||||||
parsed_result = {}
|
|
||||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
|
||||||
encoding=encoding, errors=errors)
|
|
||||||
for name, value in pairs:
|
|
||||||
if name in parsed_result:
|
|
||||||
parsed_result[name].append(value)
|
|
||||||
else:
|
|
||||||
parsed_result[name] = [value]
|
|
||||||
return parsed_result
|
|
||||||
|
|
||||||
|
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
|
|
||||||
|
@ -2774,6 +2842,8 @@ else:
|
||||||
else:
|
else:
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
|
||||||
|
compat_os_path_expanduser = compat_expanduser
|
||||||
|
|
||||||
|
|
||||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
# os.path.realpath on Windows does not follow symbolic links
|
# os.path.realpath on Windows does not follow symbolic links
|
||||||
|
@ -2785,6 +2855,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
else:
|
else:
|
||||||
compat_realpath = os.path.realpath
|
compat_realpath = os.path.realpath
|
||||||
|
|
||||||
|
compat_os_path_realpath = compat_realpath
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
def compat_print(s):
|
def compat_print(s):
|
||||||
|
@ -2805,11 +2877,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
else:
|
else:
|
||||||
compat_getpass = getpass.getpass
|
compat_getpass = getpass.getpass
|
||||||
|
|
||||||
|
compat_getpass_getpass = compat_getpass
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
compat_input = raw_input
|
compat_input = raw_input
|
||||||
except NameError: # Python 3
|
except NameError: # Python 3
|
||||||
compat_input = input
|
compat_input = input
|
||||||
|
|
||||||
|
|
||||||
# Python < 2.6.5 require kwargs to be bytes
|
# Python < 2.6.5 require kwargs to be bytes
|
||||||
try:
|
try:
|
||||||
def _testfunc(x):
|
def _testfunc(x):
|
||||||
|
@ -2915,15 +2991,16 @@ else:
|
||||||
lines = _lines
|
lines = _lines
|
||||||
return _terminal_size(columns, lines)
|
return _terminal_size(columns, lines)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
itertools.count(start=0, step=1)
|
itertools.count(start=0, step=1)
|
||||||
compat_itertools_count = itertools.count
|
compat_itertools_count = itertools.count
|
||||||
except TypeError: # Python 2.6
|
except TypeError: # Python 2.6
|
||||||
def compat_itertools_count(start=0, step=1):
|
def compat_itertools_count(start=0, step=1):
|
||||||
n = start
|
|
||||||
while True:
|
while True:
|
||||||
yield n
|
yield start
|
||||||
n += step
|
start += step
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
from tokenize import tokenize as compat_tokenize_tokenize
|
from tokenize import tokenize as compat_tokenize_tokenize
|
||||||
|
@ -3075,6 +3152,8 @@ if sys.version_info < (3, 3):
|
||||||
else:
|
else:
|
||||||
compat_b64decode = base64.b64decode
|
compat_b64decode = base64.b64decode
|
||||||
|
|
||||||
|
compat_base64_b64decode = compat_b64decode
|
||||||
|
|
||||||
|
|
||||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||||
|
@ -3094,30 +3173,95 @@ else:
|
||||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
if sys.version_info < (3, 0):
|
||||||
|
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
|
||||||
|
def compat_open(file_, *args, **kwargs):
|
||||||
|
if len(args) > 6 or 'opener' in kwargs:
|
||||||
|
raise ValueError('open: unsupported argument "opener"')
|
||||||
|
return io.open(file_, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
compat_open = open
|
||||||
|
|
||||||
|
|
||||||
|
# compat_register_utf8
|
||||||
|
def compat_register_utf8():
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/820
|
||||||
|
from codecs import register, lookup
|
||||||
|
register(
|
||||||
|
lambda name: lookup('utf-8') if name == 'cp65001' else None)
|
||||||
|
|
||||||
|
|
||||||
|
# compat_datetime_timedelta_total_seconds
|
||||||
|
try:
|
||||||
|
compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
|
||||||
|
except AttributeError:
|
||||||
|
# Py 2.6
|
||||||
|
def compat_datetime_timedelta_total_seconds(td):
|
||||||
|
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
||||||
|
|
||||||
|
# optional decompression packages
|
||||||
|
# PyPi brotli package implements 'br' Content-Encoding
|
||||||
|
try:
|
||||||
|
import brotli as compat_brotli
|
||||||
|
except ImportError:
|
||||||
|
compat_brotli = None
|
||||||
|
# PyPi ncompress package implements 'compress' Content-Encoding
|
||||||
|
try:
|
||||||
|
import ncompress as compat_ncompress
|
||||||
|
except ImportError:
|
||||||
|
compat_ncompress = None
|
||||||
|
|
||||||
|
|
||||||
|
legacy = [
|
||||||
'compat_HTMLParseError',
|
'compat_HTMLParseError',
|
||||||
'compat_HTMLParser',
|
'compat_HTMLParser',
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
'compat_Struct',
|
|
||||||
'compat_b64decode',
|
'compat_b64decode',
|
||||||
'compat_basestring',
|
|
||||||
'compat_casefold',
|
|
||||||
'compat_chr',
|
|
||||||
'compat_collections_abc',
|
|
||||||
'compat_collections_chain_map',
|
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_cookiejar_Cookie',
|
'compat_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
'compat_cookies_SimpleCookie',
|
'compat_cookies_SimpleCookie',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
|
||||||
'compat_etree_Element',
|
'compat_etree_Element',
|
||||||
'compat_etree_fromstring',
|
|
||||||
'compat_etree_register_namespace',
|
'compat_etree_register_namespace',
|
||||||
'compat_expanduser',
|
'compat_expanduser',
|
||||||
|
'compat_getpass',
|
||||||
|
'compat_parse_qs',
|
||||||
|
'compat_realpath',
|
||||||
|
'compat_urllib_parse_parse_qs',
|
||||||
|
'compat_urllib_parse_unquote',
|
||||||
|
'compat_urllib_parse_unquote_plus',
|
||||||
|
'compat_urllib_parse_unquote_to_bytes',
|
||||||
|
'compat_urllib_parse_urlencode',
|
||||||
|
'compat_urllib_parse_urlparse',
|
||||||
|
'compat_urlparse',
|
||||||
|
'compat_urlretrieve',
|
||||||
|
'compat_xml_parse_error',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'compat_html_parser_HTMLParseError',
|
||||||
|
'compat_html_parser_HTMLParser',
|
||||||
|
'compat_Struct',
|
||||||
|
'compat_base64_b64decode',
|
||||||
|
'compat_basestring',
|
||||||
|
'compat_brotli',
|
||||||
|
'compat_casefold',
|
||||||
|
'compat_chr',
|
||||||
|
'compat_collections_abc',
|
||||||
|
'compat_collections_chain_map',
|
||||||
|
'compat_datetime_timedelta_total_seconds',
|
||||||
|
'compat_http_cookiejar',
|
||||||
|
'compat_http_cookiejar_Cookie',
|
||||||
|
'compat_http_cookies',
|
||||||
|
'compat_http_cookies_SimpleCookie',
|
||||||
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
|
'compat_etree_fromstring',
|
||||||
'compat_filter',
|
'compat_filter',
|
||||||
'compat_get_terminal_size',
|
'compat_get_terminal_size',
|
||||||
'compat_getenv',
|
'compat_getenv',
|
||||||
'compat_getpass',
|
'compat_getpass_getpass',
|
||||||
'compat_html_entities',
|
'compat_html_entities',
|
||||||
'compat_html_entities_html5',
|
'compat_html_entities_html5',
|
||||||
'compat_http_client',
|
'compat_http_client',
|
||||||
|
@ -3128,14 +3272,17 @@ __all__ = [
|
||||||
'compat_itertools_zip_longest',
|
'compat_itertools_zip_longest',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
'compat_map',
|
'compat_map',
|
||||||
|
'compat_ncompress',
|
||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
|
'compat_open',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_parse_qs',
|
'compat_os_path_expanduser',
|
||||||
|
'compat_os_path_realpath',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
'compat_re_Match',
|
'compat_re_Match',
|
||||||
'compat_re_Pattern',
|
'compat_re_Pattern',
|
||||||
'compat_realpath',
|
'compat_register_utf8',
|
||||||
'compat_setenv',
|
'compat_setenv',
|
||||||
'compat_shlex_quote',
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shlex_split',
|
||||||
|
@ -3147,17 +3294,14 @@ __all__ = [
|
||||||
'compat_tokenize_tokenize',
|
'compat_tokenize_tokenize',
|
||||||
'compat_urllib_error',
|
'compat_urllib_error',
|
||||||
'compat_urllib_parse',
|
'compat_urllib_parse',
|
||||||
'compat_urllib_parse_unquote',
|
|
||||||
'compat_urllib_parse_unquote_plus',
|
|
||||||
'compat_urllib_parse_unquote_to_bytes',
|
|
||||||
'compat_urllib_parse_urlencode',
|
|
||||||
'compat_urllib_parse_urlparse',
|
|
||||||
'compat_urllib_request',
|
'compat_urllib_request',
|
||||||
'compat_urllib_request_DataHandler',
|
'compat_urllib_request_DataHandler',
|
||||||
'compat_urllib_response',
|
'compat_urllib_response',
|
||||||
'compat_urlparse',
|
'compat_urllib_request_urlretrieve',
|
||||||
'compat_urlretrieve',
|
'compat_urllib_HTTPError',
|
||||||
'compat_xml_parse_error',
|
'compat_xml_etree_ElementTree_Element',
|
||||||
|
'compat_xml_etree_ElementTree_ParseError',
|
||||||
|
'compat_xml_etree_register_namespace',
|
||||||
'compat_xpath',
|
'compat_xpath',
|
||||||
'compat_zip',
|
'compat_zip',
|
||||||
'workaround_optparse_bug9161',
|
'workaround_optparse_bug9161',
|
||||||
|
|
|
@ -88,17 +88,21 @@ class FileDownloader(object):
|
||||||
return '---.-%'
|
return '---.-%'
|
||||||
return '%6s' % ('%3.1f%%' % percent)
|
return '%6s' % ('%3.1f%%' % percent)
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def calc_eta(start, now, total, current):
|
def calc_eta(cls, start_or_rate, now_or_remaining, *args):
|
||||||
|
if len(args) < 2:
|
||||||
|
rate, remaining = (start_or_rate, now_or_remaining)
|
||||||
|
if None in (rate, remaining):
|
||||||
|
return None
|
||||||
|
return int(float(remaining) / rate)
|
||||||
|
start, now = (start_or_rate, now_or_remaining)
|
||||||
|
total, current = args[:2]
|
||||||
if total is None:
|
if total is None:
|
||||||
return None
|
return None
|
||||||
if now is None:
|
if now is None:
|
||||||
now = time.time()
|
now = time.time()
|
||||||
dif = now - start
|
rate = cls.calc_speed(start, now, current)
|
||||||
if current == 0 or dif < 0.001: # One millisecond
|
return rate and int((float(total) - float(current)) / rate)
|
||||||
return None
|
|
||||||
rate = float(current) / dif
|
|
||||||
return int((float(total) - float(current)) / rate)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_eta(eta):
|
def format_eta(eta):
|
||||||
|
@ -123,6 +127,12 @@ class FileDownloader(object):
|
||||||
def format_retries(retries):
|
def format_retries(retries):
|
||||||
return 'inf' if retries == float('inf') else '%.0f' % retries
|
return 'inf' if retries == float('inf') else '%.0f' % retries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def filesize_or_none(unencoded_filename):
|
||||||
|
fn = encodeFilename(unencoded_filename)
|
||||||
|
if os.path.isfile(fn):
|
||||||
|
return os.path.getsize(fn)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def best_block_size(elapsed_time, bytes):
|
def best_block_size(elapsed_time, bytes):
|
||||||
new_min = max(bytes / 2.0, 1.0)
|
new_min = max(bytes / 2.0, 1.0)
|
||||||
|
@ -329,6 +339,10 @@ class FileDownloader(object):
|
||||||
def download(self, filename, info_dict):
|
def download(self, filename, info_dict):
|
||||||
"""Download to a filename using the info from info_dict
|
"""Download to a filename using the info from info_dict
|
||||||
Return True on success and False otherwise
|
Return True on success and False otherwise
|
||||||
|
|
||||||
|
This method filters the `Cookie` header from the info_dict to prevent leaks.
|
||||||
|
Downloaders have their own way of handling cookies.
|
||||||
|
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||||
"""
|
"""
|
||||||
|
|
||||||
nooverwrites_and_exists = (
|
nooverwrites_and_exists = (
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -30,26 +32,23 @@ class DashSegmentsFD(FragmentFD):
|
||||||
fragment_retries = self.params.get('fragment_retries', 0)
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
frag_index = 0
|
for frag_index, fragment in enumerate(fragments, 1):
|
||||||
for i, fragment in enumerate(fragments):
|
|
||||||
frag_index += 1
|
|
||||||
if frag_index <= ctx['fragment_index']:
|
if frag_index <= ctx['fragment_index']:
|
||||||
continue
|
continue
|
||||||
# In DASH, the first segment contains necessary headers to
|
# In DASH, the first segment contains necessary headers to
|
||||||
# generate a valid MP4 file, so always abort for the first segment
|
# generate a valid MP4 file, so always abort for the first segment
|
||||||
fatal = i == 0 or not skip_unavailable_fragments
|
fatal = frag_index == 1 or not skip_unavailable_fragments
|
||||||
count = 0
|
fragment_url = fragment.get('url')
|
||||||
while count <= fragment_retries:
|
if not fragment_url:
|
||||||
|
assert fragment_base_url
|
||||||
|
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||||
|
success = False
|
||||||
|
for count in itertools.count():
|
||||||
try:
|
try:
|
||||||
fragment_url = fragment.get('url')
|
|
||||||
if not fragment_url:
|
|
||||||
assert fragment_base_url
|
|
||||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
|
||||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
self._append_fragment(ctx, frag_content)
|
self._append_fragment(ctx, frag_content)
|
||||||
break
|
|
||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||||
# whole download to fail. However if the same fragment is immediately
|
# whole download to fail. However if the same fragment is immediately
|
||||||
|
@ -57,22 +56,21 @@ class DashSegmentsFD(FragmentFD):
|
||||||
# is usually enough) thus allowing to download the whole file successfully.
|
# is usually enough) thus allowing to download the whole file successfully.
|
||||||
# To be future-proof we will retry all fragments that fail with any
|
# To be future-proof we will retry all fragments that fail with any
|
||||||
# HTTP error.
|
# HTTP error.
|
||||||
count += 1
|
if count < fragment_retries:
|
||||||
if count <= fragment_retries:
|
self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
|
||||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
continue
|
||||||
except DownloadError:
|
except DownloadError:
|
||||||
# Don't retry fragment if error occurred during HTTP downloading
|
# Don't retry fragment if error occurred during HTTP downloading
|
||||||
# itself since it has own retry settings
|
# itself since it has its own retry settings
|
||||||
if not fatal:
|
if fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
raise
|
||||||
break
|
break
|
||||||
raise
|
|
||||||
|
|
||||||
if count > fragment_retries:
|
if not success:
|
||||||
if not fatal:
|
if not fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
self.report_skip_fragment(frag_index)
|
||||||
continue
|
continue
|
||||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
self.report_error('giving up after %s fragment retries' % count)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os.path
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
|
@ -23,6 +24,8 @@ from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
process_communicate_or_kill,
|
process_communicate_or_kill,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +33,7 @@ class ExternalFD(FileDownloader):
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
self._cookies_tempfile = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
started = time.time()
|
started = time.time()
|
||||||
|
@ -42,6 +46,13 @@ class ExternalFD(FileDownloader):
|
||||||
# should take place
|
# should take place
|
||||||
retval = 0
|
retval = 0
|
||||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||||
|
finally:
|
||||||
|
if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
|
||||||
|
try:
|
||||||
|
os.remove(self._cookies_tempfile)
|
||||||
|
except OSError:
|
||||||
|
self.report_warning(
|
||||||
|
'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
|
||||||
|
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
status = {
|
status = {
|
||||||
|
@ -97,6 +108,16 @@ class ExternalFD(FileDownloader):
|
||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
||||||
|
|
||||||
|
def _write_cookies(self):
|
||||||
|
if not self.ydl.cookiejar.filename:
|
||||||
|
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
|
||||||
|
tmp_cookies.close()
|
||||||
|
self._cookies_tempfile = tmp_cookies.name
|
||||||
|
self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
|
||||||
|
# real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
|
||||||
|
self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
|
||||||
|
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||||
|
@ -110,13 +131,21 @@ class ExternalFD(FileDownloader):
|
||||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||||
return p.returncode
|
return p.returncode
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _header_items(info_dict):
|
||||||
|
return traverse_obj(
|
||||||
|
info_dict, ('http_headers', T(dict.items), Ellipsis))
|
||||||
|
|
||||||
|
|
||||||
class CurlFD(ExternalFD):
|
class CurlFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-V'
|
AVAILABLE_OPT = '-V'
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||||
for key, val in info_dict['http_headers'].items():
|
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
|
if cookie_header:
|
||||||
|
cmd += ['--cookie', cookie_header]
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||||
cmd += self._valueless_option('--silent', 'noprogress')
|
cmd += self._valueless_option('--silent', 'noprogress')
|
||||||
|
@ -151,8 +180,11 @@ class AxelFD(ExternalFD):
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-o', tmpfilename]
|
cmd = [self.exe, '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['-H', '%s: %s' % (key, val)]
|
cmd += ['-H', '%s: %s' % (key, val)]
|
||||||
|
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
|
if cookie_header:
|
||||||
|
cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
@ -162,8 +194,10 @@ class WgetFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '--version'
|
AVAILABLE_OPT = '--version'
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
|
||||||
for key, val in info_dict['http_headers'].items():
|
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||||
|
cmd += ['--load-cookies', self._write_cookies()]
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--limit-rate', 'ratelimit')
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
retry = self._option('--tries', 'retries')
|
retry = self._option('--tries', 'retries')
|
||||||
|
@ -182,24 +216,121 @@ class WgetFD(ExternalFD):
|
||||||
class Aria2cFD(ExternalFD):
|
class Aria2cFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-v'
|
AVAILABLE_OPT = '-v'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _aria2c_filename(fn):
|
||||||
|
return fn if os.path.isabs(fn) else os.path.join('.', fn)
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-c']
|
cmd = [self.exe, '-c',
|
||||||
cmd += self._configuration_args([
|
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||||
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||||
dn = os.path.dirname(tmpfilename)
|
if 'fragments' in info_dict:
|
||||||
if dn:
|
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
|
||||||
cmd += ['--dir', dn]
|
else:
|
||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
cmd += ['--min-split-size', '1M']
|
||||||
for key, val in info_dict['http_headers'].items():
|
|
||||||
|
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||||
|
cmd += ['--load-cookies={0}'.format(self._write_cookies())]
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._configuration_args(['--max-connection-per-server', '4'])
|
||||||
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
|
cmd += self._option('--max-overall-download-limit', 'ratelimit')
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
|
||||||
|
cmd += self._configuration_args()
|
||||||
|
|
||||||
|
# aria2c strips out spaces from the beginning/end of filenames and paths.
|
||||||
|
# We work around this issue by adding a "./" to the beginning of the
|
||||||
|
# filename and relative path, and adding a "/" at the end of the path.
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/276
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/20312
|
||||||
|
# https://github.com/aria2/aria2/issues/1373
|
||||||
|
dn = os.path.dirname(tmpfilename)
|
||||||
|
if dn:
|
||||||
|
cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
|
||||||
|
if 'fragments' not in info_dict:
|
||||||
|
cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
|
||||||
|
cmd += ['--auto-file-renaming=false']
|
||||||
|
if 'fragments' in info_dict:
|
||||||
|
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||||
|
url_list_file = '%s.frag.urls' % (tmpfilename, )
|
||||||
|
url_list = []
|
||||||
|
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||||
|
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||||
|
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||||
|
stream, _ = self.sanitize_open(url_list_file, 'wb')
|
||||||
|
stream.write('\n'.join(url_list).encode())
|
||||||
|
stream.close()
|
||||||
|
cmd += ['-i', self._aria2c_filename(url_list_file)]
|
||||||
|
else:
|
||||||
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
class Aria2pFD(ExternalFD):
|
||||||
|
''' Aria2pFD class
|
||||||
|
This class support to use aria2p as downloader.
|
||||||
|
(Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
|
||||||
|
through JSON-RPC.)
|
||||||
|
It can help you to get download progress more easily.
|
||||||
|
To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
|
||||||
|
Then run aria2c in the background and enable with the --enable-rpc option.
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
import aria2p
|
||||||
|
__avail = True
|
||||||
|
except ImportError:
|
||||||
|
__avail = False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def available(cls):
|
||||||
|
return cls.__avail
|
||||||
|
|
||||||
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
|
aria2 = self.aria2p.API(
|
||||||
|
self.aria2p.Client(
|
||||||
|
host='http://localhost',
|
||||||
|
port=6800,
|
||||||
|
secret=''
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
options = {
|
||||||
|
'min-split-size': '1M',
|
||||||
|
'max-connection-per-server': 4,
|
||||||
|
'auto-file-renaming': 'false',
|
||||||
|
}
|
||||||
|
options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
|
||||||
|
options['out'] = os.path.basename(tmpfilename)
|
||||||
|
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||||
|
options['load-cookies'] = self._write_cookies()
|
||||||
|
options['header'] = []
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
|
options['header'].append('{0}: {1}'.format(key, val))
|
||||||
|
download = aria2.add_uris([info_dict['url']], options)
|
||||||
|
status = {
|
||||||
|
'status': 'downloading',
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
}
|
||||||
|
started = time.time()
|
||||||
|
while download.status in ['active', 'waiting']:
|
||||||
|
download = aria2.get_download(download.gid)
|
||||||
|
status.update({
|
||||||
|
'downloaded_bytes': download.completed_length,
|
||||||
|
'total_bytes': download.total_length,
|
||||||
|
'elapsed': time.time() - started,
|
||||||
|
'eta': download.eta.total_seconds(),
|
||||||
|
'speed': download.download_speed,
|
||||||
|
})
|
||||||
|
self._hook_progress(status)
|
||||||
|
time.sleep(.5)
|
||||||
|
return download.status != 'complete'
|
||||||
|
|
||||||
|
|
||||||
class HttpieFD(ExternalFD):
|
class HttpieFD(ExternalFD):
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls):
|
||||||
|
@ -207,15 +338,23 @@ class HttpieFD(ExternalFD):
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['%s:%s' % (key, val)]
|
cmd += ['%s:%s' % (key, val)]
|
||||||
|
|
||||||
|
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
|
||||||
|
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
|
||||||
|
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
|
||||||
|
# 2: https://httpie.io/docs/cli/sessions
|
||||||
|
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
|
if cookie_header:
|
||||||
|
cmd += ['Cookie:%s' % cookie_header]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
class FFmpegFD(ExternalFD):
|
class FFmpegFD(ExternalFD):
|
||||||
@classmethod
|
@classmethod
|
||||||
def supports(cls, info_dict):
|
def supports(cls, info_dict):
|
||||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
|
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls):
|
||||||
|
@ -254,7 +393,14 @@ class FFmpegFD(ExternalFD):
|
||||||
# if end_time:
|
# if end_time:
|
||||||
# args += ['-t', compat_str(end_time - start_time)]
|
# args += ['-t', compat_str(end_time - start_time)]
|
||||||
|
|
||||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
cookies = self.ydl.cookiejar.get_cookies_for_url(url)
|
||||||
|
if cookies:
|
||||||
|
args.extend(['-cookies', ''.join(
|
||||||
|
'{0}={1}; path={2}; domain={3};\r\n'.format(
|
||||||
|
cookie.name, cookie.value, cookie.path, cookie.domain)
|
||||||
|
for cookie in cookies)])
|
||||||
|
|
||||||
|
if info_dict.get('http_headers') and re.match(r'^https?://', url):
|
||||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||||
|
|
|
@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __do_ytdl_file(ctx):
|
def __do_ytdl_file(ctx):
|
||||||
return not ctx['live'] and not ctx['tmpfilename'] == '-'
|
return ctx['live'] is not True and ctx['tmpfilename'] != '-'
|
||||||
|
|
||||||
def _read_ytdl_file(self, ctx):
|
def _read_ytdl_file(self, ctx):
|
||||||
assert 'ytdl_corrupt' not in ctx
|
assert 'ytdl_corrupt' not in ctx
|
||||||
|
@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
|
||||||
'url': frag_url,
|
'url': frag_url,
|
||||||
'http_headers': headers or info_dict.get('http_headers'),
|
'http_headers': headers or info_dict.get('http_headers'),
|
||||||
}
|
}
|
||||||
|
frag_resume_len = 0
|
||||||
|
if ctx['dl'].params.get('continuedl', True):
|
||||||
|
frag_resume_len = self.filesize_or_none(
|
||||||
|
self.temp_name(fragment_filename))
|
||||||
|
fragment_info_dict['frag_resume_len'] = frag_resume_len
|
||||||
|
ctx['frag_resume_len'] = frag_resume_len or 0
|
||||||
|
|
||||||
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False, None
|
return False, None
|
||||||
|
@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
|
||||||
del ctx['fragment_filename_sanitized']
|
del ctx['fragment_filename_sanitized']
|
||||||
|
|
||||||
def _prepare_frag_download(self, ctx):
|
def _prepare_frag_download(self, ctx):
|
||||||
if 'live' not in ctx:
|
if not ctx.setdefault('live', False):
|
||||||
ctx['live'] = False
|
|
||||||
if not ctx['live']:
|
|
||||||
total_frags_str = '%d' % ctx['total_frags']
|
total_frags_str = '%d' % ctx['total_frags']
|
||||||
ad_frags = ctx.get('ad_frags', 0)
|
ad_frags = ctx.get('ad_frags', 0)
|
||||||
if ad_frags:
|
if ad_frags:
|
||||||
|
@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
|
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
|
||||||
self.report_destination(ctx['filename'])
|
self.report_destination(ctx['filename'])
|
||||||
|
continuedl = self.params.get('continuedl', True)
|
||||||
dl = HttpQuietDownloader(
|
dl = HttpQuietDownloader(
|
||||||
self.ydl,
|
self.ydl,
|
||||||
{
|
{
|
||||||
'continuedl': True,
|
'continuedl': continuedl,
|
||||||
'quiet': True,
|
'quiet': True,
|
||||||
'noprogress': True,
|
'noprogress': True,
|
||||||
'ratelimit': self.params.get('ratelimit'),
|
'ratelimit': self.params.get('ratelimit'),
|
||||||
|
@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
|
||||||
)
|
)
|
||||||
tmpfilename = self.temp_name(ctx['filename'])
|
tmpfilename = self.temp_name(ctx['filename'])
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
# Establish possible resume length
|
# Establish possible resume length
|
||||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
resume_len = self.filesize_or_none(tmpfilename) or 0
|
||||||
|
if resume_len > 0:
|
||||||
open_mode = 'ab'
|
open_mode = 'ab'
|
||||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
|
||||||
|
|
||||||
# Should be initialized before ytdl file check
|
# Should be initialized before ytdl file check
|
||||||
ctx.update({
|
ctx.update({
|
||||||
|
@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
|
||||||
})
|
})
|
||||||
|
|
||||||
if self.__do_ytdl_file(ctx):
|
if self.__do_ytdl_file(ctx):
|
||||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||||
|
if continuedl and ytdl_file_exists:
|
||||||
self._read_ytdl_file(ctx)
|
self._read_ytdl_file(ctx)
|
||||||
is_corrupt = ctx.get('ytdl_corrupt') is True
|
is_corrupt = ctx.get('ytdl_corrupt') is True
|
||||||
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
|
||||||
|
@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
|
||||||
if 'ytdl_corrupt' in ctx:
|
if 'ytdl_corrupt' in ctx:
|
||||||
del ctx['ytdl_corrupt']
|
del ctx['ytdl_corrupt']
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
if not continuedl:
|
||||||
|
if ytdl_file_exists:
|
||||||
|
self._read_ytdl_file(ctx)
|
||||||
|
ctx['fragment_index'] = resume_len = 0
|
||||||
self._write_ytdl_file(ctx)
|
self._write_ytdl_file(ctx)
|
||||||
assert ctx['fragment_index'] == 0
|
assert ctx['fragment_index'] == 0
|
||||||
|
|
||||||
|
@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
|
||||||
start = time.time()
|
start = time.time()
|
||||||
ctx.update({
|
ctx.update({
|
||||||
'started': start,
|
'started': start,
|
||||||
|
'fragment_started': start,
|
||||||
# Amount of fragment's bytes downloaded by the time of the previous
|
# Amount of fragment's bytes downloaded by the time of the previous
|
||||||
# frag progress hook invocation
|
# frag progress hook invocation
|
||||||
'prev_frag_downloaded_bytes': 0,
|
'prev_frag_downloaded_bytes': 0,
|
||||||
|
@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
|
||||||
if s['status'] not in ('downloading', 'finished'):
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if not total_frags and ctx.get('fragment_count'):
|
||||||
|
state['fragment_count'] = ctx['fragment_count']
|
||||||
|
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
state['elapsed'] = time_now - start
|
state['elapsed'] = time_now - start
|
||||||
frag_total_bytes = s.get('total_bytes') or 0
|
frag_total_bytes = s.get('total_bytes') or 0
|
||||||
|
@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
|
||||||
ctx['fragment_index'] = state['fragment_index']
|
ctx['fragment_index'] = state['fragment_index']
|
||||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||||
|
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||||
|
ctx['fragment_started'], time_now, frag_total_bytes)
|
||||||
|
ctx['fragment_started'] = time.time()
|
||||||
ctx['prev_frag_downloaded_bytes'] = 0
|
ctx['prev_frag_downloaded_bytes'] = 0
|
||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = s['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||||
|
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||||
|
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
|
||||||
if not ctx['live']:
|
if not ctx['live']:
|
||||||
state['eta'] = self.calc_eta(
|
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||||
start, time_now, estimated_size - resume_len,
|
|
||||||
state['downloaded_bytes'] - resume_len)
|
|
||||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
|
||||||
ctx['speed'] = state['speed']
|
|
||||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||||
self._hook_progress(state)
|
self._hook_progress(state)
|
||||||
|
|
||||||
|
@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
|
||||||
os.utime(ctx['filename'], (time.time(), filetime))
|
os.utime(ctx['filename'], (time.time(), filetime))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': downloaded_bytes,
|
'downloaded_bytes': downloaded_bytes,
|
||||||
|
|
|
@ -58,9 +58,9 @@ class HttpFD(FileDownloader):
|
||||||
|
|
||||||
if self.params.get('continuedl', True):
|
if self.params.get('continuedl', True):
|
||||||
# Establish possible resume length
|
# Establish possible resume length
|
||||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
ctx.resume_len = info_dict.get('frag_resume_len')
|
||||||
ctx.resume_len = os.path.getsize(
|
if ctx.resume_len is None:
|
||||||
encodeFilename(ctx.tmpfilename))
|
ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0
|
||||||
|
|
||||||
ctx.is_resume = ctx.resume_len > 0
|
ctx.is_resume = ctx.resume_len > 0
|
||||||
|
|
||||||
|
@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
|
||||||
raise RetryDownload(err)
|
raise RetryDownload(err)
|
||||||
raise err
|
raise err
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
# set in response despite of requested Range (see
|
# set in response despite requested Range (see
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
|
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
|
||||||
if has_range:
|
if has_range:
|
||||||
content_range = ctx.data.headers.get('Content-Range')
|
content_range = ctx.data.headers.get('Content-Range')
|
||||||
|
@ -141,7 +141,8 @@ class HttpFD(FileDownloader):
|
||||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||||
# and performing entire redownload
|
# and performing entire redownload
|
||||||
self.report_unable_to_resume()
|
if range_start > 0:
|
||||||
|
self.report_unable_to_resume()
|
||||||
ctx.resume_len = 0
|
ctx.resume_len = 0
|
||||||
ctx.open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
|
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||||
|
@ -293,10 +294,7 @@ class HttpFD(FileDownloader):
|
||||||
|
|
||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||||
if ctx.data_len is None:
|
eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter))
|
||||||
eta = None
|
|
||||||
else:
|
|
||||||
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
|
|
|
@ -8,6 +8,8 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
remove_start,
|
||||||
|
traverse_obj,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||||
query = {'mbr': 'true'}
|
query = {
|
||||||
|
'mbr': 'true',
|
||||||
|
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
|
||||||
|
}
|
||||||
if auth:
|
if auth:
|
||||||
query['auth'] = auth
|
query['auth'] = auth
|
||||||
TP_SMIL_QUERY = [{
|
TP_SMIL_QUERY = [{
|
||||||
'assetTypes': 'high_video_ak',
|
'assetTypes': 'high_video_ak',
|
||||||
'switch': 'hls_high_ak'
|
'switch': 'hls_high_ak',
|
||||||
}, {
|
}, {
|
||||||
'assetTypes': 'high_video_s3'
|
'assetTypes': 'high_video_s3',
|
||||||
}, {
|
}, {
|
||||||
'assetTypes': 'high_video_s3',
|
'assetTypes': 'high_video_s3',
|
||||||
'switch': 'hls_high_fastly',
|
'switch': 'hls_high_fastly',
|
||||||
|
@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||||
|
result = traverse_obj(
|
||||||
|
result, ('results',
|
||||||
|
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||||
|
get_all=False)
|
||||||
|
if not result:
|
||||||
|
raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
|
||||||
|
video_id=remove_start(filter_value, '/'))
|
||||||
title = result['title']
|
title = result['title']
|
||||||
video_id = result['id']
|
video_id = result['id']
|
||||||
media_url = result['publicUrl']
|
media_url = result['publicUrl']
|
||||||
|
@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
'skip': 'This video is only available for users of participating TV providers.',
|
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
|
'skip': 'This video is only available for users of participating TV providers.',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
|
|
@ -15,7 +15,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class AmericasTestKitchenIE(InfoExtractor):
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
|
@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
'id': '5b400b9ee338f922cb06450c',
|
'id': '5b400b9ee338f922cb06450c',
|
||||||
'title': 'Japanese Suppers',
|
'title': 'Japanese Suppers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'display_id': 'weeknight-japanese-suppers',
|
||||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||||
'thumbnail': r're:^https?://',
|
'timestamp': 1523304000,
|
||||||
'timestamp': 1523318400,
|
'upload_date': '20180409',
|
||||||
'upload_date': '20180410',
|
'release_date': '20180409',
|
||||||
'release_date': '20180410',
|
|
||||||
'series': "America's Test Kitchen",
|
'series': "America's Test Kitchen",
|
||||||
|
'season': 'Season 18',
|
||||||
'season_number': 18,
|
'season_number': 18,
|
||||||
'episode': 'Japanese Suppers',
|
'episode': 'Japanese Suppers',
|
||||||
'episode_number': 15,
|
'episode_number': 15,
|
||||||
|
'duration': 1376,
|
||||||
|
'thumbnail': r're:^https?://',
|
||||||
|
'average_rating': 0,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
'id': '5fbe8c61bda2010001c6763b',
|
'id': '5fbe8c61bda2010001c6763b',
|
||||||
'title': 'Simple Chicken Dinner',
|
'title': 'Simple Chicken Dinner',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
|
||||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||||
'thumbnail': r're:^https?://',
|
'timestamp': 1610737200,
|
||||||
'timestamp': 1610755200,
|
'upload_date': '20210115',
|
||||||
'upload_date': '20210116',
|
'release_date': '20210115',
|
||||||
'release_date': '20210116',
|
|
||||||
'series': "America's Test Kitchen",
|
'series': "America's Test Kitchen",
|
||||||
|
'season': 'Season 21',
|
||||||
'season_number': 21,
|
'season_number': 21,
|
||||||
'episode': 'Simple Chicken Dinner',
|
'episode': 'Simple Chicken Dinner',
|
||||||
'episode_number': 3,
|
'episode_number': 3,
|
||||||
|
'duration': 1397,
|
||||||
|
'thumbnail': r're:^https?://',
|
||||||
|
'view_count': int,
|
||||||
|
'average_rating': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# ATK Season
|
# ATK Season
|
||||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||||
|
@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
'playlist_count': 13,
|
'playlist_count': 13,
|
||||||
}, {
|
}, {
|
||||||
# Cooks Country Season
|
# Cooks Country Season
|
||||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'season_12',
|
'id': 'season_12',
|
||||||
'title': 'Season 12',
|
'title': 'Season 12',
|
||||||
},
|
},
|
||||||
'playlist_count': 13,
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
# America's Test Kitchen Series
|
||||||
|
'url': 'https://www.americastestkitchen.com/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'americastestkitchen',
|
||||||
|
'title': 'America\'s Test Kitchen',
|
||||||
|
},
|
||||||
|
'playlist_count': 558,
|
||||||
|
}, {
|
||||||
|
# Cooks Country Series
|
||||||
|
'url': 'https://www.americastestkitchen.com/cookscountry',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cookscountry',
|
||||||
|
'title': 'Cook\'s Country',
|
||||||
|
},
|
||||||
|
'playlist_count': 199,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/cookscountry/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cooksillustrated.com',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
match = re.match(self._VALID_URL, url).groupdict()
|
||||||
season_number = int(season_number)
|
show = match.get('show2')
|
||||||
|
show_path = ('/' + show) if show else ''
|
||||||
|
show = show or match['show']
|
||||||
|
season_number = int_or_none(match.get('season'))
|
||||||
|
|
||||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
slug, title = {
|
||||||
|
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
|
||||||
|
'cookscountry': ('cco', 'Cook\'s Country'),
|
||||||
|
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
|
||||||
|
}[show]
|
||||||
|
|
||||||
season = 'Season %d' % season_number
|
facet_filters = [
|
||||||
|
'search_document_klass:episode',
|
||||||
|
'search_show_slug:' + slug,
|
||||||
|
]
|
||||||
|
|
||||||
|
if season_number:
|
||||||
|
playlist_id = 'season_%d' % season_number
|
||||||
|
playlist_title = 'Season %d' % season_number
|
||||||
|
facet_filters.append('search_season_list:' + playlist_title)
|
||||||
|
else:
|
||||||
|
playlist_id = show
|
||||||
|
playlist_title = title
|
||||||
|
|
||||||
season_search = self._download_json(
|
season_search = self._download_json(
|
||||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||||
season, headers={
|
playlist_id, headers={
|
||||||
'Origin': 'https://www.%s.com' % show_name,
|
'Origin': 'https://www.americastestkitchen.com',
|
||||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||||
}, query={
|
}, query={
|
||||||
'facetFilters': json.dumps([
|
'facetFilters': json.dumps(facet_filters),
|
||||||
'search_season_list:' + season,
|
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||||
'search_document_klass:episode',
|
|
||||||
'search_show_slug:' + slug,
|
|
||||||
]),
|
|
||||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
|
||||||
'attributesToHighlight': '',
|
'attributesToHighlight': '',
|
||||||
'hitsPerPage': 1000,
|
'hitsPerPage': 1000,
|
||||||
})
|
})
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for episode in (season_search.get('hits') or []):
|
for episode in (season_search.get('hits') or []):
|
||||||
search_url = episode.get('search_url')
|
search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
|
||||||
if not search_url:
|
if not search_url:
|
||||||
continue
|
continue
|
||||||
yield {
|
yield {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
|
||||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
|
||||||
'title': episode.get('title'),
|
'title': episode.get('title'),
|
||||||
'description': episode.get('description'),
|
'description': episode.get('description'),
|
||||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||||
|
@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries(), 'season_%d' % season_number, season)
|
entries(), playlist_id, playlist_title)
|
||||||
|
|
173
youtube_dl/extractor/blerp.py
Normal file
173
youtube_dl/extractor/blerp.py
Normal file
|
@ -0,0 +1,173 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class BlerpIE(InfoExtractor):
|
||||||
|
IE_NAME = 'blerp'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6320fe8745636cb4dd677a5a',
|
||||||
|
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
|
||||||
|
'uploader': 'luminousaj',
|
||||||
|
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5bc94ef4796001000498429f',
|
||||||
|
'title': 'Yee',
|
||||||
|
'uploader': '179617322678353920',
|
||||||
|
'uploader_id': '5ba99cf71386730004552c42',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||||
|
_GRAPHQL_QUERY = (
|
||||||
|
'''query webBitePageGetBite($_id: MongoID!) {
|
||||||
|
web {
|
||||||
|
biteById(_id: $_id) {
|
||||||
|
...bitePageFrag
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment bitePageFrag on Bite {
|
||||||
|
_id
|
||||||
|
title
|
||||||
|
userKeywords
|
||||||
|
keywords
|
||||||
|
color
|
||||||
|
visibility
|
||||||
|
isPremium
|
||||||
|
owned
|
||||||
|
price
|
||||||
|
extraReview
|
||||||
|
isAudioExists
|
||||||
|
image {
|
||||||
|
filename
|
||||||
|
original {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
userReactions {
|
||||||
|
_id
|
||||||
|
reactions
|
||||||
|
createdAt
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
topReactions
|
||||||
|
totalSaveCount
|
||||||
|
saved
|
||||||
|
blerpLibraryType
|
||||||
|
license
|
||||||
|
licenseMetaData
|
||||||
|
playCount
|
||||||
|
totalShareCount
|
||||||
|
totalFavoriteCount
|
||||||
|
totalAddedToBoardCount
|
||||||
|
userCategory
|
||||||
|
userAudioQuality
|
||||||
|
audioCreationState
|
||||||
|
transcription
|
||||||
|
userTranscription
|
||||||
|
description
|
||||||
|
createdAt
|
||||||
|
updatedAt
|
||||||
|
author
|
||||||
|
listingType
|
||||||
|
ownerObject {
|
||||||
|
_id
|
||||||
|
username
|
||||||
|
profileImage {
|
||||||
|
filename
|
||||||
|
original {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
transcription
|
||||||
|
favorited
|
||||||
|
visibility
|
||||||
|
isCurated
|
||||||
|
sourceUrl
|
||||||
|
audienceRating
|
||||||
|
strictAudienceRating
|
||||||
|
ownerId
|
||||||
|
reportObject {
|
||||||
|
reportedContentStatus
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
giphy {
|
||||||
|
mp4
|
||||||
|
gif
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
audio {
|
||||||
|
filename
|
||||||
|
original {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
mp3 {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
|
||||||
|
''')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||||
|
'query': self._GRAPHQL_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'_id': audio_id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||||
|
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||||
|
|
||||||
|
bite_json = json_result['data']['web']['biteById']
|
||||||
|
|
||||||
|
info_dict = {
|
||||||
|
'id': bite_json['_id'],
|
||||||
|
'url': bite_json['audio']['mp3']['url'],
|
||||||
|
'title': bite_json['title'],
|
||||||
|
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||||
|
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||||
|
}
|
||||||
|
|
||||||
|
return info_dict
|
74
youtube_dl/extractor/callin.py
Normal file
74
youtube_dl/extractor/callin.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
traverse_obj,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CallinIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||||
|
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PrumRdSQJW',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||||
|
'description': 'Or, why the government doesn’t like SpaceX',
|
||||||
|
'channel': 'The Pull Request',
|
||||||
|
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||||
|
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lzxMidUnjA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||||
|
'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
|
||||||
|
'channel': 'The DEBRIEF With Briahna Joy Gray',
|
||||||
|
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||||
|
webpage, 'next.js data', fatal=fatal, **kw),
|
||||||
|
video_id, transform_source=transform_source, fatal=fatal)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
next_data = self._search_nextjs_data(webpage, video_id)
|
||||||
|
episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
|
||||||
|
if not episode:
|
||||||
|
raise ExtractorError('Failed to find episode data')
|
||||||
|
|
||||||
|
title = episode.get('title') or self._og_search_title(webpage)
|
||||||
|
description = episode.get('description') or self._og_search_description(webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
episode.get('m3u8'), video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
channel = try_get(episode, lambda x: x['show']['title'], compat_str)
|
||||||
|
channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'channel': channel,
|
||||||
|
'channel_url': channel_url,
|
||||||
|
}
|
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id = self._match_id(url)
|
user_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
url, user_id, headers=self.geo_verification_headers())
|
|
||||||
|
|
||||||
manifest_root = self._html_search_regex(
|
|
||||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
|
||||||
|
|
||||||
if not manifest_root:
|
|
||||||
ERRORS = (
|
|
||||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
|
||||||
('in a private show', 'This user is in a private show'),
|
|
||||||
('is currently performing LIVE', 'This model is currently performing live'),
|
|
||||||
)
|
|
||||||
for pattern, message in ERRORS:
|
|
||||||
if pattern in webpage:
|
|
||||||
error = message
|
|
||||||
expected = True
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
error = 'Unable to find manifest URL root'
|
|
||||||
expected = False
|
|
||||||
raise ExtractorError(error, expected=expected)
|
|
||||||
|
|
||||||
manifest = self._download_json(
|
manifest = self._download_json(
|
||||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
thumbnails = []
|
||||||
for format_id, format_dict in manifest['formats'].items():
|
for format_id, format_dict in manifest['formats'].items():
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
|
@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
|
||||||
'preference': -1,
|
'preference': -1,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
if format_id == 'jpeg':
|
||||||
|
thumbnails.append({
|
||||||
|
'url': f['url'],
|
||||||
|
'width': f['width'],
|
||||||
|
'height': f['height'],
|
||||||
|
'format_id': f['format_id'],
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': user_id,
|
'id': user_id,
|
||||||
'title': self._live_title(user_id),
|
'title': self._live_title(user_id),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18
|
'age_limit': 18
|
||||||
|
|
69
youtube_dl/extractor/clipchamp.py
Normal file
69
youtube_dl/extractor/clipchamp.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
merge_dicts,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipchampIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gRXZ4ZhdDaU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Untitled video',
|
||||||
|
'uploader': 'Alexander Schwartz',
|
||||||
|
'timestamp': 1680805580,
|
||||||
|
'upload_date': '20230406',
|
||||||
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
||||||
|
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
||||||
|
|
||||||
|
storage_location = data.get('storage_location')
|
||||||
|
if storage_location != 'cf_stream':
|
||||||
|
raise ExtractorError('Unsupported clip storage location "%s"' % (storage_location,))
|
||||||
|
|
||||||
|
path = data['download_url']
|
||||||
|
iframe = self._download_webpage(
|
||||||
|
'https://iframe.cloudflarestream.com/' + path, video_id, 'Downloading player iframe')
|
||||||
|
subdomain = self._search_regex(
|
||||||
|
r'''\bcustomer-domain-prefix\s*=\s*("|')(?P<sd>[\w-]+)\1''', iframe,
|
||||||
|
'subdomain', group='sd', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
||||||
|
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
||||||
|
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
||||||
|
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), T(compat_str)))) or None,
|
||||||
|
}, traverse_obj(data, {
|
||||||
|
'title': ('project', 'project_name', T(compat_str)),
|
||||||
|
'timestamp': ('created_at', T(unified_timestamp)),
|
||||||
|
'thumbnail': ('thumbnail_url', T(url_or_none)),
|
||||||
|
}), rev=True)
|
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import datetime
|
import datetime
|
||||||
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import netrc
|
import netrc
|
||||||
|
@ -23,6 +24,8 @@ from ..compat import (
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_integer_types,
|
compat_integer_types,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_map as map,
|
||||||
|
compat_open as open,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
|
@ -31,6 +34,7 @@ from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
from ..downloader.f4m import (
|
from ..downloader.f4m import (
|
||||||
get_base_url,
|
get_base_url,
|
||||||
|
@ -70,6 +74,7 @@ from ..utils import (
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -79,6 +84,7 @@ from ..utils import (
|
||||||
urljoin,
|
urljoin,
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
variadic,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
@ -367,9 +373,22 @@ class InfoExtractor(object):
|
||||||
title, description etc.
|
title, description etc.
|
||||||
|
|
||||||
|
|
||||||
Subclasses of this one should re-define the _real_initialize() and
|
A subclass of InfoExtractor must be defined to handle each specific site (or
|
||||||
_real_extract() methods and define a _VALID_URL regexp.
|
several sites). Such a concrete subclass should be added to the list of
|
||||||
Probably, they should also be added to the list of extractors.
|
extractors. It should also:
|
||||||
|
* define its _VALID_URL attribute as a regexp, or a Sequence of alternative
|
||||||
|
regexps (but see below)
|
||||||
|
* re-define the _real_extract() method
|
||||||
|
* optionally re-define the _real_initialize() method.
|
||||||
|
|
||||||
|
An extractor subclass may also override suitable() if necessary, but the
|
||||||
|
function signature must be preserved and the function must import everything
|
||||||
|
it needs (except other extractors), so that lazy_extractors works correctly.
|
||||||
|
If the subclass's suitable() and _real_extract() functions avoid using
|
||||||
|
_VALID_URL, the subclass need not set that class attribute.
|
||||||
|
|
||||||
|
An abstract subclass of InfoExtractor may be used to simplify implementation
|
||||||
|
within an extractor module; it should not be added to the list of extractors.
|
||||||
|
|
||||||
_GEO_BYPASS attribute may be set to False in order to disable
|
_GEO_BYPASS attribute may be set to False in order to disable
|
||||||
geo restriction bypass mechanisms for a particular extractor.
|
geo restriction bypass mechanisms for a particular extractor.
|
||||||
|
@ -404,22 +423,33 @@ class InfoExtractor(object):
|
||||||
self._x_forwarded_for_ip = None
|
self._x_forwarded_for_ip = None
|
||||||
self.set_downloader(downloader)
|
self.set_downloader(downloader)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __match_valid_url(cls, url):
|
||||||
|
# This does not use has/getattr intentionally - we want to know whether
|
||||||
|
# we have cached the regexp for cls, whereas getattr would also
|
||||||
|
# match its superclass
|
||||||
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
# _VALID_URL can now be a list/tuple of patterns
|
||||||
|
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
|
||||||
|
# 20% faster than next(filter(None, (p.match(url) for p in cls._VALID_URL_RE)), None) in 2.7
|
||||||
|
for p in cls._VALID_URL_RE:
|
||||||
|
p = p.match(url)
|
||||||
|
if p:
|
||||||
|
return p
|
||||||
|
|
||||||
|
# The public alias can safely be overridden, as in some back-ports
|
||||||
|
_match_valid_url = __match_valid_url
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
|
# This function must import everything it needs (except other extractors),
|
||||||
# This does not use has/getattr intentionally - we want to know whether
|
# so that lazy_extractors works correctly
|
||||||
# we have cached the regexp for *this* class, whereas getattr would also
|
return cls.__match_valid_url(url) is not None
|
||||||
# match the superclass
|
|
||||||
if '_VALID_URL_RE' not in cls.__dict__:
|
|
||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
|
||||||
return cls._VALID_URL_RE.match(url) is not None
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _match_id(cls, url):
|
def _match_id(cls, url):
|
||||||
if '_VALID_URL_RE' not in cls.__dict__:
|
m = cls.__match_valid_url(url)
|
||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
|
||||||
m = cls._VALID_URL_RE.match(url)
|
|
||||||
assert m
|
assert m
|
||||||
return compat_str(m.group('id'))
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
|
@ -1005,6 +1035,8 @@ class InfoExtractor(object):
|
||||||
if group is None:
|
if group is None:
|
||||||
# return the first matching group
|
# return the first matching group
|
||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
|
elif isinstance(group, (list, tuple)):
|
||||||
|
return tuple(mobj.group(g) for g in group)
|
||||||
else:
|
else:
|
||||||
return mobj.group(group)
|
return mobj.group(group)
|
||||||
elif default is not NO_DEFAULT:
|
elif default is not NO_DEFAULT:
|
||||||
|
@ -1020,10 +1052,9 @@ class InfoExtractor(object):
|
||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
"""
|
"""
|
||||||
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
||||||
if res:
|
if isinstance(res, tuple):
|
||||||
return clean_html(res).strip()
|
return tuple(map(clean_html, res))
|
||||||
else:
|
return clean_html(res)
|
||||||
return res
|
|
||||||
|
|
||||||
def _get_netrc_login_info(self, netrc_machine=None):
|
def _get_netrc_login_info(self, netrc_machine=None):
|
||||||
username = None
|
username = None
|
||||||
|
@ -1087,7 +1118,7 @@ class InfoExtractor(object):
|
||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
|
||||||
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||||
% {'prop': re.escape(prop)})
|
% {'prop': re.escape(prop)})
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
|
@ -1348,6 +1379,44 @@ class InfoExtractor(object):
|
||||||
break
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
|
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||||
|
nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
|
||||||
|
kw.pop('transform_source', None)
|
||||||
|
next_data = self._search_regex(
|
||||||
|
r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
|
||||||
|
webpage, 'next.js data', group='nd', **kw)
|
||||||
|
if not next_data:
|
||||||
|
return {}
|
||||||
|
return self._parse_json(next_data, video_id, **nkw)
|
||||||
|
|
||||||
|
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
|
||||||
|
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||||
|
|
||||||
|
# self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)
|
||||||
|
context_name = args[0] if len(args) > 0 else kwargs.get('context_name', '__NUXT__')
|
||||||
|
fatal = kwargs.get('fatal', True)
|
||||||
|
traverse = kwargs.get('traverse', ('data', 0))
|
||||||
|
|
||||||
|
re_ctx = re.escape(context_name)
|
||||||
|
|
||||||
|
FUNCTION_RE = (r'\(\s*function\s*\((?P<arg_keys>[\s\S]*?)\)\s*\{\s*'
|
||||||
|
r'return\s+(?P<js>\{[\s\S]*?})\s*;?\s*}\s*\((?P<arg_vals>[\s\S]*?)\)')
|
||||||
|
|
||||||
|
js, arg_keys, arg_vals = self._search_regex(
|
||||||
|
(p.format(re_ctx, FUNCTION_RE) for p in
|
||||||
|
(r'<script>\s*window\s*\.\s*{0}\s*=\s*{1}\s*\)\s*;?\s*</script>',
|
||||||
|
r'{0}\s*\([\s\S]*?{1}')),
|
||||||
|
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||||
|
default=NO_DEFAULT if fatal else (None, None, None))
|
||||||
|
if js is None:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
|
||||||
|
'[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
|
||||||
|
|
||||||
|
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
||||||
|
return traverse_obj(ret, traverse) or {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||||
|
@ -2495,7 +2564,8 @@ class InfoExtractor(object):
|
||||||
return f
|
return f
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _media_formats(src, cur_media_type, type_info={}):
|
def _media_formats(src, cur_media_type, type_info=None):
|
||||||
|
type_info = type_info or {}
|
||||||
full_url = absolute_url(src)
|
full_url = absolute_url(src)
|
||||||
ext = type_info.get('ext') or determine_ext(full_url)
|
ext = type_info.get('ext') or determine_ext(full_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
|
@ -2513,6 +2583,7 @@ class InfoExtractor(object):
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': full_url,
|
'url': full_url,
|
||||||
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||||
|
'ext': ext,
|
||||||
}]
|
}]
|
||||||
return is_plain_url, formats
|
return is_plain_url, formats
|
||||||
|
|
||||||
|
@ -2521,7 +2592,7 @@ class InfoExtractor(object):
|
||||||
# so we wll include them right here (see
|
# so we wll include them right here (see
|
||||||
# https://www.ampproject.org/docs/reference/components/amp-video)
|
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||||
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
||||||
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
|
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video(?:-js)?|audio)'
|
||||||
media_tags = [(media_tag, media_tag_name, media_type, '')
|
media_tags = [(media_tag, media_tag_name, media_type, '')
|
||||||
for media_tag, media_tag_name, media_type
|
for media_tag, media_tag_name, media_type
|
||||||
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
||||||
|
@ -2539,7 +2610,8 @@ class InfoExtractor(object):
|
||||||
media_attributes = extract_attributes(media_tag)
|
media_attributes = extract_attributes(media_tag)
|
||||||
src = strip_or_none(media_attributes.get('src'))
|
src = strip_or_none(media_attributes.get('src'))
|
||||||
if src:
|
if src:
|
||||||
_, formats = _media_formats(src, media_type)
|
f = parse_content_type(media_attributes.get('type'))
|
||||||
|
_, formats = _media_formats(src, media_type, f)
|
||||||
media_info['formats'].extend(formats)
|
media_info['formats'].extend(formats)
|
||||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||||
if media_content:
|
if media_content:
|
||||||
|
|
204
youtube_dl/extractor/dlf.py
Normal file
204
youtube_dl/extractor/dlf.py
Normal file
|
@ -0,0 +1,204 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
variadic,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DLFBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
||||||
|
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
||||||
|
|
||||||
|
def _parse_button_attrs(self, button, audio_id=None):
|
||||||
|
attrs = extract_attributes(button)
|
||||||
|
audio_id = audio_id or attrs['data-audio-diraid']
|
||||||
|
|
||||||
|
url = traverse_obj(
|
||||||
|
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
||||||
|
'data-audio-src', expected_type=url_or_none)
|
||||||
|
ext = determine_ext(url)
|
||||||
|
formats = (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
||||||
|
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def traverse_attrs(path):
|
||||||
|
path = list(variadic(path))
|
||||||
|
t = path.pop() if callable(path[-1]) else None
|
||||||
|
return traverse_obj(attrs, path, expected_type=t, get_all=False)
|
||||||
|
|
||||||
|
def txt_or_none(v, default=None):
|
||||||
|
return default if v is None else (compat_str(v).strip() or default)
|
||||||
|
|
||||||
|
return merge_dicts(*reversed([{
|
||||||
|
'id': audio_id,
|
||||||
|
# 'extractor_key': DLFIE.ie_key(),
|
||||||
|
# 'extractor': DLFIE.IE_NAME,
|
||||||
|
'formats': formats,
|
||||||
|
}, dict((k, traverse_attrs(v)) for k, v in {
|
||||||
|
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), txt_or_none),
|
||||||
|
'duration': (('data-audioduration', 'data-audio-duration'), int_or_none),
|
||||||
|
'thumbnail': ('data-audioimage', url_or_none),
|
||||||
|
'uploader': 'data-audio-producer',
|
||||||
|
'series': 'data-audio-series',
|
||||||
|
'channel': 'data-audio-origin-site-name',
|
||||||
|
'webpage_url': ('data-audio-download-tracking-path', url_or_none),
|
||||||
|
}.items())]))
|
||||||
|
|
||||||
|
|
||||||
|
class DLFIE(DLFBaseIE):
|
||||||
|
IE_NAME = 'dlf'
|
||||||
|
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
||||||
|
_TESTS = [
|
||||||
|
# Audio as an HLS stream
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '03a3eb19',
|
||||||
|
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'duration': 3298,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'On Stage',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8'
|
||||||
|
},
|
||||||
|
'skip': 'This webpage no longer exists'
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd9cc1856',
|
||||||
|
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 291,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Kommentare und Themen der Woche',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, audio_id)
|
||||||
|
|
||||||
|
return self._parse_button_attrs(
|
||||||
|
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
||||||
|
|
||||||
|
|
||||||
|
class DLFCorpusIE(DLFBaseIE):
|
||||||
|
IE_NAME = 'dlf:corpus'
|
||||||
|
IE_DESC = 'DLF Multi-feed Archives'
|
||||||
|
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
||||||
|
_TESTS = [
|
||||||
|
# Recorded news broadcast with referrals to related broadcasts
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
||||||
|
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||||
|
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1fc5d64a',
|
||||||
|
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 252,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2ada145f',
|
||||||
|
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 336,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Deutschlandfunk Nova',
|
||||||
|
'channel': 'deutschlandfunk-nova'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e55e8c9',
|
||||||
|
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 187,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '47e1a096',
|
||||||
|
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 602,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e55e8c9',
|
||||||
|
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 187,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
# Podcast feed with tag buttons, playlist count fluctuates
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kommentare-und-themen-der-woche-100',
|
||||||
|
'title': 'Meinung - Kommentare und Themen der Woche',
|
||||||
|
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
},
|
||||||
|
# Podcast feed with no description
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'podcast-tolle-idee-100',
|
||||||
|
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
||||||
|
playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||||
|
self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage, default=None))
|
|
@ -138,6 +138,7 @@ from .bleacherreport import (
|
||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
BleacherReportCMSIE,
|
BleacherReportCMSIE,
|
||||||
)
|
)
|
||||||
|
from .blerp import BlerpIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bongacams import BongaCamsIE
|
from .bongacams import BongaCamsIE
|
||||||
|
@ -158,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
|
||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
from .callin import CallinIE
|
||||||
from .camdemy import (
|
from .camdemy import (
|
||||||
CamdemyIE,
|
CamdemyIE,
|
||||||
CamdemyFolderIE
|
CamdemyFolderIE
|
||||||
|
@ -224,6 +226,7 @@ from .ciscolive import (
|
||||||
CiscoLiveSearchIE,
|
CiscoLiveSearchIE,
|
||||||
)
|
)
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
|
from .clipchamp import ClipchampIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clippit import ClippitIE
|
from .clippit import ClippitIE
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
|
@ -293,6 +296,10 @@ from .dbtv import DBTVIE
|
||||||
from .dctp import DctpTvIE
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .democracynow import DemocracynowIE
|
from .democracynow import DemocracynowIE
|
||||||
|
from .dlf import (
|
||||||
|
DLFCorpusIE,
|
||||||
|
DLFIE,
|
||||||
|
)
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .digg import DiggIE
|
from .digg import DiggIE
|
||||||
|
@ -374,6 +381,8 @@ from .fc2 import (
|
||||||
FC2EmbedIE,
|
FC2EmbedIE,
|
||||||
)
|
)
|
||||||
from .fczenit import FczenitIE
|
from .fczenit import FczenitIE
|
||||||
|
from .filemoon import FileMoonIE
|
||||||
|
from .fifa import FifaIE
|
||||||
from .filmon import (
|
from .filmon import (
|
||||||
FilmOnIE,
|
FilmOnIE,
|
||||||
FilmOnChannelIE,
|
FilmOnChannelIE,
|
||||||
|
@ -440,6 +449,13 @@ from .gfycat import GfycatIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
from .giga import GigaIE
|
from .giga import GigaIE
|
||||||
from .glide import GlideIE
|
from .glide import GlideIE
|
||||||
|
from .globalplayer import (
|
||||||
|
GlobalPlayerLiveIE,
|
||||||
|
GlobalPlayerLivePlaylistIE,
|
||||||
|
GlobalPlayerAudioIE,
|
||||||
|
GlobalPlayerAudioEpisodeIE,
|
||||||
|
GlobalPlayerVideoIE
|
||||||
|
)
|
||||||
from .globo import (
|
from .globo import (
|
||||||
GloboIE,
|
GloboIE,
|
||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
|
@ -553,6 +569,7 @@ from .khanacademy import (
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
|
from .kommunetv import KommunetvIE
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
from .kth import KTHIE
|
from .kth import KTHIE
|
||||||
|
@ -725,6 +742,7 @@ from .myvi import (
|
||||||
MyviIE,
|
MyviIE,
|
||||||
MyviEmbedIE,
|
MyviEmbedIE,
|
||||||
)
|
)
|
||||||
|
from .myvideoge import MyVideoGeIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
from .nationalgeographic import (
|
from .nationalgeographic import (
|
||||||
NationalGeographicVideoIE,
|
NationalGeographicVideoIE,
|
||||||
|
@ -969,6 +987,10 @@ from .pornhub import (
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornovoisines import PornoVoisinesIE
|
from .pornovoisines import PornoVoisinesIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .pr0gramm import (
|
||||||
|
Pr0grammIE,
|
||||||
|
Pr0grammStaticIE,
|
||||||
|
)
|
||||||
from .puhutv import (
|
from .puhutv import (
|
||||||
PuhuTVIE,
|
PuhuTVIE,
|
||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
|
@ -1006,6 +1028,10 @@ from .raywenderlich import (
|
||||||
RayWenderlichIE,
|
RayWenderlichIE,
|
||||||
RayWenderlichCourseIE,
|
RayWenderlichCourseIE,
|
||||||
)
|
)
|
||||||
|
from .rbgtum import (
|
||||||
|
RbgTumIE,
|
||||||
|
RbgTumCourseIE,
|
||||||
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
from .redbulltv import (
|
from .redbulltv import (
|
||||||
|
@ -1061,6 +1087,10 @@ from .rutube import (
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
from .ruv import RuvIE
|
from .ruv import RuvIE
|
||||||
|
from .s4c import (
|
||||||
|
S4CIE,
|
||||||
|
S4CSeriesIE,
|
||||||
|
)
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
SafariApiIE,
|
SafariApiIE,
|
||||||
|
@ -1196,6 +1226,7 @@ from .storyfire import (
|
||||||
from .streamable import StreamableIE
|
from .streamable import StreamableIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
|
from .streamsb import StreamsbIE
|
||||||
from .streetvoice import StreetVoiceIE
|
from .streetvoice import StreetVoiceIE
|
||||||
from .stretchinternet import StretchInternetIE
|
from .stretchinternet import StretchInternetIE
|
||||||
from .stv import STVPlayerIE
|
from .stv import STVPlayerIE
|
||||||
|
@ -1554,6 +1585,7 @@ from .weibo import (
|
||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
|
from .whyp import WhypIE
|
||||||
from .wistia import (
|
from .wistia import (
|
||||||
WistiaIE,
|
WistiaIE,
|
||||||
WistiaPlaylistIE,
|
WistiaPlaylistIE,
|
||||||
|
|
101
youtube_dl/extractor/fifa.py
Normal file
101
youtube_dl/extractor/fifa.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
|
||||||
|
|
||||||
|
BaseInfoExtractor = InfoExtractor
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
class InfoExtractor(BaseInfoExtractor):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _match_valid_url(cls, url):
|
||||||
|
return re.match(cls._VALID_URL, url)
|
||||||
|
|
||||||
|
|
||||||
|
class FifaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7on10qPcnyLajDDU3ntg6y',
|
||||||
|
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
|
||||||
|
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'categories': ['FIFA Tournaments'],
|
||||||
|
'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
|
||||||
|
'duration': 8165,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1cg5r5Qt6Qt12ilkDgb1sV',
|
||||||
|
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
|
||||||
|
'description': 'md5:d908c74ee66322b804ae2e521b02a855',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'categories': ['FIFA Tournaments', 'Highlights'],
|
||||||
|
'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
|
||||||
|
'duration': 902,
|
||||||
|
'release_timestamp': 1404777600,
|
||||||
|
'release_date': '20140708',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3C6gQH9C2DLwzNx7BMRQdp',
|
||||||
|
'title': 'Josimar goal against Northern Ireland | Classic Goals',
|
||||||
|
'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'categories': ['FIFA Tournaments', 'Goal'],
|
||||||
|
'duration': 28,
|
||||||
|
'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, locale = self._match_valid_url(url).group('id', 'locale')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
preconnect_link = self._search_regex(
|
||||||
|
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
|
||||||
|
|
||||||
|
video_details = self._download_json(
|
||||||
|
'{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
|
||||||
|
|
||||||
|
preplay_parameters = self._download_json(
|
||||||
|
'{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
|
||||||
|
|
||||||
|
content_data = self._download_json(
|
||||||
|
# 1. query string is expected to be sent as-is
|
||||||
|
# 2. `sig` must be appended
|
||||||
|
# 3. if absent, the call appears to work but the manifest is bad (404)
|
||||||
|
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
|
||||||
|
video_id, 'Downloading Content Data')
|
||||||
|
|
||||||
|
# formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
|
||||||
|
formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_details['title'],
|
||||||
|
'description': video_details.get('description'),
|
||||||
|
'duration': int_or_none(video_details.get('duration')),
|
||||||
|
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
|
||||||
|
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
|
||||||
|
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
43
youtube_dl/extractor/filemoon.py
Normal file
43
youtube_dl/extractor/filemoon.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
decode_packed_codes,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FileMoonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
|
||||||
|
'md5': '5a713742f57ac4aef29b74733e8dda01',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dw40rxrzruqz',
|
||||||
|
'title': 'dw40rxrzruqz',
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
|
||||||
|
packed = matches[-1]
|
||||||
|
unpacked = decode_packed_codes(packed)
|
||||||
|
jwplayer_sources = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._generic_title(url) or video_id,
|
||||||
|
'formats': formats
|
||||||
|
}
|
|
@ -2320,6 +2320,25 @@ class GenericIE(InfoExtractor):
|
||||||
'height': 720,
|
'height': 720,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# would like to use the yt-dl test video but searching for
|
||||||
|
# '"\'/\\ä↭𝕐' fails, so using an old vid from YouTube Korea
|
||||||
|
'note': 'Test default search',
|
||||||
|
'url': 'Shorts로 허락 필요없이 놀자! (BTS편)',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'usDGO4Zb-dc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'YouTube Shorts로 허락 필요없이 놀자! (BTS편)',
|
||||||
|
'description': 'md5:96e31607eba81ab441567b5e289f4716',
|
||||||
|
'upload_date': '20211107',
|
||||||
|
'uploader': 'YouTube Korea',
|
||||||
|
'location': '대한민국',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'default_search': 'ytsearch',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['uploader id'],
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
273
youtube_dl/extractor/globalplayer.py
Normal file
273
youtube_dl/extractor/globalplayer.py
Normal file
|
@ -0,0 +1,273 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
join_nonempty,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
str_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
|
urlhandle_detect_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
def _get_page_props(self, url, video_id):
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||||
|
|
||||||
|
def _request_ext(self, url, video_id):
|
||||||
|
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
|
||||||
|
url, video_id, note='Determining source extension'))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clean_desc(x):
|
||||||
|
x = clean_html(x)
|
||||||
|
if x:
|
||||||
|
x = x.replace('\xa0', ' ')
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _extract_audio(self, episode, series):
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'vcodec': 'none',
|
||||||
|
}, traverse_obj(series, {
|
||||||
|
'series': 'title',
|
||||||
|
'series_id': 'id',
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'uploader': 'itunesAuthor', # podcasts only
|
||||||
|
}), traverse_obj(episode, {
|
||||||
|
'id': 'id',
|
||||||
|
'description': ('description', T(self._clean_desc)),
|
||||||
|
'duration': ('duration', T(parse_duration)),
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'url': 'streamUrl',
|
||||||
|
'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
|
||||||
|
'title': 'title',
|
||||||
|
}, get_all=False), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2mx1E',
|
||||||
|
'ext': 'aac',
|
||||||
|
'display_id': 'smoothchill-uk',
|
||||||
|
'title': 're:^Smooth Chill.+$',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
|
||||||
|
'description': 'Music To Chill To',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# national station
|
||||||
|
'url': 'https://www.globalplayer.com/live/heart/uk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2mwx4',
|
||||||
|
'ext': 'aac',
|
||||||
|
'description': 'turn up the feel good!',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
'title': 're:^Heart UK.+$',
|
||||||
|
'display_id': 'heart-uk',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# regional variation
|
||||||
|
'url': 'https://www.globalplayer.com/live/heart/london/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'AMqg',
|
||||||
|
'ext': 'aac',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||||
|
'title': 're:^Heart London.+$',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
'display_id': 'heart-london',
|
||||||
|
'description': 'turn up the feel good!',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
station = self._get_page_props(url, video_id)['station']
|
||||||
|
stream_url = station['streamUrl']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': station['id'],
|
||||||
|
'display_id': (
|
||||||
|
join_nonempty('brandSlug', 'slug', from_dict=station)
|
||||||
|
or station.get('legacyStationPrefix')),
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': self._request_ext(stream_url, video_id),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'is_live': True,
|
||||||
|
}, {
|
||||||
|
'title': self._live_title(traverse_obj(
|
||||||
|
station, (('name', 'brandName'), T(str_or_none)),
|
||||||
|
get_all=False)),
|
||||||
|
}, traverse_obj(station, {
|
||||||
|
'description': 'tagline',
|
||||||
|
'thumbnail': 'brandLogo',
|
||||||
|
}), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# "live playlist"
|
||||||
|
'url': 'https://www.globalplayer.com/playlists/8bLk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8bLk',
|
||||||
|
'ext': 'aac',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
|
||||||
|
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
|
||||||
|
'title': 're:Classic FM Hall of Fame.+$'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
station = self._get_page_props(url, video_id)['playlistData']
|
||||||
|
stream_url = station['streamUrl']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': self._request_ext(stream_url, video_id),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'is_live': True,
|
||||||
|
}, traverse_obj(station, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': ('description', T(self._clean_desc)),
|
||||||
|
'thumbnail': 'image',
|
||||||
|
}), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# podcast
|
||||||
|
'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '42KuaM',
|
||||||
|
'title': 'Filthy Ritual',
|
||||||
|
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||||
|
'categories': ['Society & Culture', 'True Crime'],
|
||||||
|
'uploader': 'Global',
|
||||||
|
'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio catchup
|
||||||
|
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46vyD7z',
|
||||||
|
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||||
|
'title': 'Nick Ferrari',
|
||||||
|
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
|
props = self._get_page_props(url, video_id)
|
||||||
|
series = props['podcastInfo'] if podcast else props['catchupInfo']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
|
||||||
|
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
|
||||||
|
'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
|
||||||
|
}, traverse_obj(series, {
|
||||||
|
'description': ('description', T(self._clean_desc)),
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'title': 'title',
|
||||||
|
'uploader': 'itunesAuthor', # podcasts only
|
||||||
|
}), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# podcast
|
||||||
|
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7DrfNnE',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Filthy Ritual - Trailer',
|
||||||
|
'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
|
||||||
|
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||||
|
'duration': 225.0,
|
||||||
|
'timestamp': 1681254900,
|
||||||
|
'series': 'Filthy Ritual',
|
||||||
|
'series_id': '42KuaM',
|
||||||
|
'upload_date': '20230411',
|
||||||
|
'uploader': 'Global',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio catchup
|
||||||
|
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
|
||||||
|
'only_matching': True,
|
||||||
|
# expired: refresh the details with a current show for a full test
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'timestamp': 1682056800,
|
||||||
|
'series': 'Nick Ferrari',
|
||||||
|
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||||
|
'upload_date': '20230421',
|
||||||
|
'series_id': '46vyD7z',
|
||||||
|
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||||
|
'title': 'Nick Ferrari',
|
||||||
|
'duration': 10800.0,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
|
props = self._get_page_props(url, video_id)
|
||||||
|
episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
|
||||||
|
|
||||||
|
return self._extract_audio(
|
||||||
|
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2JsSZ7Gm2uP',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
|
||||||
|
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
|
||||||
|
'upload_date': '20230420',
|
||||||
|
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._get_page_props(url, video_id)['videoData']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
}, traverse_obj(meta, {
|
||||||
|
'url': 'url',
|
||||||
|
'thumbnail': ('image', 'url'),
|
||||||
|
'title': 'title',
|
||||||
|
'upload_date': ('publish_date', T(unified_strdate)),
|
||||||
|
'description': 'description',
|
||||||
|
}), rev=True)
|
|
@ -1,19 +1,29 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_filter as filter,
|
||||||
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
error_to_compat_str,
|
||||||
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,14 +32,102 @@ class IGNBaseIE(InfoExtractor):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||||
|
|
||||||
|
def _checked_call_api(self, slug):
|
||||||
|
try:
|
||||||
|
return self._call_api(slug)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
|
e.cause.args = e.cause.args or [
|
||||||
|
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||||
|
raise ExtractorError(
|
||||||
|
'Content not found: expired?', cause=e.cause,
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_video_info(self, video, fatal=True):
|
||||||
|
video_id = video['videoId']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
refs = traverse_obj(video, 'refs', expected_type=dict) or {}
|
||||||
|
|
||||||
|
m3u8_url = url_or_none(refs.get('m3uUrl'))
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
f4m_url = url_or_none(refs.get('f4mUrl'))
|
||||||
|
if f4m_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
|
for asset in (video.get('assets') or []):
|
||||||
|
asset_url = url_or_none(asset.get('url'))
|
||||||
|
if not asset_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': asset_url,
|
||||||
|
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||||
|
'fps': int_or_none(asset.get('frame_rate')),
|
||||||
|
'height': int_or_none(asset.get('height')),
|
||||||
|
'width': int_or_none(asset.get('width')),
|
||||||
|
})
|
||||||
|
|
||||||
|
mezzanine_url = traverse_obj(
|
||||||
|
video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
|
||||||
|
if mezzanine_url:
|
||||||
|
formats.append({
|
||||||
|
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||||
|
'format_id': 'mezzanine',
|
||||||
|
'preference': 1,
|
||||||
|
'url': mezzanine_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
if formats or fatal:
|
||||||
|
self._sort_formats(formats)
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
thumbnails = traverse_obj(
|
||||||
|
video, ('thumbnails', Ellipsis, {'url': 'url'}), expected_type=url_or_none)
|
||||||
|
tags = traverse_obj(
|
||||||
|
video, ('tags', Ellipsis, 'displayName'),
|
||||||
|
expected_type=lambda x: x.strip() or None)
|
||||||
|
|
||||||
|
metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
|
||||||
|
title = traverse_obj(
|
||||||
|
metadata, 'longTitle', 'title', 'name',
|
||||||
|
expected_type=lambda x: x.strip() or None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': strip_or_none(metadata.get('description')),
|
||||||
|
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
'tags': tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
# yt-dlp shim
|
||||||
|
@classmethod
|
||||||
|
def _extract_from_webpage(cls, url, webpage):
|
||||||
|
for embed_url in orderedSet(
|
||||||
|
cls._extract_embed_urls(url, webpage) or [], lazy=True):
|
||||||
|
yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
|
||||||
|
|
||||||
|
|
||||||
class IGNIE(IGNBaseIE):
|
class IGNIE(IGNBaseIE):
|
||||||
"""
|
"""
|
||||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||||
Some videos of it.ign.com are also supported
|
Some videos of it.ign.com are also supported
|
||||||
"""
|
"""
|
||||||
|
_VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
|
||||||
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
_PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
|
||||||
|
_VALID_URL = (
|
||||||
|
r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
|
||||||
|
% '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
|
||||||
IE_NAME = 'ign.com'
|
IE_NAME = 'ign.com'
|
||||||
_PAGE_TYPE = 'video'
|
_PAGE_TYPE = 'video'
|
||||||
|
|
||||||
|
@ -44,7 +142,10 @@ class IGNIE(IGNBaseIE):
|
||||||
'timestamp': 1370440800,
|
'timestamp': 1370440800,
|
||||||
'upload_date': '20130605',
|
'upload_date': '20130605',
|
||||||
'tags': 'count:9',
|
'tags': 'count:9',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'nocheckcertificate': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||||
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||||
|
@ -56,86 +157,51 @@ class IGNIE(IGNBaseIE):
|
||||||
'timestamp': 1420571160,
|
'timestamp': 1420571160,
|
||||||
'upload_date': '20150106',
|
'upload_date': '20150106',
|
||||||
'tags': 'count:4',
|
'tags': 'count:4',
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
grids = re.findall(
|
||||||
|
r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
|
||||||
|
webpage)
|
||||||
|
return filter(None,
|
||||||
|
(urljoin(url, m.group('path')) for m in re.finditer(
|
||||||
|
r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
|
||||||
|
% cls._VIDEO_PATH_RE, grids[0] if grids else '')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
display_id = m.group('id')
|
||||||
|
if display_id:
|
||||||
|
return self._extract_video(url, display_id)
|
||||||
|
display_id = m.group('filt') or 'all'
|
||||||
|
return self._extract_playlist(url, display_id)
|
||||||
|
|
||||||
|
def _extract_playlist(self, url, display_id):
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result(u, ie=self.ie_key())
|
||||||
|
for u in self._extract_embed_urls(url, webpage)),
|
||||||
|
playlist_id=display_id)
|
||||||
|
|
||||||
|
def _extract_video(self, url, display_id):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
video = self._call_api(display_id)
|
video = self._checked_call_api(display_id)
|
||||||
video_id = video['videoId']
|
|
||||||
metadata = video['metadata']
|
|
||||||
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
|
||||||
|
|
||||||
formats = []
|
info = self._extract_video_info(video)
|
||||||
refs = video.get('refs') or {}
|
|
||||||
|
|
||||||
m3u8_url = refs.get('m3uUrl')
|
return merge_dicts({
|
||||||
if m3u8_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
|
|
||||||
f4m_url = refs.get('f4mUrl')
|
|
||||||
if f4m_url:
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
|
|
||||||
for asset in (video.get('assets') or []):
|
|
||||||
asset_url = asset.get('url')
|
|
||||||
if not asset_url:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': asset_url,
|
|
||||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
|
||||||
'fps': int_or_none(asset.get('frame_rate')),
|
|
||||||
'height': int_or_none(asset.get('height')),
|
|
||||||
'width': int_or_none(asset.get('width')),
|
|
||||||
})
|
|
||||||
|
|
||||||
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
|
||||||
if mezzanine_url:
|
|
||||||
formats.append({
|
|
||||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
|
||||||
'format_id': 'mezzanine',
|
|
||||||
'preference': 1,
|
|
||||||
'url': mezzanine_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for thumbnail in (video.get('thumbnails') or []):
|
|
||||||
thumbnail_url = thumbnail.get('url')
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'url': thumbnail_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
tags = []
|
|
||||||
for tag in (video.get('tags') or []):
|
|
||||||
display_name = tag.get('displayName')
|
|
||||||
if not display_name:
|
|
||||||
continue
|
|
||||||
tags.append(display_name)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': strip_or_none(metadata.get('description')),
|
|
||||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
|
||||||
'duration': int_or_none(metadata.get('duration')),
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'thumbnails': thumbnails,
|
}, info)
|
||||||
'formats': formats,
|
|
||||||
'tags': tags,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class IGNVideoIE(InfoExtractor):
|
class IGNVideoIE(IGNBaseIE):
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||||
|
@ -147,7 +213,8 @@ class IGNVideoIE(InfoExtractor):
|
||||||
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||||
'timestamp': 1444665600,
|
'timestamp': 1444665600,
|
||||||
'upload_date': '20151012',
|
'upload_date': '20151012',
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 400: Bad Request'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -167,22 +234,38 @@ class IGNVideoIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
url = self._request_webpage(req, video_id).geturl()
|
embed_url = compat_urlparse.urlunparse(
|
||||||
|
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
|
||||||
|
|
||||||
|
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
|
||||||
|
new_url = urlh.geturl()
|
||||||
ign_url = compat_parse_qs(
|
ign_url = compat_parse_qs(
|
||||||
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
compat_urlparse.urlparse(new_url).query).get('url', [None])[-1]
|
||||||
if ign_url:
|
if ign_url:
|
||||||
return self.url_result(ign_url, IGNIE.ie_key())
|
return self.url_result(ign_url, IGNIE.ie_key())
|
||||||
return self.url_result(url)
|
video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
|
||||||
|
if not video:
|
||||||
|
if new_url == url:
|
||||||
|
raise ExtractorError('Redirect loop: ' + url)
|
||||||
|
return self.url_result(new_url)
|
||||||
|
video = extract_attributes(video)
|
||||||
|
video_data = video.get('data-settings') or '{}'
|
||||||
|
video_data = self._parse_json(video_data, video_id)['video']
|
||||||
|
info = self._extract_video_info(video_data)
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'display_id': video_id,
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class IGNArticleIE(IGNBaseIE):
|
class IGNArticleIE(IGNBaseIE):
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||||
_PAGE_TYPE = 'article'
|
_PAGE_TYPE = 'article'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '524497489e4e8ff5848ece34',
|
'id': '72113',
|
||||||
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||||
},
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
|
@ -190,7 +273,7 @@ class IGNArticleIE(IGNBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5ebbd138523268b93c9141af17bec937',
|
'id': '5ebbd138523268b93c9141af17bec937',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GTA 5 Video Review',
|
'title': 'Grand Theft Auto V Video Review',
|
||||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||||
'timestamp': 1379339880,
|
'timestamp': 1379339880,
|
||||||
'upload_date': '20130916',
|
'upload_date': '20130916',
|
||||||
|
@ -200,7 +283,7 @@ class IGNArticleIE(IGNBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
'title': 'GTA 5 In Slow Motion',
|
||||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
'timestamp': 1386878820,
|
'timestamp': 1386878820,
|
||||||
'upload_date': '20131212',
|
'upload_date': '20131212',
|
||||||
|
@ -208,16 +291,17 @@ class IGNArticleIE(IGNBaseIE):
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
'params': {
|
'params': {
|
||||||
'playlist_items': '2-3',
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Backend fetch failed'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '53ee806780a81ec46e0790f8',
|
'id': '53ee806780a81ec46e0790f8',
|
||||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 1,
|
||||||
|
'expected_warnings': ['Backend fetch failed'],
|
||||||
}, {
|
}, {
|
||||||
# videoId pattern
|
# videoId pattern
|
||||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||||
|
@ -240,18 +324,91 @@ class IGNArticleIE(IGNBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _checked_call_api(self, slug):
|
||||||
|
try:
|
||||||
|
return self._call_api(slug)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
|
e.cause.args = e.cause.args or [
|
||||||
|
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||||
|
if e.cause.code == 404:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Content not found: expired?', cause=e.cause,
|
||||||
|
expected=True)
|
||||||
|
elif e.cause.code == 503:
|
||||||
|
self.report_warning(error_to_compat_str(e.cause))
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||||
|
webpage, 'next.js data', **kw),
|
||||||
|
video_id, **kw)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
article = self._call_api(display_id)
|
article = self._checked_call_api(display_id)
|
||||||
|
|
||||||
def entries():
|
if article:
|
||||||
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
# obsolete ?
|
||||||
if media_url:
|
def entries():
|
||||||
yield self.url_result(media_url, IGNIE.ie_key())
|
media_url = traverse_obj(
|
||||||
for content in (article.get('content') or []):
|
article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
|
||||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
expected_type=url_or_none)
|
||||||
yield self.url_result(video_url)
|
if media_url:
|
||||||
|
yield self.url_result(media_url, IGNIE.ie_key())
|
||||||
|
for content in (article.get('content') or []):
|
||||||
|
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||||
|
if url_or_none(video_url):
|
||||||
|
yield self.url_result(video_url)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), article.get('articleId'),
|
||||||
|
traverse_obj(
|
||||||
|
article, ('metadata', 'headline'),
|
||||||
|
expected_type=lambda x: x.strip() or None))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
|
||||||
|
if playlist_id:
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for m in re.finditer(
|
||||||
|
r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
|
||||||
|
webpage):
|
||||||
|
flashvars = self._search_regex(
|
||||||
|
r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
|
||||||
|
m.group('params'), 'flashvars', default='')
|
||||||
|
flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
|
||||||
|
v_url = url_or_none((flashvars.get('url') or [None])[-1])
|
||||||
|
if v_url:
|
||||||
|
yield self.url_result(v_url)
|
||||||
|
else:
|
||||||
|
playlist_id = self._search_regex(
|
||||||
|
r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
|
||||||
|
webpage, 'id', group='id', default=None)
|
||||||
|
|
||||||
|
nextjs_data = self._search_nextjs_data(webpage, display_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for player in traverse_obj(
|
||||||
|
nextjs_data,
|
||||||
|
('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
|
||||||
|
# skip promo links (which may not always be served, eg GH CI servers)
|
||||||
|
if traverse_obj(nextjs_data,
|
||||||
|
('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
|
||||||
|
expected_type=dict):
|
||||||
|
continue
|
||||||
|
video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
|
||||||
|
info = self._extract_video_info(video, fatal=False)
|
||||||
|
if info:
|
||||||
|
yield merge_dicts({
|
||||||
|
'display_id': display_id,
|
||||||
|
}, info)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries(), article.get('articleId'),
|
entries(), playlist_id or display_id,
|
||||||
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)
|
||||||
|
|
|
@ -3,123 +3,266 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .brightcove import BrightcoveNewIE
|
from .brightcove import BrightcoveNewIE
|
||||||
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_integer_types,
|
||||||
|
compat_kwargs,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
error_to_compat_str,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_class,
|
ExtractorError,
|
||||||
JSON_LD_RE,
|
get_element_by_attribute,
|
||||||
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ITVIE(InfoExtractor):
|
class ITVBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
|
||||||
_GEO_COUNTRIES = ['GB']
|
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||||
|
transform_source = kw.pop('transform_source', None)
|
||||||
|
fatal = kw.pop('fatal', True)
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
|
||||||
|
webpage, 'next.js data', group='js', fatal=fatal, **kw),
|
||||||
|
video_id, transform_source=transform_source, fatal=fatal)
|
||||||
|
|
||||||
|
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
|
||||||
|
if errnote is False:
|
||||||
|
return False
|
||||||
|
if errnote is None:
|
||||||
|
errnote = 'Unable to download webpage'
|
||||||
|
|
||||||
|
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err, video_id=video_id)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(errmsg)
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _vanilla_ua_header():
|
||||||
|
return {'User-Agent': 'Mozilla/5.0'}
|
||||||
|
|
||||||
|
def _download_webpage_handle(self, url, video_id, *args, **kwargs):
|
||||||
|
# specialised to (a) use vanilla UA (b) detect geo-block
|
||||||
|
params = self._downloader.params
|
||||||
|
nkwargs = {}
|
||||||
|
if (
|
||||||
|
'user_agent' not in params
|
||||||
|
and not any(re.match(r'(?i)user-agent\s*:', h)
|
||||||
|
for h in (params.get('headers') or []))
|
||||||
|
and 'User-Agent' not in (kwargs.get('headers') or {})):
|
||||||
|
|
||||||
|
kwargs.setdefault('headers', {})
|
||||||
|
kwargs['headers'] = self._vanilla_ua_header()
|
||||||
|
nkwargs = kwargs
|
||||||
|
if kwargs.get('expected_status') is not None:
|
||||||
|
exp = kwargs['expected_status']
|
||||||
|
if isinstance(exp, compat_integer_types):
|
||||||
|
exp = [exp]
|
||||||
|
if isinstance(exp, (list, tuple)) and 403 not in exp:
|
||||||
|
kwargs['expected_status'] = [403]
|
||||||
|
kwargs['expected_status'].extend(exp)
|
||||||
|
nkwargs = kwargs
|
||||||
|
else:
|
||||||
|
kwargs['expected_status'] = 403
|
||||||
|
nkwargs = kwargs
|
||||||
|
|
||||||
|
if nkwargs:
|
||||||
|
kwargs = compat_kwargs(kwargs)
|
||||||
|
|
||||||
|
ret = super(ITVBaseIE, self)._download_webpage_handle(url, video_id, *args, **kwargs)
|
||||||
|
if ret is False:
|
||||||
|
return ret
|
||||||
|
webpage, urlh = ret
|
||||||
|
|
||||||
|
if urlh.getcode() == 403:
|
||||||
|
# geo-block error is like this, with an unnecessary 'Of':
|
||||||
|
# '{\n "Message" : "Request Originated Outside Of Allowed Geographic Region",\
|
||||||
|
# \n "TransactionId" : "oas-magni-475082-xbYF0W"\n}'
|
||||||
|
if '"Request Originated Outside Of Allowed Geographic Region"' in webpage:
|
||||||
|
self.raise_geo_restricted(countries=['GB'])
|
||||||
|
ret = self.__handle_request_webpage_error(
|
||||||
|
compat_HTTPError(urlh.geturl(), 403, 'HTTP Error 403: Forbidden', urlh.headers, urlh),
|
||||||
|
fatal=kwargs.get('fatal'))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ITVIE(ITVBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
|
||||||
|
_IE_DESC = 'ITVX'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'note': 'Hub URLs redirect to ITVX',
|
||||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '2a4547a0012',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Liar - Series 2 - Episode 6',
|
|
||||||
'description': 'md5:d0f91536569dec79ea184f0a44cca089',
|
|
||||||
'series': 'Liar',
|
|
||||||
'season_number': 2,
|
|
||||||
'episode_number': 6,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# unavailable via data-playlist-url
|
'note': 'Hub page unavailable via data-playlist-url (404 now)',
|
||||||
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
|
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# InvalidVodcrid
|
'note': 'Hub page with InvalidVodcrid (404 now)',
|
||||||
'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
|
'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# ContentUnavailable
|
'note': 'Hub page with ContentUnavailable (404 now)',
|
||||||
'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
|
'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}, {
|
||||||
|
'note': 'ITVX, or itvX, show',
|
||||||
|
'url': 'https://www.itv.com/watch/vera/1a7314/1a7314a0014',
|
||||||
|
'md5': 'bd0ad666b2c058fffe7d036785880064',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1a7314a0014',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vera - Series 3 - Episode 4 - Prodigal Son',
|
||||||
|
'description': 'Vera and her team investigate the fatal stabbing of an ex-Met police officer outside a busy Newcastle nightclub - but there aren\'t many clues.',
|
||||||
|
'timestamp': 1653591600,
|
||||||
|
'upload_date': '20220526',
|
||||||
|
'uploader': 'ITVX',
|
||||||
|
'thumbnail': r're:https://\w+\.itv\.com/images/(?:\w+/)+\d+x\d+\?',
|
||||||
|
'duration': 5340.8,
|
||||||
|
'age_limit': 16,
|
||||||
|
'series': 'Vera',
|
||||||
|
'series_number': 3,
|
||||||
|
'episode': 'Prodigal Son',
|
||||||
|
'episode_number': 4,
|
||||||
|
'channel': 'ITV3',
|
||||||
|
'categories': list,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
# 'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'only available in UK',
|
||||||
|
}, {
|
||||||
|
'note': 'Latest ITV news bulletin: details change daily',
|
||||||
|
'url': 'https://www.itv.com/watch/news/varies-but-is-not-checked/6js5d0f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6js5d0f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:The latest ITV News headlines - \S.+',
|
||||||
|
'description': r'''re:.* today's top stories from the ITV News team.$''',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': r're:2\d\d\d(?:0[1-9]|1[0-2])(?:[012][1-9]|3[01])',
|
||||||
|
'uploader': 'ITVX',
|
||||||
|
'thumbnail': r're:https://images\.ctfassets\.net/(?:\w+/)+[\w.]+\.(?:jpg|png)',
|
||||||
|
'duration': float,
|
||||||
|
'age_limit': None,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# variable download
|
||||||
|
# 'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'only available in UK',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _og_extract(self, webpage, require_title=False):
|
||||||
|
return {
|
||||||
|
'title': self._og_search_title(webpage, fatal=require_title),
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
'uploader': self._og_search_property('site_name', webpage, default=None),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
params = extract_attributes(self._search_regex(
|
|
||||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
|
||||||
|
|
||||||
ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
|
webpage = self._download_webpage(url, video_id)
|
||||||
hmac = params['data-video-hmac']
|
|
||||||
|
# now quite different params!
|
||||||
|
params = extract_attributes(self._search_regex(
|
||||||
|
r'''(<[^>]+\b(?:class|data-testid)\s*=\s*("|')genie-container\2[^>]*>)''',
|
||||||
|
webpage, 'params'))
|
||||||
|
|
||||||
|
ios_playlist_url = traverse_obj(
|
||||||
|
params, 'data-video-id', 'data-video-playlist',
|
||||||
|
get_all=False, expected_type=url_or_none)
|
||||||
|
|
||||||
headers = self.geo_verification_headers()
|
headers = self.geo_verification_headers()
|
||||||
headers.update({
|
headers.update({
|
||||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'hmac': hmac.upper(),
|
|
||||||
})
|
})
|
||||||
ios_playlist = self._download_json(
|
ios_playlist = self._download_json(
|
||||||
ios_playlist_url, video_id, data=json.dumps({
|
ios_playlist_url, video_id, data=json.dumps({
|
||||||
'user': {
|
'user': {
|
||||||
'itvUserId': '',
|
|
||||||
'entitlements': [],
|
'entitlements': [],
|
||||||
'token': ''
|
|
||||||
},
|
},
|
||||||
'device': {
|
'device': {
|
||||||
'manufacturer': 'Safari',
|
'manufacturer': 'Mobile Safari',
|
||||||
'model': '5',
|
'model': '5.1',
|
||||||
'os': {
|
'os': {
|
||||||
'name': 'Windows NT',
|
'name': 'iOS',
|
||||||
'version': '6.1',
|
'version': '5.0',
|
||||||
'type': 'desktop'
|
'type': ' mobile'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'client': {
|
'client': {
|
||||||
'version': '4.1',
|
'version': '4.1',
|
||||||
'id': 'browser'
|
'id': 'browser',
|
||||||
|
'supportsAdPods': True,
|
||||||
|
'service': 'itv.x',
|
||||||
|
'appversion': '2.43.28',
|
||||||
},
|
},
|
||||||
'variantAvailability': {
|
'variantAvailability': {
|
||||||
|
'player': 'hls',
|
||||||
'featureset': {
|
'featureset': {
|
||||||
'min': ['hls', 'aes', 'outband-webvtt'],
|
'min': ['hls', 'aes', 'outband-webvtt'],
|
||||||
'max': ['hls', 'aes', 'outband-webvtt']
|
'max': ['hls', 'aes', 'outband-webvtt']
|
||||||
},
|
},
|
||||||
'platformTag': 'dotcom'
|
'platformTag': 'mobile'
|
||||||
}
|
}
|
||||||
}).encode(), headers=headers)
|
}).encode(), headers=headers)
|
||||||
video_data = ios_playlist['Playlist']['Video']
|
video_data = ios_playlist['Playlist']['Video']
|
||||||
ios_base_url = video_data.get('Base')
|
ios_base_url = traverse_obj(video_data, 'Base', expected_type=url_or_none)
|
||||||
|
|
||||||
|
media_url = (
|
||||||
|
(lambda u: url_or_none(urljoin(ios_base_url, u)))
|
||||||
|
if ios_base_url else url_or_none)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for media_file in (video_data.get('MediaFiles') or []):
|
for media_file in traverse_obj(video_data, 'MediaFiles', expected_type=list) or []:
|
||||||
href = media_file.get('Href')
|
href = traverse_obj(media_file, 'Href', expected_type=media_url)
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
if ios_base_url:
|
|
||||||
href = ios_base_url + href
|
|
||||||
ext = determine_ext(href)
|
ext = determine_ext(href)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
href, video_id, 'mp4', entry_protocol='m3u8',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': href,
|
'url': href,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {})
|
||||||
|
f['http_headers'].update(self._vanilla_ua_header())
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subs = video_data.get('Subtitles') or []
|
for sub in traverse_obj(video_data, 'Subtitles', expected_type=list) or []:
|
||||||
for sub in subs:
|
href = traverse_obj(sub, 'Href', expected_type=url_or_none)
|
||||||
if not isinstance(sub, dict):
|
|
||||||
continue
|
|
||||||
href = url_or_none(sub.get('Href'))
|
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault('en', []).append({
|
subtitles.setdefault('en', []).append({
|
||||||
|
@ -127,59 +270,132 @@ class ITVIE(InfoExtractor):
|
||||||
'ext': determine_ext(href, 'vtt'),
|
'ext': determine_ext(href, 'vtt'),
|
||||||
})
|
})
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
|
||||||
if not info:
|
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
|
||||||
json_ld = self._parse_json(self._search_regex(
|
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
|
||||||
JSON_LD_RE, webpage, 'JSON-LD', '{}',
|
info = self._og_extract(webpage, require_title=not title)
|
||||||
group='json_ld'), video_id, fatal=False)
|
tn = info.pop('thumbnail', None)
|
||||||
if json_ld and json_ld.get('@type') == 'BreadcrumbList':
|
if tn:
|
||||||
for ile in (json_ld.get('itemListElement:') or []):
|
info['thumbnails'] = [{'url': tn}]
|
||||||
item = ile.get('item:') or {}
|
|
||||||
if item.get('@type') == 'TVEpisode':
|
# num. episode title
|
||||||
item['@context'] = 'http://schema.org'
|
num_ep_title = video_data.get('numberedEpisodeTitle')
|
||||||
info = self._json_ld(item, video_id, fatal=False) or {}
|
if not num_ep_title:
|
||||||
break
|
num_ep_title = clean_html(get_element_by_attribute('data-testid', 'episode-hero-description-strong', webpage))
|
||||||
|
num_ep_title = num_ep_title and num_ep_title.rstrip(' -')
|
||||||
|
ep_title = strip_or_none(
|
||||||
|
video_data.get('episodeTitle')
|
||||||
|
or (num_ep_title.split('.', 1)[-1] if num_ep_title else None))
|
||||||
|
title = title or re.sub(r'\s+-\s+ITVX$', '', info['title'])
|
||||||
|
if ep_title and ep_title != title:
|
||||||
|
title = title + ' - ' + ep_title
|
||||||
|
|
||||||
|
def get_thumbnails():
|
||||||
|
tns = []
|
||||||
|
for w, x in (traverse_obj(video_data, ('imagePresets'), expected_type=dict) or {}).items():
|
||||||
|
if isinstance(x, dict):
|
||||||
|
for y, z in x.items():
|
||||||
|
tns.append({'id': w + '_' + y, 'url': z})
|
||||||
|
return tns or None
|
||||||
|
|
||||||
|
video_str = lambda *x: traverse_obj(
|
||||||
|
video_data, *x, get_all=False, expected_type=strip_or_none)
|
||||||
|
|
||||||
return merge_dicts({
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': parse_duration(video_data.get('Duration')),
|
# parsing hh:mm:ss:nnn not yet patched
|
||||||
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
|
'duration': parse_duration(re.sub(r'(\d{2})(:)(\d{3}$)', r'\1.\3', video_data.get('Duration') or '')),
|
||||||
|
'description': video_str('synopsis'),
|
||||||
|
'timestamp': traverse_obj(video_data, 'broadcastDateTime', 'dateTime', expected_type=parse_iso8601),
|
||||||
|
'thumbnails': get_thumbnails(),
|
||||||
|
'series': video_str('showTitle', 'programmeTitle'),
|
||||||
|
'series_number': int_or_none(video_data.get('seriesNumber')),
|
||||||
|
'episode': ep_title,
|
||||||
|
'episode_number': int_or_none((num_ep_title or '').split('.')[0]),
|
||||||
|
'channel': video_str('channel'),
|
||||||
|
'categories': traverse_obj(video_data, ('categories', 'formatted'), expected_type=list),
|
||||||
|
'age_limit': {False: 16, True: 0}.get(video_data.get('isChildrenCategory')),
|
||||||
}, info)
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class ITVBTCCIE(InfoExtractor):
|
class ITVBTCCIE(ITVBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||||
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
_TESTS = [{
|
||||||
|
'note': 'British Touring Car Championship',
|
||||||
|
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
||||||
'title': 'BTCC 2018: All the action from Brands Hatch',
|
'title': 'BTCC 2018: All the action from Brands Hatch',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 9,
|
||||||
}
|
}, {
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
'note': 'redirects to /btcc/articles/...',
|
||||||
|
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'news article',
|
||||||
|
'url': 'https://www.itv.com/news/wales/2020-07-23/sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
|
||||||
|
'title': '''Sean Fletcher on why Wales' coastline should be your 'staycation' destination | ITV News''',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
|
# should really be a class var of the BC IE
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||||
|
BRIGHTCOVE_ACCOUNT = '1582188683001'
|
||||||
|
BRIGHTCOVE_PLAYER = 'HkiHLnNRx'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||||
|
link = compat_urlparse.urlparse(urlh.geturl()).path.strip('/')
|
||||||
|
|
||||||
entries = [
|
next_data = self._search_nextjs_data(webpage, playlist_id, fatal=False, default='{}')
|
||||||
self.url_result(
|
path_prefix = compat_urlparse.urlparse(next_data.get('assetPrefix') or '').path.strip('/')
|
||||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
link = remove_start(link, path_prefix).strip('/')
|
||||||
# ITV does not like some GB IP ranges, so here are some
|
|
||||||
# IP blocks it accepts
|
content = traverse_obj(
|
||||||
'geo_ip_blocks': [
|
next_data, ('props', 'pageProps', Ellipsis),
|
||||||
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
|
expected_type=lambda x: x if x['link'] == link else None,
|
||||||
],
|
get_all=False, default={})
|
||||||
'referrer': url,
|
content = traverse_obj(
|
||||||
}),
|
content, ('body', 'content', Ellipsis, 'data'),
|
||||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
expected_type=lambda x: x if x.get('name') == 'Brightcove' or x.get('type') == 'Brightcove' else None)
|
||||||
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
|
|
||||||
|
contraband = {
|
||||||
|
# ITV does not like some GB IP ranges, so here are some
|
||||||
|
# IP blocks it accepts
|
||||||
|
'geo_ip_blocks': [
|
||||||
|
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
|
||||||
|
],
|
||||||
|
'referrer': urlh.geturl(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
|
||||||
|
for data in content or []:
|
||||||
|
video_id = data.get('id')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
account = data.get('accountId') or self.BRIGHTCOVE_ACCOUNT
|
||||||
|
player = data.get('playerId') or self.BRIGHTCOVE_PLAYER
|
||||||
|
yield self.url_result(
|
||||||
|
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account, player, video_id), contraband),
|
||||||
|
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
# obsolete ?
|
||||||
|
for video_id in re.findall(r'''data-video-id=["'](\d+)''', webpage):
|
||||||
|
yield self.url_result(
|
||||||
|
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (self.BRIGHTCOVE_ACCOUNT, self.BRIGHTCOVE_PLAYER, video_id), contraband),
|
||||||
|
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage, fatal=False)
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title)
|
return self.playlist_result(entries(), playlist_id, title)
|
||||||
|
|
35
youtube_dl/extractor/kommunetv.py
Normal file
35
youtube_dl/extractor/kommunetv.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import update_url
|
||||||
|
|
||||||
|
|
||||||
|
class KommunetvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://oslo.kommunetv.no/archive/921',
|
||||||
|
'md5': '5f102be308ee759be1e12b63d5da4bbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '921',
|
||||||
|
'title': 'Bystyremøte',
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
headers = {
|
||||||
|
'Accept': 'application/json'
|
||||||
|
}
|
||||||
|
data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
|
||||||
|
title = data['stream']['title']
|
||||||
|
file = data['playlist'][0]['playlist'][0]['file']
|
||||||
|
url = update_url(file, query=None, fragment=None)
|
||||||
|
formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title
|
||||||
|
}
|
87
youtube_dl/extractor/myvideoge.py
Normal file
87
youtube_dl/extractor/myvideoge.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
get_element_by_id,
|
||||||
|
get_element_by_class,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
MONTH_NAMES,
|
||||||
|
qualities,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MyVideoGeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.myvideo.ge/v/3941048',
|
||||||
|
'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3941048',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The best prikol',
|
||||||
|
'upload_date': '20200611',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'chixa33',
|
||||||
|
'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
|
||||||
|
},
|
||||||
|
# working from local dev system
|
||||||
|
'skip': 'site blocks CI servers',
|
||||||
|
}
|
||||||
|
_MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
|
||||||
|
|
||||||
|
_quality = staticmethod(qualities(('SD', 'HD')))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = (
|
||||||
|
self._og_search_title(webpage, default=None)
|
||||||
|
or clean_html(get_element_by_class('my_video_title', webpage))
|
||||||
|
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
|
||||||
|
|
||||||
|
jwplayer_sources = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
|
||||||
|
or '',
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
|
||||||
|
for f in formats or []:
|
||||||
|
f['preference'] = self._quality(f['format_id'])
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = (
|
||||||
|
self._og_search_description(webpage)
|
||||||
|
or get_element_by_id('long_desc_holder', webpage)
|
||||||
|
or self._html_search_meta('description', webpage))
|
||||||
|
|
||||||
|
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
upload_date = get_element_by_class('mv_vid_upl_date', webpage)
|
||||||
|
# as ka locale may not be present roll a local date conversion
|
||||||
|
upload_date = (unified_strdate(
|
||||||
|
# translate any ka month to an en one
|
||||||
|
re.sub('|'.join(self._MONTH_NAMES_KA),
|
||||||
|
lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
|
||||||
|
upload_date, re.I))
|
||||||
|
if upload_date else None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
|
||||||
|
'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
|
||||||
|
'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
|
||||||
|
}
|
|
@ -7,6 +7,7 @@ import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_open as open,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
)
|
)
|
||||||
|
|
105
youtube_dl/extractor/pr0gramm.py
Normal file
105
youtube_dl/extractor/pr0gramm.py
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
import re
|
||||||
|
from ..utils import (
|
||||||
|
merge_dicts,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Pr0grammStaticIE(InfoExtractor):
|
||||||
|
# Possible urls:
|
||||||
|
# https://pr0gramm.com/static/5466437
|
||||||
|
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://pr0gramm.com/static/5466437',
|
||||||
|
'md5': '52fa540d70d3edc286846f8ca85938aa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5466437',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pr0gramm-5466437 by g11st',
|
||||||
|
'uploader': 'g11st',
|
||||||
|
'upload_date': '20221221',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Fetch media sources
|
||||||
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
|
media_info = entries[0]
|
||||||
|
|
||||||
|
# this raises if there are no formats
|
||||||
|
self._sort_formats(media_info.get('formats') or [])
|
||||||
|
|
||||||
|
# Fetch author
|
||||||
|
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
|
||||||
|
|
||||||
|
# Fetch approx upload timestamp from filename
|
||||||
|
# Have None-defaults in case the extraction fails
|
||||||
|
uploadDay = None
|
||||||
|
uploadMon = None
|
||||||
|
uploadYear = None
|
||||||
|
uploadTimestr = None
|
||||||
|
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
|
||||||
|
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
|
||||||
|
|
||||||
|
if (m):
|
||||||
|
# Up to a day of accuracy should suffice...
|
||||||
|
uploadDay = m.groupdict().get('day')
|
||||||
|
uploadMon = m.groupdict().get('mon')
|
||||||
|
uploadYear = m.groupdict().get('year')
|
||||||
|
uploadTimestr = uploadYear + uploadMon + uploadDay
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': uploadTimestr
|
||||||
|
}, media_info)
|
||||||
|
|
||||||
|
|
||||||
|
# This extractor is for the primary url (used for sharing, and appears in the
|
||||||
|
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
|
||||||
|
# video information here. So let's redirect to a compatibility version of
|
||||||
|
# the site, which does contain the <video>-element by itself, without requiring
|
||||||
|
# js to be ran.
|
||||||
|
class Pr0grammIE(InfoExtractor):
|
||||||
|
# Possible urls:
|
||||||
|
# https://pr0gramm.com/new/546637
|
||||||
|
# https://pr0gramm.com/new/video/546637
|
||||||
|
# https://pr0gramm.com/top/546637
|
||||||
|
# https://pr0gramm.com/top/video/546637
|
||||||
|
# https://pr0gramm.com/user/g11st/uploads/5466437
|
||||||
|
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
|
||||||
|
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
|
||||||
|
# https://pr0gramm.com/user/froschler/1elf/5232030
|
||||||
|
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
|
||||||
|
# https://pr0gramm.com/top/fruher war alles damals/5498175
|
||||||
|
|
||||||
|
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5466437',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'pr0gramm-5466437 by g11st',
|
||||||
|
'uploader': 'g11st',
|
||||||
|
'upload_date': '20221221',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _generic_title():
|
||||||
|
return "oof"
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
'https://pr0gramm.com/static/' + video_id,
|
||||||
|
video_id=video_id,
|
||||||
|
ie=Pr0grammStaticIE.ie_key())
|
97
youtube_dl/extractor/rbgtum.py
Normal file
97
youtube_dl/extractor/rbgtum.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class RbgTumIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Combined view
|
||||||
|
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||||
|
'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cpp/22128',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture: October 18. 2022',
|
||||||
|
'series': 'Concepts of C++ programming (IN2377)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Presentation only
|
||||||
|
'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
|
||||||
|
'md5': '36c584272179f3e56b0db5d880639cba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'I2DL/12349/PRES',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture 3: Introduction to Neural Networks',
|
||||||
|
'series': 'Introduction to Deep Learning (IN2346)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Camera only
|
||||||
|
'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
|
||||||
|
'md5': 'e04189d92ff2f56aedf5cede65d37aad',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fvv-info/16130/CAM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fachschaftsvollversammlung',
|
||||||
|
'series': 'Fachschaftsvollversammlung Informatik',
|
||||||
|
}
|
||||||
|
}, ]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
||||||
|
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||||
|
lecture_series_title = self._html_search_regex(
|
||||||
|
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': lecture_title,
|
||||||
|
'series': lecture_series_title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RbgTumCourseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
|
'id': '2022/S/fpv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'SET FSMPIC',
|
||||||
|
'id': '2022/W/set',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, ]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
|
||||||
|
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
lecture_urls = []
|
||||||
|
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
||||||
|
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
||||||
|
|
||||||
|
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
124
youtube_dl/extractor/s4c.py
Normal file
124
youtube_dl/extractor/s4c.py
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from functools import partial as partial_f
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class S4CIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.s4c.cymru/clic/programme/861362209',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '861362209',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Y Swn',
|
||||||
|
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
|
||||||
|
'duration': 5340,
|
||||||
|
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '856636948',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Am Dro',
|
||||||
|
'duration': 2880,
|
||||||
|
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||||
|
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
details = self._download_json(
|
||||||
|
'https://www.s4c.cymru/df/full_prog_details',
|
||||||
|
video_id, query={
|
||||||
|
'lang': 'e',
|
||||||
|
'programme_id': video_id,
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
|
||||||
|
'programme_id': video_id,
|
||||||
|
'signed': '0',
|
||||||
|
'lang': 'en',
|
||||||
|
'mode': 'od',
|
||||||
|
'appId': 'clic',
|
||||||
|
'streamName': '',
|
||||||
|
}, note='Downloading player config JSON')
|
||||||
|
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||||
|
'mode': 'od',
|
||||||
|
'application': 'clic',
|
||||||
|
'region': 'WW',
|
||||||
|
'extra': 'false',
|
||||||
|
'thirdParty': 'false',
|
||||||
|
'filename': player_config['filename'],
|
||||||
|
}, note='Downloading streaming urls JSON')['hls']
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
|
||||||
|
subtitles.setdefault(sub.get('3', 'en'), []).append({
|
||||||
|
'url': sub['0'],
|
||||||
|
'name': sub.get('1'),
|
||||||
|
})
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': url_or_none(player_config.get('poster')),
|
||||||
|
}, traverse_obj(details, ('full_prog_details', 0, {
|
||||||
|
'title': (('programme_title', 'series_title'), T(txt_or_none)),
|
||||||
|
'description': ('full_billing', T(txt_or_none)),
|
||||||
|
'duration': ('duration', T(partial_f(float_or_none, invscale=60))),
|
||||||
|
}), get_all=False),
|
||||||
|
rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class S4CSeriesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.s4c.cymru/clic/series/864982911',
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '864982911',
|
||||||
|
'title': 'Iaith ar Daith',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.s4c.cymru/clic/series/866852587',
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '866852587',
|
||||||
|
'title': 'FFIT Cymru',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
series_details = self._download_json(
|
||||||
|
'https://www.s4c.cymru/df/series_details', series_id, query={
|
||||||
|
'lang': 'e',
|
||||||
|
'series_id': series_id,
|
||||||
|
'show_prog_in_series': 'Y'
|
||||||
|
}, note='Downloading series details JSON')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id)
|
||||||
|
for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))),
|
||||||
|
playlist_id=series_id, playlist_title=traverse_obj(
|
||||||
|
series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))
|
61
youtube_dl/extractor/streamsb.py
Normal file
61
youtube_dl/extractor/streamsb.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import binascii
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import urljoin, url_basename
|
||||||
|
|
||||||
|
|
||||||
|
def to_ascii_hex(str1):
|
||||||
|
return binascii.hexlify(str1.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_string(length):
|
||||||
|
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
class StreamsbIE(InfoExtractor):
|
||||||
|
_DOMAINS = ('viewsb.com', )
|
||||||
|
_VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS)
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://viewsb.com/dxfvlu4qanjx',
|
||||||
|
'md5': '488d111a63415369bf90ea83adc8a325',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dxfvlu4qanjx',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sintel'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path')
|
||||||
|
iframe_url = urljoin('https://' + domain, iframe_rel_url)
|
||||||
|
|
||||||
|
iframe_data = self._download_webpage(iframe_url, video_id)
|
||||||
|
app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50'
|
||||||
|
|
||||||
|
video_code = url_basename(iframe_url).rsplit('.')[0]
|
||||||
|
|
||||||
|
length = 12
|
||||||
|
req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb'))
|
||||||
|
ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req))
|
||||||
|
|
||||||
|
video_data = self._download_webpage(ereq, video_id, headers={
|
||||||
|
'Referer': iframe_url,
|
||||||
|
'watchsb': 'sbstream',
|
||||||
|
})
|
||||||
|
player_data = self._parse_json(video_data, video_id)
|
||||||
|
title = player_data['stream_data']['title']
|
||||||
|
formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
}
|
|
@ -261,27 +261,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
|
|
||||||
# _VALID_URL matches Vimeo URLs
|
# _VALID_URL matches Vimeo URLs
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
www|
|
www|
|
||||||
player
|
player
|
||||||
)
|
)
|
||||||
\.
|
\.
|
||||||
)?
|
)?
|
||||||
vimeo(?:pro)?\.com/
|
vimeo(?:pro)?\.com/
|
||||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
(?:
|
||||||
(?:.*?/)??
|
(?P<u>user)|
|
||||||
(?:
|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
(?:
|
(?:.*?/)??
|
||||||
play_redirect_hls|
|
(?P<q>
|
||||||
moogaloop\.swf)\?clip_id=
|
(?:
|
||||||
)?
|
play_redirect_hls|
|
||||||
(?:videos?/)?
|
moogaloop\.swf)\?clip_id=
|
||||||
(?P<id>[0-9]+)
|
)?
|
||||||
(?:/(?P<unlisted_hash>[\da-f]{10}))?
|
(?:videos?/)?
|
||||||
/?(?:[?&].*)?(?:[#].*)?$
|
)
|
||||||
'''
|
(?P<id>[0-9]+)
|
||||||
|
(?(u)
|
||||||
|
/(?!videos|likes)[^/?#]+/?|
|
||||||
|
(?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
|
||||||
|
)
|
||||||
|
(?:(?(q)[&]|(?(u)|/?)[?]).+?)?(?:[#].*)?$
|
||||||
|
'''
|
||||||
IE_NAME = 'vimeo'
|
IE_NAME = 'vimeo'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
@ -539,7 +545,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
# user playlist alias -> https://vimeo.com/258705797
|
||||||
|
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
]
|
]
|
||||||
|
@ -663,7 +674,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
|
|
||||||
if '//player.vimeo.com/video/' in url:
|
if '//player.vimeo.com/video/' in url:
|
||||||
config = self._parse_json(self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
|
||||||
if config.get('view') == 4:
|
if config.get('view') == 4:
|
||||||
config = self._verify_player_video_password(
|
config = self._verify_player_video_password(
|
||||||
redirect_url, video_id, headers)
|
redirect_url, video_id, headers)
|
||||||
|
|
55
youtube_dl/extractor/whyp.py
Normal file
55
youtube_dl/extractor/whyp.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
str_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WhypIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
|
||||||
|
'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
|
||||||
|
'info_dict': {
|
||||||
|
'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
|
||||||
|
'id': '18337',
|
||||||
|
'title': 'Home Page Example Track',
|
||||||
|
'description': r're:(?s).+\bexample track\b',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 52.82,
|
||||||
|
'uploader': 'Brad',
|
||||||
|
'uploader_id': '1',
|
||||||
|
'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.whyp.it/tracks/18337',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
unique_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, unique_id)
|
||||||
|
data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'url': data['audio_url'],
|
||||||
|
'id': unique_id,
|
||||||
|
}, traverse_obj(data, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('duration', T(float_or_none)),
|
||||||
|
'uploader': ('user', 'username'),
|
||||||
|
'uploader_id': ('user', 'id', T(str_or_none)),
|
||||||
|
'thumbnail': ('artwork_url', T(url_or_none)),
|
||||||
|
}), {
|
||||||
|
'ext': 'mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'http_headers': {'Referer': 'https://whyp.it/'},
|
||||||
|
}, rev=True)
|
|
@ -24,7 +24,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class XHamsterIE(InfoExtractor):
|
class XHamsterIE(InfoExtractor):
|
||||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
|
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:.+?\.)?%s/
|
(?:.+?\.)?%s/
|
||||||
|
@ -123,6 +123,9 @@ class XHamsterIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
|
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -433,6 +436,9 @@ class XHamsterUserIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://xhday.com/users/mobhunter',
|
'url': 'https://xhday.com/users/mobhunter',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://xhvid.com/users/pelushe21',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, user_id):
|
def _entries(self, user_id):
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,30 +2,68 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import math
|
|
||||||
import operator
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from functools import update_wrapper
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
variadic,
|
||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
|
compat_chr,
|
||||||
compat_collections_chain_map as ChainMap,
|
compat_collections_chain_map as ChainMap,
|
||||||
compat_itertools_zip_longest as zip_longest,
|
compat_itertools_zip_longest as zip_longest,
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# name JS functions
|
||||||
|
class function_with_repr(object):
|
||||||
|
# from yt_dlp/utils.py, but in this module
|
||||||
|
# repr_ is always set
|
||||||
|
def __init__(self, func, repr_):
|
||||||
|
update_wrapper(self, func)
|
||||||
|
self.func, self.__repr = func, repr_
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
return self.func(*args, **kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.__repr
|
||||||
|
|
||||||
|
|
||||||
|
# name JS operators
|
||||||
|
def wraps_op(op):
|
||||||
|
|
||||||
|
def update_and_rename_wrapper(w):
|
||||||
|
f = update_wrapper(w, op)
|
||||||
|
# fn names are str in both Py 2/3
|
||||||
|
f.__name__ = str('JS_') + f.__name__
|
||||||
|
return f
|
||||||
|
|
||||||
|
return update_and_rename_wrapper
|
||||||
|
|
||||||
|
|
||||||
|
# NB In principle NaN cannot be checked by membership.
|
||||||
|
# Here all NaN values are actually this one, so _NaN is _NaN,
|
||||||
|
# although _NaN != _NaN.
|
||||||
|
|
||||||
|
_NaN = float('nan')
|
||||||
|
|
||||||
|
|
||||||
def _js_bit_op(op):
|
def _js_bit_op(op):
|
||||||
|
|
||||||
def zeroise(x):
|
def zeroise(x):
|
||||||
return 0 if x in (None, JS_Undefined) else x
|
return 0 if x in (None, JS_Undefined, _NaN) else x
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
||||||
|
|
||||||
|
@ -34,23 +72,24 @@ def _js_bit_op(op):
|
||||||
|
|
||||||
def _js_arith_op(op):
|
def _js_arith_op(op):
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
if JS_Undefined in (a, b):
|
if JS_Undefined in (a, b):
|
||||||
return float('nan')
|
return _NaN
|
||||||
return op(a or 0, b or 0)
|
return op(a or 0, b or 0)
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
def _js_div(a, b):
|
def _js_div(a, b):
|
||||||
if JS_Undefined in (a, b) or not (a and b):
|
if JS_Undefined in (a, b) or not (a or b):
|
||||||
return float('nan')
|
return _NaN
|
||||||
return operator.truediv(a or 0, b) if b else float('inf')
|
return operator.truediv(a or 0, b) if b else float('inf')
|
||||||
|
|
||||||
|
|
||||||
def _js_mod(a, b):
|
def _js_mod(a, b):
|
||||||
if JS_Undefined in (a, b) or not b:
|
if JS_Undefined in (a, b) or not b:
|
||||||
return float('nan')
|
return _NaN
|
||||||
return (a or 0) % b
|
return (a or 0) % b
|
||||||
|
|
||||||
|
|
||||||
|
@ -58,12 +97,13 @@ def _js_exp(a, b):
|
||||||
if not b:
|
if not b:
|
||||||
return 1 # even 0 ** 0 !!
|
return 1 # even 0 ** 0 !!
|
||||||
elif JS_Undefined in (a, b):
|
elif JS_Undefined in (a, b):
|
||||||
return float('nan')
|
return _NaN
|
||||||
return (a or 0) ** b
|
return (a or 0) ** b
|
||||||
|
|
||||||
|
|
||||||
def _js_eq_op(op):
|
def _js_eq_op(op):
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
if set((a, b)) <= set((None, JS_Undefined)):
|
if set((a, b)) <= set((None, JS_Undefined)):
|
||||||
return op(a, a)
|
return op(a, a)
|
||||||
|
@ -74,6 +114,7 @@ def _js_eq_op(op):
|
||||||
|
|
||||||
def _js_comp_op(op):
|
def _js_comp_op(op):
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
if JS_Undefined in (a, b):
|
if JS_Undefined in (a, b):
|
||||||
return False
|
return False
|
||||||
|
@ -88,13 +129,8 @@ def _js_comp_op(op):
|
||||||
|
|
||||||
def _js_ternary(cndn, if_true=True, if_false=False):
|
def _js_ternary(cndn, if_true=True, if_false=False):
|
||||||
"""Simulate JS's ternary operator (cndn?if_true:if_false)"""
|
"""Simulate JS's ternary operator (cndn?if_true:if_false)"""
|
||||||
if cndn in (False, None, 0, '', JS_Undefined):
|
if cndn in (False, None, 0, '', JS_Undefined, _NaN):
|
||||||
return if_false
|
return if_false
|
||||||
try:
|
|
||||||
if math.isnan(cndn): # NB: NaN cannot be checked by membership
|
|
||||||
return if_false
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
return if_true
|
return if_true
|
||||||
|
|
||||||
|
|
||||||
|
@ -187,19 +223,6 @@ class LocalNameSpace(ChainMap):
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
__named_object_counter = 0
|
__named_object_counter = 0
|
||||||
|
|
||||||
_RE_FLAGS = {
|
|
||||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
|
||||||
# invent new bitmask values well above that for literal parsing
|
|
||||||
# TODO: new pattern class to execute matches with these flags
|
|
||||||
'd': 1024, # Generate indices for substring matches
|
|
||||||
'g': 2048, # Global search
|
|
||||||
'i': re.I, # Case-insensitive search
|
|
||||||
'm': re.M, # Multi-line search
|
|
||||||
's': re.S, # Allows . to match newline characters
|
|
||||||
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
|
||||||
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
|
||||||
}
|
|
||||||
|
|
||||||
_OBJ_NAME = '__youtube_dl_jsinterp_obj'
|
_OBJ_NAME = '__youtube_dl_jsinterp_obj'
|
||||||
|
|
||||||
OP_CHARS = None
|
OP_CHARS = None
|
||||||
|
@ -217,9 +240,72 @@ class JSInterpreter(object):
|
||||||
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
||||||
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
||||||
|
|
||||||
|
class JS_RegExp(object):
|
||||||
|
RE_FLAGS = {
|
||||||
|
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||||
|
# invent new bitmask values well above that for literal parsing
|
||||||
|
# TODO: execute matches with these flags (remaining: d, y)
|
||||||
|
'd': 1024, # Generate indices for substring matches
|
||||||
|
'g': 2048, # Global search
|
||||||
|
'i': re.I, # Case-insensitive search
|
||||||
|
'm': re.M, # Multi-line search
|
||||||
|
's': re.S, # Allows . to match newline characters
|
||||||
|
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
||||||
|
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, pattern_txt, flags=0):
|
||||||
|
if isinstance(flags, compat_str):
|
||||||
|
flags, _ = self.regex_flags(flags)
|
||||||
|
# First, avoid https://github.com/python/cpython/issues/74534
|
||||||
|
self.__self = None
|
||||||
|
self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
|
||||||
|
self.__flags = flags
|
||||||
|
|
||||||
|
def __instantiate(self):
|
||||||
|
if self.__self:
|
||||||
|
return
|
||||||
|
self.__self = re.compile(self.__pattern_txt, self.__flags)
|
||||||
|
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
|
||||||
|
for name in dir(self.__self):
|
||||||
|
# Only these? Obviously __class__, __init__.
|
||||||
|
# PyPy creates a __weakref__ attribute with value None
|
||||||
|
# that can't be setattr'd but also can't need to be copied.
|
||||||
|
if name in ('__class__', '__init__', '__weakref__'):
|
||||||
|
continue
|
||||||
|
setattr(self, name, getattr(self.__self, name))
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
self.__instantiate()
|
||||||
|
# make Py 2.6 conform to its lying documentation
|
||||||
|
if name == 'flags':
|
||||||
|
self.flags = self.__flags
|
||||||
|
return self.flags
|
||||||
|
elif name == 'pattern':
|
||||||
|
self.pattern = self.__pattern_txt
|
||||||
|
return self.pattern
|
||||||
|
elif hasattr(self.__self, name):
|
||||||
|
v = getattr(self.__self, name)
|
||||||
|
setattr(self, name, v)
|
||||||
|
return v
|
||||||
|
elif name in ('groupindex', 'groups'):
|
||||||
|
return 0 if name == 'groupindex' else {}
|
||||||
|
raise AttributeError('{0} has no attribute named {1}'.format(self, name))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def regex_flags(cls, expr):
|
||||||
|
flags = 0
|
||||||
|
if not expr:
|
||||||
|
return flags, expr
|
||||||
|
for idx, ch in enumerate(expr):
|
||||||
|
if ch not in cls.RE_FLAGS:
|
||||||
|
break
|
||||||
|
flags |= cls.RE_FLAGS[ch]
|
||||||
|
return flags, expr[idx + 1:]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __op_chars(cls):
|
def __op_chars(cls):
|
||||||
op_chars = set(';,')
|
op_chars = set(';,[')
|
||||||
for op in cls._all_operators():
|
for op in cls._all_operators():
|
||||||
for c in op[0]:
|
for c in op[0]:
|
||||||
op_chars.add(c)
|
op_chars.add(c)
|
||||||
|
@ -228,39 +314,30 @@ class JSInterpreter(object):
|
||||||
def _named_object(self, namespace, obj):
|
def _named_object(self, namespace, obj):
|
||||||
self.__named_object_counter += 1
|
self.__named_object_counter += 1
|
||||||
name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
|
name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
|
||||||
|
if callable(obj) and not isinstance(obj, function_with_repr):
|
||||||
|
obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, ))
|
||||||
namespace[name] = obj
|
namespace[name] = obj
|
||||||
return name
|
return name
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _regex_flags(cls, expr):
|
|
||||||
flags = 0
|
|
||||||
if not expr:
|
|
||||||
return flags, expr
|
|
||||||
for idx, ch in enumerate(expr):
|
|
||||||
if ch not in cls._RE_FLAGS:
|
|
||||||
break
|
|
||||||
flags |= cls._RE_FLAGS[ch]
|
|
||||||
return flags, expr[idx + 1:]
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
|
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
|
||||||
if not expr:
|
if not expr:
|
||||||
return
|
return
|
||||||
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
||||||
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
||||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
in_quote, escaping, skipping = None, False, 0
|
in_quote, escaping, skipping = None, False, 0
|
||||||
after_op, in_regex_char_group, skip_re = True, False, 0
|
after_op, in_regex_char_group = True, False
|
||||||
|
|
||||||
for idx, char in enumerate(expr):
|
for idx, char in enumerate(expr):
|
||||||
if skip_re > 0:
|
paren_delta = 0
|
||||||
skip_re -= 1
|
|
||||||
continue
|
|
||||||
if not in_quote:
|
if not in_quote:
|
||||||
if char in _MATCHING_PARENS:
|
if char in _MATCHING_PARENS:
|
||||||
counters[_MATCHING_PARENS[char]] += 1
|
counters[_MATCHING_PARENS[char]] += 1
|
||||||
|
paren_delta = 1
|
||||||
elif char in counters:
|
elif char in counters:
|
||||||
counters[char] -= 1
|
counters[char] -= 1
|
||||||
|
paren_delta = -1
|
||||||
if not escaping:
|
if not escaping:
|
||||||
if char in _QUOTES and in_quote in (char, None):
|
if char in _QUOTES and in_quote in (char, None):
|
||||||
if in_quote or after_op or char != '/':
|
if in_quote or after_op or char != '/':
|
||||||
|
@ -268,7 +345,7 @@ class JSInterpreter(object):
|
||||||
elif in_quote == '/' and char in '[]':
|
elif in_quote == '/' and char in '[]':
|
||||||
in_regex_char_group = char == '['
|
in_regex_char_group = char == '['
|
||||||
escaping = not escaping and in_quote and char == '\\'
|
escaping = not escaping and in_quote and char == '\\'
|
||||||
after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op))
|
after_op = not in_quote and (char in cls.OP_CHARS or paren_delta > 0 or (after_op and char.isspace()))
|
||||||
|
|
||||||
if char != delim[pos] or any(counters.values()) or in_quote:
|
if char != delim[pos] or any(counters.values()) or in_quote:
|
||||||
pos = skipping = 0
|
pos = skipping = 0
|
||||||
|
@ -278,7 +355,7 @@ class JSInterpreter(object):
|
||||||
continue
|
continue
|
||||||
elif pos == 0 and skip_delims:
|
elif pos == 0 and skip_delims:
|
||||||
here = expr[idx:]
|
here = expr[idx:]
|
||||||
for s in skip_delims if isinstance(skip_delims, (list, tuple)) else [skip_delims]:
|
for s in variadic(skip_delims):
|
||||||
if here.startswith(s) and s:
|
if here.startswith(s) and s:
|
||||||
skipping = len(s) - 1
|
skipping = len(s) - 1
|
||||||
break
|
break
|
||||||
|
@ -301,7 +378,7 @@ class JSInterpreter(object):
|
||||||
separated = list(cls._separate(expr, delim, 1))
|
separated = list(cls._separate(expr, delim, 1))
|
||||||
|
|
||||||
if len(separated) < 2:
|
if len(separated) < 2:
|
||||||
raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals()))
|
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
|
||||||
return separated[0][1:].strip(), separated[1].strip()
|
return separated[0][1:].strip(), separated[1].strip()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -326,9 +403,10 @@ class JSInterpreter(object):
|
||||||
return right_val
|
return right_val
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# print('Eval:', opfunc.__name__, left_val, right_val)
|
||||||
return opfunc(left_val, right_val)
|
return opfunc(left_val, right_val)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise self.Exception('Failed to evaluate {left_val!r} {op} {right_val!r}'.format(**locals()), expr, cause=e)
|
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
||||||
|
|
||||||
def _index(self, obj, idx, allow_undefined=False):
|
def _index(self, obj, idx, allow_undefined=False):
|
||||||
if idx == 'length':
|
if idx == 'length':
|
||||||
|
@ -338,7 +416,7 @@ class JSInterpreter(object):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if allow_undefined:
|
if allow_undefined:
|
||||||
return JS_Undefined
|
return JS_Undefined
|
||||||
raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e)
|
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||||
|
|
||||||
def _dump(self, obj, namespace):
|
def _dump(self, obj, namespace):
|
||||||
try:
|
try:
|
||||||
|
@ -346,12 +424,28 @@ class JSInterpreter(object):
|
||||||
except TypeError:
|
except TypeError:
|
||||||
return self._named_object(namespace, obj)
|
return self._named_object(namespace, obj)
|
||||||
|
|
||||||
|
# used below
|
||||||
|
_VAR_RET_THROW_RE = re.compile(r'''(?x)
|
||||||
|
(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["'])|$)|(?P<throw>throw\s+)
|
||||||
|
''')
|
||||||
|
_COMPOUND_RE = re.compile(r'''(?x)
|
||||||
|
(?P<try>try)\s*\{|
|
||||||
|
(?P<if>if)\s*\(|
|
||||||
|
(?P<switch>switch)\s*\(|
|
||||||
|
(?P<for>for)\s*\(|
|
||||||
|
(?P<while>while)\s*\(
|
||||||
|
''')
|
||||||
|
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||||
|
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||||
|
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise self.Exception('Recursion limit reached')
|
raise self.Exception('Recursion limit reached')
|
||||||
allow_recursion -= 1
|
allow_recursion -= 1
|
||||||
|
|
||||||
|
# print('At: ' + stmt[:60])
|
||||||
should_return = False
|
should_return = False
|
||||||
|
# fails on (eg) if (...) stmt1; else stmt2;
|
||||||
sub_statements = list(self._separate(stmt, ';')) or ['']
|
sub_statements = list(self._separate(stmt, ';')) or ['']
|
||||||
expr = stmt = sub_statements.pop().strip()
|
expr = stmt = sub_statements.pop().strip()
|
||||||
for sub_stmt in sub_statements:
|
for sub_stmt in sub_statements:
|
||||||
|
@ -359,7 +453,7 @@ class JSInterpreter(object):
|
||||||
if should_return:
|
if should_return:
|
||||||
return ret, should_return
|
return ret, should_return
|
||||||
|
|
||||||
m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
|
m = self._VAR_RET_THROW_RE.match(stmt)
|
||||||
if m:
|
if m:
|
||||||
expr = stmt[len(m.group(0)):].strip()
|
expr = stmt[len(m.group(0)):].strip()
|
||||||
if m.group('throw'):
|
if m.group('throw'):
|
||||||
|
@ -371,25 +465,30 @@ class JSInterpreter(object):
|
||||||
if expr[0] in _QUOTES:
|
if expr[0] in _QUOTES:
|
||||||
inner, outer = self._separate(expr, expr[0], 1)
|
inner, outer = self._separate(expr, expr[0], 1)
|
||||||
if expr[0] == '/':
|
if expr[0] == '/':
|
||||||
flags, outer = self._regex_flags(outer)
|
flags, outer = self.JS_RegExp.regex_flags(outer)
|
||||||
inner = re.compile(inner[1:], flags=flags) # , strict=True))
|
inner = self.JS_RegExp(inner[1:], flags=flags)
|
||||||
else:
|
else:
|
||||||
inner = json.loads(js_to_json(inner + expr[0])) # , strict=True))
|
inner = json.loads(js_to_json(inner + expr[0])) # , strict=True))
|
||||||
if not outer:
|
if not outer:
|
||||||
return inner, should_return
|
return inner, should_return
|
||||||
expr = self._named_object(local_vars, inner) + outer
|
expr = self._named_object(local_vars, inner) + outer
|
||||||
|
|
||||||
if expr.startswith('new '):
|
new_kw, _, obj = expr.partition('new ')
|
||||||
obj = expr[4:]
|
if not new_kw:
|
||||||
if obj.startswith('Date('):
|
for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
|
||||||
left, right = self._separate_at_paren(obj[4:])
|
('RegExp', self.JS_RegExp),
|
||||||
expr = unified_timestamp(
|
('Error', self.Exception)):
|
||||||
self.interpret_expression(left, local_vars, allow_recursion), False)
|
if not obj.startswith(klass + '('):
|
||||||
if not expr:
|
continue
|
||||||
raise self.Exception('Failed to parse date {left!r}'.format(**locals()), expr=expr)
|
left, right = self._separate_at_paren(obj[len(klass):])
|
||||||
expr = self._dump(int(expr * 1000), local_vars) + right
|
argvals = self.interpret_iter(left, local_vars, allow_recursion)
|
||||||
|
expr = konstr(*argvals)
|
||||||
|
if expr is None:
|
||||||
|
raise self.Exception('Failed to parse {klass} {left!r:.100}'.format(**locals()), expr=expr)
|
||||||
|
expr = self._dump(expr, local_vars) + right
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
raise self.Exception('Unsupported object {obj}'.format(**locals()), expr=expr)
|
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
if expr.startswith('void '):
|
if expr.startswith('void '):
|
||||||
left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
|
left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
|
||||||
|
@ -412,8 +511,15 @@ class JSInterpreter(object):
|
||||||
expr = self._dump(inner, local_vars) + outer
|
expr = self._dump(inner, local_vars) + outer
|
||||||
|
|
||||||
if expr.startswith('('):
|
if expr.startswith('('):
|
||||||
inner, outer = self._separate_at_paren(expr)
|
|
||||||
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
|
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
|
||||||
|
if m:
|
||||||
|
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
|
||||||
|
outer = None
|
||||||
|
inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars)
|
||||||
|
else:
|
||||||
|
inner, outer = self._separate_at_paren(expr)
|
||||||
|
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
|
||||||
if not outer or should_abort:
|
if not outer or should_abort:
|
||||||
return inner, should_abort or should_return
|
return inner, should_abort or should_return
|
||||||
else:
|
else:
|
||||||
|
@ -426,13 +532,43 @@ class JSInterpreter(object):
|
||||||
for item in self._separate(inner)])
|
for item in self._separate(inner)])
|
||||||
expr = name + outer
|
expr = name + outer
|
||||||
|
|
||||||
m = re.match(r'''(?x)
|
m = self._COMPOUND_RE.match(expr)
|
||||||
(?P<try>try)\s*\{|
|
|
||||||
(?P<switch>switch)\s*\(|
|
|
||||||
(?P<for>for)\s*\(
|
|
||||||
''', expr)
|
|
||||||
md = m.groupdict() if m else {}
|
md = m.groupdict() if m else {}
|
||||||
if md.get('try'):
|
if md.get('if'):
|
||||||
|
cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
|
if expr.startswith('{'):
|
||||||
|
if_expr, expr = self._separate_at_paren(expr)
|
||||||
|
else:
|
||||||
|
# may lose ... else ... because of ll.368-374
|
||||||
|
if_expr, expr = self._separate_at_paren(expr, delim=';')
|
||||||
|
else_expr = None
|
||||||
|
m = re.match(r'else\s*(?P<block>\{)?', expr)
|
||||||
|
if m:
|
||||||
|
if m.group('block'):
|
||||||
|
else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
|
else:
|
||||||
|
# handle subset ... else if (...) {...} else ...
|
||||||
|
# TODO: make interpret_statement do this properly, if possible
|
||||||
|
exprs = list(self._separate(expr[m.end():], delim='}', max_split=2))
|
||||||
|
if len(exprs) > 1:
|
||||||
|
if re.match(r'\s*if\s*\(', exprs[0]) and re.match(r'\s*else\b', exprs[1]):
|
||||||
|
else_expr = exprs[0] + '}' + exprs[1]
|
||||||
|
expr = (exprs[2] + '}') if len(exprs) == 3 else None
|
||||||
|
else:
|
||||||
|
else_expr = exprs[0]
|
||||||
|
exprs.append('')
|
||||||
|
expr = '}'.join(exprs[1:])
|
||||||
|
else:
|
||||||
|
else_expr = exprs[0]
|
||||||
|
expr = None
|
||||||
|
else_expr = else_expr.lstrip() + '}'
|
||||||
|
cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
|
||||||
|
ret, should_abort = self.interpret_statement(
|
||||||
|
if_expr if cndn else else_expr, local_vars, allow_recursion)
|
||||||
|
if should_abort:
|
||||||
|
return ret, True
|
||||||
|
|
||||||
|
elif md.get('try'):
|
||||||
try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
err = None
|
err = None
|
||||||
try:
|
try:
|
||||||
|
@ -455,7 +591,7 @@ class JSInterpreter(object):
|
||||||
err = None
|
err = None
|
||||||
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
||||||
|
|
||||||
m = re.match(r'finally\s*\{', expr)
|
m = self._FINALLY_RE.match(expr)
|
||||||
if m:
|
if m:
|
||||||
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
|
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
|
||||||
|
@ -469,23 +605,24 @@ class JSInterpreter(object):
|
||||||
if err:
|
if err:
|
||||||
raise err
|
raise err
|
||||||
|
|
||||||
elif md.get('for'):
|
elif md.get('for') or md.get('while'):
|
||||||
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
|
init_or_cond, remaining = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
if remaining.startswith('{'):
|
if remaining.startswith('{'):
|
||||||
body, expr = self._separate_at_paren(remaining)
|
body, expr = self._separate_at_paren(remaining)
|
||||||
else:
|
else:
|
||||||
switch_m = re.match(r'switch\s*\(', remaining) # FIXME
|
switch_m = self._SWITCH_RE.match(remaining) # FIXME
|
||||||
if switch_m:
|
if switch_m:
|
||||||
switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
|
switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
|
||||||
body, expr = self._separate_at_paren(remaining, '}')
|
body, expr = self._separate_at_paren(remaining, '}')
|
||||||
body = 'switch(%s){%s}' % (switch_val, body)
|
body = 'switch(%s){%s}' % (switch_val, body)
|
||||||
else:
|
else:
|
||||||
body, expr = remaining, ''
|
body, expr = remaining, ''
|
||||||
start, cndn, increment = self._separate(constructor, ';')
|
if md.get('for'):
|
||||||
self.interpret_expression(start, local_vars, allow_recursion)
|
start, cndn, increment = self._separate(init_or_cond, ';')
|
||||||
while True:
|
self.interpret_expression(start, local_vars, allow_recursion)
|
||||||
if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
|
else:
|
||||||
break
|
cndn, increment = init_or_cond, None
|
||||||
|
while _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
|
||||||
try:
|
try:
|
||||||
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
|
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
|
||||||
if should_abort:
|
if should_abort:
|
||||||
|
@ -494,7 +631,8 @@ class JSInterpreter(object):
|
||||||
break
|
break
|
||||||
except JS_Continue:
|
except JS_Continue:
|
||||||
pass
|
pass
|
||||||
self.interpret_expression(increment, local_vars, allow_recursion)
|
if increment:
|
||||||
|
self.interpret_expression(increment, local_vars, allow_recursion)
|
||||||
|
|
||||||
elif md.get('switch'):
|
elif md.get('switch'):
|
||||||
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
|
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
|
||||||
|
@ -593,7 +731,7 @@ class JSInterpreter(object):
|
||||||
elif expr == 'undefined':
|
elif expr == 'undefined':
|
||||||
return JS_Undefined, should_return
|
return JS_Undefined, should_return
|
||||||
elif expr == 'NaN':
|
elif expr == 'NaN':
|
||||||
return float('NaN'), should_return
|
return _NaN, should_return
|
||||||
|
|
||||||
elif md.get('return'):
|
elif md.get('return'):
|
||||||
return local_vars[m.group('name')], should_return
|
return local_vars[m.group('name')], should_return
|
||||||
|
@ -620,9 +758,24 @@ class JSInterpreter(object):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
right_expr = separated.pop()
|
right_expr = separated.pop()
|
||||||
while op == '-' and len(separated) > 1 and not separated[-1].strip():
|
# handle operators that are both unary and binary, minimal BODMAS
|
||||||
right_expr = '-' + right_expr
|
if op in ('+', '-'):
|
||||||
separated.pop()
|
undone = 0
|
||||||
|
while len(separated) > 1 and not separated[-1].strip():
|
||||||
|
undone += 1
|
||||||
|
separated.pop()
|
||||||
|
if op == '-' and undone % 2 != 0:
|
||||||
|
right_expr = op + right_expr
|
||||||
|
left_val = separated[-1]
|
||||||
|
for dm_op in ('*', '%', '/', '**'):
|
||||||
|
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||||
|
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||||
|
expr = op.join(separated) + op + right_expr
|
||||||
|
right_expr = None
|
||||||
|
break
|
||||||
|
if right_expr is None:
|
||||||
|
continue
|
||||||
|
|
||||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
||||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
|
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
|
||||||
|
|
||||||
|
@ -640,7 +793,7 @@ class JSInterpreter(object):
|
||||||
""" assert, but without risk of getting optimized out """
|
""" assert, but without risk of getting optimized out """
|
||||||
if not cndn:
|
if not cndn:
|
||||||
memb = member
|
memb = member
|
||||||
raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
|
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
def eval_method():
|
def eval_method():
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
|
@ -676,7 +829,7 @@ class JSInterpreter(object):
|
||||||
if obj == compat_str:
|
if obj == compat_str:
|
||||||
if member == 'fromCharCode':
|
if member == 'fromCharCode':
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(argvals, 'takes one or more arguments')
|
||||||
return ''.join(map(chr, argvals))
|
return ''.join(map(compat_chr, argvals))
|
||||||
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
||||||
elif obj == float:
|
elif obj == float:
|
||||||
if member == 'pow':
|
if member == 'pow':
|
||||||
|
@ -749,6 +902,17 @@ class JSInterpreter(object):
|
||||||
if idx >= len(obj):
|
if idx >= len(obj):
|
||||||
return None
|
return None
|
||||||
return ord(obj[idx])
|
return ord(obj[idx])
|
||||||
|
elif member in ('replace', 'replaceAll'):
|
||||||
|
assertion(isinstance(obj, compat_str), 'must be applied on a string')
|
||||||
|
assertion(len(argvals) == 2, 'takes exactly two arguments')
|
||||||
|
# TODO: argvals[1] callable, other Py vs JS edge cases
|
||||||
|
if isinstance(argvals[0], self.JS_RegExp):
|
||||||
|
count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1
|
||||||
|
assertion(member != 'replaceAll' or count == 0,
|
||||||
|
'replaceAll must be called with a global RegExp')
|
||||||
|
return argvals[0].sub(argvals[1], obj, count=count)
|
||||||
|
count = ('replaceAll', 'replace').index(member)
|
||||||
|
return re.sub(re.escape(argvals[0]), argvals[1], obj, count=count)
|
||||||
|
|
||||||
idx = int(member) if isinstance(obj, list) else member
|
idx = int(member) if isinstance(obj, list) else member
|
||||||
return obj[idx](argvals, allow_recursion=allow_recursion)
|
return obj[idx](argvals, allow_recursion=allow_recursion)
|
||||||
|
@ -780,19 +944,26 @@ class JSInterpreter(object):
|
||||||
raise self.Exception('Cannot return from an expression', expr)
|
raise self.Exception('Cannot return from an expression', expr)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def interpret_iter(self, list_txt, local_vars, allow_recursion):
|
||||||
|
for v in self._separate(list_txt):
|
||||||
|
yield self.interpret_expression(v, local_vars, allow_recursion)
|
||||||
|
|
||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||||
obj = {}
|
obj = {}
|
||||||
obj_m = re.search(
|
fields = None
|
||||||
r'''(?x)
|
for obj_m in re.finditer(
|
||||||
(?<!this\.)%s\s*=\s*{\s*
|
r'''(?xs)
|
||||||
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
|
||||||
}\s*;
|
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
|
||||||
''' % (re.escape(objname), _FUNC_NAME_RE),
|
}}\s*;
|
||||||
self.code)
|
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
|
||||||
if not obj_m:
|
self.code):
|
||||||
|
fields = obj_m.group('fields')
|
||||||
|
if fields:
|
||||||
|
break
|
||||||
|
else:
|
||||||
raise self.Exception('Could not find object ' + objname)
|
raise self.Exception('Could not find object ' + objname)
|
||||||
fields = obj_m.group('fields')
|
|
||||||
# Currently, it only supports function definitions
|
# Currently, it only supports function definitions
|
||||||
fields_m = re.finditer(
|
fields_m = re.finditer(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
|
@ -805,6 +976,17 @@ class JSInterpreter(object):
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _offset_e_by_d(d, e, local_vars):
|
||||||
|
""" Short-cut eval: (d%e.length+e.length)%e.length """
|
||||||
|
try:
|
||||||
|
d = local_vars[d]
|
||||||
|
e = local_vars[e]
|
||||||
|
e = len(e)
|
||||||
|
return _js_mod(_js_mod(d, e) + e, e), False
|
||||||
|
except Exception:
|
||||||
|
return None, True
|
||||||
|
|
||||||
def extract_function_code(self, funcname):
|
def extract_function_code(self, funcname):
|
||||||
""" @returns argnames, code """
|
""" @returns argnames, code """
|
||||||
func_m = re.search(
|
func_m = re.search(
|
||||||
|
@ -817,13 +999,15 @@ class JSInterpreter(object):
|
||||||
\((?P<args>[^)]*)\)\s*
|
\((?P<args>[^)]*)\)\s*
|
||||||
(?P<code>{.+})''' % {'name': re.escape(funcname)},
|
(?P<code>{.+})''' % {'name': re.escape(funcname)},
|
||||||
self.code)
|
self.code)
|
||||||
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
|
||||||
if func_m is None:
|
if func_m is None:
|
||||||
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
|
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
|
||||||
|
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
||||||
return self.build_arglist(func_m.group('args')), code
|
return self.build_arglist(func_m.group('args')), code
|
||||||
|
|
||||||
def extract_function(self, funcname):
|
def extract_function(self, funcname):
|
||||||
return self.extract_function_from_code(*self.extract_function_code(funcname))
|
return function_with_repr(
|
||||||
|
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
||||||
|
'F<%s>' % (funcname, ))
|
||||||
|
|
||||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||||
local_vars = {}
|
local_vars = {}
|
||||||
|
|
|
@ -11,6 +11,7 @@ from .compat import (
|
||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_open as open,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
@ -41,14 +42,11 @@ def _hide_login_info(opts):
|
||||||
def parseOpts(overrideArguments=None):
|
def parseOpts(overrideArguments=None):
|
||||||
def _readOptions(filename_bytes, default=[]):
|
def _readOptions(filename_bytes, default=[]):
|
||||||
try:
|
try:
|
||||||
optionf = open(filename_bytes)
|
optionf = open(filename_bytes, encoding=preferredencoding())
|
||||||
except IOError:
|
except IOError:
|
||||||
return default # silently skip if file is not present
|
return default # silently skip if file is not present
|
||||||
try:
|
try:
|
||||||
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
|
|
||||||
contents = optionf.read()
|
contents = optionf.read()
|
||||||
if sys.version_info < (3,):
|
|
||||||
contents = contents.decode(preferredencoding())
|
|
||||||
res = compat_shlex_split(contents, comments=True)
|
res = compat_shlex_split(contents, comments=True)
|
||||||
finally:
|
finally:
|
||||||
optionf.close()
|
optionf.close()
|
||||||
|
@ -546,12 +544,14 @@ def parseOpts(overrideArguments=None):
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--referer',
|
'--referer',
|
||||||
metavar='URL', dest='referer', default=None,
|
metavar='URL', dest='referer', default=None,
|
||||||
help='Specify a custom referer, use if the video access is restricted to one domain',
|
help='Specify a custom Referer: use if the video access is restricted to one domain',
|
||||||
)
|
)
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--add-header',
|
'--add-header',
|
||||||
metavar='FIELD:VALUE', dest='headers', action='append',
|
metavar='FIELD:VALUE', dest='headers', action='append',
|
||||||
help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
|
help=('Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times. '
|
||||||
|
'NB Use --cookies rather than adding a Cookie header if its contents may be sensitive; '
|
||||||
|
'data from a Cookie header will be sent to all domains, not just the one intended')
|
||||||
)
|
)
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--bidi-workaround',
|
'--bidi-workaround',
|
||||||
|
@ -733,9 +733,13 @@ def parseOpts(overrideArguments=None):
|
||||||
'--no-part',
|
'--no-part',
|
||||||
action='store_true', dest='nopart', default=False,
|
action='store_true', dest='nopart', default=False,
|
||||||
help='Do not use .part files - write directly into output file')
|
help='Do not use .part files - write directly into output file')
|
||||||
|
filesystem.add_option(
|
||||||
|
'--mtime',
|
||||||
|
action='store_true', dest='updatetime', default=True,
|
||||||
|
help='Use the Last-modified header to set the file modification time (default)')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--no-mtime',
|
'--no-mtime',
|
||||||
action='store_false', dest='updatetime', default=True,
|
action='store_false', dest='updatetime',
|
||||||
help='Do not use the Last-modified header to set the file modification time')
|
help='Do not use the Last-modified header to set the file modification time')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--write-description',
|
'--write-description',
|
||||||
|
|
|
@ -18,6 +18,8 @@ from ..utils import (
|
||||||
shell_quote,
|
shell_quote,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from ..compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
class EmbedThumbnailPPError(PostProcessingError):
|
class EmbedThumbnailPPError(PostProcessingError):
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
|
@ -9,6 +8,7 @@ import re
|
||||||
|
|
||||||
from .common import AudioConversionError, PostProcessor
|
from .common import AudioConversionError, PostProcessor
|
||||||
|
|
||||||
|
from ..compat import compat_open as open
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
@ -493,7 +493,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||||
chapters = info.get('chapters', [])
|
chapters = info.get('chapters', [])
|
||||||
if chapters:
|
if chapters:
|
||||||
metadata_filename = replace_extension(filename, 'meta')
|
metadata_filename = replace_extension(filename, 'meta')
|
||||||
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
with open(metadata_filename, 'w', encoding='utf-8') as f:
|
||||||
def ffmpeg_escape(text):
|
def ffmpeg_escape(text):
|
||||||
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
||||||
|
|
||||||
|
@ -636,7 +636,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||||
with open(dfxp_file, 'rb') as f:
|
with open(dfxp_file, 'rb') as f:
|
||||||
srt_data = dfxp2srt(f.read())
|
srt_data = dfxp2srt(f.read())
|
||||||
|
|
||||||
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
with open(srt_file, 'w', encoding='utf-8') as f:
|
||||||
f.write(srt_data)
|
f.write(srt_data)
|
||||||
old_file = srt_file
|
old_file = srt_file
|
||||||
|
|
||||||
|
@ -652,7 +652,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||||
|
|
||||||
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
||||||
|
|
||||||
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
with open(new_file, 'r', encoding='utf-8') as f:
|
||||||
subs[lang] = {
|
subs[lang] = {
|
||||||
'ext': new_ext,
|
'ext': new_ext,
|
||||||
'data': f.read(),
|
'data': f.read(),
|
||||||
|
|
|
@ -727,7 +727,7 @@ class SWFInterpreter(object):
|
||||||
stack.append(res)
|
stack.append(res)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
assert isinstance(obj, (dict, _ScopeDict)),\
|
assert isinstance(obj, (dict, _ScopeDict)), \
|
||||||
'Accessing member %r on %r' % (pname, obj)
|
'Accessing member %r on %r' % (pname, obj)
|
||||||
res = obj.get(pname, undefined)
|
res = obj.get(pname, undefined)
|
||||||
stack.append(res)
|
stack.append(res)
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -9,7 +8,10 @@ import subprocess
|
||||||
import sys
|
import sys
|
||||||
from zipimport import zipimporter
|
from zipimport import zipimporter
|
||||||
|
|
||||||
from .compat import compat_realpath
|
from .compat import (
|
||||||
|
compat_open as open,
|
||||||
|
compat_realpath,
|
||||||
|
)
|
||||||
from .utils import encode_compat_str
|
from .utils import encode_compat_str
|
||||||
|
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
@ -127,7 +129,7 @@ def update_self(to_screen, verbose, opener):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
bat = os.path.join(directory, 'youtube-dl-updater.bat')
|
bat = os.path.join(directory, 'youtube-dl-updater.bat')
|
||||||
with io.open(bat, 'w') as batfile:
|
with open(bat, 'w') as batfile:
|
||||||
batfile.write('''
|
batfile.write('''
|
||||||
@echo off
|
@echo off
|
||||||
echo Waiting for file handle to be closed ...
|
echo Waiting for file handle to be closed ...
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue