Merge remote-tracking branch 'origin/master' into IE_cleanup

Conflicts:
	youtube_dl/FileDownloader.py
This commit is contained in:
Filippo Valsorda 2012-11-27 23:20:32 +01:00
commit c63cc10ffa
12 changed files with 346 additions and 102 deletions

1
.gitignore vendored
View file

@ -3,3 +3,4 @@
*~ *~
wine-py2exe/ wine-py2exe/
py2exe.log py2exe.log
*.kate-swp

View file

@ -1 +1 @@
2012.11.28 2012.11.29

View file

@ -1,4 +1,4 @@
% youtube-dl(1) % YOUTUBE-DL(1)
# NAME # NAME
youtube-dl youtube-dl
@ -20,6 +20,11 @@ which means you can modify it, redistribute it or use it however you like.
-i, --ignore-errors continue on download errors -i, --ignore-errors continue on download errors
-r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10) -R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default
is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized
from an initial value of SIZE.
--dump-user-agent display the current browser identification --dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent --user-agent UA specify a custom user agent
--list-extractors List all supported extractors and the URLs they --list-extractors List all supported extractors and the URLs they
@ -108,6 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`. You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
# OUTPUT TEMPLATE
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
- `id`: The sequence will be replaced by the video identifier.
- `url`: The sequence will be replaced by the video URL.
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
- `title`: The sequence will be replaced by the video title.
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
youtube-dl_test_video_.mp4 # A simple file name
# FAQ # FAQ
### Can you please put the -b option back? ### Can you please put the -b option back?

View file

@ -56,7 +56,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c') self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
forbidden = u'"\0\\/&: \'\t\n' forbidden = u'"\0\\/&!: \'\t\n'
for fc in forbidden: for fc in forbidden:
for fbc in forbidden: for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

Binary file not shown.

View file

@ -1,4 +1,4 @@
.TH youtube-dl 1 "" .TH YOUTUBE-DL 1 ""
.SH NAME .SH NAME
.PP .PP
youtube-dl youtube-dl
@ -24,6 +24,11 @@ redistribute it or use it however you like.
-i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors -i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors
-r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m) -r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m)
-R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10) -R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10)
--buffer-size\ SIZE\ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)\ (default
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ 1024)
--no-resize-buffer\ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE.
--dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification --dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
--user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent --user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
--list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they --list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
@ -139,6 +144,59 @@ You can configure youtube-dl by placing default arguments (such as
\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not \f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
\f[C]~/.local/config/youtube-dl.conf\f[]. \f[C]~/.local/config/youtube-dl.conf\f[].
.SH OUTPUT TEMPLATE
.PP
The \f[C]-o\f[] option allows users to indicate a template for the
output file names.
The basic usage is not to set any template arguments when downloading a
single file, like in
\f[C]youtube-dl\ -o\ funny_video.flv\ "http://some/video"\f[].
However, it may contain special sequences that will be replaced when
downloading each video.
The special sequences have the format \f[C]%(NAME)s\f[].
To clarify, that is a percent symbol followed by a name in parenthesis,
followed by a lowercase S.
Allowed names are:
.IP \[bu] 2
\f[C]id\f[]: The sequence will be replaced by the video identifier.
.IP \[bu] 2
\f[C]url\f[]: The sequence will be replaced by the video URL.
.IP \[bu] 2
\f[C]uploader\f[]: The sequence will be replaced by the nickname of the
person who uploaded the video.
.IP \[bu] 2
\f[C]upload_date\f[]: The sequence will be replaced by the upload date
in YYYYMMDD format.
.IP \[bu] 2
\f[C]title\f[]: The sequence will be replaced by the video title.
.IP \[bu] 2
\f[C]ext\f[]: The sequence will be replaced by the appropriate extension
(like flv or mp4).
.IP \[bu] 2
\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when
creating the file.
.IP \[bu] 2
\f[C]autonumber\f[]: The sequence will be replaced by a five-digit
number that will be increased with each download, starting at zero.
.PP
The current default template is \f[C]%(id)s.%(ext)s\f[], but that will
be switchted to \f[C]%(title)s-%(id)s.%(ext)s\f[] (which can be
requested with \f[C]-t\f[] at the moment).
.PP
In some cases, you don\[aq]t want special characters such as 中, spaces,
or &, such as when transferring the downloaded filename to a Windows
system or the filename through an 8bit-unsafe channel.
In these cases, add the \f[C]--restrict-filenames\f[] flag to get a
shorter title:
.IP
.nf
\f[C]
$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc
youtube-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ --restrict-filenames
youtube-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
\f[]
.fi
.SH FAQ .SH FAQ
.SS Can you please put the -b option back? .SS Can you please put the -b option back?
.PP .PP

View file

@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts local cur prev opts
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --buffer-size --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --no-resize-buffer --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

BIN
youtube-dl.exe Executable file → Normal file

Binary file not shown.

View file

@ -62,6 +62,8 @@ class FileDownloader(object):
ratelimit: Download speed limit, in bytes/sec. ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files. nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible. continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar. noprogress: Do not print the progress bar.
playliststart: Playlist item to start at. playliststart: Playlist item to start at.
@ -106,7 +108,7 @@ class FileDownloader(object):
if bytes == 0.0: if bytes == 0.0:
exponent = 0 exponent = 0
else: else:
exponent = long(math.log(bytes, 1024.0)) exponent = int(math.log(bytes, 1024.0))
suffix = 'bkMGTPEZY'[exponent] suffix = 'bkMGTPEZY'[exponent]
converted = float(bytes) / float(1024 ** exponent) converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix) return '%.2f%s' % (converted, suffix)
@ -125,7 +127,7 @@ class FileDownloader(object):
if current == 0 or dif < 0.001: # One millisecond if current == 0 or dif < 0.001: # One millisecond
return '--:--' return '--:--'
rate = float(current) / dif rate = float(current) / dif
eta = long((float(total) - float(current)) / rate) eta = int((float(total) - float(current)) / rate)
(eta_mins, eta_secs) = divmod(eta, 60) (eta_mins, eta_secs) = divmod(eta, 60)
if eta_mins > 99: if eta_mins > 99:
return '--:--' return '--:--'
@ -177,7 +179,7 @@ class FileDownloader(object):
if not self.params.get('quiet', False): if not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol] terminator = [u'\n', u''][skip_eol]
output = message + terminator output = message + terminator
if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding(), 'ignore') output = output.encode(preferredencoding(), 'ignore')
self._screen_file.write(output) self._screen_file.write(output)
self._screen_file.flush() self._screen_file.flush()
@ -325,9 +327,13 @@ class FileDownloader(object):
"""Generate the output filename.""" """Generate the output filename."""
try: try:
template_dict = dict(info_dict) template_dict = dict(info_dict)
template_dict['epoch'] = unicode(int(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads) template_dict['epoch'] = int(time.time())
template_dict['autonumber'] = u'%05d' % self._num_downloads
template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items()) template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
template_dict = dict((k, sanitize_filename(u(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
filename = self.params['outtmpl'] % template_dict filename = self.params['outtmpl'] % template_dict
return filename return filename
except (ValueError, KeyError), err: except (ValueError, KeyError), err:
@ -370,7 +376,6 @@ class FileDownloader(object):
raise MaxDownloadsReached() raise MaxDownloadsReached()
filename = self.prepare_filename(info_dict) filename = self.prepare_filename(info_dict)
filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
# Forced printings # Forced printings
if self.params.get('forcetitle', False): if self.params.get('forcetitle', False):
@ -398,7 +403,7 @@ class FileDownloader(object):
if dn != '' and not os.path.exists(dn): # dn is already encoded if dn != '' and not os.path.exists(dn): # dn is already encoded
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError), err: except (OSError, IOError), err:
self.trouble(u'ERROR: unable to create directory ' + unicode(err)) self.trouble(u'ERROR: unable to create directory ' + u(err))
return return
if self.params.get('writedescription', False): if self.params.get('writedescription', False):
@ -623,7 +628,7 @@ class FileDownloader(object):
else: else:
# Examine the reported length # Examine the reported length
if (content_length is not None and if (content_length is not None and
(resume_len - 100 < long(content_length) < resume_len + 100)): (resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded. # The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that # Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file, # YouTube sometimes adds or removes a few bytes from the end of the file,
@ -650,10 +655,10 @@ class FileDownloader(object):
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
if data_len is not None: if data_len is not None:
data_len = long(data_len) + resume_len data_len = int(data_len) + resume_len
data_len_str = self.format_bytes(data_len) data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len byte_counter = 0 + resume_len
block_size = 1024 block_size = self.params.get('buffersize', 1024)
start = time.time() start = time.time()
while True: while True:
# Download and write # Download and write
@ -679,7 +684,8 @@ class FileDownloader(object):
except (IOError, OSError), err: except (IOError, OSError), err:
self.trouble(u'\nERROR: unable to write data: %s' % str(err)) self.trouble(u'\nERROR: unable to write data: %s' % str(err))
return False return False
block_size = self.best_block_size(after - before, len(data_block)) if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message # Progress message
speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
@ -699,7 +705,7 @@ class FileDownloader(object):
stream.close() stream.close()
self.report_finish() self.report_finish()
if data_len is not None and byte_counter != data_len: if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, long(data_len)) raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
# Update file modification time # Update file modification time

View file

@ -253,7 +253,7 @@ class YoutubeIE(InfoExtractor):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err: except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
return return
# Set language # Set language
@ -262,7 +262,7 @@ class YoutubeIE(InfoExtractor):
self.report_lang() self.report_lang()
urllib2.urlopen(request).read() urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: unable to set language: %s' % u(err))
return return
# No authentication to be performed # No authentication to be performed
@ -285,7 +285,7 @@ class YoutubeIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return return
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
return return
# Confirm age # Confirm age
@ -298,7 +298,7 @@ class YoutubeIE(InfoExtractor):
self.report_age_confirmation() self.report_age_confirmation()
age_results = urllib2.urlopen(request).read() age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -320,7 +320,7 @@ class YoutubeIE(InfoExtractor):
try: try:
video_webpage = urllib2.urlopen(request).read() video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
# Attempt to extract SWF player URL # Attempt to extract SWF player URL
@ -342,7 +342,7 @@ class YoutubeIE(InfoExtractor):
if 'token' in video_info: if 'token' in video_info:
break break
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
return return
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:
@ -405,7 +405,7 @@ class YoutubeIE(InfoExtractor):
try: try:
srt_list = urllib2.urlopen(request).read() srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list: if not srt_lang_list:
@ -422,7 +422,7 @@ class YoutubeIE(InfoExtractor):
try: try:
srt_xml = urllib2.urlopen(request).read() srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
if not srt_xml: if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles') raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8')) video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@ -544,7 +544,7 @@ class MetacafeIE(InfoExtractor):
self.report_disclaimer() self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read() disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % u(err))
return return
# Confirm age # Confirm age
@ -557,7 +557,7 @@ class MetacafeIE(InfoExtractor):
self.report_age_confirmation() self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read() disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -581,7 +581,7 @@ class MetacafeIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader and title from webpage # Extract URL, uploader and title from webpage
@ -672,7 +672,7 @@ class DailymotionIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader and title from webpage # Extract URL, uploader and title from webpage
@ -768,7 +768,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader, and title from webpage # Extract URL, uploader, and title from webpage
@ -807,7 +807,7 @@ class GoogleIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage) mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
if mobj is None: if mobj is None:
@ -861,7 +861,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader, and title from webpage # Extract URL, uploader, and title from webpage
@ -929,7 +929,7 @@ class YahooIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@ -953,7 +953,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract uploader and title from webpage # Extract uploader and title from webpage
@ -1011,7 +1011,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract media URL from playlist XML # Extract media URL from playlist XML
@ -1067,7 +1067,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Now we begin extracting as much information as we can from what we # Now we begin extracting as much information as we can from what we
@ -1147,6 +1147,143 @@ class VimeoIE(InfoExtractor):
}] }]
class ArteTvIE(InfoExtractor):
"""arte.tv information extractor."""
_VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def report_download_webpage(self, video_id):
"""Report webpage download."""
self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
def report_extraction(self, video_id):
"""Report information extraction."""
self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
def fetch_webpage(self, url):
self._downloader.increment_downloads()
request = urllib2.Request(url)
try:
self.report_download_webpage(url)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
except ValueError, err:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
return webpage
def grep_webpage(self, url, regex, regexFlags, matchTuples):
page = self.fetch_webpage(url)
mobj = re.search(regex, page, regexFlags)
info = {}
if mobj is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
for (i, key, err) in matchTuples:
if mobj.group(i) is None:
self._downloader.trouble(err)
return
else:
info[key] = mobj.group(i)
return info
def extractLiveStream(self, url):
video_lang = url.split('/')[-4]
info = self.grep_webpage(
url,
r'src="(.*?/videothek_js.*?\.js)',
0,
[
(1, 'url', u'ERROR: Invalid URL: %s' % url)
]
)
http_host = url.split('/')[2]
next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
info = self.grep_webpage(
next_url,
r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
'(http://.*?\.swf).*?' +
'(rtmp://.*?)\'',
re.DOTALL,
[
(1, 'path', u'ERROR: could not extract video path: %s' % url),
(2, 'player', u'ERROR: could not extract video player: %s' % url),
(3, 'url', u'ERROR: could not extract video url: %s' % url)
]
)
video_url = u'%s/%s' % (info.get('url'), info.get('path'))
def extractPlus7Stream(self, url):
video_lang = url.split('/')[-3]
info = self.grep_webpage(
url,
r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
0,
[
(1, 'url', u'ERROR: Invalid URL: %s' % url)
]
)
next_url = urllib.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
0,
[
(1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
]
)
next_url = urllib.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video id="(.*?)".*?>.*?' +
'<name>(.*?)</name>.*?' +
'<dateVideo>(.*?)</dateVideo>.*?' +
'<url quality="hd">(.*?)</url>',
re.DOTALL,
[
(1, 'id', u'ERROR: could not extract video id: %s' % url),
(2, 'title', u'ERROR: could not extract video title: %s' % url),
(3, 'date', u'ERROR: could not extract video date: %s' % url),
(4, 'url', u'ERROR: could not extract video url: %s' % url)
]
)
return {
'id': info.get('id'),
'url': urllib.unquote(info.get('url')),
'uploader': u'arte.tv',
'upload_date': info.get('date'),
'title': info.get('title'),
'ext': u'mp4',
'format': u'NA',
'player_url': None,
}
def _real_extract(self, url):
video_id = url.split('/')[-1]
self.report_extraction(video_id)
if re.search(self._LIVE_URL, video_id) is not None:
self.extractLiveStream(url)
return
else:
info = self.extractPlus7Stream(url)
return [info]
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
"""Generic last-resort information extractor.""" """Generic last-resort information extractor."""
@ -1232,7 +1369,7 @@ class GenericIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
except ValueError, err: except ValueError, err:
# since this is the last-resort InfoExtractor, if # since this is the last-resort InfoExtractor, if
@ -1324,7 +1461,7 @@ class YoutubeSearchIE(InfoExtractor):
return return
else: else:
try: try:
n = long(prefix) n = int(prefix)
if n <= 0: if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
@ -1351,7 +1488,7 @@ class YoutubeSearchIE(InfoExtractor):
try: try:
data = urllib2.urlopen(request).read() data = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download API page: %s' % u(err))
return return
api_response = json.loads(data)['data'] api_response = json.loads(data)['data']
@ -1402,7 +1539,7 @@ class GoogleSearchIE(InfoExtractor):
return return
else: else:
try: try:
n = long(prefix) n = int(prefix)
if n <= 0: if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
@ -1428,7 +1565,7 @@ class GoogleSearchIE(InfoExtractor):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1484,7 +1621,7 @@ class YahooSearchIE(InfoExtractor):
return return
else: else:
try: try:
n = long(prefix) n = int(prefix)
if n <= 0: if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
@ -1511,7 +1648,7 @@ class YahooSearchIE(InfoExtractor):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1581,7 +1718,7 @@ class YoutubePlaylistIE(InfoExtractor):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1638,7 +1775,7 @@ class YoutubeChannelIE(InfoExtractor):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1701,7 +1838,7 @@ class YoutubeUserIE(InfoExtractor):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1773,7 +1910,7 @@ class BlipTVUserIE(InfoExtractor):
mobj = re.search(r'data-users-id="([^"]+)"', page) mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1) page_base = page_base % mobj.group(1)
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
@ -1861,7 +1998,7 @@ class DepositFilesIE(InfoExtractor):
self.report_download_webpage(file_id) self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % u(err))
return return
# Search for the real file URL # Search for the real file URL
@ -1977,7 +2114,7 @@ class FacebookIE(InfoExtractor):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err: except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
return return
if useremail is None: if useremail is None:
@ -1997,7 +2134,7 @@ class FacebookIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return return
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -2014,7 +2151,7 @@ class FacebookIE(InfoExtractor):
page = urllib2.urlopen(request) page = urllib2.urlopen(request)
video_webpage = page.read() video_webpage = page.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
# Start extracting information # Start extracting information
@ -2149,13 +2286,13 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh 'urlhandle': urlh
} }
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
return return
if info is None: # Regular URL if info is None: # Regular URL
try: try:
json_code = urlh.read() json_code = urlh.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % u(err))
return return
try: try:
@ -2223,7 +2360,7 @@ class MyVideoIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
self.report_extraction(video_id) self.report_extraction(video_id)
@ -2320,7 +2457,7 @@ class ComedyCentralIE(InfoExtractor):
htmlHandle = urllib2.urlopen(req) htmlHandle = urllib2.urlopen(req)
html = htmlHandle.read() html = htmlHandle.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
if dlNewest: if dlNewest:
url = htmlHandle.geturl() url = htmlHandle.geturl()
@ -2353,7 +2490,7 @@ class ComedyCentralIE(InfoExtractor):
urlHandle = urllib2.urlopen(playerUrl_raw) urlHandle = urllib2.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl() playerUrl = urlHandle.geturl()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to find out player URL: ' + u(err))
return return
uri = mMovieParams[0][1] uri = mMovieParams[0][1]
@ -2362,7 +2499,7 @@ class ComedyCentralIE(InfoExtractor):
try: try:
indexXml = urllib2.urlopen(indexUrl).read() indexXml = urllib2.urlopen(indexUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download episode index: ' + u(err))
return return
results = [] results = []
@ -2383,7 +2520,7 @@ class ComedyCentralIE(InfoExtractor):
try: try:
configXml = urllib2.urlopen(configReq).read() configXml = urllib2.urlopen(configReq).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
cdoc = xml.etree.ElementTree.fromstring(configXml) cdoc = xml.etree.ElementTree.fromstring(configXml)
@ -2466,7 +2603,7 @@ class EscapistIE(InfoExtractor):
m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type']) m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type'])
webPage = webPageBytes.decode(m.group(1) if m else 'utf-8') webPage = webPageBytes.decode(m.group(1) if m else 'utf-8')
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download webpage: ' + u(err))
return return
descMatch = re.search('<meta name="description" content="([^"]*)"', webPage) descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
@ -2482,7 +2619,7 @@ class EscapistIE(InfoExtractor):
try: try:
configJSON = urllib2.urlopen(configUrl).read() configJSON = urllib2.urlopen(configUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download configuration: ' + u(err))
return return
# Technically, it's JavaScript, not JSON # Technically, it's JavaScript, not JSON
@ -2491,7 +2628,7 @@ class EscapistIE(InfoExtractor):
try: try:
config = json.loads(configJSON) config = json.loads(configJSON)
except (ValueError,), err: except (ValueError,), err:
self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err)) self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + u(err))
return return
playlist = config['playlist'] playlist = config['playlist']
@ -2538,7 +2675,7 @@ class CollegeHumorIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage) m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@ -2559,7 +2696,7 @@ class CollegeHumorIE(InfoExtractor):
try: try:
metaXml = urllib2.urlopen(xmlUrl).read() metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % u(err))
return return
mdoc = xml.etree.ElementTree.fromstring(metaXml) mdoc = xml.etree.ElementTree.fromstring(metaXml)
@ -2604,7 +2741,7 @@ class XVideosIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
self.report_extraction(video_id) self.report_extraction(video_id)
@ -2688,7 +2825,7 @@ class SoundcloudIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
self.report_extraction('%s/%s' % (uploader, slug_title)) self.report_extraction('%s/%s' % (uploader, slug_title))
@ -2723,7 +2860,7 @@ class SoundcloudIE(InfoExtractor):
try: try:
upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
except Exception, e: except Exception, e:
self._downloader.to_stderr(compat_str(e)) self._downloader.to_stderr(u(e))
# for soundcloud, a request to a cross domain is required for cookies # for soundcloud, a request to a cross domain is required for cookies
request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@ -2765,7 +2902,7 @@ class InfoQIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
self.report_extraction(url) self.report_extraction(url)
@ -2877,7 +3014,7 @@ class MixcloudIE(InfoExtractor):
self.report_download_json(file_url) self.report_download_json(file_url)
jsonData = urllib2.urlopen(request).read() jsonData = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % u(err))
return return
# parse JSON # parse JSON
@ -2956,7 +3093,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
try: try:
metaXml = urllib2.urlopen(xmlUrl).read() metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err)) self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % u(err))
return return
mdoc = xml.etree.ElementTree.fromstring(metaXml) mdoc = xml.etree.ElementTree.fromstring(metaXml)
try: try:
@ -2980,7 +3117,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
try: try:
coursepage = urllib2.urlopen(url).read() coursepage = urllib2.urlopen(url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download course info page: ' + u(err))
return return
m = re.search('<h1>([^<]+)</h1>', coursepage) m = re.search('<h1>([^<]+)</h1>', coursepage)
@ -3019,7 +3156,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
try: try:
rootpage = urllib2.urlopen(rootURL).read() rootpage = urllib2.urlopen(rootURL).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download course info page: ' + u(err))
return return
info['title'] = info['id'] info['title'] = info['id']
@ -3066,7 +3203,7 @@ class MTVIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage) mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@ -3099,7 +3236,7 @@ class MTVIE(InfoExtractor):
try: try:
metadataXml = urllib2.urlopen(request).read() metadataXml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % u(err))
return return
mdoc = xml.etree.ElementTree.fromstring(metadataXml) mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@ -3187,7 +3324,7 @@ class YoukuIE(InfoExtractor):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
jsondata = urllib2.urlopen(request).read() jsondata = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as err: except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
self.report_extraction(video_id) self.report_extraction(video_id)
@ -3361,7 +3498,7 @@ class GooglePlusIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % u(err))
return return
# Extract update date # Extract update date
@ -3403,7 +3540,7 @@ class GooglePlusIE(InfoExtractor):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
self.report_extract_vid_page(video_page) self.report_extract_vid_page(video_page)

View file

@ -18,10 +18,11 @@ __authors__ = (
'Ori Avtalion', 'Ori Avtalion',
'shizeeg', 'shizeeg',
'Filippo Valsorda', 'Filippo Valsorda',
'Christian Albrecht',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
__version__ = '2012.11.28' __version__ = '2012.11.29'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION' UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@ -126,9 +127,12 @@ def parseOpts():
opts = [] opts = []
if option._short_opts: opts.append(option._short_opts[0]) if option._short_opts:
if option._long_opts: opts.append(option._long_opts[0]) opts.append(option._short_opts[0])
if len(opts) > 1: opts.insert(1, ', ') if option._long_opts:
opts.append(option._long_opts[0])
if len(opts) > 1:
opts.insert(1, ', ')
if option.takes_value(): opts.append(' %s' % option.metavar) if option.takes_value(): opts.append(' %s' % option.metavar)
@ -187,6 +191,11 @@ def parseOpts():
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries', general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
general.add_option('--buffer-size',
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
general.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
general.add_option('--dump-user-agent', general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent', action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False) help='display the current browser identification', default=False)
@ -362,7 +371,7 @@ def gen_extractors():
YoukuIE(), YoukuIE(),
XNXXIE(), XNXXIE(),
GooglePlusIE(), GooglePlusIE(),
ArteTvIE(),
GenericIE() GenericIE()
] ]
@ -440,9 +449,14 @@ def _real_main():
opts.ratelimit = numeric_limit opts.ratelimit = numeric_limit
if opts.retries is not None: if opts.retries is not None:
try: try:
opts.retries = long(opts.retries) opts.retries = int(opts.retries)
except (TypeError, ValueError), err: except (TypeError, ValueError), err:
parser.error(u'invalid retry count specified') parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None:
parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize
try: try:
opts.playliststart = int(opts.playliststart) opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0: if opts.playliststart <= 0:
@ -493,6 +507,8 @@ def _real_main():
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,
'retries': opts.retries, 'retries': opts.retries,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl, 'continuedl': opts.continue_dl,
'noprogress': opts.noprogress, 'noprogress': opts.noprogress,
'playliststart': opts.playliststart, 'playliststart': opts.playliststart,

View file

@ -27,9 +27,9 @@ std_headers = {
} }
try: try:
compat_str = unicode # Python 2 u = unicode # Python 2
except NameError: except NameError:
compat_str = str u = str
def preferredencoding(): def preferredencoding():
"""Get preferred encoding. """Get preferred encoding.
@ -37,19 +37,17 @@ def preferredencoding():
Returns the best encoding scheme for the system, based on Returns the best encoding scheme for the system, based on
locale.getpreferredencoding() and some further tweaks. locale.getpreferredencoding() and some further tweaks.
""" """
def yield_preferredencoding(): try:
try: pref = locale.getpreferredencoding()
pref = locale.getpreferredencoding() u'TEST'.encode(pref)
u'TEST'.encode(pref) except:
except: pref = 'UTF-8'
pref = 'UTF-8'
while True: return pref
yield pref
return yield_preferredencoding().next()
def htmlentity_transform(matchobj): def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a Unicode character. """Transforms an HTML entity to a character.
This function receives a match object and is intended to be used with This function receives a match object and is intended to be used with
the re.sub() function. the re.sub() function.
@ -60,7 +58,6 @@ def htmlentity_transform(matchobj):
if entity in htmlentitydefs.name2codepoint: if entity in htmlentitydefs.name2codepoint:
return unichr(htmlentitydefs.name2codepoint[entity]) return unichr(htmlentitydefs.name2codepoint[entity])
# Unicode character
mobj = re.match(ur'(?u)#(x?\d+)', entity) mobj = re.match(ur'(?u)#(x?\d+)', entity)
if mobj is not None: if mobj is not None:
numstr = mobj.group(1) numstr = mobj.group(1)
@ -69,7 +66,7 @@ def htmlentity_transform(matchobj):
numstr = u'0%s' % numstr numstr = u'0%s' % numstr
else: else:
base = 10 base = 10
return unichr(long(numstr, base)) return unichr(int(numstr, base))
# Unknown entity in name, return its literal representation # Unknown entity in name, return its literal representation
return (u'&%s;' % entity) return (u'&%s;' % entity)
@ -128,8 +125,10 @@ class IDParser(HTMLParser.HTMLParser):
handle_decl = handle_pi = unknown_decl = find_startpos handle_decl = handle_pi = unknown_decl = find_startpos
def get_result(self): def get_result(self):
if self.result == None: return None if self.result is None:
if len(self.result) != 3: return None return None
if len(self.result) != 3:
return None
lines = self.html.split('\n') lines = self.html.split('\n')
lines = lines[self.result[1][0]-1:self.result[2][0]] lines = lines[self.result[1][0]-1:self.result[2][0]]
lines[0] = lines[0][self.result[1][1]:] lines[0] = lines[0][self.result[1][1]:]
@ -208,7 +207,7 @@ def sanitize_filename(s, restricted=False):
return '_-' if restricted else ' -' return '_-' if restricted else ' -'
elif char in '\\/|*<>': elif char in '\\/|*<>':
return '_' return '_'
if restricted and (char in '&\'' or char.isspace()): if restricted and (char in '!&\'' or char.isspace()):
return '_' return '_'
if restricted and ord(char) > 127: if restricted and ord(char) > 127:
return '_' return '_'
@ -235,7 +234,7 @@ def orderedSet(iterable):
def unescapeHTML(s): def unescapeHTML(s):
""" """
@param s a string (of type unicode) @param s a string
""" """
assert type(s) == type(u'') assert type(s) == type(u'')
@ -244,7 +243,7 @@ def unescapeHTML(s):
def encodeFilename(s): def encodeFilename(s):
""" """
@param s The name of the file (of type unicode) @param s The name of the file
""" """
assert type(s) == type(u'') assert type(s) == type(u'')