Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Allan Zhou 2013-08-23 15:33:36 -07:00
commit 39c6f507df
14 changed files with 381 additions and 135 deletions

View file

@ -120,18 +120,20 @@ which means you can modify it, redistribute it or use it however you like.
--max-quality FORMAT highest quality format to download --max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats (currently youtube -F, --list-formats list all available formats (currently youtube
only) only)
## Subtitle Options:
--write-sub write subtitle file (currently youtube only) --write-sub write subtitle file (currently youtube only)
--write-auto-sub write automatic subtitle file (currently youtube --write-auto-sub write automatic subtitle file (currently youtube
only) only)
--only-sub [deprecated] alias of --skip-download --only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the --all-subs downloads all the available subtitles of the
video (currently youtube only) video
--list-subs lists all available subtitles for the video --list-subs lists all available subtitles for the video
(currently youtube only) --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube
--sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) only)
(currently youtube only) --sub-lang LANGS languages of the subtitles to download (optional)
--sub-lang LANG language of the subtitles to download (optional) separated by commas, use IETF language tags like
use IETF language tags like 'en' 'en,pt'
## Authentication Options: ## Authentication Options:
-u, --username USERNAME account username -u, --username USERNAME account username
@ -153,6 +155,8 @@ which means you can modify it, redistribute it or use it however you like.
processing; the video is erased by default processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post- --no-post-overwrites do not overwrite post-processed files; the post-
processed files are overwritten by default processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4
videos)
# CONFIGURATION # CONFIGURATION

View file

@ -6,28 +6,32 @@ import hashlib
import urllib.request import urllib.request
if len(sys.argv) <= 1: if len(sys.argv) <= 1:
print('Specify the version number as parameter') print('Specify the version number as parameter')
sys.exit() sys.exit()
version = sys.argv[1] version = sys.argv[1]
with open('update/LATEST_VERSION', 'w') as f: with open('update/LATEST_VERSION', 'w') as f:
f.write(version) f.write(version)
versions_info = json.load(open('update/versions.json')) versions_info = json.load(open('update/versions.json'))
if 'signature' in versions_info: if 'signature' in versions_info:
del versions_info['signature'] del versions_info['signature']
new_version = {} new_version = {}
filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version} filenames = {
'bin': 'youtube-dl',
'exe': 'youtube-dl.exe',
'tar': 'youtube-dl-%s.tar.gz' % version}
for key, filename in filenames.items(): for key, filename in filenames.items():
print('Downloading and checksumming %s...' %filename) print('Downloading and checksumming %s...' % filename)
url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename) url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
data = urllib.request.urlopen(url).read() data = urllib.request.urlopen(url).read()
sha256sum = hashlib.sha256(data).hexdigest() sha256sum = hashlib.sha256(data).hexdigest()
new_version[key] = (url, sha256sum) new_version[key] = (url, sha256sum)
versions_info['versions'][version] = new_version versions_info['versions'][version] = new_version
versions_info['latest'] = version versions_info['latest'] = version
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True) with open('update/versions.json', 'w') as jsonf:
json.dump(versions_info, jsonf, indent=4, sort_keys=True)

View file

@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
<atom:link href="http://rg3.github.io/youtube-dl" /> <atom:link href="http://rg3.github.io/youtube-dl" />
<atom:content type="xhtml"> <atom:content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"> <div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a> Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
</div> </div>
</atom:content> </atom:content>
<atom:author> <atom:author>
@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
with open('update/releases.atom','w',encoding='utf-8') as atom_file: with open('update/releases.atom','w',encoding='utf-8') as atom_file:
atom_file.write(atom_template) atom_file.write(atom_template)

View file

@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..." /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"

View file

@ -20,9 +20,9 @@ tests = [
# 87 # 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"), "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86 # 86 - vflh9ybst 2013/08/23
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"), "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
# 85 # 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"), ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
@ -32,12 +32,15 @@ tests = [
# 83 # 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
# 82 # 82 - vflZK4ZYR 2013/08/23
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
# 81 - vflLC8JvQ 2013/07/25 # 81 - vflLC8JvQ 2013/07/25
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
# 80 - vflZK4ZYR 2013/08/23 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
# 79 - vflLC8JvQ 2013/07/25 (sporadic) # 79 - vflLC8JvQ 2013/07/25 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),

View file

@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase):
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_it(self): def test_youtube_subtitles_it(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitleslang'] = 'it' DL.params['subtitleslangs'] = ['it']
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['it']
self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_onlysubtitles(self): def test_youtube_onlysubtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['onlysubtitles'] = True DL.params['onlysubtitles'] = True
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_allsubtitles(self): def test_youtube_allsubtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['allsubtitles'] = True DL.params['allsubtitles'] = True
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
subtitles = info_dict[0]['subtitles'] subtitles = info_dict[0]['subtitles']
self.assertEqual(len(subtitles), 13) self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self): def test_youtube_subtitles_sbv_format(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'sbv' DL.params['subtitlesformat'] = 'sbv'
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self): def test_youtube_subtitles_vtt_format(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'vtt' DL.params['subtitlesformat'] = 'vtt'
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self): def test_youtube_list_subtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['listsubtitles'] = True DL.params['listsubtitles'] = True
@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
def test_youtube_automatic_captions(self): def test_youtube_automatic_captions(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writeautomaticsub'] = True DL.params['writeautomaticsub'] = True
DL.params['subtitleslang'] = 'it' DL.params['subtitleslangs'] = ['it']
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('8YoUxe5ncPo') info_dict = IE.extract('8YoUxe5ncPo')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['it']
self.assertTrue(sub[2] is not None) self.assertTrue(sub is not None)
def test_youtube_multiple_langs(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
DL.params['subtitleslangs'] = langs
IE = YoutubeIE(DL)
subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs) return dict((program, executable(program)) for program in programs)
def run_ffmpeg(self, path, out_path, opts): def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
if not self._exes['ffmpeg'] and not self._exes['avconv']: if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
files_cmd = []
for path in input_paths:
files_cmd.extend(['-i', encodeFilename(path)])
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
+ opts + + opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))]) [encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate() stdout,stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor):
msg = stderr.strip().split('\n')[-1] msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg) raise FFmpegPostProcessorError(msg)
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)
def _ffmpeg_filename_argument(self, fn): def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'): if fn.startswith(u'-'):
@ -232,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
information['format'] = self._preferedformat information['format'] = self._preferedformat
information['ext'] = self._preferedformat information['ext'] = self._preferedformat
return False,information return False,information
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {
'aa': 'aar',
'ab': 'abk',
'ae': 'ave',
'af': 'afr',
'ak': 'aka',
'am': 'amh',
'an': 'arg',
'ar': 'ara',
'as': 'asm',
'av': 'ava',
'ay': 'aym',
'az': 'aze',
'ba': 'bak',
'be': 'bel',
'bg': 'bul',
'bh': 'bih',
'bi': 'bis',
'bm': 'bam',
'bn': 'ben',
'bo': 'bod',
'br': 'bre',
'bs': 'bos',
'ca': 'cat',
'ce': 'che',
'ch': 'cha',
'co': 'cos',
'cr': 'cre',
'cs': 'ces',
'cu': 'chu',
'cv': 'chv',
'cy': 'cym',
'da': 'dan',
'de': 'deu',
'dv': 'div',
'dz': 'dzo',
'ee': 'ewe',
'el': 'ell',
'en': 'eng',
'eo': 'epo',
'es': 'spa',
'et': 'est',
'eu': 'eus',
'fa': 'fas',
'ff': 'ful',
'fi': 'fin',
'fj': 'fij',
'fo': 'fao',
'fr': 'fra',
'fy': 'fry',
'ga': 'gle',
'gd': 'gla',
'gl': 'glg',
'gn': 'grn',
'gu': 'guj',
'gv': 'glv',
'ha': 'hau',
'he': 'heb',
'hi': 'hin',
'ho': 'hmo',
'hr': 'hrv',
'ht': 'hat',
'hu': 'hun',
'hy': 'hye',
'hz': 'her',
'ia': 'ina',
'id': 'ind',
'ie': 'ile',
'ig': 'ibo',
'ii': 'iii',
'ik': 'ipk',
'io': 'ido',
'is': 'isl',
'it': 'ita',
'iu': 'iku',
'ja': 'jpn',
'jv': 'jav',
'ka': 'kat',
'kg': 'kon',
'ki': 'kik',
'kj': 'kua',
'kk': 'kaz',
'kl': 'kal',
'km': 'khm',
'kn': 'kan',
'ko': 'kor',
'kr': 'kau',
'ks': 'kas',
'ku': 'kur',
'kv': 'kom',
'kw': 'cor',
'ky': 'kir',
'la': 'lat',
'lb': 'ltz',
'lg': 'lug',
'li': 'lim',
'ln': 'lin',
'lo': 'lao',
'lt': 'lit',
'lu': 'lub',
'lv': 'lav',
'mg': 'mlg',
'mh': 'mah',
'mi': 'mri',
'mk': 'mkd',
'ml': 'mal',
'mn': 'mon',
'mr': 'mar',
'ms': 'msa',
'mt': 'mlt',
'my': 'mya',
'na': 'nau',
'nb': 'nob',
'nd': 'nde',
'ne': 'nep',
'ng': 'ndo',
'nl': 'nld',
'nn': 'nno',
'no': 'nor',
'nr': 'nbl',
'nv': 'nav',
'ny': 'nya',
'oc': 'oci',
'oj': 'oji',
'om': 'orm',
'or': 'ori',
'os': 'oss',
'pa': 'pan',
'pi': 'pli',
'pl': 'pol',
'ps': 'pus',
'pt': 'por',
'qu': 'que',
'rm': 'roh',
'rn': 'run',
'ro': 'ron',
'ru': 'rus',
'rw': 'kin',
'sa': 'san',
'sc': 'srd',
'sd': 'snd',
'se': 'sme',
'sg': 'sag',
'si': 'sin',
'sk': 'slk',
'sl': 'slv',
'sm': 'smo',
'sn': 'sna',
'so': 'som',
'sq': 'sqi',
'sr': 'srp',
'ss': 'ssw',
'st': 'sot',
'su': 'sun',
'sv': 'swe',
'sw': 'swa',
'ta': 'tam',
'te': 'tel',
'tg': 'tgk',
'th': 'tha',
'ti': 'tir',
'tk': 'tuk',
'tl': 'tgl',
'tn': 'tsn',
'to': 'ton',
'tr': 'tur',
'ts': 'tso',
'tt': 'tat',
'tw': 'twi',
'ty': 'tah',
'ug': 'uig',
'uk': 'ukr',
'ur': 'urd',
'uz': 'uzb',
've': 'ven',
'vi': 'vie',
'vo': 'vol',
'wa': 'wln',
'wo': 'wol',
'xh': 'xho',
'yi': 'yid',
'yo': 'yor',
'za': 'zha',
'zh': 'zho',
'zu': 'zul',
}
def __init__(self, downloader=None, subtitlesformat='srt'):
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
self._subformat = subtitlesformat
@classmethod
def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
return cls._lang_map.get(code[:2])
def run(self, information):
if information['ext'] != u'mp4':
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
sub_langs = [key for key in information['subtitles']]
filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
for (i, lang) in enumerate(sub_langs):
opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
lang_code = self._conver_lang_code(lang)
if lang_code is not None:
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
opts.extend(['-f', 'mp4'])
temp_filename = filename + u'.temp'
self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, information

View file

@ -76,7 +76,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslang: Language of the subtitles to download subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range. daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file skip_download: Skip the actual download of the video file
@ -483,41 +483,28 @@ class YoutubeDL(object):
self.report_error(u'Cannot write description file ' + descfn) self.report_error(u'Cannot write description file ' + descfn)
return return
if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub'),
self.params.get('allsubtitles', False)])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE # subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE # that way it will silently go on when used with unsupporting IE
subtitle = info_dict['subtitles'][0] subtitles = info_dict['subtitles']
(sub_error, sub_lang, sub) = subtitle
sub_format = self.params.get('subtitlesformat') sub_format = self.params.get('subtitlesformat')
if sub_error: for sub_lang in subtitles.keys():
self.report_warning("Some error while getting the subtitles") sub = subtitles[sub_lang]
else: if sub is None:
continue
try: try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format sub_filename = subtitles_filename(filename, sub_lang, sub_format)
self.report_writesubtitles(sub_filename) self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub) subfile.write(sub)
except (OSError, IOError): except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn) self.report_error(u'Cannot write subtitles file ' + descfn)
return return
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
for subtitle in subtitles:
(sub_error, sub_lang, sub) = subtitle
if sub_error:
self.report_warning("Some error while getting the subtitles")
else:
try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn)
return
if self.params.get('writeinfojson', False): if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json' infofn = filename + u'.info.json'
self.report_writeinfojson(infofn) self.report_writeinfojson(infofn)

View file

@ -83,6 +83,9 @@ def parseOpts(overrideArguments=None):
return "".join(opts) return "".join(opts)
def _comma_separated_values_options_callback(option, opt_str, value, parser):
setattr(parser.values, option.dest, value.split(','))
def _find_term_columns(): def _find_term_columns():
columns = os.environ.get('COLUMNS', None) columns = os.environ.get('COLUMNS', None)
if columns: if columns:
@ -206,9 +209,10 @@ def parseOpts(overrideArguments=None):
subtitles.add_option('--sub-format', subtitles.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', action='store', dest='subtitlesformat', metavar='FORMAT',
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt') help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
subtitles.add_option('--sub-lang', '--srt-lang', subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG', action='callback', dest='subtitleslang', metavar='LANGS', type='str',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'') default=[], callback=_comma_separated_values_options_callback,
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader.add_option('-r', '--rate-limit', downloader.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
@ -323,6 +327,8 @@ def parseOpts(overrideArguments=None):
help='keeps the video file on disk after the post-processing; the video is erased by default') help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
help='do not overwrite post-processed files; the post-processed files are overwritten by default') help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)')
parser.add_option_group(general) parser.add_option_group(general)
@ -571,7 +577,7 @@ def _real_main(argv=None):
'allsubtitles': opts.allsubtitles, 'allsubtitles': opts.allsubtitles,
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslang': opts.subtitleslang, 'subtitleslangs': opts.subtitleslang,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': decodeOption(opts.matchtitle),
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': decodeOption(opts.rejecttitle),
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
@ -611,6 +617,8 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo: if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version # Update version
if opts.update_self: if opts.update_self:

View file

@ -47,7 +47,8 @@ class InfoExtractor(object):
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.
location: Physical location of the video. location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump). player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents. subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file, urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen like returned by urllib.request.urlopen

View file

@ -3,7 +3,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
unescapeHTML,
determine_ext,
ExtractorError, ExtractorError,
) )
@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
video_url = compat_urllib_parse.unquote(mobj.group('file')) video_url = compat_urllib_parse.unquote(mobj.group('file'))
else: else:
video_url = mobj.group('server')+'/key='+mobj.group('file') video_url = mobj.group('server')+'/key='+mobj.group('file')
video_extension = video_url.split('.')[-1]
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
webpage, u'title') webpage, u'title')
# Can't see the description anywhere in the UI # Only a few videos have an description
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)', mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
# webpage, u'description', fatal=False) if mobj:
# if video_description: video_description = unescapeHTML(video_description) video_description = unescapeHTML(mobj.group('description'))
else:
video_description = None
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
if mobj: if mobj:
@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
return [{ return [{
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'ext': video_extension, 'ext': determine_ext(video_url),
'title': video_title, 'title': video_title,
# 'description': video_description, 'description': video_description,
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail 'thumbnail': video_thumbnail

View file

@ -423,7 +423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 87: elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86: elif len(s) == 86:
return s[5:20] + s[2] + s[21:] return s[5:40] + s[3] + s[41:48] + s[0] + s[49:86]
elif len(s) == 85: elif len(s) == 85:
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27] return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84: elif len(s) == 84:
@ -434,6 +434,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
elif len(s) == 81: elif len(s) == 81:
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80:
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
elif len(s) == 79: elif len(s) == 79:
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
@ -456,11 +458,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try: try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'unable to download video subtitles: %s' % compat_str(err), None) self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
if not sub_lang_list: if not sub_lang_list:
return (u'video doesn\'t have subtitles', None) self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
return sub_lang_list return sub_lang_list
def _list_available_subtitles(self, video_id): def _list_available_subtitles(self, video_id):
@ -469,8 +473,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _request_subtitle(self, sub_lang, sub_name, video_id, format): def _request_subtitle(self, sub_lang, sub_name, video_id, format):
""" """
Return tuple: Return the subtitle as a string or None if they are not found
(error_message, sub_lang, sub)
""" """
self.report_video_subtitles_request(video_id, sub_lang, format) self.report_video_subtitles_request(video_id, sub_lang, format)
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
@ -483,21 +486,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try: try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8') sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'unable to download video subtitles: %s' % compat_str(err), None, None) self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub: if not sub:
return (u'Did not fetch video subtitles', None, None) self._downloader.report_warning(u'Did not fetch video subtitles')
return (None, sub_lang, sub) return
return sub
def _request_automatic_caption(self, video_id, webpage): def _request_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
sub_lang = self._downloader.params.get('subtitleslang') or 'en' sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
sub_format = self._downloader.params.get('subtitlesformat') sub_format = self._downloader.params.get('subtitlesformat')
self.to_screen(u'%s: Looking for automatic captions' % video_id) self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage) mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
if mobj is None: if mobj is None:
return [(err_msg, None, None)] self._downloader.report_warning(err_msg)
return {}
player_config = json.loads(mobj.group(1)) player_config = json.loads(mobj.group(1))
try: try:
args = player_config[u'args'] args = player_config[u'args']
@ -512,40 +518,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}) })
subtitles_url = caption_url + '&' + params subtitles_url = caption_url + '&' + params
sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
return [(None, sub_lang, sub)] return {sub_lang: sub}
except KeyError: # An extractor error can be raise by the download process if there are
return [(err_msg, None, None)] # no automatic captions but there are subtitles
except (KeyError, ExtractorError):
self._downloader.report_warning(err_msg)
return {}
def _extract_subtitle(self, video_id): def _extract_subtitles(self, video_id):
""" """
Return a list with a tuple: Return a dictionary: {language: subtitles} or {} if the subtitles
[(error_message, sub_lang, sub)] couldn't be found
""" """
sub_lang_list = self._get_available_subtitles(video_id) available_subs_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat') sub_format = self._downloader.params.get('subtitlesformat')
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles if not available_subs_list: #There was some error, it didn't get the available subtitles
return [(sub_lang_list[0], None, None)] return {}
if self._downloader.params.get('subtitleslang', False): if self._downloader.params.get('allsubtitles', False):
sub_lang = self._downloader.params.get('subtitleslang') sub_lang_list = available_subs_list
elif 'en' in sub_lang_list:
sub_lang = 'en'
else: else:
sub_lang = list(sub_lang_list.keys())[0] if self._downloader.params.get('subtitleslangs', False):
if not sub_lang in sub_lang_list: reqested_langs = self._downloader.params.get('subtitleslangs')
return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] elif 'en' in available_subs_list:
reqested_langs = ['en']
else:
reqested_langs = [list(available_subs_list.keys())[0]]
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) sub_lang_list = {}
return [subtitle] for sub_lang in reqested_langs:
if not sub_lang in available_subs_list:
def _extract_all_subtitles(self, video_id): self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
sub_lang_list = self._get_available_subtitles(video_id) continue
sub_format = self._downloader.params.get('subtitlesformat') sub_lang_list[sub_lang] = available_subs_list[sub_lang]
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles subtitles = {}
return [(sub_lang_list[0], None, None)]
subtitles = []
for sub_lang in sub_lang_list: for sub_lang in sub_lang_list:
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
subtitles.append(subtitle) if subtitle:
subtitles[sub_lang] = subtitle
return subtitles return subtitles
def _print_formats(self, formats): def _print_formats(self, formats):
@ -736,25 +745,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# subtitles # subtitles
video_subtitles = None video_subtitles = None
if self._downloader.params.get('writesubtitles', False): if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_subtitle(video_id) video_subtitles = self._extract_subtitles(video_id)
if video_subtitles: elif self._downloader.params.get('writeautomaticsub', False):
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage) video_subtitles = self._request_automatic_caption(video_id, video_webpage)
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_all_subtitles(video_id)
for video_subtitle in video_subtitles:
(sub_error, sub_lang, sub) = video_subtitle
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('listsubtitles', False): if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id) self._list_available_subtitles(video_id)

View file

@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'):
else: else:
return default_ext return default_ext
def subtitles_filename(filename, sub_lang, sub_format):
return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
def date_from_str(date_str): def date_from_str(date_str):
""" """
Return a datetime object from a string in the format YYYYMMDD or Return a datetime object from a string in the format YYYYMMDD or

View file

@ -1,2 +1,2 @@
__version__ = '2013.08.22' __version__ = '2013.08.23'