This commit is contained in:
parent
f9e6aa1dcf
commit
4ef1fc9707
1 changed files with 14 additions and 18 deletions
|
@ -1322,17 +1322,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
uppercase_escape(config), video_id, fatal=False)
|
uppercase_escape(config), video_id, fatal=False)
|
||||||
|
|
||||||
def _get_automatic_captions(self, video_id, webpage):
|
def _get_automatic_captions(self, video_id, player_response, player_config):
|
||||||
"""We need the webpage for getting the captions url, pass it as an
|
"""We need the webpage for getting the captions url, pass it as an
|
||||||
argument to speed up the process."""
|
argument to speed up the process."""
|
||||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||||
player_config = self._get_ytplayer_config(video_id, webpage)
|
|
||||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||||
if not player_config:
|
if not (player_response or player_config):
|
||||||
self._downloader.report_warning(err_msg)
|
self._downloader.report_warning(err_msg)
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
args = player_config['args']
|
args = player_config.get('args') if player_config else {}
|
||||||
caption_url = args.get('ttsurl')
|
caption_url = args.get('ttsurl')
|
||||||
if caption_url:
|
if caption_url:
|
||||||
timestamp = args['timestamp']
|
timestamp = args['timestamp']
|
||||||
|
@ -1391,19 +1390,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return captions
|
return captions
|
||||||
|
|
||||||
# New captions format as of 22.06.2017
|
# New captions format as of 22.06.2017
|
||||||
player_response = args.get('player_response')
|
if player_response:
|
||||||
if player_response and isinstance(player_response, compat_str):
|
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||||
player_response = self._parse_json(
|
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||||
player_response, video_id, fatal=False)
|
sub_lang_list = []
|
||||||
if player_response:
|
for lang in renderer['translationLanguages']:
|
||||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
lang_code = lang.get('languageCode')
|
||||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
if lang_code:
|
||||||
sub_lang_list = []
|
sub_lang_list.append(lang_code)
|
||||||
for lang in renderer['translationLanguages']:
|
return make_captions(base_url, sub_lang_list)
|
||||||
lang_code = lang.get('languageCode')
|
|
||||||
if lang_code:
|
|
||||||
sub_lang_list.append(lang_code)
|
|
||||||
return make_captions(base_url, sub_lang_list)
|
|
||||||
|
|
||||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||||
|
@ -1652,6 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
# Get video info
|
# Get video info
|
||||||
video_info = {}
|
video_info = {}
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
|
ytplayer_config = None
|
||||||
|
|
||||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
|
@ -2276,7 +2272,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
|
||||||
|
|
||||||
video_duration = try_get(
|
video_duration = try_get(
|
||||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||||
|
|
Loading…
Reference in a new issue