[youtube:tab] Pass innertube context and x-goog-visitor-id header along with continuation requests (closes #28702)
This commit is contained in:
parent
27e5a4464d
commit
1b0a13f33c
1 changed files with 27 additions and 15 deletions
|
@ -306,7 +306,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||||
default='{}'), video_id, fatal=False)
|
default='{}'), video_id, fatal=False) or {}
|
||||||
|
|
||||||
def _extract_video(self, renderer):
|
def _extract_video(self, renderer):
|
||||||
video_id = renderer['videoId']
|
video_id = renderer['videoId']
|
||||||
|
@ -2475,7 +2475,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
ctp = continuation_ep.get('clickTrackingParams')
|
ctp = continuation_ep.get('clickTrackingParams')
|
||||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
||||||
|
|
||||||
def _entries(self, tab, identity_token):
|
def _entries(self, tab, item_id, webpage):
|
||||||
tab_content = try_get(tab, lambda x: x['content'], dict)
|
tab_content = try_get(tab, lambda x: x['content'], dict)
|
||||||
if not tab_content:
|
if not tab_content:
|
||||||
return
|
return
|
||||||
|
@ -2535,26 +2535,37 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
yield entry
|
yield entry
|
||||||
continuation = self._extract_continuation(rich_grid_renderer)
|
continuation = self._extract_continuation(rich_grid_renderer)
|
||||||
|
|
||||||
|
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||||
|
client_version = try_get(
|
||||||
|
ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'x-youtube-client-name': '1',
|
'x-youtube-client-name': '1',
|
||||||
'x-youtube-client-version': '2.20201112.04.01',
|
'x-youtube-client-version': client_version,
|
||||||
'content-type': 'application/json',
|
'content-type': 'application/json',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'WEB',
|
||||||
|
'clientVersion': client_version,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
|
||||||
|
|
||||||
|
identity_token = self._extract_identity_token(ytcfg, webpage)
|
||||||
if identity_token:
|
if identity_token:
|
||||||
headers['x-youtube-identity-token'] = identity_token
|
headers['x-youtube-identity-token'] = identity_token
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
'context': {
|
'context': context,
|
||||||
'client': {
|
|
||||||
'clientName': 'WEB',
|
|
||||||
'clientVersion': '2.20201021.03.00',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
if not continuation:
|
if not continuation:
|
||||||
break
|
break
|
||||||
|
if visitor_data:
|
||||||
|
headers['x-goog-visitor-id'] = visitor_data
|
||||||
data['continuation'] = continuation['continuation']
|
data['continuation'] = continuation['continuation']
|
||||||
data['clickTracking'] = {
|
data['clickTracking'] = {
|
||||||
'clickTrackingParams': continuation['itct']
|
'clickTrackingParams': continuation['itct']
|
||||||
|
@ -2579,6 +2590,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
if not response:
|
if not response:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
visitor_data = try_get(
|
||||||
|
response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
|
||||||
|
|
||||||
continuation_contents = try_get(
|
continuation_contents = try_get(
|
||||||
response, lambda x: x['continuationContents'], dict)
|
response, lambda x: x['continuationContents'], dict)
|
||||||
if continuation_contents:
|
if continuation_contents:
|
||||||
|
@ -2687,7 +2701,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
alerts.append(text)
|
alerts.append(text)
|
||||||
return '\n'.join(alerts)
|
return '\n'.join(alerts)
|
||||||
|
|
||||||
def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
def _extract_from_tabs(self, item_id, webpage, data, tabs):
|
||||||
selected_tab = self._extract_selected_tab(tabs)
|
selected_tab = self._extract_selected_tab(tabs)
|
||||||
renderer = try_get(
|
renderer = try_get(
|
||||||
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
||||||
|
@ -2712,7 +2726,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
if renderer:
|
if renderer:
|
||||||
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
|
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
|
||||||
playlist = self.playlist_result(
|
playlist = self.playlist_result(
|
||||||
self._entries(selected_tab, identity_token),
|
self._entries(selected_tab, item_id, webpage),
|
||||||
playlist_id=playlist_id, playlist_title=title,
|
playlist_id=playlist_id, playlist_title=title,
|
||||||
playlist_description=description)
|
playlist_description=description)
|
||||||
playlist.update(self._extract_uploader(data))
|
playlist.update(self._extract_uploader(data))
|
||||||
|
@ -2736,8 +2750,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
self._playlist_entries(playlist), playlist_id=playlist_id,
|
self._playlist_entries(playlist), playlist_id=playlist_id,
|
||||||
playlist_title=title)
|
playlist_title=title)
|
||||||
|
|
||||||
def _extract_identity_token(self, webpage, item_id):
|
def _extract_identity_token(self, ytcfg, webpage):
|
||||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
|
||||||
if ytcfg:
|
if ytcfg:
|
||||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||||
if token:
|
if token:
|
||||||
|
@ -2760,12 +2773,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
webpage = self._download_webpage(url, item_id)
|
webpage = self._download_webpage(url, item_id)
|
||||||
identity_token = self._extract_identity_token(webpage, item_id)
|
|
||||||
data = self._extract_yt_initial_data(item_id, webpage)
|
data = self._extract_yt_initial_data(item_id, webpage)
|
||||||
tabs = try_get(
|
tabs = try_get(
|
||||||
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
||||||
if tabs:
|
if tabs:
|
||||||
return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
|
return self._extract_from_tabs(item_id, webpage, data, tabs)
|
||||||
playlist = try_get(
|
playlist = try_get(
|
||||||
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
|
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
|
||||||
if playlist:
|
if playlist:
|
||||||
|
|
Loading…
Reference in a new issue