[youtube:tab] Improve grid extraction (closes #28725)

This commit is contained in:
Sergey M․ 2021-04-17 01:05:44 +07:00
parent ea87ed8394
commit 7c52395479
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -2320,9 +2320,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
@staticmethod @staticmethod
def _extract_grid_item_renderer(item): def _extract_grid_item_renderer(item):
for item_kind in ('Playlist', 'Video', 'Channel', 'Show'): assert isinstance(item, dict)
renderer = item.get('grid%sRenderer' % item_kind) for key, renderer in item.items():
if renderer: if not key.startswith('grid') or not key.endswith('Renderer'):
continue
if not isinstance(renderer, dict):
continue
return renderer return renderer
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
@ -2333,7 +2336,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not isinstance(renderer, dict): if not isinstance(renderer, dict):
continue continue
title = try_get( title = try_get(
renderer, lambda x: x['title']['runs'][0]['text'], compat_str) renderer, (lambda x: x['title']['runs'][0]['text'],
lambda x: x['title']['simpleText']), compat_str)
# playlist # playlist
playlist_id = renderer.get('playlistId') playlist_id = renderer.get('playlistId')
if playlist_id: if playlist_id:
@ -2341,10 +2345,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'https://www.youtube.com/playlist?list=%s' % playlist_id, 'https://www.youtube.com/playlist?list=%s' % playlist_id,
ie=YoutubeTabIE.ie_key(), video_id=playlist_id, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title) video_title=title)
continue
# video # video
video_id = renderer.get('videoId') video_id = renderer.get('videoId')
if video_id: if video_id:
yield self._extract_video(renderer) yield self._extract_video(renderer)
continue
# channel # channel
channel_id = renderer.get('channelId') channel_id = renderer.get('channelId')
if channel_id: if channel_id:
@ -2353,19 +2359,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
yield self.url_result( yield self.url_result(
'https://www.youtube.com/channel/%s' % channel_id, 'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title) ie=YoutubeTabIE.ie_key(), video_title=title)
# show continue
if playlist_id is None: # needs to check for playlist_id, or non-series playlists are recognized twice # generic endpoint URL support
show_playlist_url = try_get( ep_url = urljoin('https://www.youtube.com/', try_get(
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str) compat_str))
if show_playlist_url: if ep_url:
playlist_id = self._search_regex(r'/playlist\?list=([0-9a-zA-Z-_]+)', show_playlist_url, for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
'playlist id', default=None) if ie.suitable(ep_url):
if playlist_id:
title = try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
yield self.url_result( yield self.url_result(
"https://www.youtube.com/playlist?list=%s" % playlist_id, ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) break
def _shelf_entries_from_content(self, shelf_renderer): def _shelf_entries_from_content(self, shelf_renderer):
content = shelf_renderer.get('content') content = shelf_renderer.get('content')