From 132aece1ed824aafd849ef63dcb44c835eab6ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Dec 2020 04:44:10 +0700 Subject: [PATCH] [youtube:tab] Extract channels only from channels tab (closes #27266) --- youtube_dl/extractor/youtube.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6fdc379cd..b796f58b2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2796,12 +2796,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): # TODO pass - def _shelf_entries(self, shelf_renderer): + def _shelf_entries(self, shelf_renderer, skip_channels=False): ep = try_get( shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], compat_str) shelf_url = urljoin('https://www.youtube.com', ep) if shelf_url: + # Skipping links to another channels, note that checking for + # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL + # will not work + if skip_channels and '/channels?' in shelf_url: + return title = try_get( shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str) yield self.url_result(shelf_url, video_title=title) @@ -2912,9 +2917,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): } def _entries(self, tab, identity_token): - slr_renderer = try_get(tab, lambda x: x['sectionListRenderer'], dict) + tab_content = try_get(tab, lambda x: x['content'], dict) + if not tab_content: + return + slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict) if not slr_renderer: return + is_channels_tab = tab.get('title') == 'Channels' continuation = None slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or [] for slr_content in slr_contents: @@ -2941,7 +2950,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): continue renderer = isr_content.get('shelfRenderer') if renderer: - for entry in self._shelf_entries(renderer): + for entry in self._shelf_entries(renderer, not is_channels_tab): yield entry continue renderer = isr_content.get('backstagePostThreadRenderer') @@ -3071,7 +3080,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): description = None playlist_id = item_id playlist = self.playlist_result( - self._entries(selected_tab['content'], identity_token), + self._entries(selected_tab, identity_token), playlist_id=playlist_id, playlist_title=title, playlist_description=description) playlist.update(self._extract_uploader(data))