[youtube] Improve yt initial data extraction (closes #27093)
This commit is contained in:
parent
86f2fa1590
commit
b31b5f4434
1 changed files with 20 additions and 2 deletions
|
@ -283,6 +283,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id):
|
def _call_api(self, ep, query, video_id):
|
||||||
data = self._DEFAULT_API_DATA.copy()
|
data = self._DEFAULT_API_DATA.copy()
|
||||||
data.update(query)
|
data.update(query)
|
||||||
|
@ -299,8 +301,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_yt_initial_data(self, video_id, webpage):
|
def _extract_yt_initial_data(self, video_id, webpage):
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',
|
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
|
||||||
webpage, 'yt initial data'),
|
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1066,6 +1068,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
|
||||||
|
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'CHqg6qOn4no',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Part 77 Sort a list of simple types in c#',
|
||||||
|
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
|
||||||
|
'upload_date': '20130831',
|
||||||
|
'uploader_id': 'kudvenkat',
|
||||||
|
'uploader': 'kudvenkat',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
Loading…
Reference in a new issue