[youtube] imporve music metadata and license extraction(closes #26013)
This commit is contained in:
parent
f4415faa46
commit
01c92973dd
1 changed files with 29 additions and 1 deletions
|
@ -2162,7 +2162,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
# Youtube Music Auto-generated description
|
# Youtube Music Auto-generated description
|
||||||
release_date = release_year = None
|
release_date = release_year = None
|
||||||
if video_description:
|
if video_description:
|
||||||
mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
|
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
|
||||||
if mobj:
|
if mobj:
|
||||||
if not track:
|
if not track:
|
||||||
track = mobj.group('track').strip()
|
track = mobj.group('track').strip()
|
||||||
|
@ -2179,6 +2179,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if release_year:
|
if release_year:
|
||||||
release_year = int(release_year)
|
release_year = int(release_year)
|
||||||
|
|
||||||
|
yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
|
||||||
|
contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
|
||||||
|
for content in contents:
|
||||||
|
rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
|
||||||
|
multiple_songs = False
|
||||||
|
for row in rows:
|
||||||
|
if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
|
||||||
|
multiple_songs = True
|
||||||
|
break
|
||||||
|
for row in rows:
|
||||||
|
mrr = row.get('metadataRowRenderer') or {}
|
||||||
|
mrr_title = try_get(
|
||||||
|
mrr, lambda x: x['title']['simpleText'], compat_str)
|
||||||
|
mrr_contents = try_get(
|
||||||
|
mrr, lambda x: x['contents'][0], dict) or {}
|
||||||
|
mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
|
||||||
|
if not (mrr_title and mrr_contents_text):
|
||||||
|
continue
|
||||||
|
if mrr_title == 'License':
|
||||||
|
video_license = mrr_contents_text
|
||||||
|
elif not multiple_songs:
|
||||||
|
if mrr_title == 'Album':
|
||||||
|
album = mrr_contents_text
|
||||||
|
elif mrr_title == 'Artist':
|
||||||
|
artist = mrr_contents_text
|
||||||
|
elif mrr_title == 'Song':
|
||||||
|
track = mrr_contents_text
|
||||||
|
|
||||||
m_episode = re.search(
|
m_episode = re.search(
|
||||||
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
|
||||||
video_webpage)
|
video_webpage)
|
||||||
|
|
Loading…
Reference in a new issue