[youtube:feed] Check each 'load more' portion for unique video ids
This commit is contained in:
parent
25f14e9f93
commit
62c95fd5fc
1 changed files with 8 additions and 2 deletions
|
@ -1621,10 +1621,16 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
# for the video ids doesn't contain an index
|
# for the video ids doesn't contain an index
|
||||||
ids = []
|
ids = []
|
||||||
more_widget_html = content_html = page
|
more_widget_html = content_html = page
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
||||||
new_ids = orderedSet(matches)
|
|
||||||
|
# 'recommended' feed has infinite 'load more' and each new portion spins
|
||||||
|
# the same videos in (sometimes) slightly different order, so we'll check
|
||||||
|
# for unicity and break when portion has no new videos
|
||||||
|
new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
|
||||||
|
if not new_ids:
|
||||||
|
break
|
||||||
|
|
||||||
ids.extend(new_ids)
|
ids.extend(new_ids)
|
||||||
|
|
||||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||||
|
|
Loading…
Reference in a new issue