[youtube] Separate methods for embeds extraction
This commit is contained in:
parent
c5c9bf0c12
commit
66c9fa36c1
2 changed files with 41 additions and 29 deletions
|
@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):
|
||||||
if vid_me_embed_url is not None:
|
if vid_me_embed_url is not None:
|
||||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for YouTube embeds
|
||||||
matches = re.findall(r'''(?x)
|
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||||
(?:
|
if youtube_urls:
|
||||||
<iframe[^>]+?src=|
|
|
||||||
data-video-url=|
|
|
||||||
<embed[^>]+?src=|
|
|
||||||
embedSWF\(?:\s*|
|
|
||||||
<object[^>]+data=|
|
|
||||||
new\s+SWFObject\(
|
|
||||||
)
|
|
||||||
(["\'])
|
|
||||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
|
||||||
(?:embed|v|p)/.+?)
|
|
||||||
\1''', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
# Look for lazyYT YouTube embed
|
|
||||||
matches = re.findall(
|
|
||||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
|
||||||
|
|
||||||
# Look for Wordpress "YouTube Video Importer" plugin
|
|
||||||
matches = re.findall(r'''(?x)<div[^>]+
|
|
||||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
|
||||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
|
||||||
|
|
||||||
matches = DailymotionIE._extract_urls(webpage)
|
matches = DailymotionIE._extract_urls(webpage)
|
||||||
if matches:
|
if matches:
|
||||||
|
|
|
@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
playback_url, video_id, 'Marking watched',
|
playback_url, video_id, 'Marking watched',
|
||||||
'Unable to mark watched', fatal=False)
|
'Unable to mark watched', fatal=False)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
# Embedded YouTube player
|
||||||
|
entries = [
|
||||||
|
unescapeHTML(mobj.group('url'))
|
||||||
|
for mobj in re.finditer(r'''(?x)
|
||||||
|
(?:
|
||||||
|
<iframe[^>]+?src=|
|
||||||
|
data-video-url=|
|
||||||
|
<embed[^>]+?src=|
|
||||||
|
embedSWF\(?:\s*|
|
||||||
|
<object[^>]+data=|
|
||||||
|
new\s+SWFObject\(
|
||||||
|
)
|
||||||
|
(["\'])
|
||||||
|
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||||
|
(?:embed|v|p)/.+?)
|
||||||
|
\1''', webpage)]
|
||||||
|
|
||||||
|
# lazyYT YouTube embed
|
||||||
|
entries.extend(list(map(
|
||||||
|
unescapeHTML,
|
||||||
|
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
|
||||||
|
|
||||||
|
# Wordpress "YouTube Video Importer" plugin
|
||||||
|
matches = re.findall(r'''(?x)<div[^>]+
|
||||||
|
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||||
|
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||||
|
entries.extend(m[-1] for m in matches)
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
urls = YoutubeIE._extract_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def extract_id(cls, url):
|
def extract_id(cls, url):
|
||||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||||
|
|
Loading…
Reference in a new issue