[ted] Fix playlist extraction (closes #20844)
This commit is contained in:
parent
4831ef7fe4
commit
c2ee6fa66a
1 changed files with 16 additions and 14 deletions
|
@ -5,8 +5,12 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -20,7 +24,7 @@ class TEDIE(InfoExtractor):
|
||||||
(?P<proto>https?://)
|
(?P<proto>https?://)
|
||||||
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
|
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
|
||||||
(
|
(
|
||||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
(?P<type_playlist>playlists(?:/(?P<playlist_id>\d+))?) # We have a playlist
|
||||||
|
|
|
|
||||||
((?P<type_talk>talks)) # We have a simple talk
|
((?P<type_talk>talks)) # We have a simple talk
|
||||||
|
|
|
|
||||||
|
@ -84,6 +88,7 @@ class TEDIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10',
|
'id': '10',
|
||||||
'title': 'Who are the hackers?',
|
'title': 'Who are the hackers?',
|
||||||
|
'description': 'md5:49a0dbe8fb76d81a0e64b4a80af7f15a'
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
}, {
|
}, {
|
||||||
|
@ -150,22 +155,19 @@ class TEDIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, name,
|
webpage = self._download_webpage(url, name,
|
||||||
'Downloading playlist webpage')
|
'Downloading playlist webpage')
|
||||||
info = self._extract_info(webpage)
|
|
||||||
|
|
||||||
playlist_info = try_get(
|
playlist_entries = []
|
||||||
info, lambda x: x['__INITIAL_DATA__']['playlist'],
|
for entry in re.findall(r'(?s)<[^>]+data-ga-context="playlist"[^>]*>', webpage):
|
||||||
dict) or info['playlist']
|
attrs = extract_attributes(entry)
|
||||||
|
entry_url = compat_urlparse.urljoin(url, attrs['href'])
|
||||||
|
playlist_entries.append(self.url_result(entry_url, self.ie_key()))
|
||||||
|
|
||||||
playlist_entries = [
|
final_url = self._og_search_url(webpage)
|
||||||
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
|
||||||
for talk in try_get(
|
|
||||||
info, lambda x: x['__INITIAL_DATA__']['talks'],
|
|
||||||
dict) or info['talks']
|
|
||||||
]
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
playlist_entries,
|
playlist_entries,
|
||||||
playlist_id=compat_str(playlist_info['id']),
|
playlist_id=re.match(self._VALID_URL, final_url, re.VERBOSE).group('playlist_id'),
|
||||||
playlist_title=playlist_info['title'])
|
playlist_title=self._og_search_title(webpage),
|
||||||
|
playlist_description=self._og_search_description(webpage))
|
||||||
|
|
||||||
def _talk_info(self, url, video_name):
|
def _talk_info(self, url, video_name):
|
||||||
webpage = self._download_webpage(url, video_name)
|
webpage = self._download_webpage(url, video_name)
|
||||||
|
|
Loading…
Reference in a new issue