Merge branch 'akretz-master'

This commit is contained in:
Sergey M․ 2014-12-07 01:47:09 +06:00
commit 4542535f94
2 changed files with 55 additions and 7 deletions
youtube_dl/extractor

View file

@ -439,7 +439,7 @@ class InfoExtractor(object):
return video_info return video_info
@staticmethod @staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None): def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist""" """Returns a playlist"""
video_info = {'_type': 'playlist', video_info = {'_type': 'playlist',
'entries': entries} 'entries': entries}
@ -447,6 +447,8 @@ class InfoExtractor(object):
video_info['id'] = playlist_id video_info['id'] = playlist_id
if playlist_title: if playlist_title:
video_info['title'] = playlist_title video_info['title'] = playlist_title
if playlist_description:
video_info['description'] = playlist_description
return video_info return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):

View file

@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger', 'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9', 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
'upload_date': '20140225', 'upload_date': '20140203',
'duration': 522.56, 'duration': 522.56,
}, },
'params': { 'params': {
@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6', 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
'upload_date': '20140225', 'upload_date': '20141014',
'duration': 2410.44, 'duration': 2410.44,
}, },
'params': { 'params': {
@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
'info_dict': {
'id': '439664',
'title': 'Episode 8 - Ganze Folge - Playlist',
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
},
] ]
_CLIPID_REGEXES = [ _CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"', r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"', r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)', r'clip[iI]d=(\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
] ]
_TITLE_REGEXES = [ _TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
] ]
_PAGE_TYPE_REGEXES = [
r'<meta name="page_type" content="([^"]+)">',
r"'itemType'\s*:\s*'([^']*)'",
]
_PLAYLIST_ID_REGEXES = [
r'content[iI]d=(\d+)',
r"'itemId'\s*:\s*'([^']*)'",
]
_PLAYLIST_CLIP_REGEXES = [
r'(?s)data-qvt=.+?<a href="([^"]+)"',
]
def _real_extract(self, url): def _extract_clip(self, url, webpage):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'testclient' access_token = 'testclient'
@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
} }
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
for regex in self._PLAYLIST_CLIP_REGEXES:
playlist_clips = re.findall(regex, webpage)
if playlist_clips:
title = self._html_search_regex(
self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
entries = [
self.url_result(
re.match('(.+?//.+?)/', url).group(1) + clip_path,
'ProSiebenSat1')
for clip_path in playlist_clips]
return self.playlist_result(entries, playlist_id, title, description)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_type = self._search_regex(
self._PAGE_TYPE_REGEXES, webpage,
'page type', default='clip').lower()
if page_type == 'clip':
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)