[googledrive] Modernize

This commit is contained in:
remitamine 2015-12-21 03:05:34 +01:00
parent 8e92d21ebf
commit 5b251628e9
3 changed files with 54 additions and 101 deletions

View file

@ -209,10 +209,7 @@ from .globo import GloboIE
from .godtube import GodTubeIE from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE from .golem import GolemIE
from .googledrive import ( from .googledrive import GoogleDriveIE
GoogleDriveEmbedIE,
GoogleDriveIE,
)
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE from .gorillavid import GorillaVidIE

View file

@ -48,7 +48,7 @@ from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE from .snagfilms import SnagFilmsEmbedIE
from .googledrive import GoogleDriveEmbedIE from .googledrive import GoogleDriveIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1601,7 +1601,7 @@ class GenericIE(InfoExtractor):
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
# Look for Google Drive embeds # Look for Google Drive embeds
google_drive_url = GoogleDriveEmbedIE._extract_url(webpage) google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url: if google_drive_url:
return self.url_result(google_drive_url, 'GoogleDrive') return self.url_result(google_drive_url, 'GoogleDrive')

View file

@ -1,132 +1,88 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
RegexNotFoundError,
ExtractorError, ExtractorError,
int_or_none,
) )
class GoogleDriveEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})' class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = { _TEST = {
'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview', 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': { 'info_dict': {
'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE', 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv', 'title': 'Big Buck Bunny.mp4',
'duration': 46,
} }
} }
_FORMATS_EXT = {
'5': 'flv',
'6': 'flv',
'13': '3gp',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'34': 'flv',
'35': 'flv',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
'59': 'mp4',
}
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})', r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
webpage) webpage)
if mobj: if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id') return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url',
'ie_key': 'GoogleDrive',
'url': 'https://drive.google.com/file/d/%s' % video_id
}
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
_TEST = {
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
}
}
_formats = {
'5': {'ext': 'flv'},
'6': {'ext': 'flv'},
'13': {'ext': '3gp'},
'17': {'ext': '3gp'},
'18': {'ext': 'mp4'},
'22': {'ext': 'mp4'},
'34': {'ext': 'flv'},
'35': {'ext': 'flv'},
'36': {'ext': '3gp'},
'37': {'ext': 'mp4'},
'38': {'ext': 'mp4'},
'43': {'ext': 'webm'},
'44': {'ext': 'webm'},
'45': {'ext': 'webm'},
'46': {'ext': 'webm'},
'59': {'ext': 'mp4'}
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape' 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
)
try: reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
title = self._html_search_regex( if reason:
r'"title"\s*,\s*"([^"]+)', raise ExtractorError(reason)
webpage,
'title' title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
) duration = int_or_none(self._search_regex(
fmt_stream_map = self._html_search_regex( r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
r'"fmt_stream_map"\s*,\s*"([^"]+)', fmt_stream_map = self._search_regex(
webpage, r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
'fmt_stream_map' fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
)
fmt_list = self._html_search_regex(
r'"fmt_list"\s*,\s*"([^"]+)',
webpage,
'fmt_list'
)
# timestamp = self._html_search_regex(
# r'"timestamp"\s*,\s*"([^"]+)',
# webpage,
# 'timestamp'
# )
length_seconds = self._html_search_regex(
r'"length_seconds"\s*,\s*"([^"]+)',
webpage,
'length_seconds'
)
except RegexNotFoundError:
try:
reason = self._html_search_regex(
r'"reason","([^"]+)',
webpage,
'reason'
)
raise ExtractorError(reason)
return
except RegexNotFoundError:
raise ExtractorError('not a video')
return
fmt_stream_map = fmt_stream_map.split(',')
fmt_list = fmt_list.split(',')
formats = [] formats = []
for i in range(len(fmt_stream_map)): for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
fmt_id, fmt_url = fmt_stream_map[i].split('|') fmt_id, fmt_url = fmt_stream.split('|')
resolution = fmt_list[i].split('/')[1] resolution = fmt.split('/')[1]
width, height = resolution.split('x') width, height = resolution.split('x')
formats.append({ formats.append({
'url': fmt_url, 'url': fmt_url,
'format_id': fmt_id, 'format_id': fmt_id,
'resolution': resolution, 'resolution': resolution,
'width': int(width), 'width': int_or_none(width),
'height': int(height), 'height': int_or_none(height),
'ext': self._formats[fmt_id]['ext'] 'ext': self._FORMATS_EXT[fmt_id],
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
# 'timestamp': int(timestamp), 'thumbnail': self._og_search_thumbnail(webpage),
'duration': int(length_seconds), 'duration': duration,
'formats': formats 'formats': formats,
} }