from __future__ import unicode_literals import re from .common import InfoExtractor from ..utils import ( parse_duration, unified_strdate, ) class HuffPostIE(InfoExtractor): IE_DESC = 'Huffington Post' _VALID_URL = r'''(?x) https?://(embed\.)?live\.huffingtonpost\.com/ (?: r/segment/[^/]+/| HPLEmbedPlayer/\?segmentId= ) (?P[0-9a-f]+)''' _TEST = { 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', 'file': '52dd3e4b02a7602131000677.mp4', 'md5': 'TODO', 'info_dict': { 'title': 'TODO', 'description': 'TODO', 'duration': 1549, } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id data = self._download_json(api_url, video_id)['data'] video_title = data['title'] duration = parse_duration(data['running_time']) upload_date = unified_strdate(data['schedule']['started_at']) thumbnails = [] for url in data['images'].values(): m = re.match('.*-([0-9]+x[0-9]+)\.', url) if not m: continue thumbnails.append({ 'url': url, 'resolution': m.group(1), }) formats = [{ 'format': key, 'format_id': key.replace('/', '.'), 'ext': 'mp4', 'url': url, 'vcodec': 'none' if key.startswith('audio/') else None, } for key, url in data['sources']['live'].items()] self._sort_formats(formats) return { 'id': video_id, 'title': video_title, 'formats': formats, 'duration': duration, 'upload_date': upload_date, 'thumbnails': thumbnails, }