From 133b1886fc6721090ea5d3be3d382626e2602b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 4 Jan 2016 02:33:08 +0600 Subject: [PATCH] [20min] Improve (Closes #8110) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/min20.py | 56 ------------------------ youtube_dl/extractor/twentymin.py | 73 +++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 57 deletions(-) delete mode 100644 youtube_dl/extractor/min20.py create mode 100644 youtube_dl/extractor/twentymin.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 64e1fd334..625b0bf16 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -364,7 +364,6 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE -from .min20 import Min20IE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .miomio import MioMioIE @@ -747,6 +746,7 @@ from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE +from .twentymin import TwentyMinutenIE from .twentytwotracks import ( TwentyTwoTracksIE, TwentyTwoTracksGenreIE diff --git a/youtube_dl/extractor/min20.py b/youtube_dl/extractor/min20.py deleted file mode 100644 index 23aead19d..000000000 --- a/youtube_dl/extractor/min20.py +++ /dev/null @@ -1,56 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class Min20IE(InfoExtractor): - _VALID_URL = r'http://www\.20min\.ch/(videotv/\?vid=(?P[0-9]+)|.+?-(?P[0-9]+)$)' - _TESTS = [{ - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' - } - }, { - 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', - 'info_dict': { - 'id': '469148', - 'ext': 'flv', - 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }] - - # location of the flv videos, can't be extracted from the web page - _BASE_URL = "http://flv-rr.20min-tv.ch/videos/" - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('page_id') - if video_id is None: - # URL from the videoportal - video_id = mobj.group('video_id') - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'

.*(.+?)

', webpage, 'title') - flash_id = self._search_regex(r"so\.addVariable\(\"file1\",\"([0-9]+)\"\)", webpage, 'flash_id') - - description = self._html_search_regex(r'', webpage, 'description') - thumbnail = self._html_search_regex(r'', webpage, 'thumbnail') - url = self._BASE_URL + flash_id + "m.flv" - - return { - 'id': video_id, - 'url': url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail - } diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py new file mode 100644 index 000000000..ca7d953b8 --- /dev/null +++ b/youtube_dl/extractor/twentymin.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import remove_end + + +class TwentyMinutenIE(InfoExtractor): + IE_NAME = '20min' + _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P\d+)|(?:[^/]+/)*(?P[^/#?]+))' + _TESTS = [{ + # regular video + 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', + 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'info_dict': { + 'id': '469148', + 'ext': 'flv', + 'title': '85 000 Franken für 15 perfekte Minuten', + 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', + 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' + } + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', + 'md5': 'cd4cbb99b94130cff423e967cd275e5e', + 'info_dict': { + 'id': '469408', + 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', + 'ext': 'flv', + 'title': '«Wir müssen mutig nach vorne schauen»', + 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', + 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + } + }, { + 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', + 'only_matching': True, + }, { + 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'

.*?(.+?)

', + webpage, 'title', default=None) + if not title: + title = remove_end(re.sub( + r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + + if not video_id: + video_id = self._search_regex( + r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + + description = self._html_search_meta( + 'description', webpage, 'description') + thumbnail = self._og_search_thumbnail(webpage) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + }