Add an extractor for Szombathelyi TV

2013-10-14 13:07:47 +02:00 · 2013-10-14 13:07:47 +02:00 · f9b3d7af47
commit f9b3d7af47
parent ea62a2da46
2 changed files with 42 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -112,6 +112,7 @@ from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .steam import SteamIE
 from .sztvhu import SztvHuIE
 from .teamcoco import TeamcocoIE
 from .ted import TEDIE
 from .tf1 import TF1IE
--- a/youtube_dl/extractor/sztvhu.py
+++ b/youtube_dl/extractor/sztvhu.py
@ -0,0 +1,41 @@
 # -*- coding: utf-8 -*-
 import re
 from .common import InfoExtractor
 from ..utils import determine_ext
 class SztvHuIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/([^/]+)/(?P<name>.+)'
    _TEST = {
        u'url': u'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
        u'file': u'130909zoldnap.mp4',
        u'md5': u'0047eacedc0afd1ceeac99e69173a07e',
        u'info_dict': {
            u"title": u"Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren",
            u"description" : u'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
 #        file = self._search_regex(r'var fileHtml5 = "...:(.*?)";',
        file = self._search_regex(r'file: "...:(.*?)",',
                                webpage, 'video file')
        title = self._html_search_regex(r'<meta name="title" content="([^"]*)"',
                                webpage, 'video title').rsplit(' - ', 2)[0]
        description = self._html_search_regex(r'<meta name="description" content="([^"]*)"/>',
                                webpage, 'video description')
        thumbnail = self._og_search_thumbnail(webpage)
        video_url = 'http://media.sztv.hu/vod/' + file
        return {'id': name,
                'url' : video_url,
                'title': title,
                'ext': determine_ext(video_url),
                'description': description,
                'thumbnail': thumbnail,
                }