Add various anime sites (Closes #4554)

2015-01-04 02:05:26 +01:00 · 2015-01-04 02:05:26 +01:00 · b68ff25917
commit b68ff25917
parent 19b05d886e
6 changed files with 355 additions and 0 deletions
--- a/1
+++ b/1
@ -98,3 +98,4 @@ Will Glynn
 Max Reimann
 Cédric Luthi
 Thijs Vermeir
 Joel Leclerc
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -164,6 +164,10 @@ from .globo import GloboIE
 from .godtube import GodTubeIE
 from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
 from .gogoanime import (
    GoGoAnimeIE,
    GoGoAnimeSearchIE
 )
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
@ -313,6 +317,16 @@ from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .planetaplay import PlanetaPlayIE
 from .played import PlayedIE
 from .play44 import (
    Play44IE,
    ByZooIE,
    Video44IE,
    VideoWingIE,
    PlayPandaIE,
    VideoZooIE,
    PlayBBIE,
    EasyVideoIE
 )
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
@ -373,6 +387,10 @@ from .smotri import (
 from .snotr import SnotrIE
 from .sockshare import SockshareIE
 from .sohu import SohuIE
 from .soulanime import (
    SoulAnimeWatchingIE,
    SoulAnimeSeriesIE
 )
 from .soundcloud import (
    SoundcloudIE,
    SoundcloudSetIE,
@ -467,6 +485,7 @@ from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofun import VideoFunIE
 from .videofyme import VideofyMeIE
 from .videomega import VideoMegaIE
 from .videopremium import VideoPremiumIE
--- a/youtube_dl/extractor/gogoanime.py
+++ b/youtube_dl/extractor/gogoanime.py
@ -0,0 +1,76 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_urllib_parse,
    get_element_by_attribute,
    unescapeHTML
 )
 class GoGoAnimeIE(InfoExtractor):
    IE_NAME = 'gogoanime'
    IE_DESC = 'GoGoAnime'
    _VALID_URL = r'http://www.gogoanime.com/(?P<id>[A-Za-z0-9-]+)'
    _TEST = {
        'url': 'http://www.gogoanime.com/mahou-shoujo-madoka-magica-movie-1',
        'info_dict': {
            'id': 'mahou-shoujo-madoka-magica-movie-1'
        },
        'playlist_count': 3
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)
        if 'Oops! Page Not Found</font>' in page:
            raise ExtractorError('Video does not exist', expected=True)
        content = get_element_by_attribute("class", "postcontent", page)
        vids = re.findall(r'<iframe[^>]*?src=[\'"](h[^\'"]+)[\'"]', content)
        vids = [
            unescapeHTML(compat_urllib_parse.unquote(x))
            for x in vids if not re.search(r".*videofun.*", x)]
        if re.search(r'<div class="postcontent">[^<]*<p><iframe src=[\'"][^>]+></iframe><br />', page):
            return self.playlist_result([self.url_result(vid) for vid in vids], video_id)
        title = self._html_search_regex(
            r'<div class="postdesc">[^<]*<h1>([^<]+)</h1>', page, 'title')
        return {
            '_type': 'url',
            'id': video_id,
            'url': vids[0],
            'title': title,
        }
 class GoGoAnimeSearchIE(InfoExtractor):
    IE_NAME = 'gogoanime:search'
    IE_DESC = 'GoGoAnime Search'
    _VALID_URL = r'http://www\.gogoanime\.com/.*\?s=(?P<id>[^&]*)'
    _TEST = {
        'url': 'http://www.gogoanime.com/?s=bokusatsu',
        'info_dict': {
            'id': 'bokusatsu'
        },
        'playlist_count': 6
    }
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        posts = re.findall(
            r'<div class="postlist">[^<]*<p[^>]*>[^<]*<a href="(?P<url>[^"]+)"',
            webpage)
        return self.playlist_result(
            [self.url_result(p) for p in posts], playlist_id)
--- a/youtube_dl/extractor/play44.py
+++ b/youtube_dl/extractor/play44.py
@ -0,0 +1,149 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse
 )
 class Play44IE(InfoExtractor):
    _VALID_URL = r'http://[w.]*play44\.net/embed\.php[^/]*/(?P<id>.+)'
    _TESTS = [{
        'url': 'http://play44.net/embed.php?w=600&h=438&vid=M/mahou-shoujo-madoka-magica-07.flv',
        'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
        'info_dict': {
            'id': 'mahou-shoujo-madoka-magica-07',
            'ext': 'flv',
            'title': 'mahou-shoujo-madoka-magica-07',
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)
        video_url = compat_urllib_parse.unquote(self._html_search_regex(
            r'_url = "(https?://[^"]+?)";', page, 'url'))
        title = self._search_regex(r'.*/(?P<title>[^.]*).', video_url, 'title')
        return {
            'id': title,
            'url': video_url,
            'title': title,
        }
 class ByZooIE(Play44IE):
    _VALID_URL = r'http://[w.]*byzoo\.org/embed\.php[^/]*/(?P<id>.+)'
    _TESTS = [{
        'url': 'http://byzoo.org/embed.php?w=600&h=438&vid=at/nw/mahou_shoujo_madoka_magica_movie_3_-_part1.mp4',
        'md5': '455c83dabe2cd9fd74a87612b01fe017',
        'info_dict': {
            'id': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
            'ext': 'mp4',
            'title': 'mahou_shoujo_madoka_magica_movie_3_-_part1',
        }
    }]
 class Video44IE(Play44IE):
    _VALID_URL = r'http://[w.]*video44\.net/.*file=(?P<id>[^&].).*'
    _TESTS = [{
        'url': 'http://www.video44.net/gogo/?w=600&h=438&file=chaoshead-12.flv&sv=1',
        'md5': '43eaec6d0beb10e8d42459b9f108aff3',
        'info_dict': {
            'id': 'chaoshead-12',
            'ext': 'mp4',
            'title': 'chaoshead-12',
        }
    }]
 class VideoWingIE(Play44IE):
    _VALID_URL = r'''(?x)
        http://[w.]*videowing\.[^/]*/
        (?:
            .*video=/*
            |embed/
        )
        (?P<id>[^&?.]+)
    '''
    _TESTS = [{
        'url': 'http://videowing.me/embed?w=718&h=438&video=ongoing/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
        'md5': '4ed320e353ed26c742c4f12a9c210b60',
        'info_dict': {
            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
            'ext': 'mp4',
            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
        }
    }, {
        'url': 'http://videowing.me/embed/a8d6a39522df066bd734a69f2334497e?w=600&h=438',
        'md5': '33fdd71581357018c226f95c5cedcfd7',
        'info_dict': {
            'id': 'mahoushoujomadokamagicamovie1part1',
            'ext': 'flv',
            'title': 'mahoushoujomadokamagicamovie1part1',
        }
    }]
 class PlayPandaIE(Play44IE):
    _VALID_URL = r'http://[w.]*playpanda\.[^/]*/.*vid=/*(?P<id>[^&].).*'
    _TESTS = [{
        'url': 'http://playpanda.net/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
        'md5': '4ed320e353ed26c742c4f12a9c210b60',
        'info_dict': {
            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
            'ext': 'mp4',
            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
            'description': 'boku_wa_tomodachi_ga_sukunai_-_05'
        }
    }]
 class VideoZooIE(Play44IE):
    _VALID_URL = r'http://[w.]*videozoo\.[^/]*/.*vid=/*(?P<id>[^&].).*'
    _TESTS = [{
        'url': 'http://videozoo.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
        'md5': '4ed320e353ed26c742c4f12a9c210b60',
        'info_dict': {
            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
            'ext': 'mp4',
            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
        }
    }]
 class PlayBBIE(Play44IE):
    _VALID_URL = r'http://[w.]*playbb\.[^/]*/.*vid=/*(?P<id>[^&].).*'
    _TESTS = [{
        'url': 'http://playbb.me/embed.php?w=718&h=438&vid=at/nw/boku_wa_tomodachi_ga_sukunai_-_05.mp4',
        'md5': '4ed320e353ed26c742c4f12a9c210b60',
        'info_dict': {
            'id': 'boku_wa_tomodachi_ga_sukunai_-_05',
            'ext': 'mp4',
            'title': 'boku_wa_tomodachi_ga_sukunai_-_05',
        }
    }]
 class EasyVideoIE(Play44IE):
    _VALID_URL = r'http://[w.]*easyvideo\.[^/]*/.*file=/*(?P<id>[^&.]+)'
    _TESTS = [{
        'url': 'http://easyvideo.me/gogo/?w=718&h=438&file=bokuwatomodachigasukunai-04.flv&sv=1',
        'md5': '26178b57629b7650106d72b191137176',
        'info_dict': {
            'id': 'bokuwatomodachigasukunai-04',
            'ext': 'mp4',
            'title': 'bokuwatomodachigasukunai-04',
        },
        'skip': 'Blocked in Germany',
    }]
--- a/youtube_dl/extractor/soulanime.py
+++ b/youtube_dl/extractor/soulanime.py
@ -0,0 +1,74 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class SoulAnimeWatchingIE(InfoExtractor):
    IE_NAME = "soulanime:watching"
    IE_DESC = "SoulAnime video"
    _TEST = {
        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
        'md5': '05fae04abf72298098b528e98abf4298',
        'info_dict': {
            'id': 'seirei-tsukai-no-blade-dance-episode-9',
            'ext': 'mp4',
            'title': 'seirei-tsukai-no-blade-dance-episode-9',
            'description': 'seirei-tsukai-no-blade-dance-episode-9'
        }
    }
    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        domain = mobj.group('domain')
        page = self._download_webpage(url, video_id)
        video_url_encoded = self._html_search_regex(
            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
        video_url = "http://www.soul-anime." + domain + video_url_encoded
        vid = self._request_webpage(video_url, video_id)
        ext = vid.info().gettype().split("/")[1]
        return {
            'id': video_id,
            'url': video_url,
            'ext': ext,
            'title': video_id,
            'description': video_id
        }
 class SoulAnimeSeriesIE(InfoExtractor):
    IE_NAME = "soulanime:series"
    IE_DESC = "SoulAnime Series"
    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
    _TEST = {
        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
        'info_dict': {
            'id': 'black-rock-shooter-tv'
        },
        'playlist_count': 8
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        series_id = mobj.group('id')
        domain = mobj.group('domain')
        pattern = re.compile(self._EPISODE_REGEX)
        page = self._download_webpage(url, series_id, "Downloading series page")
        mobj = pattern.findall(page)
        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
        return self.playlist_result(entries, series_id)
--- a/youtube_dl/extractor/videofun.py
+++ b/youtube_dl/extractor/videofun.py
@ -0,0 +1,36 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse
 )
 class VideoFunIE(InfoExtractor):
    _VALID_URL = r'http://[w.]*videofun\.me/embed/(?P<id>[0-9a-f]+)'
    _TEST = {
        'url': 'http://videofun.me/embed/8267659be070860af600fee7deadbcdb?w=600&h=438',
        'md5': 'e37e99d665f503dd2db952f7c4dba9e6',
        'info_dict': {
            'id': 'Mahou-Shoujo-Madoka-Magica-07',
            'ext': 'flv',
            'title': 'Mahou-Shoujo-Madoka-Magica-07',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            url, video_id, 'Downloading video page')
        video_url_encoded = self._html_search_regex(
            r'url: "(http://gateway\.videofun\.me[^"]+)"', webpage, 'video url')
        video_url = compat_urllib_parse.unquote(video_url_encoded)
        title = self._html_search_regex(r'.*/([^.]*)\.', video_url, 'title')
        return {
            'id': title,
            'url': video_url,
            'title': title,
        }