From a229909fa616c8b2820ea3b254861f07bedcd376 Mon Sep 17 00:00:00 2001
From: Naglis Jonaitis <njonaitis@gmail.com>
Date: Sun, 3 Aug 2014 21:24:44 +0300
Subject: [PATCH 1/7] [jove] Add new extractor. Closes #3177

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/jove.py     | 64 ++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)
 create mode 100644 youtube_dl/extractor/jove.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 2bf8bc5e8..c80a1bd48 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -144,6 +144,7 @@ from .ivi import (
 from .izlesene import IzleseneIE
 from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .jpopsukitv import JpopsukiIE
diff --git a/youtube_dl/extractor/jove.py b/youtube_dl/extractor/jove.py
new file mode 100644
index 000000000..a7110b683
--- /dev/null
+++ b/youtube_dl/extractor/jove.py
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+from datetime import datetime
+
+from .common import InfoExtractor
+from ..utils import determine_ext, ExtractorError
+
+
+class JoveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
+    _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
+    _TEST = {
+        'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+        'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+        'info_dict': {
+            'id': '2744',
+            'ext': 'mp4',
+            'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+            'description': 'Transcranial direct current stimulation (tDCS) is an established technique to modulate cortical excitability1,2. It has been ...',
+            'thumbnail': 're:^https?://.*\.png$',
+            'upload_date': '20110523',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_meta('citation_title', webpage, 'title')
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._html_search_meta(
+            'description', webpage, 'description', fatal=False)
+        publish_date = self._html_search_meta(
+            'citation_publication_date', webpage, 'publish date', fatal=False)
+        if publish_date:
+            publish_date = datetime.strptime(publish_date,
+                                             '%Y/%m/%d').strftime('%Y%m%d')
+
+        # Not the same as video_id.
+        chapters_id = self._html_search_regex(
+            r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+        chapters_xml = self._download_xml(
+            self._CHAPTERS_URL.format(video_id=chapters_id),
+            video_id, note='Downloading chapter XML',
+            errnote='Failed to download chapter XML'
+        )
+        video_url = chapters_xml.attrib.get('video')
+        if not video_url:
+            raise ExtractorError('Failed to get the video URL')
+
+        ext = determine_ext(video_url)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'ext': ext,
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': publish_date,
+        }

From c1d293cfa68031e0ec1a4190041f280d22c2b026 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 17 Aug 2014 02:07:04 +0700
Subject: [PATCH 2/7] [dfb] Fix f4m manifest URL

---
 youtube_dl/extractor/dfb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
index cb8e06822..8049779b0 100644
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
             video_id)
         video_info = player_info.find('video')
 
-        f4m_info = self._download_xml(video_info.find('url').text, video_id)
+        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
         token_el = f4m_info.find('token')
         manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
 

From 4d9bd478f9d846291c722da002266504fce824ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 18 Aug 2014 19:20:53 +0700
Subject: [PATCH 3/7] [pbs] Extract coveplayerid (Closes #3522)

---
 youtube_dl/extractor/pbs.py | 42 +++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index ec95d0704..09bee6561 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -20,17 +20,30 @@ class PBSIE(InfoExtractor):
         )
     '''
 
-    _TEST = {
-        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-        'md5': 'ce1888486f0908d555a8093cac9a7362',
-        'info_dict': {
-            'id': '2365006249',
-            'ext': 'mp4',
-            'title': 'A More Perfect Union',
-            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
-            'duration': 3190,
+    _TESTS = [
+        {
+            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
+            'md5': 'ce1888486f0908d555a8093cac9a7362',
+            'info_dict': {
+                'id': '2365006249',
+                'ext': 'mp4',
+                'title': 'A More Perfect Union',
+                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
+                'duration': 3190,
+            },
         },
-    }
+        {
+            'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
+            'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
+            'info_dict': {
+                'id': '2201174722',
+                'ext': 'mp4',
+                'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
+                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
+                'duration': 801,
+            },
+        },
+    ]
 
     def _extract_ids(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -40,10 +53,13 @@ class PBSIE(InfoExtractor):
         if presumptive_id:
             webpage = self._download_webpage(url, display_id)
 
-            # frontline video embed
+            MEDIA_ID_REGEXES = [
+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
+                r'class="coveplayerid">([^<]+)<',                       # coveplayer
+            ]
+
             media_id = self._search_regex(
-                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
-                webpage, 'frontline video ID', fatal=False, default=None)
+                MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
             if media_id:
                 return media_id, presumptive_id
 

From cd6b48365e29400044b8852226bed41490ef6e2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 18 Aug 2014 19:24:18 +0700
Subject: [PATCH 4/7] [pbs] Add frontline video test

---
 youtube_dl/extractor/pbs.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 09bee6561..dee4af6f1 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -32,6 +32,17 @@ class PBSIE(InfoExtractor):
                 'duration': 3190,
             },
         },
+        {
+            'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
+            'md5': '143c98aa54a346738a3d78f54c925321',
+            'info_dict': {
+                'id': '2365297690',
+                'ext': 'mp4',
+                'title': 'Losing Iraq',
+                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+                'duration': 5050,
+            },
+        },
         {
             'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
             'md5': 'b19856d7f5351b17a5ab1dc6a64be633',

From 6493f5d7045c90b2a6820b2cda58553d3a2fce8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Mon, 18 Aug 2014 15:39:35 +0200
Subject: [PATCH 5/7] [rtlnl] Add extractor for rtlxl.nl (closes #3523)

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/rtlnl.py    | 52 ++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 youtube_dl/extractor/rtlnl.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 27602e0c0..d6056ce8d 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -252,6 +252,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
+from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
new file mode 100644
index 000000000..14928cd62
--- /dev/null
+++ b/youtube_dl/extractor/rtlnl.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RtlXlIE(InfoExtractor):
+    IE_NAME = 'rtlxl.nl'
+    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+
+    _TEST = {
+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+        'info_dict': {
+            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+            'ext': 'flv',
+            'title': 'RTL Nieuws - Laat',
+            'description': 'Dagelijks het laatste nieuws uit binnen- en '
+                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
+                'onze mobiele apps.',
+            'timestamp': 1408051800,
+            'upload_date': '20140814',
+        },
+        'params': {
+            # We download the first bytes of the first fragment, it can't be
+            # processed by the f4m downloader beacuse it isn't complete
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uuid = mobj.group('uuid')
+
+        info = self._download_json(
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
+            uuid)
+        meta = info['meta']
+        material = info['material'][0]
+        episode_info = info['episodes'][0]
+
+        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
+        progname = info['abstracts'][0]['name']
+        subtitle = material['title'] or info['episodes'][0]['name']
+
+        return {
+            'id': uuid,
+            'title': '%s - %s' % (progname, subtitle), 
+            'formats': self._extract_f4m_formats(f4m_url, uuid),
+            'timestamp': material['original_date'],
+            'description': episode_info['synopsis'],
+        }

From 938dd254e557612b8a6e1f0a2dd1689fca30b9ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Mon, 18 Aug 2014 22:43:35 +0200
Subject: [PATCH 6/7] [mitele] Add extractor for mitele.es

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/mitele.py   | 60 ++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 youtube_dl/extractor/mitele.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d6056ce8d..6780f7f99 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -178,6 +178,7 @@ from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
 from .mlb import MLBIE
 from .mpora import MporaIE
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
new file mode 100644
index 000000000..979f3d692
--- /dev/null
+++ b/youtube_dl/extractor/mitele.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    get_element_by_attribute,
+    parse_duration,
+    strip_jsonp,
+)
+
+
+class MiTeleIE(InfoExtractor):
+    IE_NAME = 'mitele.es'
+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+
+    _TEST = {
+        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
+        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
+        'info_dict': {
+            'id': '0fce117d',
+            'ext': 'mp4',
+            'title': 'Programa 144 - Tor, la web invisible',
+            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+            'display_id': 'programa-144',
+            'duration': 2913,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = mobj.group('episode')
+        webpage = self._download_webpage(url, episode)
+        embed_data_json = self._search_regex(
+            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
+            flags=re.DOTALL
+        ).replace('\'', '"')
+        embed_data = json.loads(embed_data_json)
+
+        info_url = embed_data['flashvars']['host']
+        info_el = self._download_xml(info_url, episode).find('./video/info')
+
+        video_link = info_el.find('videoUrl/link').text
+        token_query = compat_urllib_parse.urlencode({'id': video_link})
+        token_info = self._download_json(
+            'http://token.mitele.es/?' + token_query, episode,
+            transform_source=strip_jsonp
+        )
+
+        return {
+            'id': embed_data['videoId'],
+            'display_id': episode,
+            'title': info_el.find('title').text,
+            'url': token_info['tokenizedUrl'],
+            'description': get_element_by_attribute('class', 'text', webpage),
+            'thumbnail': info_el.find('thumb').text,
+            'duration': parse_duration(info_el.find('duration').text),
+        }

From fe556f1b0cfd5782ec379a731f4b8879f2a352a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Tue, 19 Aug 2014 20:02:08 +0700
Subject: [PATCH 7/7] [jove] Simplify, extract full description and add test
 for video that requires subscription

---
 youtube_dl/extractor/jove.py | 76 ++++++++++++++++++++++--------------
 youtube_dl/utils.py          |  1 +
 2 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/youtube_dl/extractor/jove.py b/youtube_dl/extractor/jove.py
index a7110b683..cf73cd753 100644
--- a/youtube_dl/extractor/jove.py
+++ b/youtube_dl/extractor/jove.py
@@ -1,64 +1,80 @@
-# coding: utf-8
 from __future__ import unicode_literals
 
 import re
-from datetime import datetime
 
 from .common import InfoExtractor
-from ..utils import determine_ext, ExtractorError
+from ..utils import (
+    ExtractorError,
+    unified_strdate
+)
 
 
 class JoveIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
     _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
-    _TEST = {
-        'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
-        'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
-        'info_dict': {
-            'id': '2744',
-            'ext': 'mp4',
-            'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
-            'description': 'Transcranial direct current stimulation (tDCS) is an established technique to modulate cortical excitability1,2. It has been ...',
-            'thumbnail': 're:^https?://.*\.png$',
-            'upload_date': '20110523',
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+            'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+            'info_dict': {
+                'id': '2744',
+                'ext': 'mp4',
+                'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+                'description': 'md5:015dd4509649c0908bc27f049e0262c6',
+                'thumbnail': 're:^https?://.*\.png$',
+                'upload_date': '20110523',
+            }
+        },
+        {
+            'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
+            'md5': '914aeb356f416811d911996434811beb',
+            'info_dict': {
+                'id': '51796',
+                'ext': 'mp4',
+                'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
+                'description': 'md5:35ff029261900583970c4023b70f1dc9',
+                'thumbnail': 're:^https?://.*\.png$',
+                'upload_date': '20140802',
+            }
+        },
+
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
         webpage = self._download_webpage(url, video_id)
-        title = self._html_search_meta('citation_title', webpage, 'title')
-        thumbnail = self._og_search_thumbnail(webpage)
-        description = self._html_search_meta(
-            'description', webpage, 'description', fatal=False)
-        publish_date = self._html_search_meta(
-            'citation_publication_date', webpage, 'publish date', fatal=False)
-        if publish_date:
-            publish_date = datetime.strptime(publish_date,
-                                             '%Y/%m/%d').strftime('%Y%m%d')
 
-        # Not the same as video_id.
         chapters_id = self._html_search_regex(
             r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+
         chapters_xml = self._download_xml(
             self._CHAPTERS_URL.format(video_id=chapters_id),
-            video_id, note='Downloading chapter XML',
-            errnote='Failed to download chapter XML'
-        )
+            video_id, note='Downloading chapters XML',
+            errnote='Failed to download chapters XML')
+
         video_url = chapters_xml.attrib.get('video')
         if not video_url:
             raise ExtractorError('Failed to get the video URL')
 
-        ext = determine_ext(video_url)
+        title = self._html_search_meta('citation_title', webpage, 'title')
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._html_search_regex(
+            r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
+            webpage, 'description', fatal=False)
+        publish_date = unified_strdate(self._html_search_meta(
+            'citation_publication_date', webpage, 'publish date', fatal=False))
+        comment_count = self._html_search_regex(
+            r'<meta name="num_comments" content="(\d+) Comments?"',
+            webpage, 'comment count', fatal=False)
 
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
-            'ext': ext,
             'thumbnail': thumbnail,
             'description': description,
             'upload_date': publish_date,
+            'comment_count': comment_count,
         }
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 65b492fb3..1081a9368 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -827,6 +827,7 @@ def unified_strdate(date_str):
         '%b %dnd %Y %I:%M%p',
         '%b %dth %Y %I:%M%p',
         '%Y-%m-%d',
+        '%Y/%m/%d',
         '%d.%m.%Y',
         '%d/%m/%Y',
         '%Y/%m/%d %H:%M:%S',