[generic] Extract RSS video itunes metadata

This commit is contained in:
Sergey M․ 2020-12-06 23:08:03 +07:00
parent 5e822c2526
commit f2c704e112
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -20,12 +20,14 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
HEADRequest, HEADRequest,
int_or_none,
is_html, is_html,
js_to_json, js_to_json,
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
merge_dicts, merge_dicts,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
parse_duration,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
@ -33,7 +35,9 @@ from ..utils import (
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
UnsupportedError, UnsupportedError,
url_or_none,
xpath_text, xpath_text,
xpath_with_ns,
) )
from .commonprotocols import RtmpIE from .commonprotocols import RtmpIE
from .brightcove import ( from .brightcove import (
@ -206,10 +210,12 @@ class GenericIE(InfoExtractor):
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
'ext': 'mov', 'ext': 'mov',
'id': 'pdv_maddow_netcast_mov-12-03-2020-223726', 'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726', 'title': 're:MSNBC Rachel Maddow',
'description': 're:.*her unique approach to storytelling.*', 'description': 're:.*her unique approach to storytelling.*',
'upload_date': '20201204', 'timestamp': int,
'upload_date': compat_str,
'duration': float,
}, },
}], }],
}, },
@ -2189,6 +2195,10 @@ class GenericIE(InfoExtractor):
playlist_desc_el = doc.find('./channel/description') playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
}
entries = [] entries = []
for it in doc.findall('./channel/item'): for it in doc.findall('./channel/item'):
next_url = None next_url = None
@ -2204,6 +2214,20 @@ class GenericIE(InfoExtractor):
if not next_url: if not next_url:
continue continue
def itunes(key):
return xpath_text(
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
default=None)
duration = itunes('duration')
explicit = itunes('explicit')
if explicit == 'true':
age_limit = 18
elif explicit == 'false':
age_limit = 0
else:
age_limit = None
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': next_url, 'url': next_url,
@ -2211,6 +2235,12 @@ class GenericIE(InfoExtractor):
'description': xpath_text(it, 'description', default=None), 'description': xpath_text(it, 'description', default=None),
'timestamp': unified_timestamp( 'timestamp': unified_timestamp(
xpath_text(it, 'pubDate', default=None)), xpath_text(it, 'pubDate', default=None)),
'duration': int_or_none(duration) or parse_duration(duration),
'thumbnail': url_or_none(itunes('image')),
'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')),
'age_limit': age_limit,
}) })
return { return {