[generic] Extract RSS video itunes metadata
This commit is contained in:
parent
5e822c2526
commit
f2c704e112
1 changed files with 33 additions and 3 deletions
|
@ -20,12 +20,14 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
is_html,
|
is_html,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_duration,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
@ -33,7 +35,9 @@ from ..utils import (
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
|
url_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
from .commonprotocols import RtmpIE
|
from .commonprotocols import RtmpIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
|
@ -206,10 +210,12 @@ class GenericIE(InfoExtractor):
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'ext': 'mov',
|
'ext': 'mov',
|
||||||
'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
|
'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
|
||||||
'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
|
'title': 're:MSNBC Rachel Maddow',
|
||||||
'description': 're:.*her unique approach to storytelling.*',
|
'description': 're:.*her unique approach to storytelling.*',
|
||||||
'upload_date': '20201204',
|
'timestamp': int,
|
||||||
|
'upload_date': compat_str,
|
||||||
|
'duration': float,
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
},
|
},
|
||||||
|
@ -2189,6 +2195,10 @@ class GenericIE(InfoExtractor):
|
||||||
playlist_desc_el = doc.find('./channel/description')
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||||
|
}
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for it in doc.findall('./channel/item'):
|
for it in doc.findall('./channel/item'):
|
||||||
next_url = None
|
next_url = None
|
||||||
|
@ -2204,6 +2214,20 @@ class GenericIE(InfoExtractor):
|
||||||
if not next_url:
|
if not next_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
def itunes(key):
|
||||||
|
return xpath_text(
|
||||||
|
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
duration = itunes('duration')
|
||||||
|
explicit = itunes('explicit')
|
||||||
|
if explicit == 'true':
|
||||||
|
age_limit = 18
|
||||||
|
elif explicit == 'false':
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': next_url,
|
'url': next_url,
|
||||||
|
@ -2211,6 +2235,12 @@ class GenericIE(InfoExtractor):
|
||||||
'description': xpath_text(it, 'description', default=None),
|
'description': xpath_text(it, 'description', default=None),
|
||||||
'timestamp': unified_timestamp(
|
'timestamp': unified_timestamp(
|
||||||
xpath_text(it, 'pubDate', default=None)),
|
xpath_text(it, 'pubDate', default=None)),
|
||||||
|
'duration': int_or_none(duration) or parse_duration(duration),
|
||||||
|
'thumbnail': url_or_none(itunes('image')),
|
||||||
|
'episode': itunes('title'),
|
||||||
|
'episode_number': int_or_none(itunes('episode')),
|
||||||
|
'season_number': int_or_none(itunes('season')),
|
||||||
|
'age_limit': age_limit,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
Loading…
Reference in a new issue