[extractor/common] keep support for non standard JSON-LD VideoObject author values
This commit is contained in:
parent
3ae9c0f410
commit
6beb1ac65b
1 changed files with 6 additions and 2 deletions
|
@ -70,7 +70,6 @@ from ..utils import (
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -1276,6 +1275,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def extract_video_object(e):
|
def extract_video_object(e):
|
||||||
assert e['@type'] == 'VideoObject'
|
assert e['@type'] == 'VideoObject'
|
||||||
|
author = e.get('author')
|
||||||
info.update({
|
info.update({
|
||||||
'url': url_or_none(e.get('contentUrl')),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
|
@ -1283,7 +1283,11 @@ class InfoExtractor(object):
|
||||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
'uploader': try_get(e, lambda x: x['author']['name'], compat_str),
|
# author can be an instance of 'Organization' or 'Person' types.
|
||||||
|
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||||
|
# however some websites are using 'Text' type instead.
|
||||||
|
# 1. https://schema.org/VideoObject
|
||||||
|
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
|
|
Loading…
Reference in a new issue