[extractor/common] Extract more metadata for VideoObject in _json_ld
This commit is contained in:
parent
2de624fdd5
commit
6b3a3098b5
1 changed files with 9 additions and 2 deletions
|
@ -44,6 +44,7 @@ from ..utils import (
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
url_basename,
|
url_basename,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
@ -840,10 +841,16 @@ class InfoExtractor(object):
|
||||||
})
|
})
|
||||||
elif item_type == 'VideoObject':
|
elif item_type == 'VideoObject':
|
||||||
info.update({
|
info.update({
|
||||||
|
'url': json_ld.get('contentUrl'),
|
||||||
'title': unescapeHTML(json_ld.get('name')),
|
'title': unescapeHTML(json_ld.get('name')),
|
||||||
'description': unescapeHTML(json_ld.get('description')),
|
'description': unescapeHTML(json_ld.get('description')),
|
||||||
'upload_date': unified_strdate(json_ld.get('upload_date')),
|
'thumbnail': json_ld.get('thumbnailUrl'),
|
||||||
'url': unescapeHTML(json_ld.get('contentUrl')),
|
'duration': parse_duration(json_ld.get('duration')),
|
||||||
|
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
||||||
|
'filesize': float_or_none(json_ld.get('contentSize')),
|
||||||
|
'tbr': int_or_none(json_ld.get('bitrate')),
|
||||||
|
'width': int_or_none(json_ld.get('width')),
|
||||||
|
'height': int_or_none(json_ld.get('height')),
|
||||||
})
|
})
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue