[cspan] Extract info from jwplayer data (closes #3672, closes #3734, closes #10638, closes #13030, closes #18806, closes #23148, closes #24461, closes #26171, closes #26800, closes #27263)
This commit is contained in:
parent
be19ae11fd
commit
3e4e338133
1 changed files with 23 additions and 2 deletions
|
@ -10,6 +10,8 @@ from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
merge_dicts,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
|
||||||
bc_attr['data-bcid'])
|
bc_attr['data-bcid'])
|
||||||
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||||
|
|
||||||
|
def add_referer(formats):
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {})['Referer'] = url
|
||||||
|
|
||||||
|
# As of 01.12.2020 this path looks to cover all cases making the rest
|
||||||
|
# of the code unnecessary
|
||||||
|
jwsetup = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
|
||||||
|
default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if jwsetup:
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||||
|
base_url=url)
|
||||||
|
add_referer(info['formats'])
|
||||||
|
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
return merge_dicts(info, ld_info)
|
||||||
|
|
||||||
|
# Obsolete
|
||||||
# We first look for clipid, because clipprog always appears before
|
# We first look for clipid, because clipprog always appears before
|
||||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||||
|
@ -165,8 +187,7 @@ class CSpanIE(InfoExtractor):
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||||
for f in formats:
|
add_referer(formats)
|
||||||
f.setdefault('http_headers', {})['Referer'] = url
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_%d' % (video_id, partnum + 1),
|
'id': '%s_%d' % (video_id, partnum + 1),
|
||||||
|
|
Loading…
Reference in a new issue