[generic] Improve KVS (etc) extraction
* detect kt_player('kt_player', 'https://.../kt_player.swf?v=5... * detect age limit if 18 USC 2257 is mentioned * test with shooshtime.com Partially resolves #31332.
This commit is contained in:
parent
1a4fbe8462
commit
fc2beab0e7
1 changed files with 22 additions and 7 deletions
|
@ -36,6 +36,7 @@ from ..utils import (
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
xpath_attr,
|
xpath_attr,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
@ -2308,6 +2309,17 @@ class GenericIE(InfoExtractor):
|
||||||
'height': 720,
|
'height': 720,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
|
||||||
|
'md5': 'e2f0a4c329f7986280b7328e24036d60',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '284002',
|
||||||
|
'display_id': 'just-out-of-the-shower-joi',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Just Out Of The Shower JOI - Shooshtime',
|
||||||
|
'height': 720,
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -2477,7 +2489,7 @@ class GenericIE(InfoExtractor):
|
||||||
format_id = flashvars.get(key + '_text', key)
|
format_id = flashvars.get(key + '_text', key)
|
||||||
formats.append(merge_dicts(
|
formats.append(merge_dicts(
|
||||||
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
|
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
|
||||||
'url': getrealurl(flashvars[key], flashvars['license_code']),
|
'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'http_headers': {'Referer': url},
|
'http_headers': {'Referer': url},
|
||||||
|
@ -2704,6 +2716,7 @@ class GenericIE(InfoExtractor):
|
||||||
AGE_LIMIT_MARKERS = [
|
AGE_LIMIT_MARKERS = [
|
||||||
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
||||||
r'>[^<]*you acknowledge you are at least (\d+) years old',
|
r'>[^<]*you acknowledge you are at least (\d+) years old',
|
||||||
|
r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
|
||||||
]
|
]
|
||||||
for marker in AGE_LIMIT_MARKERS:
|
for marker in AGE_LIMIT_MARKERS:
|
||||||
m = re.search(marker, webpage)
|
m = re.search(marker, webpage)
|
||||||
|
@ -3559,13 +3572,15 @@ class GenericIE(InfoExtractor):
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
# Look for generic KVS player (before ld+json for tests)
|
# Look for generic KVS player (before ld+json for tests)
|
||||||
found = re.search(
|
found = self._search_regex(
|
||||||
r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)\1[^>]*>',
|
(r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
|
||||||
webpage)
|
# kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
|
||||||
|
r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
|
||||||
|
), webpage, 'KVS player', group='ver', default=False)
|
||||||
if found:
|
if found:
|
||||||
self.report_extraction('KVS Player')
|
self.report_extraction('%s: KVS Player' % (video_id, ))
|
||||||
if found.group('maj_ver') not in ('4', '5', '6'):
|
if found.split('.')[0] not in ('4', '5', '6'):
|
||||||
self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found.group('ver'), ))
|
self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
|
||||||
return merge_dicts(
|
return merge_dicts(
|
||||||
self._extract_kvs(url, webpage, video_id),
|
self._extract_kvs(url, webpage, video_id),
|
||||||
info_dict)
|
info_dict)
|
||||||
|
|
Loading…
Reference in a new issue