[YouTube] Refresh compat/utils usage
* import parse_qs() * import parse_qs in lazy_extractors (clears old TODO) * clean up old compiled lazy_extractors for Py2 * use update_url()
This commit is contained in:
parent
4e04f10499
commit
bafb6dec72
3 changed files with 39 additions and 44 deletions
|
@ -13,6 +13,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
lazy_extractors_filename = sys.argv[1]
|
lazy_extractors_filename = sys.argv[1]
|
||||||
if os.path.exists(lazy_extractors_filename):
|
if os.path.exists(lazy_extractors_filename):
|
||||||
os.remove(lazy_extractors_filename)
|
os.remove(lazy_extractors_filename)
|
||||||
|
# Py2: may be confused by leftover lazy_extractors.pyc
|
||||||
|
try:
|
||||||
|
os.remove(lazy_extractors_filename + 'c')
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
from youtube_dl.extractor import _ALL_CLASSES
|
from youtube_dl.extractor import _ALL_CLASSES
|
||||||
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
|
@ -22,7 +27,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||||
|
|
||||||
module_contents = [
|
module_contents = [
|
||||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||||
|
# needed for suitable() methods of Youtube extractor (see #28780)
|
||||||
|
'from youtube_dl.utils import parse_qs\n',
|
||||||
|
]
|
||||||
|
|
||||||
ie_template = '''
|
ie_template = '''
|
||||||
class {name}({bases}):
|
class {name}({bases}):
|
||||||
|
|
|
@ -40,14 +40,16 @@ class TestExecution(unittest.TestCase):
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
def test_lazy_extractors(self):
|
def test_lazy_extractors(self):
|
||||||
|
lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
finally:
|
finally:
|
||||||
try:
|
for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
|
||||||
os.remove('youtube_dl/extractor/lazy_extractors.py')
|
try:
|
||||||
except (IOError, OSError):
|
os.remove(lazy_extractors + x)
|
||||||
pass
|
except (IOError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -14,12 +14,11 @@ from ..compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_map as map,
|
compat_map as map,
|
||||||
compat_parse_qs,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
|
||||||
)
|
)
|
||||||
from ..jsinterp import JSInterpreter
|
from ..jsinterp import JSInterpreter
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -33,6 +32,7 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
@ -50,10 +50,6 @@ from ..utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_qs(url):
|
|
||||||
return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
"""Provide base functions for Youtube extractors"""
|
"""Provide base functions for Youtube extractors"""
|
||||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||||
|
@ -636,6 +632,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
|
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
|
||||||
'duration': 142,
|
'duration': 142,
|
||||||
'uploader': 'The Witcher',
|
'uploader': 'The Witcher',
|
||||||
|
'uploader_id': 'WitcherGame',
|
||||||
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
|
||||||
'upload_date': '20140605',
|
'upload_date': '20140605',
|
||||||
'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
|
'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -671,7 +669,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'note': 'Age-gated video embedable only with clientScreen=EMBED',
|
'note': 'Age-gated video embeddable only with clientScreen=EMBED',
|
||||||
'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
|
'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Tq92D6wQ1mg',
|
'id': 'Tq92D6wQ1mg',
|
||||||
|
@ -1392,11 +1390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
# Hack for lazy extractors until more generic solution is implemented
|
if parse_qs(url).get('list', [None])[0]:
|
||||||
# (see #28780)
|
|
||||||
from .youtube import parse_qs
|
|
||||||
qs = parse_qs(url)
|
|
||||||
if qs.get('list', [None])[0]:
|
|
||||||
return False
|
return False
|
||||||
return super(YoutubeIE, cls).suitable(url)
|
return super(YoutubeIE, cls).suitable(url)
|
||||||
|
|
||||||
|
@ -1546,7 +1540,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if player_url.startswith('//'):
|
if player_url.startswith('//'):
|
||||||
player_url = 'https:' + player_url
|
player_url = 'https:' + player_url
|
||||||
elif not re.match(r'https?://', player_url):
|
elif not re.match(r'https?://', player_url):
|
||||||
player_url = compat_urlparse.urljoin(
|
player_url = compat_urllib_parse.urljoin(
|
||||||
'https://www.youtube.com', player_url)
|
'https://www.youtube.com', player_url)
|
||||||
return player_url
|
return player_url
|
||||||
|
|
||||||
|
@ -1628,9 +1622,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
def _unthrottle_format_urls(self, video_id, player_url, formats):
|
def _unthrottle_format_urls(self, video_id, player_url, formats):
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
|
parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
|
||||||
qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
|
n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
|
||||||
n_param = qs.get('n')
|
|
||||||
if not n_param:
|
if not n_param:
|
||||||
continue
|
continue
|
||||||
n_param = n_param[-1]
|
n_param = n_param[-1]
|
||||||
|
@ -1638,9 +1631,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if n_response is None:
|
if n_response is None:
|
||||||
# give up if descrambling failed
|
# give up if descrambling failed
|
||||||
break
|
break
|
||||||
qs['n'] = [n_response]
|
fmt['url'] = update_url(
|
||||||
fmt['url'] = compat_urlparse.urlunparse(
|
parsed_fmt_url, query_update={'n': [n_response]})
|
||||||
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
|
||||||
|
|
||||||
# from yt-dlp, with tweaks
|
# from yt-dlp, with tweaks
|
||||||
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||||||
|
@ -1669,20 +1661,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
|
lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
|
||||||
if not playback_url:
|
if not playback_url:
|
||||||
return
|
return
|
||||||
parsed_playback_url = compat_urlparse.urlparse(playback_url)
|
|
||||||
qs = compat_urlparse.parse_qs(parsed_playback_url.query)
|
|
||||||
|
|
||||||
# cpn generation algorithm is reverse engineered from base.js.
|
# cpn generation algorithm is reverse engineered from base.js.
|
||||||
# In fact it works even with dummy cpn.
|
# In fact it works even with dummy cpn.
|
||||||
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
||||||
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
|
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
|
||||||
|
|
||||||
qs.update({
|
playback_url = update_url(
|
||||||
'ver': ['2'],
|
playback_url, query_update={
|
||||||
'cpn': [cpn],
|
'ver': ['2'],
|
||||||
})
|
'cpn': [cpn],
|
||||||
playback_url = compat_urlparse.urlunparse(
|
})
|
||||||
parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
|
||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
playback_url, video_id, 'Marking watched',
|
playback_url, video_id, 'Marking watched',
|
||||||
|
@ -2075,9 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for container in (video_details, microformat):
|
for container in (video_details, microformat):
|
||||||
for thumbnail in (try_get(
|
for thumbnail in try_get(
|
||||||
container,
|
container,
|
||||||
lambda x: x['thumbnail']['thumbnails'], list) or []):
|
lambda x: x['thumbnail']['thumbnails'], list) or []:
|
||||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
if not thumbnail_url:
|
if not thumbnail_url:
|
||||||
continue
|
continue
|
||||||
|
@ -3287,11 +3276,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
if YoutubeTabIE.suitable(url):
|
if YoutubeTabIE.suitable(url):
|
||||||
return False
|
return False
|
||||||
# Hack for lazy extractors until more generic solution is implemented
|
if parse_qs(url).get('v', [None])[0]:
|
||||||
# (see #28780)
|
|
||||||
from .youtube import parse_qs
|
|
||||||
qs = parse_qs(url)
|
|
||||||
if qs.get('v', [None])[0]:
|
|
||||||
return False
|
return False
|
||||||
return super(YoutubePlaylistIE, cls).suitable(url)
|
return super(YoutubePlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
@ -3430,9 +3415,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
qs = parse_qs(url)
|
||||||
query = (qs.get('search_query') or qs.get('q'))[0]
|
query = (qs.get('search_query') or qs.get('q'))[-1]
|
||||||
params = qs.get('sp', ('',))[0]
|
params = qs.get('sp', ('',))[-1]
|
||||||
return self.playlist_result(self._search_results(query, params), query, query)
|
return self.playlist_result(self._search_results(query, params), query, query)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue