[youtube] Correct invalid JSON (Fixes #2353)
This commit is contained in:
parent
1afe753462
commit
81c2f20b53
3 changed files with 15 additions and 6 deletions
|
@ -271,8 +271,11 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def _download_json(self, url_or_request, video_id,
|
def _download_json(self, url_or_request, video_id,
|
||||||
note=u'Downloading JSON metadata',
|
note=u'Downloading JSON metadata',
|
||||||
errnote=u'Unable to download JSON metadata'):
|
errnote=u'Unable to download JSON metadata',
|
||||||
|
transform_source=None):
|
||||||
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
if transform_source:
|
||||||
|
json_string = transform_source(json_string)
|
||||||
try:
|
try:
|
||||||
return json.loads(json_string)
|
return json.loads(json_string)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
|
|
|
@ -34,6 +34,7 @@ from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
|
uppercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
|
@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
|
||||||
# Download all channel pages using the json-based channel_ajax query
|
# Download all channel pages using the json-based channel_ajax query
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
page = self._download_webpage(url, channel_id,
|
page = self._download_json(
|
||||||
u'Downloading page #%s' % pagenum)
|
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
page = json.loads(page)
|
|
||||||
|
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
video_ids.extend(ids_in_page)
|
video_ids.extend(ids_in_page)
|
||||||
|
|
||||||
|
|
|
@ -1214,3 +1214,9 @@ class PagedList(object):
|
||||||
if end == nextfirstid:
|
if end == nextfirstid:
|
||||||
break
|
break
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def uppercase_escape(s):
|
||||||
|
return re.sub(
|
||||||
|
r'\\U([0-9a-fA-F]{8})',
|
||||||
|
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
||||||
|
|
Loading…
Reference in a new issue