Correct XML ampersand fixup
This commit is contained in:
parent
b853d2e155
commit
5aafe895fc
5 changed files with 25 additions and 10 deletions
|
@ -16,6 +16,7 @@ from youtube_dl.utils import (
|
||||||
DateRange,
|
DateRange,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
fix_xml_ampersands,
|
||||||
get_meta_content,
|
get_meta_content,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
@ -200,5 +201,18 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(parse_duration('9:12:43'), 33163)
|
self.assertEqual(parse_duration('9:12:43'), 33163)
|
||||||
self.assertEqual(parse_duration('x:y'), None)
|
self.assertEqual(parse_duration('x:y'), None)
|
||||||
|
|
||||||
|
def test_fix_xml_ampersands(self):
|
||||||
|
self.assertEqual(
|
||||||
|
fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a')
|
||||||
|
self.assertEqual(
|
||||||
|
fix_xml_ampersands('"&x=y&wrong;&z=a'),
|
||||||
|
'"&x=y&wrong;&z=a')
|
||||||
|
self.assertEqual(
|
||||||
|
fix_xml_ampersands('&'><"'),
|
||||||
|
'&'><"')
|
||||||
|
self.assertEqual(
|
||||||
|
fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼')
|
||||||
|
self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -3,7 +3,7 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
fix_xml_all_ampersand,
|
fix_xml_ampersands
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ class ClipsyndicateIE(InfoExtractor):
|
||||||
pdoc = self._download_xml(
|
pdoc = self._download_xml(
|
||||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||||
video_id, u'Downloading video info',
|
video_id, u'Downloading video info',
|
||||||
transform_source=fix_xml_all_ampersand)
|
transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
track_doc = pdoc.find('trackList/track')
|
track_doc = pdoc.find('trackList/track')
|
||||||
def find_param(name):
|
def find_param(name):
|
||||||
|
|
|
@ -4,7 +4,7 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
fix_xml_all_ampersand,
|
fix_xml_ampersands,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class MetacriticIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
# The xml is not well formatted, there are raw '&'
|
# The xml is not well formatted, there are raw '&'
|
||||||
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
|
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
|
||||||
video_id, 'Downloading info xml', transform_source=fix_xml_all_ampersand)
|
video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
||||||
formats = []
|
formats = []
|
||||||
|
|
|
@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
fix_xml_ampersands,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _media_xml_tag(tag):
|
def _media_xml_tag(tag):
|
||||||
|
@ -83,12 +84,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
|
|
||||||
def fix_ampersand(s):
|
|
||||||
""" Fix unencoded ampersand in XML """
|
|
||||||
return s.replace(u'& ', '& ')
|
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
self._FEED_URL + '?' + data, video_id,
|
self._FEED_URL + '?' + data, video_id,
|
||||||
u'Downloading info', transform_source=fix_ampersand)
|
u'Downloading info', transform_source=fix_xml_ampersands)
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1092,9 +1092,12 @@ def month_by_name(name):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def fix_xml_all_ampersand(xml_str):
|
def fix_xml_ampersands(xml_str):
|
||||||
"""Replace all the '&' by '&' in XML"""
|
"""Replace all the '&' by '&' in XML"""
|
||||||
return xml_str.replace(u'&', u'&')
|
return re.sub(
|
||||||
|
r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
|
||||||
|
u'&',
|
||||||
|
xml_str)
|
||||||
|
|
||||||
|
|
||||||
def setproctitle(title):
|
def setproctitle(title):
|
||||||
|
|
Loading…
Reference in a new issue