Allow non-ASCII characters in simplified titles(Closes #220)

This commit is contained in:
Philipp Hagemeister 2011-11-21 21:50:39 +01:00
parent e092418d8b
commit af8e8d63f9
2 changed files with 9 additions and 6 deletions

View file

@ -16,13 +16,14 @@ def test_simplify_title():
assert u'/' not in youtube_dl._simplify_title(u'abc/de') assert u'/' not in youtube_dl._simplify_title(u'abc/de')
assert u'abc' in youtube_dl._simplify_title(u'abc/de') assert u'abc' in youtube_dl._simplify_title(u'abc/de')
assert u'de' in youtube_dl._simplify_title(u'abc/de') assert u'de' in youtube_dl._simplify_title(u'abc/de')
assert u'/' not in youtube_dl._simplify_title(u'abc/de///')
assert u'\\' not in youtube_dl._simplify_title(u'abc\\de') assert u'\\' not in youtube_dl._simplify_title(u'abc\\de')
assert u'abc' in youtube_dl._simplify_title(u'abc\\de') assert u'abc' in youtube_dl._simplify_title(u'abc\\de')
assert u'de' in youtube_dl._simplify_title(u'abc\\de') assert u'de' in youtube_dl._simplify_title(u'abc\\de')
# TODO: Fix #220 assert youtube_dl._simplify_title(u'ä') == u'ä'
#assert youtube_dl._simplify_title(u'ä') == u'ä' assert youtube_dl._simplify_title(u'кириллица') == u'кириллица'
# Strip underlines # Strip underlines
assert youtube_dl._simplify_title(u'\'a_') == u'a' assert youtube_dl._simplify_title(u'\'a_') == u'a'

View file

@ -278,7 +278,8 @@ def timeconvert(timestr):
return timestamp return timestamp
def _simplify_title(title): def _simplify_title(title):
return re.sub(ur'[^\w\d_\-]+', u'_', title).strip(u'_') expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
return expr.sub(u'_', title).strip(u'_')
class DownloadError(Exception): class DownloadError(Exception):
"""Download Error exception. """Download Error exception.
@ -2937,6 +2938,7 @@ class BlipTVIE(InfoExtractor):
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1] basename = url.split('/')[-1]
title,ext = os.path.splitext(basename) title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '') ext = ext.replace('.', '')
self.report_direct_download(title) self.report_direct_download(title)
info = { info = {
@ -3089,9 +3091,9 @@ class ComedyCentralIE(InfoExtractor):
if mobj.group('shortname'): if mobj.group('shortname'):
if mobj.group('shortname') in ('tds', 'thedailyshow'): if mobj.group('shortname') in ('tds', 'thedailyshow'):
url = 'http://www.thedailyshow.com/full-episodes/' url = u'http://www.thedailyshow.com/full-episodes/'
else: else:
url = 'http://www.colbertnation.com/full-episodes/' url = u'http://www.colbertnation.com/full-episodes/'
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
assert mobj is not None assert mobj is not None
@ -3177,7 +3179,7 @@ class ComedyCentralIE(InfoExtractor):
self._downloader.increment_downloads() self._downloader.increment_downloads()
effTitle = showId + '-' + epTitle effTitle = showId + u'-' + epTitle
info = { info = {
'id': shortMediaId, 'id': shortMediaId,
'url': video_url, 'url': video_url,