[utils] add a function to clean podcast URLs
This commit is contained in:
parent
0889eb33e0
commit
e88c9ef62a
2 changed files with 22 additions and 0 deletions
|
@ -21,6 +21,7 @@ from youtube_dl.utils import (
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
caesar,
|
caesar,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
clean_podcast_url,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
|
@ -1470,6 +1471,10 @@ Line 1
|
||||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||||
|
|
||||||
|
def test_clean_podcast_url(self):
|
||||||
|
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||||
|
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -5706,3 +5706,20 @@ def random_birthday(year_field, month_field, day_field):
|
||||||
month_field: str(random_date.month),
|
month_field: str(random_date.month),
|
||||||
day_field: str(random_date.day),
|
day_field: str(random_date.day),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def clean_podcast_url(url):
|
||||||
|
return re.sub(r'''(?x)
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
chtbl\.com/track|
|
||||||
|
media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
|
||||||
|
play\.podtrac\.com
|
||||||
|
)/[^/]+|
|
||||||
|
(?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
|
||||||
|
flex\.acast\.com|
|
||||||
|
pd(?:
|
||||||
|
cn\.co| # https://podcorn.com/analytics-prefix/
|
||||||
|
st\.fm # https://podsights.com/docs/
|
||||||
|
)/e
|
||||||
|
)/''', '', url)
|
||||||
|
|
Loading…
Reference in a new issue