[utils] add a function to clean podcast URLs
This commit is contained in:
parent
0889eb33e0
commit
e88c9ef62a
2 changed files with 22 additions and 0 deletions
|
@ -21,6 +21,7 @@ from youtube_dl.utils import (
|
|||
encode_base_n,
|
||||
caesar,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
|
@ -1470,6 +1471,10 @@ Line 1
|
|||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
def test_clean_podcast_url(self):
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -5706,3 +5706,20 @@ def random_birthday(year_field, month_field, day_field):
|
|||
month_field: str(random_date.month),
|
||||
day_field: str(random_date.day),
|
||||
}
|
||||
|
||||
|
||||
def clean_podcast_url(url):
|
||||
return re.sub(r'''(?x)
|
||||
(?:
|
||||
(?:
|
||||
chtbl\.com/track|
|
||||
media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
|
||||
play\.podtrac\.com
|
||||
)/[^/]+|
|
||||
(?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
|
||||
flex\.acast\.com|
|
||||
pd(?:
|
||||
cn\.co| # https://podcorn.com/analytics-prefix/
|
||||
st\.fm # https://podsights.com/docs/
|
||||
)/e
|
||||
)/''', '', url)
|
||||
|
|
Loading…
Reference in a new issue