showtify/showtify.py

#!/usr/bin/env python3

# Showtify: list Spotify playlists and albums
# Copyright © 2019 Midgard
#
# This program is free software: you can redistribute it and/or modify it under the terms of the
# GNU General Public License as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with this program.
# If not, see <http://www.gnu.org/licenses/>.


import requests
import re
from html import unescape


MY_COUNTRY_CODE = "BE"
MY_COUNTRY_NAME = "Belgium"


def search_all_meta(prop, value, page_content):
	return map(
		unescape,
		re.findall(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
	)


def search_meta(prop, value, page_content):
	match = re.search(r'<meta property="{}" content="({})" />'.format(prop, value), page_content)
	return unescape(match.group(1)) if match else None


def str_exact_len(s, length, align="<"):
	if len(s) < length:
		return "{{:{}{}}}".format(align, length).format(s)
	else:
		return s[:length]


def format_duration(total_seconds):
	total_minutes, seconds = divmod(total_seconds, 60)
	total_hours,   minutes = divmod(total_minutes, 60)
	days,          hours   = divmod(total_hours,   24)

	return (
		"{days} days, {hours:02}:{minutes:02}:{seconds:02}" if days > 0 else
		"{hours}:{minutes:02}:{seconds:02}" if hours > 0 else
		"{minutes}:{seconds:02}"
	).format(days=days, hours=hours, minutes=minutes, seconds=seconds)


def format_tracks(tracks, max_title_w=50, max_album_w=50, max_artist_w=50):
	if not tracks:
		return ""

	title_w  = min(max_title_w,  max([len(t.title)  for t in tracks]))
	album_w  = min(max_album_w,  max([len(t.album)  for t in tracks]))
	artist_w = min(max_artist_w, max([len(t.artist) for t in tracks]))
	duration_w = max([len(format_duration(t.duration)) for t in tracks])

	return "\n".join(
		"{duration} {title} by {artist} from {album}".format(
			duration=str_exact_len(format_duration(t.duration), duration_w, align=">"),
			title=   str_exact_len(t.title,                     title_w),
			artist=  str_exact_len(t.artist,                    artist_w),
			album=   str_exact_len(t.album,                     album_w),
		)
		for t in tracks
	)


class AlbumMetadata:
	def __init__(self, title, allowed_countries, tracks, total_duration):
		self.title = title
		self.allowed_countries = allowed_countries
		self.tracks = tracks
		self.total_duration = total_duration

	def __str__(self):
		return (
			"{title} ({dur})\n".format(title=self.title, dur=format_duration(self.total_duration)) +
			format_tracks(self.tracks) +
			(
				"\nNot available in {}".format(MY_COUNTRY_NAME)
				if self.allowed_countries and MY_COUNTRY_CODE not in self.allowed_countries else ""
			)
		)

	@classmethod
	def from_page_content(cls, page_content):
		title = search_meta("twitter:title", r'[^"]+', page_content)
		allowed_countries = search_all_meta("og:restrictions:country:allowed", "[a-zA-Z]+", page_content)

		tracks = [
			TrackMetadata.from_url(track_url)
			for track_url in search_all_meta("music:song", r'https://open\.spotify\.com/track/[a-zA-Z0-9]+', page_content)
		]
		total_duration = sum([t.duration for t in tracks if t.duration is not None])

		return cls(title, allowed_countries, tracks, total_duration)

	@classmethod
	def from_url(cls, url):
		return cls.from_page_content(requests.get(url).text)


class TrackMetadata:
	def __init__(self, title, artist, duration, album):
		self.title = title
		self.artist = artist
		self.duration = duration
		self.album = album

	def __str__(self):
		return "{} {} by {} from {}".format(format_duration(self.duration), self.title, self.artist, self.album)

	@classmethod
	def from_page_content(cls, page_content):
		title     = search_meta("og:title",                  r'[^"]+',                                       page_content)
		artist    = search_meta("twitter:audio:artist_name", r'[^"]+',                                       page_content)
		duration  = int(search_meta("music:duration",        r'[0-9]+',                                      page_content))
		album_url = search_meta("music:album",               r'https://open.spotify.com/album/[a-zA-Z0-9]+', page_content)
		album = album_title(requests.get(album_url).text)

		return cls(title, artist, duration, album)

	@classmethod
	def from_url(cls, url):
		return cls.from_page_content(requests.get(url).text)


def album_title(page_content):
	return search_meta("og:title", r'[^"]+', page_content)


METADATA_CLASS_FOR_URL = [
	(r"https://open.spotify.com/track/.*",            TrackMetadata),
	(r"https://open.spotify.com/album/.*",            AlbumMetadata),
	(r"https://open.spotify.com/playlist/.*",         AlbumMetadata),
	(r"https://open.spotify.com/user/.*/playlist/.*", AlbumMetadata),
]


def metadata_for_url(url):
	for url_regex, cls in METADATA_CLASS_FOR_URL:
		if re.fullmatch(url_regex, url):
			return cls.from_url(url)
	return None


def main(argv):
	if len(argv) <= 1:
		print("Usage: showtify.py <url>", file=sys.stderr)
		return 1
	url = argv[1]

	metadata = metadata_for_url(url)
	print(metadata if metadata is not None else "Unsupported URL or item not found")


if __name__ == '__main__':
	import sys
	sys.exit(main(sys.argv))