#!/usr/bin/env python3 import re from slugify import slugify _REGEX_PART_ALLOWED_FILE_NAME_CHARACTERS = r"-_a-zA-Z0-9 ()." _DISALLOWED_FILE_NAME_CHARACTERS = re.compile(rf"[^{_REGEX_PART_ALLOWED_FILE_NAME_CHARACTERS}]+") ALLOWED_FILE_NAMES = re.compile(rf"^[{_REGEX_PART_ALLOWED_FILE_NAME_CHARACTERS}]+$") def set_extension(file_name, extension): m = re.fullmatch(r""" ^ (.*?) # Match anything, non-greedily because otherwise it would also match extension (?: (?:\.tar)? # Optionally match .tar \.[^.]{0,5} )? # Optionally match an extension (0 to 5 long) $ """, file_name, flags=re.VERBOSE) if not m: return file_name + extension else: return m.group(1) + extension def sanitize(file_name): # Strip dots while file_name.startswith("."): file_name = file_name[1:] if not file_name: return None if ALLOWED_FILE_NAMES.fullmatch(file_name): return file_name else: return slugify(file_name, lowercase=False, regex_pattern=_DISALLOWED_FILE_NAME_CHARACTERS)