40 lines
1,015 B
Python
40 lines
1,015 B
Python
#!/usr/bin/env python3
|
|
|
|
import re
|
|
from slugify import slugify
|
|
|
|
|
|
_REGEX_PART_ALLOWED_FILE_NAME_CHARACTERS = r"-_a-zA-Z0-9 ()."
|
|
_DISALLOWED_FILE_NAME_CHARACTERS = re.compile(rf"[^{_REGEX_PART_ALLOWED_FILE_NAME_CHARACTERS}]+")
|
|
ALLOWED_FILE_NAMES = re.compile(rf"^[{_REGEX_PART_ALLOWED_FILE_NAME_CHARACTERS}]+$")
|
|
|
|
|
|
def set_extension(file_name, extension):
|
|
m = re.fullmatch(r"""
|
|
^
|
|
(.*?) # Match anything, non-greedily because otherwise it would also match extension
|
|
(?:
|
|
(?:\.tar)? # Optionally match .tar
|
|
\.[^.]{0,5}
|
|
)? # Optionally match an extension (0 to 5 long)
|
|
$
|
|
""", file_name, flags=re.VERBOSE)
|
|
if not m:
|
|
return file_name + extension
|
|
else:
|
|
return m.group(1) + extension
|
|
|
|
|
|
|
|
def sanitize(file_name):
|
|
# Strip dots
|
|
while file_name.startswith("."):
|
|
file_name = file_name[1:]
|
|
|
|
if not file_name:
|
|
return None
|
|
|
|
if ALLOWED_FILE_NAMES.fullmatch(file_name):
|
|
return file_name
|
|
else:
|
|
return slugify(file_name, lowercase=False, regex_pattern=_DISALLOWED_FILE_NAME_CHARACTERS)
|