Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 34 additions & 8 deletions slugify/slugify.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,32 @@ def slugify(
:return (str):
"""

# user-specific replacements
# determine the disallowed-char pattern early so both replacement
# passes can clean up non-word characters they introduce.
if allow_unicode:
_pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
else:
_pattern = regex_pattern or DISALLOWED_CHARS_PATTERN

# user-specific replacements (pass 1 -- before normalization)
if replacements:
for old, new in replacements:
if old:
# Skip replacements that form a self-referential cycle.
# Direct: old appears in `new` (e.g. 'a' → 'aa').
# Indirect: slugifying `new` recreates `old` (e.g. '-'
# → '$x$', because '$' → '-' produces '-x-' which contains
# '-'); such rules would grow every time slugify is called
# again on its own output.
_cleaned = re.sub(_pattern, DEFAULT_SEPARATOR, new)
_cleaned = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, _cleaned)
if old in new or old in _cleaned:
continue
text = text.replace(old, new)
# Clean up any non-word characters introduced by pass-1 replacements
# so they don't interfere with subsequent normalization / pass-2 passes.
text = re.sub(_pattern, DEFAULT_SEPARATOR, text)
text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)

# ensure text is unicode
if not isinstance(text, str):
Expand Down Expand Up @@ -163,12 +185,7 @@ def slugify(
text = NUMBERS_PATTERN.sub('', text)

# replace all other unwanted characters
if allow_unicode:
pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
else:
pattern = regex_pattern or DISALLOWED_CHARS_PATTERN

text = re.sub(pattern, DEFAULT_SEPARATOR, text)
text = re.sub(_pattern, DEFAULT_SEPARATOR, text)

# remove redundant
text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
Expand All @@ -182,10 +199,19 @@ def slugify(
words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
text = DEFAULT_SEPARATOR.join(words)

# finalize user-specific replacements
# finalize user-specific replacements (pass 2)
if replacements:
for old, new in replacements:
if old:
_cleaned = re.sub(_pattern, DEFAULT_SEPARATOR, new)
_cleaned = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, _cleaned)
if old in new or old in _cleaned:
continue
text = text.replace(old, new)
# Clean up any non-word characters introduced by pass-2 replacements
# so that slugify(slugify(x)) == slugify(x).
text = re.sub(_pattern, DEFAULT_SEPARATOR, text)
text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)

# smart truncate if requested
if max_length > 0:
Expand Down
23 changes: 23 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,29 @@ def test_replacements_german_umlaut_custom(self):
r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']])
self.assertEqual(r, "ueber-ueber-german-umlaut")

def test_replacements_idempotent(self):
"""slugify(slugify(x)) == slugify(x) with replacements."""
# Non-word chars in replacement 'new' values
self.assertEqual(
slugify(slugify('a.b', replacements=[['-', '$x$']]),
replacements=[['-', '$x$']]),
slugify('a.b', replacements=[['-', '$x$']]))
# Self-referential replacement (direct: old in new)
self.assertEqual(
slugify(slugify('a', replacements=[['a', 'aa']]),
replacements=[['a', 'aa']]),
slugify('a', replacements=[['a', 'aa']]))
# Self-referential through slugification (indirect)
self.assertEqual(
slugify(slugify('hello world', replacements=[['-', '$iqt']]),
replacements=[['-', '$iqt']]),
slugify('hello world', replacements=[['-', '$iqt']]))
# Non-cyclic replacements still work
self.assertEqual(
slugify(slugify('10 | 20 %', replacements=[['|', 'or'], ['%', 'percent']]),
replacements=[['|', 'or'], ['%', 'percent']]),
"10-or-20-percent")

def test_pre_translation(self):
self.assertEqual(PRE_TRANSLATIONS, [('Ю', 'U'), ('Щ', 'Sch'), ('У', 'Y'), ('Х', 'H'), ('Я', 'Ya'), ('Ё', 'E'), ('ё', 'e'), ('я', 'ya'), ('х', 'h'), ('у', 'y'), ('щ', 'sch'), ('ю', 'u'), ('Ü', 'Ue'), ('Ö', 'Oe'), ('Ä', 'Ae'), ('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('Ϋ́', 'Y'), ('Ϋ', 'Y'), ('Ύ', 'Y'), ('Υ', 'Y'), ('Χ', 'Ch'), ('χ', 'ch'), ('Ξ', 'X'), ('ϒ', 'Y'), ('υ', 'y'), ('ύ', 'y'), ('ϋ', 'y'), ('ΰ', 'y')])

Expand Down