From c458ad4a72015c552c87c2653c9c8190ef8a88c5 Mon Sep 17 00:00:00 2001
From: Vincent Gao <gaobing1230@gmail.com>
Date: Wed, 24 Jun 2026 09:23:27 +0200
Subject: [PATCH] fix: ensure slugify is idempotent with replacement rules

User replacements can break slugify(slugify(x)) == slugify(x) in
two ways:

1. Direct self-reference: old appears in new (e.g. a -> aa),
   causing compound growth on re-invocation.

2. Indirect self-reference: new contains non-word characters that,
   after slugification, become old (e.g. dash -> dollar-x-dollar,
   where dollar chars become dashes, creating dash-x-dash which
   contains dash, triggering pass-1 replacement in the next call).

Fix:
- Skip replacement rules in both passes when old-in-new (direct)
  or old appears after slugifying new (indirect).
- Run the disallowed-char pattern + dedup + strip after both
  replacement passes so non-word characters from replacements
  do not break idempotence.

Non-cyclic replacements (like pipe->or, percent->percent) are
unaffected.
---
 slugify/slugify.py | 42 ++++++++++++++++++++++++++++++++++--------
 test.py            | 23 +++++++++++++++++++++++
 2 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/slugify/slugify.py b/slugify/slugify.py
index 9b5f27f..61e62b5 100644
--- a/slugify/slugify.py
+++ b/slugify/slugify.py
@@ -105,10 +105,32 @@ def slugify(
     :return (str):
     """
 
-    # user-specific replacements
+    # determine the disallowed-char pattern early so both replacement
+    # passes can clean up non-word characters they introduce.
+    if allow_unicode:
+        _pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
+    else:
+        _pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
+
+    # user-specific replacements (pass 1 -- before normalization)
     if replacements:
         for old, new in replacements:
+            if old:
+                # Skip replacements that form a self-referential cycle.
+                # Direct:  old appears in `new` (e.g. 'a' → 'aa').
+                # Indirect: slugifying `new` recreates `old` (e.g. '-'
+                # → '$x$', because '$' → '-' produces '-x-' which contains
+                # '-'); such rules would grow every time slugify is called
+                # again on its own output.
+                _cleaned = re.sub(_pattern, DEFAULT_SEPARATOR, new)
+                _cleaned = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, _cleaned)
+                if old in new or old in _cleaned:
+                    continue
             text = text.replace(old, new)
+        # Clean up any non-word characters introduced by pass-1 replacements
+        # so they don't interfere with subsequent normalization / pass-2 passes.
+        text = re.sub(_pattern, DEFAULT_SEPARATOR, text)
+        text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
 
     # ensure text is unicode
     if not isinstance(text, str):
@@ -163,12 +185,7 @@ def slugify(
     text = NUMBERS_PATTERN.sub('', text)
 
     # replace all other unwanted characters
-    if allow_unicode:
-        pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN
-    else:
-        pattern = regex_pattern or DISALLOWED_CHARS_PATTERN
-
-    text = re.sub(pattern, DEFAULT_SEPARATOR, text)
+    text = re.sub(_pattern, DEFAULT_SEPARATOR, text)
 
     # remove redundant
     text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
@@ -182,10 +199,19 @@ def slugify(
             words = [w for w in text.split(DEFAULT_SEPARATOR) if w not in stopwords]
         text = DEFAULT_SEPARATOR.join(words)
 
-    # finalize user-specific replacements
+    # finalize user-specific replacements (pass 2)
     if replacements:
         for old, new in replacements:
+            if old:
+                _cleaned = re.sub(_pattern, DEFAULT_SEPARATOR, new)
+                _cleaned = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, _cleaned)
+                if old in new or old in _cleaned:
+                    continue
             text = text.replace(old, new)
+        # Clean up any non-word characters introduced by pass-2 replacements
+        # so that slugify(slugify(x)) == slugify(x).
+        text = re.sub(_pattern, DEFAULT_SEPARATOR, text)
+        text = DUPLICATE_DASH_PATTERN.sub(DEFAULT_SEPARATOR, text).strip(DEFAULT_SEPARATOR)
 
     # smart truncate if requested
     if max_length > 0:
diff --git a/test.py b/test.py
index fcec4b6..689be8f 100644
--- a/test.py
+++ b/test.py
@@ -237,6 +237,29 @@ def test_replacements_german_umlaut_custom(self):
         r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']])
         self.assertEqual(r, "ueber-ueber-german-umlaut")
 
+    def test_replacements_idempotent(self):
+        """slugify(slugify(x)) == slugify(x) with replacements."""
+        # Non-word chars in replacement 'new' values
+        self.assertEqual(
+            slugify(slugify('a.b', replacements=[['-', '$x$']]),
+                    replacements=[['-', '$x$']]),
+            slugify('a.b', replacements=[['-', '$x$']]))
+        # Self-referential replacement (direct: old in new)
+        self.assertEqual(
+            slugify(slugify('a', replacements=[['a', 'aa']]),
+                    replacements=[['a', 'aa']]),
+            slugify('a', replacements=[['a', 'aa']]))
+        # Self-referential through slugification (indirect)
+        self.assertEqual(
+            slugify(slugify('hello world', replacements=[['-', '$iqt']]),
+                    replacements=[['-', '$iqt']]),
+            slugify('hello world', replacements=[['-', '$iqt']]))
+        # Non-cyclic replacements still work
+        self.assertEqual(
+            slugify(slugify('10 | 20 %', replacements=[['|', 'or'], ['%', 'percent']]),
+                    replacements=[['|', 'or'], ['%', 'percent']]),
+            "10-or-20-percent")
+
     def test_pre_translation(self):
         self.assertEqual(PRE_TRANSLATIONS, [('Ю', 'U'), ('Щ', 'Sch'), ('У', 'Y'), ('Х', 'H'), ('Я', 'Ya'), ('Ё', 'E'), ('ё', 'e'), ('я', 'ya'), ('х', 'h'), ('у', 'y'), ('щ', 'sch'), ('ю', 'u'), ('Ü', 'Ue'), ('Ö', 'Oe'), ('Ä', 'Ae'), ('ä', 'ae'), ('ö', 'oe'), ('ü', 'ue'), ('Ϋ́', 'Y'), ('Ϋ', 'Y'), ('Ύ', 'Y'), ('Υ', 'Y'), ('Χ', 'Ch'), ('χ', 'ch'), ('Ξ', 'X'), ('ϒ', 'Y'), ('υ', 'y'), ('ύ', 'y'), ('ϋ', 'y'), ('ΰ', 'y')])