From 1fa8f852a06c9badeeb768049c26996ae72b74e1 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Sun, 18 May 2025 16:41:51 +0100
Subject: [PATCH 01/13] added helper func

---
 src/spatialdata/_core/validation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py
index d10ba3572..eb5eda36c 100644
--- a/src/spatialdata/_core/validation.py
+++ b/src/spatialdata/_core/validation.py
@@ -379,5 +379,8 @@ def __exit__(
             return False
         # Exceptions were collected that we want to raise as a combined validation error.
         if self._collector.errors:
-            raise ValidationError(title=self._message, errors=self._collector.errors)
+            raise ValidationError(
+                title=self._message + "\nTo fix, run `spatialdata.utils.sanitize_table(adata)`.",
+                errors=self._collector.errors
+            )
         return True

From d5cd64fa2b418b25798239c49c5e1d337374c16e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 18 May 2025 15:43:00 +0000
Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata/_core/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spatialdata/_core/validation.py b/src/spatialdata/_core/validation.py
index eb5eda36c..8c038a4ae 100644
--- a/src/spatialdata/_core/validation.py
+++ b/src/spatialdata/_core/validation.py
@@ -381,6 +381,6 @@ def __exit__(
         if self._collector.errors:
             raise ValidationError(
                 title=self._message + "\nTo fix, run `spatialdata.utils.sanitize_table(adata)`.",
-                errors=self._collector.errors
+                errors=self._collector.errors,
             )
         return True

From 8c3b792e91e57d032c8c5aecb9dd64a4d4af0878 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Sun, 18 May 2025 16:48:00 +0100
Subject: [PATCH 03/13] tests

---
 src/spatialdata/_utils.py    |   2 +-
 tests/utils/test_sanitize.py | 212 +++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 tests/utils/test_sanitize.py

diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index 61f5a52c7..d64ab59e1 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -345,4 +345,4 @@ def _check_match_length_channels_c_dim(
             f"The number of channel names `{len(c_coords)}` does not match the length of dimension 'c'"
             f" with length {c_length}."
         )
-    return c_coords
+    return c_coords
\ No newline at end of file
diff --git a/tests/utils/test_sanitize.py b/tests/utils/test_sanitize.py
new file mode 100644
index 000000000..b567cc53d
--- /dev/null
+++ b/tests/utils/test_sanitize.py
@@ -0,0 +1,212 @@
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import pytest
+from anndata import AnnData
+
+from spatialdata import SpatialData
+from spatialdata._utils import sanitize_name, sanitize_table
+
+
+@pytest.fixture
+def invalid_table() -> AnnData:
+    """AnnData with invalid obs column names to test basic sanitization."""
+    return AnnData(
+        obs=pd.DataFrame(
+            {
+                "@invalid#": [1, 2],
+                "valid_name": [3, 4],
+                "__private": [5, 6],
+            }
+        )
+    )
+
+
+@pytest.fixture
+def invalid_table_with_index() -> AnnData:
+    """AnnData with a name requiring whitespace→underscore and a dataframe index column."""
+    return AnnData(
+        obs=pd.DataFrame(
+            {
+                "invalid name": [1, 2],
+                "_index": [3, 4],
+            }
+        )
+    )
+
+
+@pytest.fixture
+def sdata_sanitized_tables(invalid_table, invalid_table_with_index) -> SpatialData:
+    """SpatialData built from sanitized copies of the invalid tables."""
+    table1 = invalid_table.copy()
+    table2 = invalid_table_with_index.copy()
+    sanitize_table(table1)
+    sanitize_table(table2)
+    return SpatialData(tables={"table1": table1, "table2": table2})
+
+
+# -----------------------------------------------------------------------------
+# sanitize_name tests
+# -----------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "raw,expected",
+    [
+        ("valid_name", "valid_name"),
+        ("valid-name", "valid-name"),
+        ("valid.name", "valid.name"),
+        ("invalid@name", "invalid_name"),
+        ("invalid#name", "invalid_name"),
+        ("invalid name", "invalid_name"),
+        ("", "unnamed"),
+        (".", "unnamed"),
+        ("..", "unnamed"),
+        ("__private", "private"),
+    ],
+)
+def test_sanitize_name_strips_special_chars(raw, expected):
+    assert sanitize_name(raw) == expected
+
+
+@pytest.mark.parametrize(
+    "raw,is_df_col,expected",
+    [
+        ("_index", True, "index"),
+        ("_index", False, "index"),
+        ("valid@column", True, "valid_column"),
+        ("__private", True, "private"),
+    ],
+)
+def test_sanitize_name_dataframe_column(raw, is_df_col, expected):
+    assert sanitize_name(raw, is_dataframe_column=is_df_col) == expected
+
+
+# -----------------------------------------------------------------------------
+# sanitize_table basic behaviors
+# -----------------------------------------------------------------------------
+
+
+def test_sanitize_table_basic_columns(invalid_table, invalid_table_with_index):
+    ad1 = sanitize_table(invalid_table, inplace=False)
+    assert isinstance(ad1, AnnData)
+    assert list(ad1.obs.columns) == ["invalid_", "valid_name", "private"]
+
+    ad2 = sanitize_table(invalid_table_with_index, inplace=False)
+    assert list(ad2.obs.columns) == ["invalid_name", "index"]
+
+    # original fixture remains unchanged
+    assert list(invalid_table.obs.columns) == ["@invalid#", "valid_name", "__private"]
+
+
+def test_sanitize_table_inplace_copy(invalid_table):
+    ad = invalid_table.copy()
+    sanitize_table(ad)  # inplace=True is now default
+    assert list(ad.obs.columns) == ["invalid_", "valid_name", "private"]
+
+
+def test_sanitize_table_case_insensitive_collisions():
+    obs = pd.DataFrame(
+        {
+            "Column1": [1, 2],
+            "column1": [3, 4],
+            "COLUMN1": [5, 6],
+        }
+    )
+    ad = AnnData(obs=obs)
+    sanitized = sanitize_table(ad, inplace=False)
+    cols = list(sanitized.obs.columns)
+    assert sorted(cols) == sorted(["Column1", "column1_1", "COLUMN1_2"])
+
+
+def test_sanitize_table_whitespace_collision():
+    """Ensure 'a b' → 'a_b' doesn't collide silently with existing 'a_b'."""
+    obs = pd.DataFrame({"a b": [1], "a_b": [2]})
+    ad = AnnData(obs=obs)
+    sanitized = sanitize_table(ad, inplace=False)
+    cols = list(sanitized.obs.columns)
+    assert "a_b" in cols
+    assert "a_b_1" in cols
+
+
+# -----------------------------------------------------------------------------
+# sanitize_table attribute‐specific tests
+# -----------------------------------------------------------------------------
+
+
+def test_sanitize_table_obs_and_obs_columns():
+    ad = AnnData(obs=pd.DataFrame({"@col": [1, 2]}))
+    sanitized = sanitize_table(ad, inplace=False)
+    assert list(sanitized.obs.columns) == ["col"]
+
+
+def test_sanitize_table_obsm_and_obsp():
+    ad = AnnData(obs=pd.DataFrame({"@col": [1, 2]}))
+    ad.obsm["@col"] = np.array([[1, 2], [3, 4]])
+    ad.obsp["bad name"] = np.array([[1, 2], [3, 4]])
+    sanitized = sanitize_table(ad, inplace=False)
+    assert list(sanitized.obsm.keys()) == ["col"]
+    assert list(sanitized.obsp.keys()) == ["bad_name"]
+
+
+def test_sanitize_table_varm_and_varp():
+    ad = AnnData(obs=pd.DataFrame({"x": [1, 2]}), var=pd.DataFrame(index=["v1", "v2"]))
+    ad.varm["__priv"] = np.array([[1, 2], [3, 4]])
+    ad.varp["_index"] = np.array([[1, 2], [3, 4]])
+    sanitized = sanitize_table(ad, inplace=False)
+    assert list(sanitized.varm.keys()) == ["priv"]
+    assert list(sanitized.varp.keys()) == ["index"]
+
+
+def test_sanitize_table_uns_and_layers():
+    ad = AnnData(obs=pd.DataFrame({"x": [1, 2]}), var=pd.DataFrame(index=["v1", "v2"]))
+    ad.uns["bad@key"] = "val"
+    ad.layers["bad#layer"] = np.array([[0, 1], [1, 0]])
+    sanitized = sanitize_table(ad, inplace=False)
+    assert list(sanitized.uns.keys()) == ["bad_key"]
+    assert list(sanitized.layers.keys()) == ["bad_layer"]
+
+
+def test_sanitize_table_empty_returns_empty():
+    ad = AnnData()
+    sanitized = sanitize_table(ad, inplace=False)
+    assert isinstance(sanitized, AnnData)
+    assert sanitized.obs.empty
+    assert sanitized.var.empty
+
+
+def test_sanitize_table_preserves_underlying_data():
+    ad = AnnData(obs=pd.DataFrame({"@invalid#": [1, 2], "valid": [3, 4]}))
+    ad.obsm["@invalid#"] = np.array([[1, 2], [3, 4]])
+    ad.uns["invalid@key"] = "value"
+    sanitized = sanitize_table(ad, inplace=False)
+    assert sanitized.obs["invalid_"].tolist() == [1, 2]
+    assert sanitized.obs["valid"].tolist() == [3, 4]
+    assert np.array_equal(sanitized.obsm["invalid_"], np.array([[1, 2], [3, 4]]))
+    assert sanitized.uns["invalid_key"] == "value"
+
+
+# -----------------------------------------------------------------------------
+# SpatialData integration
+# -----------------------------------------------------------------------------
+
+
+def test_sanitize_table_in_spatialdata_sanitized_fixture(sdata_sanitized_tables):
+    t1 = sdata_sanitized_tables.tables["table1"]
+    t2 = sdata_sanitized_tables.tables["table2"]
+    assert list(t1.obs.columns) == ["invalid_", "valid_name", "private"]
+    assert list(t2.obs.columns) == ["invalid_name", "index"]
+
+
+def test_spatialdata_retains_other_elements(full_sdata, sdata_sanitized_tables):
+    # Add another sanitized table into an existing full_sdata
+    tbl = AnnData(obs=pd.DataFrame({"@foo#": [1, 2], "bar": [3, 4]}))
+    sanitize_table(tbl)
+    full_sdata.tables["new_table"] = tbl
+
+    # Verify columns and presence of other SpatialData attributes
+    assert list(full_sdata.tables["new_table"].obs.columns) == ["foo_", "bar"]
+    assert "image2d" in full_sdata.images
+    assert "labels2d" in full_sdata.labels
+    assert "points_0" in full_sdata.points

From fd2ef3411c427e8d6ccc892f5f88c8b4ca60a7fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 18 May 2025 15:49:55 +0000
Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index d64ab59e1..61f5a52c7 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -345,4 +345,4 @@ def _check_match_length_channels_c_dim(
             f"The number of channel names `{len(c_coords)}` does not match the length of dimension 'c'"
             f" with length {c_length}."
         )
-    return c_coords
\ No newline at end of file
+    return c_coords

From e95cebbdb58c530f8949ee59a83da67ed7c3d1b0 Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Sun, 18 May 2025 16:50:29 +0100
Subject: [PATCH 05/13] fix

---
 src/spatialdata/_utils.py | 132 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 131 insertions(+), 1 deletion(-)

diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index d64ab59e1..0f11248b6 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -345,4 +345,134 @@ def _check_match_length_channels_c_dim(
             f"The number of channel names `{len(c_coords)}` does not match the length of dimension 'c'"
             f" with length {c_length}."
         )
-    return c_coords
\ No newline at end of file
+    return c_coords
+
+
+def sanitize_name(name: str, is_dataframe_column: bool = False) -> str:
+    """
+    Sanitize a name to comply with SpatialData naming rules.
+    This function converts invalid names into valid ones by:
+    1. Converting to string if not already
+    2. Removing invalid characters
+    3. Handling special cases like "__" prefix
+    4. Ensuring the name is not empty
+    5. Handling special cases for dataframe columns
+    Parameters
+    ----------
+    name
+        The name to sanitize
+    is_dataframe_column
+        Whether this name is for a dataframe column (additional restrictions apply)
+    Returns
+    -------
+    A sanitized version of the name that complies with SpatialData naming rules.
+    Examples
+    --------
+    >>> sanitize_name("my@invalid#name")
+    'my_invalid_name'
+    >>> sanitize_name("__private")
+    'private'
+    >>> sanitize_name("_index", is_dataframe_column=True)
+    'index'
+    """
+    # Convert to string if not already
+    name = str(name)
+
+    # Handle empty string case
+    if not name:
+        return "unnamed"
+
+    # Handle special cases
+    if name == "." or name == "..":
+        return "unnamed"
+
+    # Remove "__" prefix if present
+    if name.startswith("__"):
+        name = name[2:]
+
+    # Replace invalid characters with underscore
+    # Keep only alphanumeric, underscore, dot, and hyphen
+    sanitized = ""
+    for char in name:
+        if char.isalnum() or char in "_-.":
+            sanitized += char
+        else:
+            sanitized += "_"
+
+    # Remove leading underscores but keep trailing ones
+    sanitized = sanitized.lstrip("_")
+
+    # Ensure we don't end up with an empty string after sanitization
+    if not sanitized:
+        return "unnamed"
+
+    return sanitized
+
+
+def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
+    """
+    Sanitize all keys in an AnnData table to comply with SpatialData naming rules.
+    This function sanitizes all keys in obs, var, obsm, obsp, varm, varp, uns, and layers
+    while maintaining case-insensitive uniqueness. It can either modify the table in-place
+    or return a new sanitized copy.
+    Parameters
+    ----------
+    data
+        The AnnData table to sanitize
+    inplace
+        Whether to modify the table in-place or return a new copy
+    Returns
+    -------
+    If inplace is False, returns a new AnnData object with sanitized keys.
+    If inplace is True, returns None as the original object is modified.
+    Examples
+    --------
+    >>> import anndata as ad
+    >>> adata = ad.AnnData(obs=pd.DataFrame({"@invalid#": [1, 2]}))
+    >>> # Create a new sanitized copy
+    >>> sanitized = sanitize_table(adata)
+    >>> print(sanitized.obs.columns)
+    Index(['invalid_'], dtype='object')
+    >>> # Or modify in-place
+    >>> sanitize_table(adata, inplace=True)
+    >>> print(adata.obs.columns)
+    Index(['invalid_'], dtype='object')
+    """
+    import copy
+    from collections import defaultdict
+
+    # Create a deep copy if not modifying in-place
+    sanitized = data if inplace else copy.deepcopy(data)
+
+    # Track used names to maintain case-insensitive uniqueness
+    used_names: dict[str, set[str]] = defaultdict(set)
+
+    def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) -> str:
+        base_name = sanitize_name(name, is_dataframe_column)
+        normalized_base = base_name.lower()
+
+        # If this exact name is already used, add a number
+        if normalized_base in {n.lower() for n in used_names[attr]}:
+            counter = 1
+            while f"{base_name}_{counter}".lower() in {n.lower() for n in used_names[attr]}:
+                counter += 1
+            base_name = f"{base_name}_{counter}"
+
+        used_names[attr].add(base_name)
+        return base_name
+
+    # Handle obs and var (dataframe columns)
+    for attr in ("obs", "var"):
+        df = getattr(sanitized, attr)
+        new_columns = {old: get_unique_name(old, attr, is_dataframe_column=True) for old in df.columns}
+        df.rename(columns=new_columns, inplace=True)
+
+    # Handle other attributes
+    for attr in ("obsm", "obsp", "varm", "varp", "uns", "layers"):
+        d = getattr(sanitized, attr)
+        new_keys = {old: get_unique_name(old, attr) for old in d}
+        # Create new dictionary with sanitized keys
+        new_dict = {new_keys[old]: value for old, value in d.items()}
+        setattr(sanitized, attr, new_dict)
+
+    return None if inplace else sanitized
\ No newline at end of file

From 6ce065dd7d79155b53a773d191ec0ddf6c32a662 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 18 May 2025 15:52:06 +0000
Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata/_utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index 0f11248b6..38b246254 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -357,15 +357,18 @@ def sanitize_name(name: str, is_dataframe_column: bool = False) -> str:
     3. Handling special cases like "__" prefix
     4. Ensuring the name is not empty
     5. Handling special cases for dataframe columns
+
     Parameters
     ----------
     name
         The name to sanitize
     is_dataframe_column
         Whether this name is for a dataframe column (additional restrictions apply)
+
     Returns
     -------
     A sanitized version of the name that complies with SpatialData naming rules.
+
     Examples
     --------
     >>> sanitize_name("my@invalid#name")
@@ -415,16 +418,19 @@ def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
     This function sanitizes all keys in obs, var, obsm, obsp, varm, varp, uns, and layers
     while maintaining case-insensitive uniqueness. It can either modify the table in-place
     or return a new sanitized copy.
+
     Parameters
     ----------
     data
         The AnnData table to sanitize
     inplace
         Whether to modify the table in-place or return a new copy
+
     Returns
     -------
     If inplace is False, returns a new AnnData object with sanitized keys.
     If inplace is True, returns None as the original object is modified.
+
     Examples
     --------
     >>> import anndata as ad
@@ -475,4 +481,4 @@ def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) ->
         new_dict = {new_keys[old]: value for old, value in d.items()}
         setattr(sanitized, attr, new_dict)
 
-    return None if inplace else sanitized
\ No newline at end of file
+    return None if inplace else sanitized

From f25165a56cf68912e15c9f94f8eb63b3fbdeaebd Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Sun, 18 May 2025 17:07:05 +0100
Subject: [PATCH 07/13] mypy

---
 src/spatialdata/_utils.py | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index 38b246254..239fffed1 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -351,6 +351,7 @@ def _check_match_length_channels_c_dim(
 def sanitize_name(name: str, is_dataframe_column: bool = False) -> str:
     """
     Sanitize a name to comply with SpatialData naming rules.
+
     This function converts invalid names into valid ones by:
     1. Converting to string if not already
     2. Removing invalid characters
@@ -386,35 +387,25 @@ def sanitize_name(name: str, is_dataframe_column: bool = False) -> str:
         return "unnamed"
 
     # Handle special cases
-    if name == "." or name == "..":
+    if name in {".", ".."}:
         return "unnamed"
 
     # Remove "__" prefix if present
     if name.startswith("__"):
         name = name[2:]
 
-    # Replace invalid characters with underscore
-    # Keep only alphanumeric, underscore, dot, and hyphen
-    sanitized = ""
-    for char in name:
-        if char.isalnum() or char in "_-.":
-            sanitized += char
-        else:
-            sanitized += "_"
-
+    sanitized = "".join(char if char.isalnum() or char in "_-." else "_" for char in name)
     # Remove leading underscores but keep trailing ones
     sanitized = sanitized.lstrip("_")
 
     # Ensure we don't end up with an empty string after sanitization
-    if not sanitized:
-        return "unnamed"
-
-    return sanitized
+    return sanitized or "unnamed"
 
 
 def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
     """
     Sanitize all keys in an AnnData table to comply with SpatialData naming rules.
+
     This function sanitizes all keys in obs, var, obsm, obsp, varm, varp, uns, and layers
     while maintaining case-insensitive uniqueness. It can either modify the table in-place
     or return a new sanitized copy.

From 9f221857e11668c734533c9a26351c703771a81f Mon Sep 17 00:00:00 2001
From: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Date: Sun, 18 May 2025 17:19:49 +0100
Subject: [PATCH 08/13] Update src/spatialdata/_utils.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/spatialdata/_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index 239fffed1..487ffdf36 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -443,19 +443,21 @@ def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
 
     # Track used names to maintain case-insensitive uniqueness
     used_names: dict[str, set[str]] = defaultdict(set)
+    used_names_lower: dict[str, set[str]] = defaultdict(set)
 
     def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) -> str:
         base_name = sanitize_name(name, is_dataframe_column)
         normalized_base = base_name.lower()
 
         # If this exact name is already used, add a number
-        if normalized_base in {n.lower() for n in used_names[attr]}:
+        if normalized_base in used_names_lower[attr]:
             counter = 1
-            while f"{base_name}_{counter}".lower() in {n.lower() for n in used_names[attr]}:
+            while f"{base_name}_{counter}".lower() in used_names_lower[attr]:
                 counter += 1
             base_name = f"{base_name}_{counter}"
 
         used_names[attr].add(base_name)
+        used_names_lower[attr].add(base_name.lower())
         return base_name
 
     # Handle obs and var (dataframe columns)

From 8221d5d66c9e61713fec9a5e8a524e4b7b2cce9a Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Sun, 25 May 2025 19:01:26 -0400
Subject: [PATCH 09/13] fix sanitize edge case; add to docs

---
 docs/api/operations.md             |   2 +
 src/spatialdata/__init__.py        |   3 +
 src/spatialdata/_core/_utils.py    | 141 +++++++++++++++++++++++++++++
 src/spatialdata/_utils.py          | 129 --------------------------
 src/spatialdata/models/__init__.py |   1 +
 tests/utils/test_sanitize.py       |  31 ++++---
 6 files changed, 164 insertions(+), 143 deletions(-)

diff --git a/docs/api/operations.md b/docs/api/operations.md
index 937b8dbca..3eb2a5a6c 100644
--- a/docs/api/operations.md
+++ b/docs/api/operations.md
@@ -29,4 +29,6 @@ Operations on `SpatialData` objects.
 .. autofunction:: are_extents_equal
 .. autofunction:: deepcopy
 .. autofunction:: get_pyramid_levels
+.. autofunction:: sanitize_name
+.. autofunction:: sanitize_table
 ```
diff --git a/src/spatialdata/__init__.py b/src/spatialdata/__init__.py
index 9ddfea32d..0b68391ad 100644
--- a/src/spatialdata/__init__.py
+++ b/src/spatialdata/__init__.py
@@ -53,10 +53,13 @@
     "relabel_sequential",
     "map_raster",
     "deepcopy",
+    "sanitize_table",
+    "sanitize_name",
 ]
 
 from spatialdata import dataloader, datasets, models, transformations
 from spatialdata._core._deepcopy import deepcopy
+from spatialdata._core._utils import sanitize_name, sanitize_table
 from spatialdata._core.centroids import get_centroids
 from spatialdata._core.concatenate import concatenate
 from spatialdata._core.data_extent import are_extents_equal, get_extent
diff --git a/src/spatialdata/_core/_utils.py b/src/spatialdata/_core/_utils.py
index dd27e9c8d..12e794374 100644
--- a/src/spatialdata/_core/_utils.py
+++ b/src/spatialdata/_core/_utils.py
@@ -1,5 +1,9 @@
+from __future__ import annotations
+
 from collections.abc import Iterable
 
+from anndata import AnnData
+
 from spatialdata._core.spatialdata import SpatialData
 
 
@@ -25,3 +29,140 @@ def _find_common_table_keys(sdatas: Iterable[SpatialData]) -> set[str]:
             common_keys.intersection_update(sdata.tables.keys())
 
     return common_keys
+
+
+def sanitize_name(name: str, is_dataframe_column: bool = False) -> str:
+    """
+    Sanitize a name to comply with SpatialData naming rules.
+
+    This function converts invalid names into valid ones by:
+    1. Converting to string if not already
+    2. Removing invalid characters
+    3. Handling special cases like "__" prefix
+    4. Ensuring the name is not empty
+    5. Handling special cases for dataframe columns
+
+    See a discussion on the naming rules, and how to avoid naming collisions, here:
+    https://github.com/scverse/spatialdata/discussions/707
+
+    Parameters
+    ----------
+    name
+        The name to sanitize
+    is_dataframe_column
+        Whether this name is for a dataframe column (additional restrictions apply)
+
+    Returns
+    -------
+    A sanitized version of the name that complies with SpatialData naming rules. If a
+    santized name cannoted be generated, it returns "unnamed".
+
+    Examples
+    --------
+    >>> sanitize_name("my@invalid#name")
+    'my_invalid_name'
+    >>> sanitize_name("__private")
+    'private'
+    >>> sanitize_name("_index", is_dataframe_column=True)
+    'index'
+    """
+    # Convert to string if not already
+    name = str(name)
+
+    # Handle empty string case
+    if not name:
+        return "unnamed"
+
+    # Handle special cases
+    if name in {".", ".."}:
+        return "unnamed"
+
+    sanitized = "".join(char if char.isalnum() or char in "_-." else "_" for char in name)
+
+    # remove double underscores if found as a prefix
+    while sanitized.startswith("__"):
+        sanitized = sanitized[1:]
+
+    if is_dataframe_column and sanitized == "_index":
+        return "index"
+
+    # Ensure we don't end up with an empty string after sanitization
+    return sanitized or "unnamed"
+
+
+def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
+    """
+    Sanitize all keys in an AnnData table to comply with SpatialData naming rules.
+
+    This function sanitizes all keys in obs, var, obsm, obsp, varm, varp, uns, and layers
+    while maintaining case-insensitive uniqueness. It can either modify the table in-place
+    or return a new sanitized copy.
+
+    See a discussion on the naming rules here:
+    https://github.com/scverse/spatialdata/discussions/707
+
+    Parameters
+    ----------
+    data
+        The AnnData table to sanitize
+    inplace
+        Whether to modify the table in-place or return a new copy
+
+    Returns
+    -------
+    If inplace is False, returns a new AnnData object with sanitized keys.
+    If inplace is True, returns None as the original object is modified.
+
+    Examples
+    --------
+    >>> import anndata as ad
+    >>> adata = ad.AnnData(obs=pd.DataFrame({"@invalid#": [1, 2]}))
+    >>> # Create a new sanitized copy
+    >>> sanitized = sanitize_table(adata)
+    >>> print(sanitized.obs.columns)
+    Index(['invalid_'], dtype='object')
+    >>> # Or modify in-place
+    >>> sanitize_table(adata, inplace=True)
+    >>> print(adata.obs.columns)
+    Index(['invalid_'], dtype='object')
+    """
+    import copy
+    from collections import defaultdict
+
+    # Create a deep copy if not modifying in-place
+    sanitized = data if inplace else copy.deepcopy(data)
+
+    # Track used names to maintain case-insensitive uniqueness
+    used_names: dict[str, set[str]] = defaultdict(set)
+    used_names_lower: dict[str, set[str]] = defaultdict(set)
+
+    def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) -> str:
+        base_name = sanitize_name(name, is_dataframe_column)
+        normalized_base = base_name.lower()
+
+        # If this exact name is already used, add a number
+        if normalized_base in used_names_lower[attr]:
+            counter = 1
+            while f"{base_name}_{counter}".lower() in used_names_lower[attr]:
+                counter += 1
+            base_name = f"{base_name}_{counter}"
+
+        used_names[attr].add(base_name)
+        used_names_lower[attr].add(base_name.lower())
+        return base_name
+
+    # Handle obs and var (dataframe columns)
+    for attr in ("obs", "var"):
+        df = getattr(sanitized, attr)
+        new_columns = {old: get_unique_name(old, attr, is_dataframe_column=True) for old in df.columns}
+        df.rename(columns=new_columns, inplace=True)
+
+    # Handle other attributes
+    for attr in ("obsm", "obsp", "varm", "varp", "uns", "layers"):
+        d = getattr(sanitized, attr)
+        new_keys = {old: get_unique_name(old, attr) for old in d}
+        # Create new dictionary with sanitized keys
+        new_dict = {new_keys[old]: value for old, value in d.items()}
+        setattr(sanitized, attr, new_dict)
+
+    return None if inplace else sanitized
diff --git a/src/spatialdata/_utils.py b/src/spatialdata/_utils.py
index 487ffdf36..61f5a52c7 100644
--- a/src/spatialdata/_utils.py
+++ b/src/spatialdata/_utils.py
@@ -346,132 +346,3 @@ def _check_match_length_channels_c_dim(
             f" with length {c_length}."
         )
     return c_coords
-
-
-def sanitize_name(name: str, is_dataframe_column: bool = False) -> str:
-    """
-    Sanitize a name to comply with SpatialData naming rules.
-
-    This function converts invalid names into valid ones by:
-    1. Converting to string if not already
-    2. Removing invalid characters
-    3. Handling special cases like "__" prefix
-    4. Ensuring the name is not empty
-    5. Handling special cases for dataframe columns
-
-    Parameters
-    ----------
-    name
-        The name to sanitize
-    is_dataframe_column
-        Whether this name is for a dataframe column (additional restrictions apply)
-
-    Returns
-    -------
-    A sanitized version of the name that complies with SpatialData naming rules.
-
-    Examples
-    --------
-    >>> sanitize_name("my@invalid#name")
-    'my_invalid_name'
-    >>> sanitize_name("__private")
-    'private'
-    >>> sanitize_name("_index", is_dataframe_column=True)
-    'index'
-    """
-    # Convert to string if not already
-    name = str(name)
-
-    # Handle empty string case
-    if not name:
-        return "unnamed"
-
-    # Handle special cases
-    if name in {".", ".."}:
-        return "unnamed"
-
-    # Remove "__" prefix if present
-    if name.startswith("__"):
-        name = name[2:]
-
-    sanitized = "".join(char if char.isalnum() or char in "_-." else "_" for char in name)
-    # Remove leading underscores but keep trailing ones
-    sanitized = sanitized.lstrip("_")
-
-    # Ensure we don't end up with an empty string after sanitization
-    return sanitized or "unnamed"
-
-
-def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
-    """
-    Sanitize all keys in an AnnData table to comply with SpatialData naming rules.
-
-    This function sanitizes all keys in obs, var, obsm, obsp, varm, varp, uns, and layers
-    while maintaining case-insensitive uniqueness. It can either modify the table in-place
-    or return a new sanitized copy.
-
-    Parameters
-    ----------
-    data
-        The AnnData table to sanitize
-    inplace
-        Whether to modify the table in-place or return a new copy
-
-    Returns
-    -------
-    If inplace is False, returns a new AnnData object with sanitized keys.
-    If inplace is True, returns None as the original object is modified.
-
-    Examples
-    --------
-    >>> import anndata as ad
-    >>> adata = ad.AnnData(obs=pd.DataFrame({"@invalid#": [1, 2]}))
-    >>> # Create a new sanitized copy
-    >>> sanitized = sanitize_table(adata)
-    >>> print(sanitized.obs.columns)
-    Index(['invalid_'], dtype='object')
-    >>> # Or modify in-place
-    >>> sanitize_table(adata, inplace=True)
-    >>> print(adata.obs.columns)
-    Index(['invalid_'], dtype='object')
-    """
-    import copy
-    from collections import defaultdict
-
-    # Create a deep copy if not modifying in-place
-    sanitized = data if inplace else copy.deepcopy(data)
-
-    # Track used names to maintain case-insensitive uniqueness
-    used_names: dict[str, set[str]] = defaultdict(set)
-    used_names_lower: dict[str, set[str]] = defaultdict(set)
-
-    def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) -> str:
-        base_name = sanitize_name(name, is_dataframe_column)
-        normalized_base = base_name.lower()
-
-        # If this exact name is already used, add a number
-        if normalized_base in used_names_lower[attr]:
-            counter = 1
-            while f"{base_name}_{counter}".lower() in used_names_lower[attr]:
-                counter += 1
-            base_name = f"{base_name}_{counter}"
-
-        used_names[attr].add(base_name)
-        used_names_lower[attr].add(base_name.lower())
-        return base_name
-
-    # Handle obs and var (dataframe columns)
-    for attr in ("obs", "var"):
-        df = getattr(sanitized, attr)
-        new_columns = {old: get_unique_name(old, attr, is_dataframe_column=True) for old in df.columns}
-        df.rename(columns=new_columns, inplace=True)
-
-    # Handle other attributes
-    for attr in ("obsm", "obsp", "varm", "varp", "uns", "layers"):
-        d = getattr(sanitized, attr)
-        new_keys = {old: get_unique_name(old, attr) for old in d}
-        # Create new dictionary with sanitized keys
-        new_dict = {new_keys[old]: value for old, value in d.items()}
-        setattr(sanitized, attr, new_dict)
-
-    return None if inplace else sanitized
diff --git a/src/spatialdata/models/__init__.py b/src/spatialdata/models/__init__.py
index 3c86fa0ec..d040514ac 100644
--- a/src/spatialdata/models/__init__.py
+++ b/src/spatialdata/models/__init__.py
@@ -55,4 +55,5 @@
     "set_channel_names",
     "force_2d",
     "RasterSchema",
+    "sani",
 ]
diff --git a/tests/utils/test_sanitize.py b/tests/utils/test_sanitize.py
index b567cc53d..23c374cae 100644
--- a/tests/utils/test_sanitize.py
+++ b/tests/utils/test_sanitize.py
@@ -6,7 +6,7 @@
 from anndata import AnnData
 
 from spatialdata import SpatialData
-from spatialdata._utils import sanitize_name, sanitize_table
+from spatialdata._core._utils import sanitize_name, sanitize_table
 
 
 @pytest.fixture
@@ -63,7 +63,10 @@ def sdata_sanitized_tables(invalid_table, invalid_table_with_index) -> SpatialDa
         ("", "unnamed"),
         (".", "unnamed"),
         ("..", "unnamed"),
-        ("__private", "private"),
+        ("__", "_"),
+        ("___", "_"),
+        ("____#@$@", "_"),
+        ("__private", "_private"),
     ],
 )
 def test_sanitize_name_strips_special_chars(raw, expected):
@@ -74,9 +77,9 @@ def test_sanitize_name_strips_special_chars(raw, expected):
     "raw,is_df_col,expected",
     [
         ("_index", True, "index"),
-        ("_index", False, "index"),
+        ("_index", False, "_index"),
         ("valid@column", True, "valid_column"),
-        ("__private", True, "private"),
+        ("__private", True, "_private"),
     ],
 )
 def test_sanitize_name_dataframe_column(raw, is_df_col, expected):
@@ -91,7 +94,7 @@ def test_sanitize_name_dataframe_column(raw, is_df_col, expected):
 def test_sanitize_table_basic_columns(invalid_table, invalid_table_with_index):
     ad1 = sanitize_table(invalid_table, inplace=False)
     assert isinstance(ad1, AnnData)
-    assert list(ad1.obs.columns) == ["invalid_", "valid_name", "private"]
+    assert list(ad1.obs.columns) == ["_invalid_", "valid_name", "_private"]
 
     ad2 = sanitize_table(invalid_table_with_index, inplace=False)
     assert list(ad2.obs.columns) == ["invalid_name", "index"]
@@ -103,7 +106,7 @@ def test_sanitize_table_basic_columns(invalid_table, invalid_table_with_index):
 def test_sanitize_table_inplace_copy(invalid_table):
     ad = invalid_table.copy()
     sanitize_table(ad)  # inplace=True is now default
-    assert list(ad.obs.columns) == ["invalid_", "valid_name", "private"]
+    assert list(ad.obs.columns) == ["_invalid_", "valid_name", "_private"]
 
 
 def test_sanitize_table_case_insensitive_collisions():
@@ -138,7 +141,7 @@ def test_sanitize_table_whitespace_collision():
 def test_sanitize_table_obs_and_obs_columns():
     ad = AnnData(obs=pd.DataFrame({"@col": [1, 2]}))
     sanitized = sanitize_table(ad, inplace=False)
-    assert list(sanitized.obs.columns) == ["col"]
+    assert list(sanitized.obs.columns) == ["_col"]
 
 
 def test_sanitize_table_obsm_and_obsp():
@@ -146,7 +149,7 @@ def test_sanitize_table_obsm_and_obsp():
     ad.obsm["@col"] = np.array([[1, 2], [3, 4]])
     ad.obsp["bad name"] = np.array([[1, 2], [3, 4]])
     sanitized = sanitize_table(ad, inplace=False)
-    assert list(sanitized.obsm.keys()) == ["col"]
+    assert list(sanitized.obsm.keys()) == ["_col"]
     assert list(sanitized.obsp.keys()) == ["bad_name"]
 
 
@@ -155,8 +158,8 @@ def test_sanitize_table_varm_and_varp():
     ad.varm["__priv"] = np.array([[1, 2], [3, 4]])
     ad.varp["_index"] = np.array([[1, 2], [3, 4]])
     sanitized = sanitize_table(ad, inplace=False)
-    assert list(sanitized.varm.keys()) == ["priv"]
-    assert list(sanitized.varp.keys()) == ["index"]
+    assert list(sanitized.varm.keys()) == ["_priv"]
+    assert list(sanitized.varp.keys()) == ["_index"]
 
 
 def test_sanitize_table_uns_and_layers():
@@ -181,9 +184,9 @@ def test_sanitize_table_preserves_underlying_data():
     ad.obsm["@invalid#"] = np.array([[1, 2], [3, 4]])
     ad.uns["invalid@key"] = "value"
     sanitized = sanitize_table(ad, inplace=False)
-    assert sanitized.obs["invalid_"].tolist() == [1, 2]
+    assert sanitized.obs["_invalid_"].tolist() == [1, 2]
     assert sanitized.obs["valid"].tolist() == [3, 4]
-    assert np.array_equal(sanitized.obsm["invalid_"], np.array([[1, 2], [3, 4]]))
+    assert np.array_equal(sanitized.obsm["_invalid_"], np.array([[1, 2], [3, 4]]))
     assert sanitized.uns["invalid_key"] == "value"
 
 
@@ -195,7 +198,7 @@ def test_sanitize_table_preserves_underlying_data():
 def test_sanitize_table_in_spatialdata_sanitized_fixture(sdata_sanitized_tables):
     t1 = sdata_sanitized_tables.tables["table1"]
     t2 = sdata_sanitized_tables.tables["table2"]
-    assert list(t1.obs.columns) == ["invalid_", "valid_name", "private"]
+    assert list(t1.obs.columns) == ["_invalid_", "valid_name", "_private"]
     assert list(t2.obs.columns) == ["invalid_name", "index"]
 
 
@@ -206,7 +209,7 @@ def test_spatialdata_retains_other_elements(full_sdata, sdata_sanitized_tables):
     full_sdata.tables["new_table"] = tbl
 
     # Verify columns and presence of other SpatialData attributes
-    assert list(full_sdata.tables["new_table"].obs.columns) == ["foo_", "bar"]
+    assert list(full_sdata.tables["new_table"].obs.columns) == ["_foo_", "bar"]
     assert "image2d" in full_sdata.images
     assert "labels2d" in full_sdata.labels
     assert "points_0" in full_sdata.points

From 1ec773b541e26bc0a819147afd4bfffe8ad493b1 Mon Sep 17 00:00:00 2001
From: Wouter-Michiel Vierdag <w-mv@hotmail.com>
Date: Tue, 27 May 2025 17:18:24 +0200
Subject: [PATCH 10/13] Apply suggestions from code review

---
 src/spatialdata/_core/_utils.py    |  1 -
 src/spatialdata/models/__init__.py |  1 -
 tests/utils/test_sanitize.py       | 17 +----------------
 3 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/src/spatialdata/_core/_utils.py b/src/spatialdata/_core/_utils.py
index 12e794374..ce1203438 100644
--- a/src/spatialdata/_core/_utils.py
+++ b/src/spatialdata/_core/_utils.py
@@ -133,7 +133,6 @@ def sanitize_table(data: AnnData, inplace: bool = True) -> AnnData | None:
     sanitized = data if inplace else copy.deepcopy(data)
 
     # Track used names to maintain case-insensitive uniqueness
-    used_names: dict[str, set[str]] = defaultdict(set)
     used_names_lower: dict[str, set[str]] = defaultdict(set)
 
     def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) -> str:
diff --git a/src/spatialdata/models/__init__.py b/src/spatialdata/models/__init__.py
index d040514ac..3c86fa0ec 100644
--- a/src/spatialdata/models/__init__.py
+++ b/src/spatialdata/models/__init__.py
@@ -55,5 +55,4 @@
     "set_channel_names",
     "force_2d",
     "RasterSchema",
-    "sani",
 ]
diff --git a/tests/utils/test_sanitize.py b/tests/utils/test_sanitize.py
index 23c374cae..fb7cd2024 100644
--- a/tests/utils/test_sanitize.py
+++ b/tests/utils/test_sanitize.py
@@ -34,18 +34,6 @@ def invalid_table_with_index() -> AnnData:
             }
         )
     )
-
-
-@pytest.fixture
-def sdata_sanitized_tables(invalid_table, invalid_table_with_index) -> SpatialData:
-    """SpatialData built from sanitized copies of the invalid tables."""
-    table1 = invalid_table.copy()
-    table2 = invalid_table_with_index.copy()
-    sanitize_table(table1)
-    sanitize_table(table2)
-    return SpatialData(tables={"table1": table1, "table2": table2})
-
-
 # -----------------------------------------------------------------------------
 # sanitize_name tests
 # -----------------------------------------------------------------------------
@@ -202,7 +190,7 @@ def test_sanitize_table_in_spatialdata_sanitized_fixture(sdata_sanitized_tables)
     assert list(t2.obs.columns) == ["invalid_name", "index"]
 
 
-def test_spatialdata_retains_other_elements(full_sdata, sdata_sanitized_tables):
+def test_spatialdata_retains_other_elements(full_sdata):
     # Add another sanitized table into an existing full_sdata
     tbl = AnnData(obs=pd.DataFrame({"@foo#": [1, 2], "bar": [3, 4]}))
     sanitize_table(tbl)
@@ -210,6 +198,3 @@ def test_spatialdata_retains_other_elements(full_sdata, sdata_sanitized_tables):
 
     # Verify columns and presence of other SpatialData attributes
     assert list(full_sdata.tables["new_table"].obs.columns) == ["_foo_", "bar"]
-    assert "image2d" in full_sdata.images
-    assert "labels2d" in full_sdata.labels
-    assert "points_0" in full_sdata.points

From 38bb5546ff929ddc1deb73fab77f7e31c875f8fd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 May 2025 15:18:39 +0000
Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/utils/test_sanitize.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_sanitize.py b/tests/utils/test_sanitize.py
index fb7cd2024..85953ca25 100644
--- a/tests/utils/test_sanitize.py
+++ b/tests/utils/test_sanitize.py
@@ -5,7 +5,6 @@
 import pytest
 from anndata import AnnData
 
-from spatialdata import SpatialData
 from spatialdata._core._utils import sanitize_name, sanitize_table
 
 
@@ -34,6 +33,8 @@ def invalid_table_with_index() -> AnnData:
             }
         )
     )
+
+
 # -----------------------------------------------------------------------------
 # sanitize_name tests
 # -----------------------------------------------------------------------------

From ad1c573120e6b4481c052360e60a0e4bccdac65e Mon Sep 17 00:00:00 2001
From: Wouter-Michiel Vierdag <michiel.vierdag@embl.de>
Date: Tue, 27 May 2025 17:20:16 +0200
Subject: [PATCH 12/13] move sanite_tables into test directly

---
 tests/utils/test_sanitize.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_sanitize.py b/tests/utils/test_sanitize.py
index fb7cd2024..b4999a48a 100644
--- a/tests/utils/test_sanitize.py
+++ b/tests/utils/test_sanitize.py
@@ -34,6 +34,8 @@ def invalid_table_with_index() -> AnnData:
             }
         )
     )
+
+
 # -----------------------------------------------------------------------------
 # sanitize_name tests
 # -----------------------------------------------------------------------------
@@ -183,7 +185,13 @@ def test_sanitize_table_preserves_underlying_data():
 # -----------------------------------------------------------------------------
 
 
-def test_sanitize_table_in_spatialdata_sanitized_fixture(sdata_sanitized_tables):
+def test_sanitize_table_in_spatialdata_sanitized_fixture(invalid_table, invalid_table_with_index):
+    table1 = invalid_table.copy()
+    table2 = invalid_table_with_index.copy()
+    sanitize_table(table1)
+    sanitize_table(table2)
+    sdata_sanitized_tables = SpatialData(tables={"table1": table1, "table2": table2})
+
     t1 = sdata_sanitized_tables.tables["table1"]
     t2 = sdata_sanitized_tables.tables["table2"]
     assert list(t1.obs.columns) == ["_invalid_", "valid_name", "_private"]

From b994960e0fb21856114785d8cdeb8977a92ce809 Mon Sep 17 00:00:00 2001
From: Wouter-Michiel Vierdag <michiel.vierdag@embl.de>
Date: Tue, 27 May 2025 17:25:23 +0200
Subject: [PATCH 13/13] remove unnecessary used_names

---
 src/spatialdata/_core/_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/spatialdata/_core/_utils.py b/src/spatialdata/_core/_utils.py
index ce1203438..a55815655 100644
--- a/src/spatialdata/_core/_utils.py
+++ b/src/spatialdata/_core/_utils.py
@@ -146,7 +146,6 @@ def get_unique_name(name: str, attr: str, is_dataframe_column: bool = False) ->
                 counter += 1
             base_name = f"{base_name}_{counter}"
 
-        used_names[attr].add(base_name)
         used_names_lower[attr].add(base_name.lower())
         return base_name