From 5aa32bfa71695cf3c784e36c2750ee6882e628cc Mon Sep 17 00:00:00 2001 From: Quentin Blampey Date: Wed, 2 Apr 2025 15:39:23 +0200 Subject: [PATCH 1/4] account for the padding shift during vectorization + use multipolygon --- src/spatialdata/_core/operations/vectorize.py | 16 ++++++------ tests/core/operations/test_vectorize.py | 25 +++++++++++++++++-- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/spatialdata/_core/operations/vectorize.py b/src/spatialdata/_core/operations/vectorize.py index c34ce5257..1236676b4 100644 --- a/src/spatialdata/_core/operations/vectorize.py +++ b/src/spatialdata/_core/operations/vectorize.py @@ -227,30 +227,30 @@ def _vectorize_chunk(chunk: np.ndarray, yoff: int, xoff: int) -> GeoDataFrame: return ShapesModel.parse(gdf, transformations=transformations.copy()) -def _region_props_to_polygons(region_props: RegionProperties) -> list[Polygon]: +def _region_props_to_polygon(region_props: RegionProperties) -> MultiPolygon: mask = np.pad(region_props.image, 1) contours = skimage.measure.find_contours(mask, 0.5) # shapes with <= 3 vertices, i.e. lines, can't be converted into a polygon polygons = [Polygon(contour[:, [1, 0]]) for contour in contours if contour.shape[0] >= 4] + polygon = MultiPolygon(polygons) if len(polygons) > 1 else polygons[0] yoff, xoff, *_ = region_props.bbox - return [shapely.affinity.translate(poly, xoff, yoff) for poly in polygons] + return shapely.affinity.translate(polygon, xoff - 1, yoff - 1) # account for the padding def _vectorize_mask( mask: np.ndarray, # type: ignore[type-arg] ) -> GeoDataFrame: if mask.max() == 0: - return GeoDataFrame(geometry=[]) + return GeoDataFrame({"label": []}, geometry=[]) regions = skimage.measure.regionprops(mask) - polygons_list = [_region_props_to_polygons(region) for region in regions] - geoms = [poly for polygons in polygons_list for poly in polygons] - labels = [region.label for i, region in enumerate(regions) for _ in range(len(polygons_list[i]))] - - return GeoDataFrame({"label": labels}, geometry=geoms) + return GeoDataFrame( + {"label": [region.label for region in regions]}, + geometry=[_region_props_to_polygon(region) for region in regions], + ) def _dissolve_on_overlaps(label: int, group: GeoDataFrame) -> GeoDataFrame: diff --git a/tests/core/operations/test_vectorize.py b/tests/core/operations/test_vectorize.py index 2a867aba6..36abccbdf 100644 --- a/tests/core/operations/test_vectorize.py +++ b/tests/core/operations/test_vectorize.py @@ -3,9 +3,11 @@ import numpy as np import pytest from geopandas import GeoDataFrame -from shapely import MultiPoint, Point +from shapely import MultiPoint, Point, Polygon +from shapely.ops import unary_union +from skimage.draw import polygon -from spatialdata._core.operations.vectorize import to_circles, to_polygons +from spatialdata._core.operations.vectorize import _vectorize_mask, to_circles, to_polygons from spatialdata.datasets import blobs from spatialdata.models.models import ShapesModel from spatialdata.testing import assert_elements_are_identical @@ -139,3 +141,22 @@ def test_invalid_geodataframe_to_polygons() -> None: gdf = GeoDataFrame(geometry=[MultiPoint([[0, 0], [1, 1]])]) with pytest.raises(RuntimeError, match="Unsupported geometry type"): to_polygons(gdf) + + +def test_vectorize_mask_almost_invertible() -> None: + cell = Polygon([[10, 10], [30, 40], [90, 50], [100, 20]]) + image_shape = (70, 120) + + rasterized_image = np.zeros(image_shape, dtype=np.int8) + x, y = cell.exterior.coords.xy + rr, cc = polygon(y, x, image_shape) + rasterized_image[rr, cc] = 1 + + new_cell = _vectorize_mask(rasterized_image) + new_cell = unary_union(new_cell.geometry) + + assert new_cell.intersection(cell).area / new_cell.union(cell).area > 0.97 + +def test_label_column_vectorize_mask() -> None: + assert "label" in _vectorize_mask(np.array([0])) + assert "label" in _vectorize_mask(np.array([[0, 1], [1, 1]])) From e7de1e3aaf226d418d55b70ce51bca035e1383bf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Apr 2025 13:43:41 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/core/operations/test_vectorize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/core/operations/test_vectorize.py b/tests/core/operations/test_vectorize.py index 36abccbdf..07ce9d370 100644 --- a/tests/core/operations/test_vectorize.py +++ b/tests/core/operations/test_vectorize.py @@ -157,6 +157,7 @@ def test_vectorize_mask_almost_invertible() -> None: assert new_cell.intersection(cell).area / new_cell.union(cell).area > 0.97 + def test_label_column_vectorize_mask() -> None: assert "label" in _vectorize_mask(np.array([0])) assert "label" in _vectorize_mask(np.array([[0, 1], [1, 1]])) From 527be66440833a7c35a2f359a4d18650cb3f12f2 Mon Sep 17 00:00:00 2001 From: Quentin Blampey Date: Wed, 2 Apr 2025 15:48:32 +0200 Subject: [PATCH 3/4] use correct typing MultiPolygon | Polygon + assert polygons --- src/spatialdata/_core/operations/vectorize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/spatialdata/_core/operations/vectorize.py b/src/spatialdata/_core/operations/vectorize.py index 1236676b4..5b1740b22 100644 --- a/src/spatialdata/_core/operations/vectorize.py +++ b/src/spatialdata/_core/operations/vectorize.py @@ -227,12 +227,15 @@ def _vectorize_chunk(chunk: np.ndarray, yoff: int, xoff: int) -> GeoDataFrame: return ShapesModel.parse(gdf, transformations=transformations.copy()) -def _region_props_to_polygon(region_props: RegionProperties) -> MultiPolygon: +def _region_props_to_polygon(region_props: RegionProperties) -> MultiPolygon | Polygon: mask = np.pad(region_props.image, 1) contours = skimage.measure.find_contours(mask, 0.5) # shapes with <= 3 vertices, i.e. lines, can't be converted into a polygon polygons = [Polygon(contour[:, [1, 0]]) for contour in contours if contour.shape[0] >= 4] + + assert len(polygons) > 0, "No valid polygon found in the region properties." + polygon = MultiPolygon(polygons) if len(polygons) > 1 else polygons[0] yoff, xoff, *_ = region_props.bbox From ae6e571915dd6a03108596c9868dd25f359cb109 Mon Sep 17 00:00:00 2001 From: Luca Marconato Date: Mon, 5 May 2025 18:30:38 -0400 Subject: [PATCH 4/4] stricter check _region_props_to_polygon() --- src/spatialdata/_core/operations/vectorize.py | 9 +++++---- tests/core/operations/test_vectorize.py | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/spatialdata/_core/operations/vectorize.py b/src/spatialdata/_core/operations/vectorize.py index 5b1740b22..53f1f7e96 100644 --- a/src/spatialdata/_core/operations/vectorize.py +++ b/src/spatialdata/_core/operations/vectorize.py @@ -231,10 +231,11 @@ def _region_props_to_polygon(region_props: RegionProperties) -> MultiPolygon | P mask = np.pad(region_props.image, 1) contours = skimage.measure.find_contours(mask, 0.5) - # shapes with <= 3 vertices, i.e. lines, can't be converted into a polygon - polygons = [Polygon(contour[:, [1, 0]]) for contour in contours if contour.shape[0] >= 4] - - assert len(polygons) > 0, "No valid polygon found in the region properties." + invalid_polygons = any(contour.shape[0] <= 3 for contour in contours) + if invalid_polygons: + # this should not happen because even a single pixel should can be converted to a polygon + raise RuntimeError("Invalid polygon found in the region properties.") + polygons = [Polygon(contour[:, [1, 0]]) for contour in contours] polygon = MultiPolygon(polygons) if len(polygons) > 1 else polygons[0] diff --git a/tests/core/operations/test_vectorize.py b/tests/core/operations/test_vectorize.py index 07ce9d370..cf5e2794c 100644 --- a/tests/core/operations/test_vectorize.py +++ b/tests/core/operations/test_vectorize.py @@ -7,7 +7,11 @@ from shapely.ops import unary_union from skimage.draw import polygon -from spatialdata._core.operations.vectorize import _vectorize_mask, to_circles, to_polygons +from spatialdata._core.operations.vectorize import ( + _vectorize_mask, + to_circles, + to_polygons, +) from spatialdata.datasets import blobs from spatialdata.models.models import ShapesModel from spatialdata.testing import assert_elements_are_identical @@ -76,7 +80,10 @@ def test_polygons_to_circles() -> None: new_circles = to_circles(element) data = { - "geometry": [Point(315.8120722406787, 220.18894606643332), Point(270.1386975678398, 417.8747936281634)], + "geometry": [ + Point(315.8120722406787, 220.18894606643332), + Point(270.1386975678398, 417.8747936281634), + ], "radius": [16.608781, 17.541365], } expected = ShapesModel.parse(GeoDataFrame(data, geometry="geometry")) @@ -89,7 +96,10 @@ def test_multipolygons_to_circles() -> None: new_circles = to_circles(element) data = { - "geometry": [Point(340.37951022629096, 250.76310705786318), Point(337.1680699150594, 316.39984581697314)], + "geometry": [ + Point(340.37951022629096, 250.76310705786318), + Point(337.1680699150594, 316.39984581697314), + ], "radius": [23.488363, 19.059285], } expected = ShapesModel.parse(GeoDataFrame(data, geometry="geometry"))