diff --git a/src/spatialdata/_core/operations/vectorize.py b/src/spatialdata/_core/operations/vectorize.py index c34ce5257..53f1f7e96 100644 --- a/src/spatialdata/_core/operations/vectorize.py +++ b/src/spatialdata/_core/operations/vectorize.py @@ -227,30 +227,34 @@ def _vectorize_chunk(chunk: np.ndarray, yoff: int, xoff: int) -> GeoDataFrame: return ShapesModel.parse(gdf, transformations=transformations.copy()) -def _region_props_to_polygons(region_props: RegionProperties) -> list[Polygon]: +def _region_props_to_polygon(region_props: RegionProperties) -> MultiPolygon | Polygon: mask = np.pad(region_props.image, 1) contours = skimage.measure.find_contours(mask, 0.5) - # shapes with <= 3 vertices, i.e. lines, can't be converted into a polygon - polygons = [Polygon(contour[:, [1, 0]]) for contour in contours if contour.shape[0] >= 4] + invalid_polygons = any(contour.shape[0] <= 3 for contour in contours) + if invalid_polygons: + # this should not happen because even a single pixel should can be converted to a polygon + raise RuntimeError("Invalid polygon found in the region properties.") + polygons = [Polygon(contour[:, [1, 0]]) for contour in contours] + + polygon = MultiPolygon(polygons) if len(polygons) > 1 else polygons[0] yoff, xoff, *_ = region_props.bbox - return [shapely.affinity.translate(poly, xoff, yoff) for poly in polygons] + return shapely.affinity.translate(polygon, xoff - 1, yoff - 1) # account for the padding def _vectorize_mask( mask: np.ndarray, # type: ignore[type-arg] ) -> GeoDataFrame: if mask.max() == 0: - return GeoDataFrame(geometry=[]) + return GeoDataFrame({"label": []}, geometry=[]) regions = skimage.measure.regionprops(mask) - polygons_list = [_region_props_to_polygons(region) for region in regions] - geoms = [poly for polygons in polygons_list for poly in polygons] - labels = [region.label for i, region in enumerate(regions) for _ in range(len(polygons_list[i]))] - - return GeoDataFrame({"label": labels}, geometry=geoms) + return GeoDataFrame( + {"label": [region.label for region in regions]}, + geometry=[_region_props_to_polygon(region) for region in regions], + ) def _dissolve_on_overlaps(label: int, group: GeoDataFrame) -> GeoDataFrame: diff --git a/tests/core/operations/test_vectorize.py b/tests/core/operations/test_vectorize.py index 2a867aba6..cf5e2794c 100644 --- a/tests/core/operations/test_vectorize.py +++ b/tests/core/operations/test_vectorize.py @@ -3,9 +3,15 @@ import numpy as np import pytest from geopandas import GeoDataFrame -from shapely import MultiPoint, Point - -from spatialdata._core.operations.vectorize import to_circles, to_polygons +from shapely import MultiPoint, Point, Polygon +from shapely.ops import unary_union +from skimage.draw import polygon + +from spatialdata._core.operations.vectorize import ( + _vectorize_mask, + to_circles, + to_polygons, +) from spatialdata.datasets import blobs from spatialdata.models.models import ShapesModel from spatialdata.testing import assert_elements_are_identical @@ -74,7 +80,10 @@ def test_polygons_to_circles() -> None: new_circles = to_circles(element) data = { - "geometry": [Point(315.8120722406787, 220.18894606643332), Point(270.1386975678398, 417.8747936281634)], + "geometry": [ + Point(315.8120722406787, 220.18894606643332), + Point(270.1386975678398, 417.8747936281634), + ], "radius": [16.608781, 17.541365], } expected = ShapesModel.parse(GeoDataFrame(data, geometry="geometry")) @@ -87,7 +96,10 @@ def test_multipolygons_to_circles() -> None: new_circles = to_circles(element) data = { - "geometry": [Point(340.37951022629096, 250.76310705786318), Point(337.1680699150594, 316.39984581697314)], + "geometry": [ + Point(340.37951022629096, 250.76310705786318), + Point(337.1680699150594, 316.39984581697314), + ], "radius": [23.488363, 19.059285], } expected = ShapesModel.parse(GeoDataFrame(data, geometry="geometry")) @@ -139,3 +151,23 @@ def test_invalid_geodataframe_to_polygons() -> None: gdf = GeoDataFrame(geometry=[MultiPoint([[0, 0], [1, 1]])]) with pytest.raises(RuntimeError, match="Unsupported geometry type"): to_polygons(gdf) + + +def test_vectorize_mask_almost_invertible() -> None: + cell = Polygon([[10, 10], [30, 40], [90, 50], [100, 20]]) + image_shape = (70, 120) + + rasterized_image = np.zeros(image_shape, dtype=np.int8) + x, y = cell.exterior.coords.xy + rr, cc = polygon(y, x, image_shape) + rasterized_image[rr, cc] = 1 + + new_cell = _vectorize_mask(rasterized_image) + new_cell = unary_union(new_cell.geometry) + + assert new_cell.intersection(cell).area / new_cell.union(cell).area > 0.97 + + +def test_label_column_vectorize_mask() -> None: + assert "label" in _vectorize_mask(np.array([0])) + assert "label" in _vectorize_mask(np.array([[0, 1], [1, 1]]))