From 409900de1ed1b60778e9f1a0712571e102c0eb5b Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 11:46:26 +0000 Subject: [PATCH 01/11] Removed unnecessary compute() call --- src/spatialdata/_core/query/spatial_query.py | 57 +++++++------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index c29e3bc0e..01445df75 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -386,41 +386,21 @@ def _bounding_box_mask_points( axes: tuple[str, ...], min_coordinate: list[Number] | ArrayLike, max_coordinate: list[Number] | ArrayLike, + points_df: pd.DataFrame | None = None, # <-- new parameter ) -> list[ArrayLike]: - """Compute a mask that is true for the points inside axis-aligned bounding boxes. - - Parameters - ---------- - points - The points element to perform the query on. - axes - The axes that min_coordinate and max_coordinate refer to. - min_coordinate - PLACEHOLDER - The upper left hand corners of the bounding boxes (i.e., minimum coordinates along all dimensions). - Shape: (n_boxes, n_axes) or (n_axes,) for a single box. - {min_coordinate_docs} - max_coordinate - The lower right hand corners of the bounding boxes (i.e., the maximum coordinates along all dimensions). - Shape: (n_boxes, n_axes) or (n_axes,) for a single box. - {max_coordinate_docs} - - Returns - ------- - The masks for the points inside the bounding boxes. - """ + # TODO: add docstring back element_axes = get_axes_names(points) - min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) - - # Ensure min_coordinate and max_coordinate are 2D arrays min_coordinate = min_coordinate[np.newaxis, :] if min_coordinate.ndim == 1 else min_coordinate max_coordinate = max_coordinate[np.newaxis, :] if max_coordinate.ndim == 1 else max_coordinate + # Compute once here only if the caller hasn't already done so + if points_df is None: + points_df = points.compute() + n_boxes = min_coordinate.shape[0] in_bounding_box_masks = [] - for box in range(n_boxes): box_masks = [] for axis_index, axis_name in enumerate(axes): @@ -428,7 +408,8 @@ def _bounding_box_mask_points( continue min_value = min_coordinate[box, axis_index] max_value = max_coordinate[box, axis_index] - box_masks.append(points[axis_name].gt(min_value).compute() & points[axis_name].lt(max_value).compute()) + col = points_df[axis_name].values # <-- numpy array, no Dask + box_masks.append((col > min_value) & (col < max_value)) bounding_box_mask = np.stack(box_masks, axis=-1) in_bounding_box_masks.append(np.all(bounding_box_mask, axis=1)) return in_bounding_box_masks @@ -663,19 +644,19 @@ def _( max_coordinate_intrinsic = max_coordinate_intrinsic.data # get the points in the intrinsic coordinate bounding box + points_pd = points.compute() # <-- moved up, single materialization in_intrinsic_bounding_box = _bounding_box_mask_points( points=points, axes=intrinsic_axes, min_coordinate=min_coordinate_intrinsic, max_coordinate=max_coordinate_intrinsic, + points_df=points_pd, # <-- pass it in ) if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError( - f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`." - ) + raise ValueError(...) points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] - points_pd = points.compute() + attrs = points.attrs.copy() for mask_np in in_intrinsic_bounding_box: if mask_np.sum() == 0: @@ -715,22 +696,24 @@ def _( points_query_coordinate_system = transform( p, to_coordinate_system=target_coordinate_system, maintain_positioning=False ) - - # get a mask for the points in the bounding box + # Materialize once; reuse for both the mask and the final slice + transformed_pd = points_query_coordinate_system.compute() bounding_box_mask = _bounding_box_mask_points( points=points_query_coordinate_system, axes=axes, - min_coordinate=min_c, # type: ignore[arg-type] - max_coordinate=max_c, # type: ignore[arg-type] + min_coordinate=min_c, + max_coordinate=max_c, + points_df=transformed_pd, # <-- pass it in ) if len(bounding_box_mask) != 1: raise ValueError(f"Expected a single mask, got {len(bounding_box_mask)} masks. Please report this bug.") bounding_box_indices = np.where(bounding_box_mask[0])[0] - if len(bounding_box_indices) == 0: output.append(None) else: - points_df = p.compute().iloc[bounding_box_indices] + # Use the already-materialized intrinsic-space frame for the final result, + # not the transformed one (we want to return data in intrinsic coordinates) + points_df = points_pd[mask_np].iloc[bounding_box_indices] # no .compute() old_transformations = get_transformation(p, get_all=True) assert isinstance(old_transformations, dict) feature_key = p.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) From 054990f2e42bd25a802b513657e417454f684589 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 11:50:19 +0000 Subject: [PATCH 02/11] Using the R-tree for spatial querying of shapes --- src/spatialdata/_core/query/spatial_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 01445df75..3556b43bb 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -774,8 +774,8 @@ def _( ) for box_corners in intrinsic_bounding_box_corners: bounding_box_non_axes_aligned = Polygon(box_corners.data) - indices = polygons.geometry.intersects(bounding_box_non_axes_aligned) - queried = polygons[indices] + candidate_idx = polygons.sindex.query(bounding_box_non_axes_aligned, predicate="intersects") + queried = polygons.iloc[candidate_idx] if len(queried) == 0: queried_polygon = None else: From cd454dccd9039fa9740cb37ee28cc6445da3a559 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 13:28:19 +0000 Subject: [PATCH 03/11] Cleanup --- src/spatialdata/_core/query/spatial_query.py | 42 +++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 3556b43bb..371b4642a 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -265,7 +265,7 @@ def _adjust_bounding_box_to_real_axes( return axes_bb, min_coordinate, max_coordinate -def _get_case_of_bounding_box_query( +def _get_case_ofmaskquery( m_without_c_linear: ArrayLike, input_axes_without_c: tuple[str, ...], output_axes_without_c: tuple[str, ...], @@ -386,9 +386,33 @@ def _bounding_box_mask_points( axes: tuple[str, ...], min_coordinate: list[Number] | ArrayLike, max_coordinate: list[Number] | ArrayLike, - points_df: pd.DataFrame | None = None, # <-- new parameter + points_df: pd.DataFrame | None = None, ) -> list[ArrayLike]: - # TODO: add docstring back + """Compute a mask that is true for the points inside axis-aligned bounding boxes. + + Parameters + ---------- + points + The points element to perform the query on. + axes + The axes that min_coordinate and max_coordinate refer to. + min_coordinate + PLACEHOLDER + The upper left hand corners of the bounding boxes (i.e., minimum coordinates along all dimensions). + Shape: (n_boxes, n_axes) or (n_axes,) for a single box. + {min_coordinate_docs} + max_coordinate + The lower right hand corners of the bounding boxes (i.e., the maximum coordinates along all dimensions). + Shape: (n_boxes, n_axes) or (n_axes,) for a single box. + {max_coordinate_docs} + points_df + A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply + calls .compute() on the dask data frame + + Returns + ------- + The masks for the points inside the bounding boxes. + """ element_axes = get_axes_names(points) min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) @@ -408,7 +432,7 @@ def _bounding_box_mask_points( continue min_value = min_coordinate[box, axis_index] max_value = max_coordinate[box, axis_index] - col = points_df[axis_name].values # <-- numpy array, no Dask + col = points_df[axis_name].values box_masks.append((col > min_value) & (col < max_value)) bounding_box_mask = np.stack(box_masks, axis=-1) in_bounding_box_masks.append(np.all(bounding_box_mask, axis=1)) @@ -644,17 +668,17 @@ def _( max_coordinate_intrinsic = max_coordinate_intrinsic.data # get the points in the intrinsic coordinate bounding box - points_pd = points.compute() # <-- moved up, single materialization + points_pd = points.compute() in_intrinsic_bounding_box = _bounding_box_mask_points( points=points, axes=intrinsic_axes, min_coordinate=min_coordinate_intrinsic, max_coordinate=max_coordinate_intrinsic, - points_df=points_pd, # <-- pass it in + points_df=points_pd, ) if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError(...) + raise ValueError(f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`.") points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] attrs = points.attrs.copy() @@ -703,7 +727,7 @@ def _( axes=axes, min_coordinate=min_c, max_coordinate=max_c, - points_df=transformed_pd, # <-- pass it in + points_df=transformed_pd, ) if len(bounding_box_mask) != 1: raise ValueError(f"Expected a single mask, got {len(bounding_box_mask)} masks. Please report this bug.") @@ -713,7 +737,7 @@ def _( else: # Use the already-materialized intrinsic-space frame for the final result, # not the transformed one (we want to return data in intrinsic coordinates) - points_df = points_pd[mask_np].iloc[bounding_box_indices] # no .compute() + points_df = points_pd[mask_np].iloc[bounding_box_indices] old_transformations = get_transformation(p, get_all=True) assert isinstance(old_transformations, dict) feature_key = p.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) From 8d9844012297ba5582326dd82cd03a69d580048a Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 13:48:48 +0000 Subject: [PATCH 04/11] Cleanup --- src/spatialdata/_core/query/spatial_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 371b4642a..ff0f4ebc5 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -265,7 +265,7 @@ def _adjust_bounding_box_to_real_axes( return axes_bb, min_coordinate, max_coordinate -def _get_case_ofmaskquery( +def _get_case_of_bounding_box_query( m_without_c_linear: ArrayLike, input_axes_without_c: tuple[str, ...], output_axes_without_c: tuple[str, ...], From e0f0ba79a90e2b88fa566c4c64cbb1a1e87cf215 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Mon, 30 Mar 2026 13:51:04 +0000 Subject: [PATCH 05/11] Cleanup --- src/spatialdata/_core/query/spatial_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index ff0f4ebc5..58fbdae56 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -725,8 +725,8 @@ def _( bounding_box_mask = _bounding_box_mask_points( points=points_query_coordinate_system, axes=axes, - min_coordinate=min_c, - max_coordinate=max_c, + min_coordinate=min_c, # type: ignore[arg-type] + max_coordinate=max_c, # type: ignore[arg-type] points_df=transformed_pd, ) if len(bounding_box_mask) != 1: From be2db1b0704b3f52c6b39f41258f936f60aac97b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:53:58 +0000 Subject: [PATCH 06/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_core/query/spatial_query.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 58fbdae56..85e868a58 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -406,7 +406,7 @@ def _bounding_box_mask_points( Shape: (n_boxes, n_axes) or (n_axes,) for a single box. {max_coordinate_docs} points_df - A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply + A pre-computed pandas dataframe. Useful if the points_df has already been materialized, otherwise the methods simply calls .compute() on the dask data frame Returns @@ -678,7 +678,9 @@ def _( ) if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError(f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`.") + raise ValueError( + f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`." + ) points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] attrs = points.attrs.copy() From 80402885e2ca0966ae6070275a0d9c67ebc69971 Mon Sep 17 00:00:00 2001 From: dschaub95 Date: Tue, 31 Mar 2026 11:14:43 +0200 Subject: [PATCH 07/11] Optimize transformed point bounding box queries and add multi-box coverage --- src/spatialdata/_core/query/spatial_query.py | 149 ++++++++----------- tests/core/query/test_spatial_query.py | 29 ++++ 2 files changed, 88 insertions(+), 90 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 85e868a58..4658aed73 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -9,6 +9,7 @@ import dask.dataframe as dd import numpy as np +import pandas as pd from dask.dataframe import DataFrame as DaskDataFrame from geopandas import GeoDataFrame from shapely.geometry import MultiPolygon, Point, Polygon @@ -635,7 +636,6 @@ def _( max_coordinate: list[Number] | ArrayLike, target_coordinate_system: str, ) -> DaskDataFrame | list[DaskDataFrame] | None: - from spatialdata import transform from spatialdata.transformations import get_transformation min_coordinate = _parse_list_into_array(min_coordinate) @@ -653,104 +653,73 @@ def _( max_coordinate=max_coordinate, ) - # get the four corners of the bounding box (2D case), or the 8 corners of the "3D bounding box" (3D case) - (intrinsic_bounding_box_corners, intrinsic_axes) = _get_bounding_box_corners_in_intrinsic_coordinates( - element=points, - axes=axes, - min_coordinate=min_coordinate, - max_coordinate=max_coordinate, - target_coordinate_system=target_coordinate_system, + m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_tranformation( + points, target_coordinate_system ) - min_coordinate_intrinsic = intrinsic_bounding_box_corners.min(dim="corner") - max_coordinate_intrinsic = intrinsic_bounding_box_corners.max(dim="corner") - - min_coordinate_intrinsic = min_coordinate_intrinsic.data - max_coordinate_intrinsic = max_coordinate_intrinsic.data + m_without_c_linear = m_without_c[:-1, :-1] + _ = _get_case_of_bounding_box_query( + m_without_c_linear, + input_axes_without_c, + output_axes_without_c, + ) + axes_adjusted, min_coordinate_adjusted, max_coordinate_adjusted = _adjust_bounding_box_to_real_axes( + axes, + min_coordinate, + max_coordinate, + output_axes_without_c, + ) + if axes_adjusted != output_axes_without_c: + raise RuntimeError("This should not happen") - # get the points in the intrinsic coordinate bounding box points_pd = points.compute() - in_intrinsic_bounding_box = _bounding_box_mask_points( - points=points, - axes=intrinsic_axes, - min_coordinate=min_coordinate_intrinsic, - max_coordinate=max_coordinate_intrinsic, - points_df=points_pd, + is_identity_transform = input_axes_without_c == output_axes_without_c and np.allclose( + m_without_c, np.eye(m_without_c.shape[0]) ) - - if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)): - raise ValueError( - f"Length of list of dataframes `{len_df}` is not equal to the number of bounding boxes axes `{len_bb}`." + if is_identity_transform: + bounding_box_masks = _bounding_box_mask_points( + points=points, + axes=axes, + min_coordinate=min_coordinate, + max_coordinate=max_coordinate, + points_df=points_pd, ) - points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = [] - - attrs = points.attrs.copy() - for mask_np in in_intrinsic_bounding_box: - if mask_np.sum() == 0: - points_in_intrinsic_bounding_box.append(None) - else: - # TODO there is a problem when mixing dask dataframe graph with dask array graph. Need to compute for now. - # we can't compute either mask or points as when we calculate either one of them - # test_query_points_multiple_partitions will fail as the mask will be used to index each partition. - # However, if we compute and then create the dask array again we get the mixed dask graph problem. - filtered_pd = points_pd[mask_np] - points_filtered = dd.from_pandas(filtered_pd, npartitions=points.npartitions) - points_filtered.attrs.update(attrs) - points_in_intrinsic_bounding_box.append(points_filtered) - if len(points_in_intrinsic_bounding_box) == 0: - return None + else: + query_coordinates = points_pd.loc[:, list(input_axes_without_c)].to_numpy(copy=False) + query_coordinates = query_coordinates @ m_without_c[:-1, :-1].T + m_without_c[:-1, -1] + + bounding_box_masks = [] + for box_index in range(min_coordinate_adjusted.shape[0]): + bounding_box_mask = np.ones(len(points_pd), dtype=bool) + for axis_index in range(len(output_axes_without_c)): + min_value = min_coordinate_adjusted[box_index, axis_index] + max_value = max_coordinate_adjusted[box_index, axis_index] + column = query_coordinates[:, axis_index] + bounding_box_mask &= (column > min_value) & (column < max_value) + bounding_box_masks.append(bounding_box_mask) + + if not (len_df := len(bounding_box_masks)) == (len_bb := len(min_coordinate)): + raise ValueError(f"Length of list of masks `{len_df}` is not equal to the number of bounding boxes `{len_bb}`.") + + old_transformations = get_transformation(points, get_all=True) + assert isinstance(old_transformations, dict) + feature_key = points.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) - # assert that the number of queried points is correct - assert len(points_in_intrinsic_bounding_box) == len(min_coordinate) - - # # we have to reset the index since we have subset - # # https://stackoverflow.com/questions/61395351/how-to-reset-index-on-concatenated-dataframe-in-dask - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.assign(idx=1) - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.set_index( - # points_in_intrinsic_bounding_box.idx.cumsum() - 1 - # ) - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.map_partitions( - # lambda df: df.rename(index={"idx": None}) - # ) - # points_in_intrinsic_bounding_box = points_in_intrinsic_bounding_box.drop(columns=["idx"]) - - # transform the element to the query coordinate system output: list[DaskDataFrame | None] = [] - for p, min_c, max_c in zip(points_in_intrinsic_bounding_box, min_coordinate, max_coordinate, strict=True): - if p is None: + for mask_np in bounding_box_masks: + bounding_box_indices = np.flatnonzero(mask_np) + if len(bounding_box_indices) == 0: output.append(None) - else: - points_query_coordinate_system = transform( - p, to_coordinate_system=target_coordinate_system, maintain_positioning=False - ) - # Materialize once; reuse for both the mask and the final slice - transformed_pd = points_query_coordinate_system.compute() - bounding_box_mask = _bounding_box_mask_points( - points=points_query_coordinate_system, - axes=axes, - min_coordinate=min_c, # type: ignore[arg-type] - max_coordinate=max_c, # type: ignore[arg-type] - points_df=transformed_pd, + continue + + # The exact mask is computed in the query coordinate system, but the returned points must stay intrinsic. + queried_points = points_pd.iloc[bounding_box_indices] + output.append( + PointsModel.parse( + dd.from_pandas(queried_points, npartitions=1), + transformations=old_transformations.copy(), + feature_key=feature_key, ) - if len(bounding_box_mask) != 1: - raise ValueError(f"Expected a single mask, got {len(bounding_box_mask)} masks. Please report this bug.") - bounding_box_indices = np.where(bounding_box_mask[0])[0] - if len(bounding_box_indices) == 0: - output.append(None) - else: - # Use the already-materialized intrinsic-space frame for the final result, - # not the transformed one (we want to return data in intrinsic coordinates) - points_df = points_pd[mask_np].iloc[bounding_box_indices] - old_transformations = get_transformation(p, get_all=True) - assert isinstance(old_transformations, dict) - feature_key = p.attrs.get(ATTRS_KEY, {}).get(PointsModel.FEATURE_KEY) - - output.append( - PointsModel.parse( - dd.from_pandas(points_df, npartitions=1), - transformations=old_transformations.copy(), - feature_key=feature_key, - ) - ) + ) if len(output) == 0: return None if len(output) == 1: diff --git a/tests/core/query/test_spatial_query.py b/tests/core/query/test_spatial_query.py index 7dadc6f63..8940ed904 100644 --- a/tests/core/query/test_spatial_query.py +++ b/tests/core/query/test_spatial_query.py @@ -676,6 +676,35 @@ def _query(p: DaskDataFrame) -> DaskDataFrame: assert np.array_equal(q0.index.compute(), q1.index.compute()) +def test_query_points_multiple_boxes_in_transformed_coordinate_system(): + from spatialdata.transformations import Affine + + points_element = _make_points(np.array([[10, 10], [20, 30], [20, 30], [40, 50]])) + set_transformation( + points_element, + transformation=Affine( + np.array([[1, 0, 100], [0, 1, -50], [0, 0, 1]]), + input_axes=("x", "y"), + output_axes=("x", "y"), + ), + to_coordinate_system="aligned", + ) + + points_result = bounding_box_query( + points_element, + axes=("x", "y"), + min_coordinate=np.array([[118, -22], [138, -2], [200, 200]]), + max_coordinate=np.array([[122, -18], [142, 2], [210, 210]]), + target_coordinate_system="aligned", + ) + + np.testing.assert_allclose(points_result[0]["x"].compute(), [20, 20]) + np.testing.assert_allclose(points_result[0]["y"].compute(), [30, 30]) + np.testing.assert_allclose(points_result[1]["x"].compute(), [40]) + np.testing.assert_allclose(points_result[1]["y"].compute(), [50]) + assert points_result[2] is None + + @pytest.mark.parametrize("with_polygon_query", [True, False]) @pytest.mark.parametrize( "name", From 6853ec826c0fc89c984f2ccbc4a4ae8fcbb48c8a Mon Sep 17 00:00:00 2001 From: dschaub95 Date: Tue, 31 Mar 2026 11:20:11 +0200 Subject: [PATCH 08/11] add comments --- src/spatialdata/_core/query/spatial_query.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 4658aed73..32ec704d5 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -671,10 +671,12 @@ def _( if axes_adjusted != output_axes_without_c: raise RuntimeError("This should not happen") + # materialize the points in the intrinsic coordinate system once points_pd = points.compute() is_identity_transform = input_axes_without_c == output_axes_without_c and np.allclose( m_without_c, np.eye(m_without_c.shape[0]) ) + # if the transform is identity, we can save extra for the affine transformation if is_identity_transform: bounding_box_masks = _bounding_box_mask_points( points=points, From 8b44d2ec9935b1b48b6d97b7dae90039e33a0678 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Tue, 31 Mar 2026 13:47:38 +0200 Subject: [PATCH 09/11] Used spatial indexing also for polygon query --- src/spatialdata/_core/query/spatial_query.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 32ec704d5..34250b592 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -929,17 +929,22 @@ def _( assert np.all(element[OLD_INDEX] == buffered.index) else: buffered[OLD_INDEX] = buffered.index - indices = buffered.geometry.apply(lambda x: x.intersects(polygon)) - if np.sum(indices) == 0: + + # Use sindex for fast candidate pre-filtering, then exact intersection check + # only on the (typically small) candidate set — same pattern as bounding_box_query. + candidate_idx = buffered.sindex.query(polygon, predicate="intersects") + if len(candidate_idx) == 0: + del buffered[OLD_INDEX] return None - queried_shapes = element[indices] - queried_shapes.index = buffered[indices][OLD_INDEX] + + queried_shapes = element.iloc[candidate_idx].copy() + queried_shapes.index = buffered.iloc[candidate_idx][OLD_INDEX] queried_shapes.index.name = None if clip: if isinstance(element.geometry.iloc[0], Point): - queried_shapes = buffered[indices] - queried_shapes.index = buffered[indices][OLD_INDEX] + queried_shapes = buffered.iloc[candidate_idx].copy() + queried_shapes.index = buffered.iloc[candidate_idx][OLD_INDEX] queried_shapes.index.name = None queried_shapes = queried_shapes.clip(polygon_gdf, keep_geom_type=True) From 394f998e51d7ffe873f214889152a7c102f3f2a1 Mon Sep 17 00:00:00 2001 From: Matthias Fabian Meyer-Bender Date: Wed, 1 Apr 2026 09:39:51 +0200 Subject: [PATCH 10/11] sped up querying for scaling transformation and removed warning about performance issues --- src/spatialdata/_core/query/spatial_query.py | 54 +++++++++++++++----- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 34250b592..8b224ad9b 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -79,7 +79,7 @@ def _get_bounding_box_corners_in_intrinsic_coordinates( # compute the output axes of the transformation, remove c from input and output axes, return the matrix without c # and then build an affine transformation from that - m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_tranformation( + m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_transformation( element, target_coordinate_system ) spatial_transform = Affine(m_without_c, input_axes=input_axes_without_c, output_axes=output_axes_without_c) @@ -143,7 +143,7 @@ def _get_polygon_in_intrinsic_coordinates( polygon_gdf = ShapesModel.parse(GeoDataFrame(geometry=[polygon])) - m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_tranformation( + m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_transformation( element, target_coordinate_system ) spatial_transform = Affine(m_without_c, input_axes=input_axes_without_c, output_axes=output_axes_without_c) @@ -187,7 +187,7 @@ def _get_polygon_in_intrinsic_coordinates( return transform(polygon_gdf, to_coordinate_system="inverse") -def _get_axes_of_tranformation( +def _get_axes_of_transformation( element: SpatialElement, target_coordinate_system: str ) -> tuple[ArrayLike, tuple[str, ...], tuple[str, ...]]: """ @@ -322,6 +322,11 @@ def _get_case_of_bounding_box_query( return case +def _is_scaling_transform(m_linear: np.ndarray) -> bool: + """True when the linear part is a diagonal (pure scaling) matrix.""" + return np.allclose(m_linear, np.diag(np.diagonal(m_linear))) + + @dataclass(frozen=True) class BaseSpatialRequest: """Base class for spatial queries.""" @@ -520,16 +525,6 @@ def _( min_coordinate = _parse_list_into_array(min_coordinate) max_coordinate = _parse_list_into_array(max_coordinate) new_elements = {} - if sdata.points: - warnings.warn( - ( - "The object has `points` element. Depending on the number of points, querying MAY suffer from " - "performance issues. Please consider filtering the object before calling this function by calling the " - "`subset()` method of `SpatialData`." - ), - UserWarning, - stacklevel=2, - ) for element_type in ["points", "images", "labels", "shapes"]: elements = getattr(sdata, element_type) queried_elements = _dict_query_dispatcher( @@ -653,7 +648,7 @@ def _( max_coordinate=max_coordinate, ) - m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_tranformation( + m_without_c, input_axes_without_c, output_axes_without_c = _get_axes_of_transformation( points, target_coordinate_system ) m_without_c_linear = m_without_c[:-1, :-1] @@ -673,9 +668,18 @@ def _( # materialize the points in the intrinsic coordinate system once points_pd = points.compute() + + # checking the type of the transformation + # in the case of an identity or scaling transform, we can skip the whole + # projection into intrinsic space and reprojection into the global coordinate system is_identity_transform = input_axes_without_c == output_axes_without_c and np.allclose( m_without_c, np.eye(m_without_c.shape[0]) ) + is_scaling_transform = ( + input_axes_without_c == output_axes_without_c + and _is_scaling_transform(m_without_c_linear) + ) + # if the transform is identity, we can save extra for the affine transformation if is_identity_transform: bounding_box_masks = _bounding_box_mask_points( @@ -685,6 +689,28 @@ def _( max_coordinate=max_coordinate, points_df=points_pd, ) + elif is_scaling_transform: + # Pull scale factors from the diagonal and the translation from the last column + scales = np.diagonal(m_without_c_linear) # shape: (n_axes,) + translation = m_without_c[:-1, -1] # shape: (n_axes,) + + # Invert the affine: x_intrinsic = (x_output - translation) / scale + min_intrinsic = (min_coordinate_adjusted - translation) / scales + max_intrinsic = (max_coordinate_adjusted - translation) / scales + + # Ensure min < max after inversion (negative scale flips the interval) + min_intrinsic, max_intrinsic = ( + np.minimum(min_intrinsic, max_intrinsic), + np.maximum(min_intrinsic, max_intrinsic), + ) + + bounding_box_masks = _bounding_box_mask_points( + points=points, + axes=tuple(input_axes_without_c), + min_coordinate=min_intrinsic, + max_coordinate=max_intrinsic, + points_df=points_pd, + ) else: query_coordinates = points_pd.loc[:, list(input_axes_without_c)].to_numpy(copy=False) query_coordinates = query_coordinates @ m_without_c[:-1, :-1].T + m_without_c[:-1, -1] From 81f99aa4eace662c5970be876944b571230c29fb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Apr 2026 07:49:11 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/_core/query/spatial_query.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py index 8b224ad9b..2608a0d31 100644 --- a/src/spatialdata/_core/query/spatial_query.py +++ b/src/spatialdata/_core/query/spatial_query.py @@ -1,6 +1,5 @@ from __future__ import annotations -import warnings from abc import abstractmethod from collections.abc import Callable, Mapping from dataclasses import dataclass @@ -668,18 +667,15 @@ def _( # materialize the points in the intrinsic coordinate system once points_pd = points.compute() - + # checking the type of the transformation # in the case of an identity or scaling transform, we can skip the whole # projection into intrinsic space and reprojection into the global coordinate system is_identity_transform = input_axes_without_c == output_axes_without_c and np.allclose( m_without_c, np.eye(m_without_c.shape[0]) ) - is_scaling_transform = ( - input_axes_without_c == output_axes_without_c - and _is_scaling_transform(m_without_c_linear) - ) - + is_scaling_transform = input_axes_without_c == output_axes_without_c and _is_scaling_transform(m_without_c_linear) + # if the transform is identity, we can save extra for the affine transformation if is_identity_transform: bounding_box_masks = _bounding_box_mask_points( @@ -691,8 +687,8 @@ def _( ) elif is_scaling_transform: # Pull scale factors from the diagonal and the translation from the last column - scales = np.diagonal(m_without_c_linear) # shape: (n_axes,) - translation = m_without_c[:-1, -1] # shape: (n_axes,) + scales = np.diagonal(m_without_c_linear) # shape: (n_axes,) + translation = m_without_c[:-1, -1] # shape: (n_axes,) # Invert the affine: x_intrinsic = (x_output - translation) / scale min_intrinsic = (min_coordinate_adjusted - translation) / scales