From 3ac0e813b072a4a967c4d150b71d60d4788834cc Mon Sep 17 00:00:00 2001 From: Alfonso Subiotto Marques Date: Tue, 31 Mar 2026 16:17:25 +0200 Subject: [PATCH] fix[vortex-array]: fix overflow on FSL element take indices The take index types used on an FSL might be valid narrow types. This type was used as indices to take the inner elements, which could result in an overflow with long enough lists. Signed-off-by: Alfonso Subiotto Marques --- .../arrays/fixed_size_list/compute/take.rs | 27 ++++++++------- .../src/arrays/fixed_size_list/tests/take.rs | 33 +++++++++++++++++++ 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/vortex-array/src/arrays/fixed_size_list/compute/take.rs b/vortex-array/src/arrays/fixed_size_list/compute/take.rs index f69ef5840e1..b38d10fc2bb 100644 --- a/vortex-array/src/arrays/fixed_size_list/compute/take.rs +++ b/vortex-array/src/arrays/fixed_size_list/compute/take.rs @@ -17,6 +17,7 @@ use crate::arrays::dict::TakeExecute; use crate::dtype::IntegerPType; use crate::executor::ExecutionCtx; use crate::match_each_integer_ptype; +use crate::match_smallest_offset_type; use crate::validity::Validity; use crate::vtable::ValidityHelper; @@ -26,20 +27,24 @@ use crate::vtable::ValidityHelper; /// that elements start at offset 0 and be perfectly packed without gaps. We expand list indices /// into element indices and push them down to the child elements array. impl TakeExecute for FixedSizeList { + #[expect(clippy::cognitive_complexity)] fn take( array: &FixedSizeListArray, indices: &ArrayRef, ctx: &mut ExecutionCtx, ) -> VortexResult> { + let max_element_idx = array.elements().len(); match_each_integer_ptype!(indices.dtype().as_ptype(), |I| { - take_with_indices::(array, indices, ctx) + match_smallest_offset_type!(max_element_idx, |E| { + take_with_indices::(array, indices, ctx) + }) }) .map(Some) } } /// Dispatches to the appropriate take implementation based on list size and nullability. -fn take_with_indices( +fn take_with_indices( array: &FixedSizeListArray, indices: &ArrayRef, ctx: &mut ExecutionCtx, @@ -74,15 +79,15 @@ fn take_with_indices( } else { // The result's nullability is the union of the input nullabilities. if array.dtype().is_nullable() || indices_array.dtype().is_nullable() { - take_nullable_fsl::(array, &indices_array) + take_nullable_fsl::(array, &indices_array) } else { - take_non_nullable_fsl::(array, &indices_array) + take_non_nullable_fsl::(array, &indices_array) } } } /// Takes from an array when both the array and indices are non-nullable. -fn take_non_nullable_fsl( +fn take_non_nullable_fsl( array: &FixedSizeListArray, indices_array: &PrimitiveArray, ) -> VortexResult { @@ -91,7 +96,7 @@ fn take_non_nullable_fsl( let new_len = indices.len(); // Build the element indices directly without validity tracking. - let mut elements_indices = BufferMut::::with_capacity(new_len * list_size); + let mut elements_indices = BufferMut::::with_capacity(new_len * list_size); // Build the element indices for each list. for data_idx in indices { @@ -106,7 +111,7 @@ fn take_non_nullable_fsl( for i in list_start..list_end { // SAFETY: We've allocated enough space for enough indices for all `new_len` lists (that each consist of `list_size = list_end - list_start` elements), so we know we have enough capacity. unsafe { - elements_indices.push_unchecked(I::from_usize(i).vortex_expect("i < list_end")) + elements_indices.push_unchecked(E::from_usize(i).vortex_expect("i < list_end")) }; } } @@ -131,7 +136,7 @@ fn take_non_nullable_fsl( } /// Takes from an array when either the array or indices are nullable. -fn take_nullable_fsl( +fn take_nullable_fsl( array: &FixedSizeListArray, indices_array: &PrimitiveArray, ) -> VortexResult { @@ -144,7 +149,7 @@ fn take_nullable_fsl( // We must use placeholder zeros for null lists to maintain the array length without // propagating nullability to the element array's take operation. - let mut elements_indices = BufferMut::::with_capacity(new_len * list_size); + let mut elements_indices = BufferMut::::with_capacity(new_len * list_size); let mut new_validity_builder = BitBufferMut::with_capacity(new_len); // Build the element indices while tracking which lists are null. @@ -159,7 +164,7 @@ fn take_nullable_fsl( if !is_index_valid || !array_validity.value(data_idx) { // Append placeholder zeros for null lists. These will be masked by the validity array. // We cannot use append_nulls here as explained above. - unsafe { elements_indices.push_n_unchecked(I::zero(), list_size) }; + unsafe { elements_indices.push_n_unchecked(E::zero(), list_size) }; new_validity_builder.append(false); } else { // Append the actual element indices for this list. @@ -170,7 +175,7 @@ fn take_nullable_fsl( for i in list_start..list_end { // SAFETY: We've allocated enough space for enough indices for all `new_len` lists (that each consist of `list_size = list_end - list_start` elements), so we know we have enough capacity. unsafe { - elements_indices.push_unchecked(I::from_usize(i).vortex_expect("i < list_end")) + elements_indices.push_unchecked(E::from_usize(i).vortex_expect("i < list_end")) }; } diff --git a/vortex-array/src/arrays/fixed_size_list/tests/take.rs b/vortex-array/src/arrays/fixed_size_list/tests/take.rs index 50a0371a58a..5c085bd304f 100644 --- a/vortex-array/src/arrays/fixed_size_list/tests/take.rs +++ b/vortex-array/src/arrays/fixed_size_list/tests/take.rs @@ -9,6 +9,7 @@ use super::common::create_empty_fsl; use super::common::create_large_fsl; use super::common::create_nullable_fsl; use super::common::create_single_element_fsl; +use crate::ArrayRef; use crate::DynArray; use crate::IntoArray; use crate::arrays::FixedSizeListArray; @@ -130,6 +131,38 @@ fn test_take_fsl_with_null_indices_preserves_elements() { assert_arrays_eq!(expected, result); } +// Element index overflow: with u8 indices and list_size=16, data_idx=16 produces element index +// 16*16=256 which overflows u8. The take kernel must widen the element index type. +#[rstest] +#[case::non_nullable( + FixedSizeListArray::new( + buffer![0u8; 320].into_array(), 16, Validity::NonNullable, 20, + ), + buffer![0u8, 16, 19].into_array(), + FixedSizeListArray::new( + buffer![0u8; 48].into_array(), 16, Validity::NonNullable, 3, + ), +)] +#[case::nullable( + FixedSizeListArray::new( + buffer![0u8; 320].into_array(), 16, + Validity::from_iter((0..20).map(|i| i != 5)), 20, + ), + buffer![0u8, 16, 5].into_array(), + FixedSizeListArray::new( + buffer![0u8; 48].into_array(), 16, + Validity::from_iter([true, true, false]), 3, + ), +)] +fn test_element_index_overflow( + #[case] fsl: FixedSizeListArray, + #[case] indices: ArrayRef, + #[case] expected: FixedSizeListArray, +) { + let result = fsl.take(indices.to_array()).unwrap(); + assert_arrays_eq!(expected, result); +} + // Parameterized test for nullable array scenarios that are specific to FSL's implementation. #[rstest] #[case::nullable_mixed_elements(