vortex-data · connortsui20 · Mar 31, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/vortex-btrblocks/Cargo.toml b/vortex-btrblocks/Cargo.toml
@@ -64,15 +64,5 @@ name = "compress_listview"
 harness = false
 test = false
 
-[[bench]]
-name = "dict_encode"
-harness = false
-test = false
-
-[[bench]]
-name = "stats_calc"
-harness = false
-test = false
-
 [package.metadata.cargo-machete]
 ignored = ["getrandom_v03"]
diff --git a/vortex-btrblocks/public-api.lock b/vortex-btrblocks/public-api.lock
diff --git a/vortex-btrblocks/src/lib.rs b/vortex-btrblocks/src/lib.rs
@@ -68,13 +68,11 @@ pub use builder::default_excluded;
 pub use canonical_compressor::BtrBlocksCompressor;
 pub use schemes::patches::compress_patches;
 pub use vortex_compressor::CascadingCompressor;
-pub use vortex_compressor::builtins::integer_dictionary_encode;
 pub use vortex_compressor::ctx::CompressorContext;
 pub use vortex_compressor::ctx::MAX_CASCADE;
 pub use vortex_compressor::scheme::Scheme;
 pub use vortex_compressor::scheme::SchemeExt;
 pub use vortex_compressor::scheme::SchemeId;
-pub use vortex_compressor::scheme::estimate_compression_ratio_with_sampling;
 pub use vortex_compressor::stats::ArrayAndStats;
 pub use vortex_compressor::stats::BoolStats;
 pub use vortex_compressor::stats::FloatStats;

diff --git a/vortex-btrblocks/src/schemes/decimal.rs b/vortex-btrblocks/src/schemes/decimal.rs
@@ -10,6 +10,7 @@ use vortex_array::ToCanonical;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::arrays::decimal::narrowed_decimal;
 use vortex_array::dtype::DecimalType;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_decimal_byte_parts::DecimalBytePartsArray;
 use vortex_error::VortexResult;
 
@@ -42,12 +43,11 @@ impl Scheme for DecimalScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         _data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // Decimal compression is almost always beneficial (narrowing + primitive compression).
-        Ok(f64::MAX)
+        CompressionEstimate::AlwaysUse
     }
 
     fn compress(

diff --git a/vortex-btrblocks/src/schemes/float.rs b/vortex-btrblocks/src/schemes/float.rs
@@ -11,6 +11,7 @@ use vortex_array::Canonical;
 use vortex_array::IntoArray;
 use vortex_array::ToCanonical;
 use vortex_array::dtype::PType;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_compressor::scheme::ChildSelection;
 use vortex_compressor::scheme::DescendantExclusion;
 use vortex_error::VortexResult;
@@ -25,7 +26,6 @@ use crate::CompressorContext;
 use crate::Scheme;
 use crate::SchemeExt;
 use crate::compress_patches;
-use crate::estimate_compression_ratio_with_sampling;
 
 /// ALP (Adaptive Lossless floating-Point) encoding.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -70,22 +70,21 @@ impl Scheme for ALPScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // ALP encodes floats as integers. Without integer compression afterward, the encoded ints
         // are the same size.
         if ctx.finished_cascading() {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
         // We don't support ALP for f16.
-        if data.float_stats().source().ptype() == PType::F16 {
-            return Ok(0.0);
+        if data.array_as_primitive().ptype() == PType::F16 {
+            return CompressionEstimate::Skip;
         }
 
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -94,9 +93,7 @@ impl Scheme for ALPScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
-
-        let alp_encoded = alp_encode(&stats.source().to_primitive(), None)?;
+        let alp_encoded = alp_encode(data.array_as_primitive(), None)?;
 
         // Compress the ALP ints.
         let compressed_alp_ints =
@@ -121,15 +118,15 @@ impl Scheme for ALPRDScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        if data.float_stats().source().ptype() == PType::F16 {
-            return Ok(0.0);
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // We don't support ALPRD for f16.
+        if data.array_as_primitive().ptype() == PType::F16 {
+            return CompressionEstimate::Skip;
         }
 
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -138,15 +135,15 @@ impl Scheme for ALPRDScheme {
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
+        let primitive_array = data.array_as_primitive();
 
-        let encoder = match stats.source().ptype() {
-            PType::F32 => RDEncoder::new(stats.source().as_slice::<f32>()),
-            PType::F64 => RDEncoder::new(stats.source().as_slice::<f64>()),
+        let encoder = match primitive_array.ptype() {
+            PType::F32 => RDEncoder::new(primitive_array.as_slice::<f32>()),
+            PType::F64 => RDEncoder::new(primitive_array.as_slice::<f64>()),
             ptype => vortex_panic!("cannot ALPRD compress ptype {ptype}"),
         };
 
-        let mut alp_rd = encoder.encode(stats.source());
+        let mut alp_rd = encoder.encode(primitive_array);
 
         let patches = alp_rd
             .left_parts_patches()
@@ -182,24 +179,25 @@ impl Scheme for NullDominatedSparseScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
+        let len = data.array_len() as f64;
         let stats = data.float_stats();
+        let value_count = stats.value_count();
 
-        if stats.value_count() == 0 {
-            // All nulls should use ConstantScheme instead of this.
-            return Ok(0.0);
+        // All-null arrays should be compressed as constant instead anyways.
+        if value_count == 0 {
+            return CompressionEstimate::Skip;
         }
 
         // If the majority (90%) of values is null, this will compress well.
-        if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
-            return Ok(stats.source().len() as f64 / stats.value_count() as f64);
+        if stats.null_count() as f64 / len > 0.9 {
+            return CompressionEstimate::Ratio(len / value_count as f64);
         }
 
         // Otherwise we don't go this route.
-        Ok(0.0)
+        CompressionEstimate::Skip
     }
 
     fn compress(
@@ -208,10 +206,8 @@ impl Scheme for NullDominatedSparseScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
-
         // We pass None as we only run this pathway for NULL-dominated float arrays.
-        let sparse_encoded = SparseArray::encode(&stats.source().clone().into_array(), None)?;
+        let sparse_encoded = SparseArray::encode(data.array(), None)?;
 
         if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
             let indices = sparse.patches().indices().to_primitive().narrow()?;
@@ -241,15 +237,22 @@ impl Scheme for PcoScheme {
         is_float_primitive(canonical)
     }
 
+    fn expected_compression_ratio(
+        &self,
+        _data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        CompressionEstimate::Sample
+    }
+
     fn compress(
         &self,
         _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
         Ok(vortex_pco::PcoArray::from_primitive(
-            stats.source(),
+            data.array_as_primitive(),
             pco::DEFAULT_COMPRESSION_LEVEL,
             8192,
         )?
@@ -401,7 +404,8 @@ mod scheme_selection_tests {
         let array = PrimitiveArray::new(Buffer::copy_from(&values), Validity::NonNullable);
         let btr = BtrBlocksCompressor::default();
         let compressed = btr.compress(&array.into_array())?;
-        assert!(compressed.is::<Dict>());
+        assert!(compressed.is::<ALP>());
+        assert!(compressed.children()[0].is::<Dict>());
         Ok(())
     }