diff --git a/Cargo.lock b/Cargo.lock
index ccc1ff46344..79b5859d299 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10321,6 +10321,7 @@ dependencies = [
 name = "vortex-compressor"
 version = "0.1.0"
 dependencies = [
+ "codspeed-divan-compat",
  "itertools 0.14.0",
  "num-traits",
  "parking_lot",
diff --git a/vortex-btrblocks/Cargo.toml b/vortex-btrblocks/Cargo.toml
index 9bbd2430f09..20d7fa1c8d9 100644
--- a/vortex-btrblocks/Cargo.toml
+++ b/vortex-btrblocks/Cargo.toml
@@ -64,15 +64,5 @@ name = "compress_listview"
 harness = false
 test = false
 
-[[bench]]
-name = "dict_encode"
-harness = false
-test = false
-
-[[bench]]
-name = "stats_calc"
-harness = false
-test = false
-
 [package.metadata.cargo-machete]
 ignored = ["getrandom_v03"]
diff --git a/vortex-btrblocks/public-api.lock b/vortex-btrblocks/public-api.lock
index eb9643db354..f5cacba59c4 100644
--- a/vortex-btrblocks/public-api.lock
+++ b/vortex-btrblocks/public-api.lock
@@ -24,10 +24,6 @@ pub use vortex_btrblocks::SchemeId
 
 pub use vortex_btrblocks::StringStats
 
-pub use vortex_btrblocks::estimate_compression_ratio_with_sampling
-
-pub use vortex_btrblocks::integer_dictionary_encode
-
 pub mod vortex_btrblocks::schemes
 
 pub mod vortex_btrblocks::schemes::bool
@@ -62,7 +58,7 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::decimal::D
 
 pub fn vortex_btrblocks::schemes::decimal::DecimalScheme::compress(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::decimal::DecimalScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::decimal::DecimalScheme::expected_compression_ratio(&self, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::decimal::DecimalScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -104,7 +100,7 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::float::ALP
 
 pub fn vortex_btrblocks::schemes::float::ALPRDScheme::compress(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::float::ALPRDScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::float::ALPRDScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::float::ALPRDScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -134,7 +130,7 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::float::ALP
 
 pub fn vortex_btrblocks::schemes::float::ALPScheme::compress(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::float::ALPScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::float::ALPScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::float::ALPScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -168,7 +164,7 @@ pub fn vortex_btrblocks::schemes::float::NullDominatedSparseScheme::compress(&se
 
 pub fn vortex_btrblocks::schemes::float::NullDominatedSparseScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_btrblocks::schemes::float::NullDominatedSparseScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::float::NullDominatedSparseScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::float::NullDominatedSparseScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -200,6 +196,8 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::float::Pco
 
 pub fn vortex_btrblocks::schemes::float::PcoScheme::compress(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
+pub fn vortex_btrblocks::schemes::float::PcoScheme::expected_compression_ratio(&self, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
+
 pub fn vortex_btrblocks::schemes::float::PcoScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
 pub fn vortex_btrblocks::schemes::float::PcoScheme::scheme_name(&self) -> &'static str
@@ -240,7 +238,7 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::integer::B
 
 pub fn vortex_btrblocks::schemes::integer::BitPackingScheme::compress(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::integer::BitPackingScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::BitPackingScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::BitPackingScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -272,7 +270,7 @@ pub fn vortex_btrblocks::schemes::integer::FoRScheme::ancestor_exclusions(&self)
 
 pub fn vortex_btrblocks::schemes::integer::FoRScheme::compress(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::integer::FoRScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::FoRScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::FoRScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -302,7 +300,7 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::integer::P
 
 pub fn vortex_btrblocks::schemes::integer::PcoScheme::compress(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::integer::PcoScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::PcoScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::PcoScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -336,7 +334,7 @@ pub fn vortex_btrblocks::schemes::integer::RunEndScheme::compress(&self, compres
 
 pub fn vortex_btrblocks::schemes::integer::RunEndScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_btrblocks::schemes::integer::RunEndScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::RunEndScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::RunEndScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -370,7 +368,7 @@ pub fn vortex_btrblocks::schemes::integer::SequenceScheme::ancestor_exclusions(&
 
 pub fn vortex_btrblocks::schemes::integer::SequenceScheme::compress(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::integer::SequenceScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::SequenceScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::SequenceScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -402,7 +400,7 @@ pub fn vortex_btrblocks::schemes::integer::SparseScheme::compress(&self, compres
 
 pub fn vortex_btrblocks::schemes::integer::SparseScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_btrblocks::schemes::integer::SparseScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::SparseScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::SparseScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -440,7 +438,7 @@ pub fn vortex_btrblocks::schemes::integer::ZigZagScheme::compress(&self, compres
 
 pub fn vortex_btrblocks::schemes::integer::ZigZagScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_btrblocks::schemes::integer::ZigZagScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::integer::ZigZagScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::integer::ZigZagScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -484,6 +482,8 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::string::FS
 
 pub fn vortex_btrblocks::schemes::string::FSSTScheme::compress(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
+pub fn vortex_btrblocks::schemes::string::FSSTScheme::expected_compression_ratio(&self, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
+
 pub fn vortex_btrblocks::schemes::string::FSSTScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
 pub fn vortex_btrblocks::schemes::string::FSSTScheme::num_children(&self) -> usize
@@ -516,7 +516,7 @@ pub fn vortex_btrblocks::schemes::string::NullDominatedSparseScheme::compress(&s
 
 pub fn vortex_btrblocks::schemes::string::NullDominatedSparseScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_btrblocks::schemes::string::NullDominatedSparseScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::string::NullDominatedSparseScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::string::NullDominatedSparseScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -548,6 +548,8 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::string::Zs
 
 pub fn vortex_btrblocks::schemes::string::ZstdScheme::compress(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
+pub fn vortex_btrblocks::schemes::string::ZstdScheme::expected_compression_ratio(&self, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
+
 pub fn vortex_btrblocks::schemes::string::ZstdScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
 pub fn vortex_btrblocks::schemes::string::ZstdScheme::scheme_name(&self) -> &'static str
@@ -578,9 +580,7 @@ impl vortex_compressor::scheme::Scheme for vortex_btrblocks::schemes::temporal::
 
 pub fn vortex_btrblocks::schemes::temporal::TemporalScheme::compress(&self, compressor: &vortex_compressor::compressor::CascadingCompressor, data: &mut vortex_compressor::stats::cache::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
 
-pub fn vortex_btrblocks::schemes::temporal::TemporalScheme::detects_constant(&self) -> bool
-
-pub fn vortex_btrblocks::schemes::temporal::TemporalScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::compressor::CascadingCompressor, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_btrblocks::schemes::temporal::TemporalScheme::expected_compression_ratio(&self, _data: &mut vortex_compressor::stats::cache::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_btrblocks::schemes::temporal::TemporalScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
diff --git a/vortex-btrblocks/src/lib.rs b/vortex-btrblocks/src/lib.rs
index 1ae23251a1c..acd3a03de10 100644
--- a/vortex-btrblocks/src/lib.rs
+++ b/vortex-btrblocks/src/lib.rs
@@ -68,13 +68,11 @@ pub use builder::default_excluded;
 pub use canonical_compressor::BtrBlocksCompressor;
 pub use schemes::patches::compress_patches;
 pub use vortex_compressor::CascadingCompressor;
-pub use vortex_compressor::builtins::integer_dictionary_encode;
 pub use vortex_compressor::ctx::CompressorContext;
 pub use vortex_compressor::ctx::MAX_CASCADE;
 pub use vortex_compressor::scheme::Scheme;
 pub use vortex_compressor::scheme::SchemeExt;
 pub use vortex_compressor::scheme::SchemeId;
-pub use vortex_compressor::scheme::estimate_compression_ratio_with_sampling;
 pub use vortex_compressor::stats::ArrayAndStats;
 pub use vortex_compressor::stats::BoolStats;
 pub use vortex_compressor::stats::FloatStats;
diff --git a/vortex-btrblocks/src/schemes/decimal.rs b/vortex-btrblocks/src/schemes/decimal.rs
index 8fd21aa75cd..26e73186a92 100644
--- a/vortex-btrblocks/src/schemes/decimal.rs
+++ b/vortex-btrblocks/src/schemes/decimal.rs
@@ -10,6 +10,7 @@ use vortex_array::ToCanonical;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::arrays::decimal::narrowed_decimal;
 use vortex_array::dtype::DecimalType;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_decimal_byte_parts::DecimalBytePartsArray;
 use vortex_error::VortexResult;
 
@@ -42,12 +43,11 @@ impl Scheme for DecimalScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         _data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // Decimal compression is almost always beneficial (narrowing + primitive compression).
-        Ok(f64::MAX)
+        CompressionEstimate::AlwaysUse
     }
 
     fn compress(
diff --git a/vortex-btrblocks/src/schemes/float.rs b/vortex-btrblocks/src/schemes/float.rs
index 0f5622cea3f..6be41d2039d 100644
--- a/vortex-btrblocks/src/schemes/float.rs
+++ b/vortex-btrblocks/src/schemes/float.rs
@@ -11,6 +11,7 @@ use vortex_array::Canonical;
 use vortex_array::IntoArray;
 use vortex_array::ToCanonical;
 use vortex_array::dtype::PType;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_compressor::scheme::ChildSelection;
 use vortex_compressor::scheme::DescendantExclusion;
 use vortex_error::VortexResult;
@@ -25,7 +26,6 @@ use crate::CompressorContext;
 use crate::Scheme;
 use crate::SchemeExt;
 use crate::compress_patches;
-use crate::estimate_compression_ratio_with_sampling;
 
 /// ALP (Adaptive Lossless floating-Point) encoding.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -70,22 +70,21 @@ impl Scheme for ALPScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // ALP encodes floats as integers. Without integer compression afterward, the encoded ints
         // are the same size.
         if ctx.finished_cascading() {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
         // We don't support ALP for f16.
-        if data.float_stats().source().ptype() == PType::F16 {
-            return Ok(0.0);
+        if data.array_as_primitive().ptype() == PType::F16 {
+            return CompressionEstimate::Skip;
         }
 
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -94,9 +93,7 @@ impl Scheme for ALPScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
-
-        let alp_encoded = alp_encode(&stats.source().to_primitive(), None)?;
+        let alp_encoded = alp_encode(data.array_as_primitive(), None)?;
 
         // Compress the ALP ints.
         let compressed_alp_ints =
@@ -121,15 +118,15 @@ impl Scheme for ALPRDScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        if data.float_stats().source().ptype() == PType::F16 {
-            return Ok(0.0);
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // We don't support ALPRD for f16.
+        if data.array_as_primitive().ptype() == PType::F16 {
+            return CompressionEstimate::Skip;
         }
 
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -138,15 +135,15 @@ impl Scheme for ALPRDScheme {
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
+        let primitive_array = data.array_as_primitive();
 
-        let encoder = match stats.source().ptype() {
-            PType::F32 => RDEncoder::new(stats.source().as_slice::<f32>()),
-            PType::F64 => RDEncoder::new(stats.source().as_slice::<f64>()),
+        let encoder = match primitive_array.ptype() {
+            PType::F32 => RDEncoder::new(primitive_array.as_slice::<f32>()),
+            PType::F64 => RDEncoder::new(primitive_array.as_slice::<f64>()),
             ptype => vortex_panic!("cannot ALPRD compress ptype {ptype}"),
         };
 
-        let mut alp_rd = encoder.encode(stats.source());
+        let mut alp_rd = encoder.encode(primitive_array);
 
         let patches = alp_rd
             .left_parts_patches()
@@ -182,24 +179,25 @@ impl Scheme for NullDominatedSparseScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
+        let len = data.array_len() as f64;
         let stats = data.float_stats();
+        let value_count = stats.value_count();
 
-        if stats.value_count() == 0 {
-            // All nulls should use ConstantScheme instead of this.
-            return Ok(0.0);
+        // All-null arrays should be compressed as constant instead anyways.
+        if value_count == 0 {
+            return CompressionEstimate::Skip;
         }
 
         // If the majority (90%) of values is null, this will compress well.
-        if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
-            return Ok(stats.source().len() as f64 / stats.value_count() as f64);
+        if stats.null_count() as f64 / len > 0.9 {
+            return CompressionEstimate::Ratio(len / value_count as f64);
         }
 
         // Otherwise we don't go this route.
-        Ok(0.0)
+        CompressionEstimate::Skip
     }
 
     fn compress(
@@ -208,10 +206,8 @@ impl Scheme for NullDominatedSparseScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
-
         // We pass None as we only run this pathway for NULL-dominated float arrays.
-        let sparse_encoded = SparseArray::encode(&stats.source().clone().into_array(), None)?;
+        let sparse_encoded = SparseArray::encode(data.array(), None)?;
 
         if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
             let indices = sparse.patches().indices().to_primitive().narrow()?;
@@ -241,15 +237,22 @@ impl Scheme for PcoScheme {
         is_float_primitive(canonical)
     }
 
+    fn expected_compression_ratio(
+        &self,
+        _data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        CompressionEstimate::Sample
+    }
+
     fn compress(
         &self,
         _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
         Ok(vortex_pco::PcoArray::from_primitive(
-            stats.source(),
+            data.array_as_primitive(),
             pco::DEFAULT_COMPRESSION_LEVEL,
             8192,
         )?
@@ -401,7 +404,8 @@ mod scheme_selection_tests {
         let array = PrimitiveArray::new(Buffer::copy_from(&values), Validity::NonNullable);
         let btr = BtrBlocksCompressor::default();
         let compressed = btr.compress(&array.into_array())?;
-        assert!(compressed.is::<Dict>());
+        assert!(compressed.is::<ALP>());
+        assert!(compressed.children()[0].is::<Dict>());
         Ok(())
     }
 
diff --git a/vortex-btrblocks/src/schemes/integer.rs b/vortex-btrblocks/src/schemes/integer.rs
index e3eb7b7649b..965d4049d71 100644
--- a/vortex-btrblocks/src/schemes/integer.rs
+++ b/vortex-btrblocks/src/schemes/integer.rs
@@ -11,6 +11,7 @@ use vortex_array::arrays::ConstantArray;
 use vortex_array::scalar::Scalar;
 use vortex_compressor::builtins::FloatDictScheme;
 use vortex_compressor::builtins::StringDictScheme;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_compressor::scheme::AncestorExclusion;
 use vortex_compressor::scheme::ChildSelection;
 use vortex_compressor::scheme::DescendantExclusion;
@@ -37,7 +38,6 @@ use crate::GenerateStatsOptions;
 use crate::Scheme;
 use crate::SchemeExt;
 use crate::compress_patches;
-use crate::estimate_compression_ratio_with_sampling;
 
 /// Frame of Reference encoding.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@@ -108,33 +108,27 @@ impl Scheme for FoRScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // FoR only subtracts the min. Without further compression (e.g. BitPacking), the output is
         // the same size.
         if ctx.finished_cascading() {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
         let stats = data.integer_stats();
 
-        // All-null cannot be FOR compressed.
-        if stats.value_count() == 0 {
-            return Ok(0.0);
-        }
-
         // Only apply when the min is not already zero.
         if stats.erased().min_is_zero() {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
         // Difference between max and min.
         let for_bitwidth = match stats.erased().max_minus_min().checked_ilog2() {
             Some(l) => l + 1,
-            // If max-min == 0, the we should compress as a constant array.
-            None => return Ok(0.0),
+            // If max-min == 0, the we should be compressing this as a constant array.
+            None => return CompressionEstimate::Skip,
         };
 
         // If BitPacking can be applied (only non-negative values) and FoR doesn't reduce bit width
@@ -148,18 +142,18 @@ impl Scheme for FoRScheme {
         {
             let bitpack_bitwidth = max_log + 1;
             if for_bitwidth >= bitpack_bitwidth {
-                return Ok(0.0);
+                return CompressionEstimate::Skip;
             }
         }
 
-        let full_width: u32 = stats
-            .source()
+        let full_width: u32 = data
+            .array_as_primitive()
             .ptype()
             .bit_width()
             .try_into()
             .vortex_expect("bit width must fit in u32");
 
-        Ok(full_width as f64 / for_bitwidth as f64)
+        CompressionEstimate::Ratio(full_width as f64 / for_bitwidth as f64)
     }
 
     fn compress(
@@ -177,7 +171,7 @@ impl Scheme for FoRScheme {
         // NOTE: we could delegate in the future if we had another downstream codec that performs
         //  as well.
         let leaf_ctx = ctx.clone().as_leaf();
-        let mut biased_data = ArrayAndStats::new(biased.into_array(), ctx.stats_options());
+        let mut biased_data = ArrayAndStats::new(biased.into_array(), ctx.merged_stats_options());
         let compressed = BitPackingScheme.compress(compressor, &mut biased_data, leaf_ctx)?;
 
         // TODO(connor): This should really be `new_unchecked`.
@@ -245,30 +239,23 @@ impl Scheme for ZigZagScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // ZigZag only transforms negative values to positive. Without further compression,
         // the output is the same size.
         if ctx.finished_cascading() {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
         let stats = data.integer_stats();
 
-        // Don't try and compress all-null arrays.
-        if stats.value_count() == 0 {
-            return Ok(0.0);
-        }
-
         // ZigZag is only useful when there are negative values.
         if !stats.erased().min_is_negative() {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
-        // Run compression on a sample to see how it performs.
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -277,10 +264,8 @@ impl Scheme for ZigZagScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.integer_stats();
-
         // Zigzag encode the values, then recursively compress the inner values.
-        let zag = zigzag_encode(stats.source().clone())?;
+        let zag = zigzag_encode(data.array_as_primitive().clone())?;
         let encoded = zag.encoded().to_primitive();
 
         let compressed = compressor.compress_child(&encoded.into_array(), &ctx, self.id(), 0)?;
@@ -302,23 +287,17 @@ impl Scheme for BitPackingScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
         let stats = data.integer_stats();
 
         // BitPacking only works for non-negative values.
         if stats.erased().min_is_negative() {
-            return Ok(0.0);
-        }
-
-        // Don't compress all-null arrays.
-        if stats.value_count() == 0 {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -327,15 +306,18 @@ impl Scheme for BitPackingScheme {
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.integer_stats();
+        let primitive_array = data.array_as_primitive();
+
+        let histogram = bit_width_histogram(primitive_array)?;
+        let bw = find_best_bit_width(primitive_array.ptype(), &histogram)?;
 
-        let histogram = bit_width_histogram(stats.source())?;
-        let bw = find_best_bit_width(stats.source().ptype(), &histogram)?;
         // If best bw is determined to be the current bit-width, return the original array.
-        if bw as usize == stats.source().ptype().bit_width() {
-            return Ok(stats.source().clone().into_array());
+        if bw as usize == primitive_array.ptype().bit_width() {
+            return Ok(primitive_array.clone().into_array());
         }
-        let mut packed = bitpack_encode(stats.source(), bw, Some(&histogram))?;
+
+        // Otherwise we can bitpack the array.
+        let mut packed = bitpack_encode(primitive_array, bw, Some(&histogram))?;
 
         let patches = packed.patches().map(compress_patches).transpose()?;
         packed.replace_patches(patches);
@@ -389,42 +371,44 @@ impl Scheme for SparseScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
+        let len = data.array_len() as f64;
         let stats = data.integer_stats();
+        let value_count = stats.value_count();
 
-        if stats.value_count() == 0 {
-            // All nulls should use ConstantScheme.
-            return Ok(0.0);
+        // All-null arrays should be compressed as constant instead anyways.
+        if value_count == 0 {
+            return CompressionEstimate::Skip;
         }
 
-        // If the majority is null, will compress well.
-        if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
-            return Ok(stats.source().len() as f64 / stats.value_count() as f64);
+        // If the majority (90%) of values is null, this will compress well.
+        if stats.null_count() as f64 / len > 0.9 {
+            return CompressionEstimate::Ratio(len / value_count as f64);
         }
 
-        // See if the top value accounts for >= 90% of the set values.
-        let (_, top_count) = stats
+        let (_, most_frequent_count) = stats
             .erased()
             .most_frequent_value_and_count()
             .vortex_expect(
                 "this must be present since `SparseScheme` declared that we need distinct values",
             );
 
-        if top_count == stats.value_count() {
-            // top_value is the only value, should use ConstantScheme instead.
-            return Ok(0.0);
+        // If the most frequent value is the only value, we should compress as constant instead.
+        if most_frequent_count == value_count {
+            return CompressionEstimate::Skip;
         }
+        debug_assert!(value_count > most_frequent_count);
 
-        let freq = top_count as f64 / stats.value_count() as f64;
-        if freq >= 0.9 {
-            // We only store the positions of the non-top values.
-            return Ok(stats.value_count() as f64 / (stats.value_count() - top_count) as f64);
+        // See if the most frequent value accounts for >= 90% of the set values.
+        let freq = most_frequent_count as f64 / value_count as f64;
+        if freq < 0.9 {
+            return CompressionEstimate::Skip;
         }
 
-        Ok(0.0)
+        // We only store the positions of the non-top values.
+        CompressionEstimate::Ratio(value_count as f64 / (value_count - most_frequent_count) as f64)
     }
 
     fn compress(
@@ -433,33 +417,37 @@ impl Scheme for SparseScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.integer_stats();
+        let len = data.array_len();
+        // TODO(connor): Fight the borrow checker (needs interior mutability)!
+        let stats = data.integer_stats().clone();
+        let array = data.array();
 
-        let (top_pvalue, top_count) = stats
+        let (most_frequent_value, most_frequent_count) = stats
             .erased()
             .most_frequent_value_and_count()
             .vortex_expect(
                 "this must be present since `SparseScheme` declared that we need distinct values",
             );
-        if top_count as usize == stats.source().len() {
-            // top_value is the only value, use ConstantScheme.
+
+        if most_frequent_count as usize == len {
+            // If the most frequent value is the only value, we should compress as constant instead.
             return Ok(ConstantArray::new(
                 Scalar::primitive_value(
-                    top_pvalue,
-                    top_pvalue.ptype(),
-                    stats.source().dtype().nullability(),
+                    most_frequent_value,
+                    most_frequent_value.ptype(),
+                    array.dtype().nullability(),
                 ),
-                stats.source().len(),
+                len,
             )
             .into_array());
         }
 
         let sparse_encoded = SparseArray::encode(
-            &stats.source().clone().into_array(),
+            array,
             Some(Scalar::primitive_value(
-                top_pvalue,
-                top_pvalue.ptype(),
-                stats.source().dtype().nullability(),
+                most_frequent_value,
+                most_frequent_value.ptype(),
+                array.dtype().nullability(),
             )),
         )?;
 
@@ -547,19 +535,15 @@ impl Scheme for RunEndScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        let stats = data.integer_stats();
-
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
         // If the run length is below the threshold, drop it.
-        if stats.average_run_length() < RUN_END_THRESHOLD {
-            return Ok(0.0);
+        if data.integer_stats().average_run_length() < RUN_END_THRESHOLD {
+            return CompressionEstimate::Skip;
         }
 
-        // Run compression on a sample, see how it performs.
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -568,10 +552,8 @@ impl Scheme for RunEndScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.integer_stats();
-
         // Run-end encode the ends.
-        let (ends, values) = runend_encode(stats.source());
+        let (ends, values) = runend_encode(data.array_as_primitive());
 
         let compressed_values =
             compressor.compress_child(&values.to_primitive().into_array(), &ctx, self.id(), 0)?;
@@ -580,15 +562,10 @@ impl Scheme for RunEndScheme {
             compressor.compress_child(&ends.to_primitive().into_array(), &ctx, self.id(), 1)?;
 
         // SAFETY: compression doesn't affect invariants.
-        unsafe {
-            Ok(RunEndArray::new_unchecked(
-                compressed_ends,
-                compressed_values,
-                0,
-                stats.source().len(),
-            )
-            .into_array())
-        }
+        Ok(unsafe {
+            RunEndArray::new_unchecked(compressed_ends, compressed_values, 0, data.array_len())
+                .into_array()
+        })
     }
 }
 
@@ -623,35 +600,45 @@ impl Scheme for SequenceScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
-        _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+        ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // It is pointless checking if a sample is a sequence since it will not correspond to the
+        // entire array.
+        if ctx.is_sample() {
+            return CompressionEstimate::Skip;
+        }
+
         let stats = data.integer_stats();
 
+        // `SequenceArray` does not support nulls.
         if stats.null_count() > 0 {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
-        // TODO(connor): Why do we sequence encode the whole thing and then throw it away? And then
-        // why do we divide the ratio by 2???
-
         // If the distinct_values_count was computed, and not all values are unique, then this
         // cannot be encoded as a sequence array.
         if stats
             .distinct_count()
-            // TODO(connor): Shouldn't this be `is_none_or`??? Why do things fail if not this?
-            .is_some_and(|count| count as usize != stats.source().len())
+            .is_some_and(|count| count as usize != data.array_len())
         {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
-        // TODO(connor): Why divide by 2???
-        // Since two values are required to store base and multiplier the compression ratio is
-        // divided by 2.
-        Ok(sequence_encode(stats.source())?
-            .map(|_| stats.source().len() as f64 / 2.0)
-            .unwrap_or(0.0))
+        // TODO(connor): Why do we sequence encode the whole thing and then throw it away? And then
+        // why do we divide the ratio by 2???
+
+        CompressionEstimate::Estimate(Box::new(|_compressor, data, _ctx| {
+            let Some(encoded) = sequence_encode(data.array_as_primitive())? else {
+                // If we are unable to sequence encode this array, make sure we skip.
+                return Ok(CompressionEstimate::Skip);
+            };
+
+            // TODO(connor): This doesn't really make sense?
+            // Since two values are required to store base and multiplier the compression ratio is
+            // divided by 2.
+            Ok(CompressionEstimate::Ratio(encoded.len() as f64 / 2.0))
+        }))
     }
 
     fn compress(
@@ -665,7 +652,8 @@ impl Scheme for SequenceScheme {
         if stats.null_count() > 0 {
             vortex_bail!("sequence encoding does not support nulls");
         }
-        sequence_encode(stats.source())?.ok_or_else(|| vortex_err!("cannot sequence encode array"))
+        sequence_encode(data.array_as_primitive())?
+            .ok_or_else(|| vortex_err!("cannot sequence encode array"))
     }
 }
 
@@ -681,21 +669,17 @@ impl Scheme for PcoScheme {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        let stats = data.integer_stats();
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        use vortex_array::dtype::PType;
 
         // Pco does not support I8 or U8.
-        if matches!(
-            stats.source().ptype(),
-            vortex_array::dtype::PType::I8 | vortex_array::dtype::PType::U8
-        ) {
-            return Ok(0.0);
+        if matches!(data.array_as_primitive().ptype(), PType::I8 | PType::U8) {
+            return CompressionEstimate::Skip;
         }
 
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -704,10 +688,8 @@ impl Scheme for PcoScheme {
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.integer_stats();
-
         Ok(vortex_pco::PcoArray::from_primitive(
-            stats.source(),
+            data.array_as_primitive(),
             pco::DEFAULT_COMPRESSION_LEVEL,
             8192,
         )?
@@ -725,7 +707,9 @@ mod tests {
     use rand::rngs::StdRng;
     use vortex_array::DynArray;
     use vortex_array::IntoArray;
+    use vortex_array::arrays::Constant;
     use vortex_array::arrays::Dict;
+    use vortex_array::arrays::Masked;
     use vortex_array::arrays::PrimitiveArray;
     use vortex_array::assert_arrays_eq;
     use vortex_array::validity::Validity;
@@ -736,7 +720,6 @@ mod tests {
     use vortex_error::VortexResult;
     use vortex_fastlanes::RLE;
     use vortex_sequence::Sequence;
-    use vortex_sparse::Sparse;
 
     use crate::BtrBlocksCompressor;
     use crate::schemes::rle::RLE_INTEGER_SCHEME;
@@ -779,7 +762,7 @@ mod tests {
     }
 
     #[test]
-    fn sparse_mostly_nulls() -> VortexResult<()> {
+    fn constant_mostly_nulls() -> VortexResult<()> {
         let array = PrimitiveArray::new(
             buffer![189u8, 189, 189, 189, 189, 189, 189, 189, 189, 0, 46],
             Validity::from_iter(vec![
@@ -791,7 +774,9 @@ mod tests {
 
         let btr = BtrBlocksCompressor::default();
         let compressed = btr.compress(&array.into_array())?;
-        assert!(compressed.is::<Sparse>());
+
+        assert!(compressed.is::<Masked>());
+        assert!(compressed.children()[0].is::<Constant>());
 
         let decoded = compressed.clone();
         let expected =
diff --git a/vortex-btrblocks/src/schemes/rle.rs b/vortex-btrblocks/src/schemes/rle.rs
index 8a34f21e532..1e5a1624068 100644
--- a/vortex-btrblocks/src/schemes/rle.rs
+++ b/vortex-btrblocks/src/schemes/rle.rs
@@ -13,6 +13,7 @@ use vortex_compressor::builtins::FloatDictScheme;
 use vortex_compressor::builtins::StringDictScheme;
 use vortex_compressor::builtins::is_float_primitive;
 use vortex_compressor::builtins::is_integer_primitive;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_compressor::scheme::AncestorExclusion;
 use vortex_compressor::scheme::ChildSelection;
 use vortex_compressor::scheme::DescendantExclusion;
@@ -28,7 +29,6 @@ use crate::CascadingCompressor;
 use crate::CompressorContext;
 use crate::Scheme;
 use crate::SchemeExt;
-use crate::estimate_compression_ratio_with_sampling;
 use crate::schemes::integer::IntDictScheme;
 use crate::schemes::integer::SparseScheme;
 
@@ -64,7 +64,7 @@ pub trait RLEConfig: Debug + Send + Sync + 'static {
     fn matches(canonical: &Canonical) -> bool;
 
     /// Generates statistics for the given array.
-    fn generate_stats(array: &ArrayRef) -> Self::Stats;
+    fn generate_stats(array: &PrimitiveArray) -> Self::Stats;
 }
 
 impl RLEConfig for IntRLEConfig {
@@ -76,8 +76,8 @@ impl RLEConfig for IntRLEConfig {
         is_integer_primitive(canonical)
     }
 
-    fn generate_stats(array: &ArrayRef) -> IntegerStats {
-        IntegerStats::generate(&array.to_primitive())
+    fn generate_stats(array: &PrimitiveArray) -> IntegerStats {
+        IntegerStats::generate(array)
     }
 }
 
@@ -90,47 +90,28 @@ impl RLEConfig for FloatRLEConfig {
         is_float_primitive(canonical)
     }
 
-    fn generate_stats(array: &ArrayRef) -> FloatStats {
-        FloatStats::generate(&array.to_primitive())
+    fn generate_stats(array: &PrimitiveArray) -> FloatStats {
+        FloatStats::generate(array)
     }
 }
 
+// TODO(connor): This is completely unnecessary now.
 /// Trait for accessing RLE-specific statistics.
 pub trait RLEStats {
-    /// Returns the number of non-null values.
-    fn value_count(&self) -> u32;
     /// Returns the average run length.
     fn average_run_length(&self) -> u32;
-    /// Returns the underlying source array.
-    fn source(&self) -> &PrimitiveArray;
 }
 
 impl RLEStats for IntegerStats {
-    fn value_count(&self) -> u32 {
-        self.value_count()
-    }
-
     fn average_run_length(&self) -> u32 {
         self.average_run_length()
     }
-
-    fn source(&self) -> &PrimitiveArray {
-        self.source()
-    }
 }
 
 impl RLEStats for FloatStats {
-    fn value_count(&self) -> u32 {
-        FloatStats::value_count(self)
-    }
-
     fn average_run_length(&self) -> u32 {
         FloatStats::average_run_length(self)
     }
-
-    fn source(&self) -> &PrimitiveArray {
-        FloatStats::source(self)
-    }
 }
 
 /// RLE scheme that is generic over a configuration type.
@@ -207,26 +188,24 @@ impl<C: RLEConfig> Scheme for RLEScheme<C> {
 
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // RLE is only useful when we cascade it with another encoding.
-        let array = data.array().clone();
-        let stats = data.get_or_insert_with::<C::Stats>(|| C::generate_stats(&array));
-
-        // Don't compress all-null or empty arrays.
-        if stats.value_count() == 0 {
-            return Ok(0.0);
+        if ctx.finished_cascading() {
+            return CompressionEstimate::Skip;
         }
 
+        // TODO(connor): Fight the borrow checker (needs interior mutability)!
+        let array = data.array_as_primitive().clone();
+        let stats = data.get_or_insert_with::<C::Stats>(|| C::generate_stats(&array));
+
         // Check whether RLE is a good fit, based on the average run length.
         if stats.average_run_length() < RUN_LENGTH_THRESHOLD {
-            return Ok(0.0);
+            return CompressionEstimate::Skip;
         }
 
-        // Run compression on a sample to see how it performs.
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
+        CompressionEstimate::Sample
     }
 
     fn compress(
@@ -235,9 +214,8 @@ impl<C: RLEConfig> Scheme for RLEScheme<C> {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let array = data.array().clone();
-        let stats = data.get_or_insert_with::<C::Stats>(|| C::generate_stats(&array));
-        let rle_array = RLEArray::encode(RLEStats::source(stats))?;
+        let array = data.array_as_primitive();
+        let rle_array = RLEArray::encode(array)?;
 
         let compressed_values = compressor.compress_child(
             &rle_array.values().to_primitive().into_array(),
diff --git a/vortex-btrblocks/src/schemes/string.rs b/vortex-btrblocks/src/schemes/string.rs
index fbcb771e9b5..ca9f8acabed 100644
--- a/vortex-btrblocks/src/schemes/string.rs
+++ b/vortex-btrblocks/src/schemes/string.rs
@@ -8,6 +8,7 @@ use vortex_array::Canonical;
 use vortex_array::IntoArray;
 use vortex_array::ToCanonical;
 use vortex_array::arrays::VarBinArray;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_compressor::scheme::ChildSelection;
 use vortex_compressor::scheme::DescendantExclusion;
 use vortex_error::VortexResult;
@@ -65,18 +66,23 @@ impl Scheme for FSSTScheme {
         2
     }
 
+    fn expected_compression_ratio(
+        &self,
+        _data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        CompressionEstimate::Sample
+    }
+
     fn compress(
         &self,
         compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.string_stats();
-
-        let fsst = {
-            let compressor_fsst = fsst_train_compressor(stats.source());
-            fsst_compress(stats.source(), &compressor_fsst)
-        };
+        let utf8 = data.array_as_utf8();
+        let compressor_fsst = fsst_train_compressor(utf8);
+        let fsst = fsst_compress(utf8, &compressor_fsst);
 
         let compressed_original_lengths = compressor.compress_child(
             &fsst
@@ -144,24 +150,25 @@ impl Scheme for NullDominatedSparseScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
+        let len = data.array_len() as f64;
         let stats = data.string_stats();
+        let value_count = stats.value_count();
 
-        if stats.value_count() == 0 {
-            // All nulls should use ConstantScheme.
-            return Ok(0.0);
+        // All-null arrays should be compressed as constant instead anyways.
+        if value_count == 0 {
+            return CompressionEstimate::Skip;
         }
 
-        // If the majority is null, will compress well.
-        if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
-            return Ok(stats.source().len() as f64 / stats.value_count() as f64);
+        // If the majority (90%) of values is null, this will compress well.
+        if stats.null_count() as f64 / len > 0.9 {
+            return CompressionEstimate::Ratio(len / value_count as f64);
         }
 
         // Otherwise we don't go this route.
-        Ok(0.0)
+        CompressionEstimate::Skip
     }
 
     fn compress(
@@ -170,10 +177,8 @@ impl Scheme for NullDominatedSparseScheme {
         data: &mut ArrayAndStats,
         ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.string_stats();
-
         // We pass None as we only run this pathway for NULL-dominated string arrays.
-        let sparse_encoded = SparseArray::encode(&stats.source().clone().into_array(), None)?;
+        let sparse_encoded = SparseArray::encode(data.array(), None)?;
 
         if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
             // Compress the indices only (not the values for strings).
@@ -204,15 +209,21 @@ impl Scheme for ZstdScheme {
         is_utf8_string(canonical)
     }
 
+    fn expected_compression_ratio(
+        &self,
+        _data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        CompressionEstimate::Sample
+    }
+
     fn compress(
         &self,
         _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.string_stats();
-
-        let compacted = stats.source().compact_buffers()?;
+        let compacted = data.array_as_utf8().compact_buffers()?;
         Ok(
             vortex_zstd::ZstdArray::from_var_bin_view_without_dict(&compacted, 3, 8192)?
                 .into_array(),
@@ -230,18 +241,21 @@ impl Scheme for ZstdBuffersScheme {
         is_utf8_string(canonical)
     }
 
+    fn expected_compression_ratio(
+        &self,
+        _data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        CompressionEstimate::Sample
+    }
+
     fn compress(
         &self,
         _compressor: &CascadingCompressor,
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        let stats = data.string_stats();
-
-        Ok(
-            vortex_zstd::ZstdBuffersArray::compress(&stats.source().clone().into_array(), 3)?
-                .into_array(),
-        )
+        Ok(vortex_zstd::ZstdBuffersArray::compress(data.array(), 3)?.into_array())
     }
 }
 
diff --git a/vortex-btrblocks/src/schemes/temporal.rs b/vortex-btrblocks/src/schemes/temporal.rs
index f1ecb158d96..f934b230845 100644
--- a/vortex-btrblocks/src/schemes/temporal.rs
+++ b/vortex-btrblocks/src/schemes/temporal.rs
@@ -13,6 +13,7 @@ use vortex_array::arrays::TemporalArray;
 use vortex_array::dtype::extension::Matcher;
 use vortex_array::extension::datetime::AnyTemporal;
 use vortex_array::extension::datetime::TemporalMetadata;
+use vortex_compressor::estimate::CompressionEstimate;
 use vortex_datetime_parts::DateTimePartsArray;
 use vortex_datetime_parts::TemporalParts;
 use vortex_datetime_parts::split_temporal;
@@ -49,10 +50,6 @@ impl Scheme for TemporalScheme {
         )
     }
 
-    fn detects_constant(&self) -> bool {
-        true
-    }
-
     /// Children: days=0, seconds=1, subseconds=2.
     fn num_children(&self) -> usize {
         3
@@ -60,13 +57,11 @@ impl Scheme for TemporalScheme {
 
     fn expected_compression_ratio(
         &self,
-        _compressor: &CascadingCompressor,
         _data: &mut ArrayAndStats,
         _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
+    ) -> CompressionEstimate {
         // Temporal compression (splitting into parts) is almost always beneficial.
-        // Return a moderate ratio to ensure this scheme is selected.
-        Ok(f64::MAX)
+        CompressionEstimate::AlwaysUse
     }
 
     fn compress(
diff --git a/vortex-compressor/Cargo.toml b/vortex-compressor/Cargo.toml
index 260c9c531f5..da9bd07889c 100644
--- a/vortex-compressor/Cargo.toml
+++ b/vortex-compressor/Cargo.toml
@@ -27,8 +27,19 @@ vortex-mask = { workspace = true }
 vortex-utils = { workspace = true }
 
 [dev-dependencies]
+divan = { workspace = true }
 rstest = { workspace = true }
 vortex-array = { workspace = true, features = ["_test-harness"] }
 
 [lints]
 workspace = true
+
+[[bench]]
+name = "dict_encode"
+harness = false
+test = false
+
+[[bench]]
+name = "stats_calc"
+harness = false
+test = false
diff --git a/vortex-btrblocks/benches/dict_encode.rs b/vortex-compressor/benches/dict_encode.rs
similarity index 81%
rename from vortex-btrblocks/benches/dict_encode.rs
rename to vortex-compressor/benches/dict_encode.rs
index 8d7c6fc6297..52f5329af47 100644
--- a/vortex-btrblocks/benches/dict_encode.rs
+++ b/vortex-compressor/benches/dict_encode.rs
@@ -9,9 +9,9 @@ use vortex_array::arrays::BoolArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::builders::dict::dict_encode;
 use vortex_array::validity::Validity;
-use vortex_btrblocks::IntegerStats;
-use vortex_btrblocks::integer_dictionary_encode;
 use vortex_buffer::BufferMut;
+use vortex_compressor::builtins::integer_dictionary_encode;
+use vortex_compressor::stats::IntegerStats;
 
 fn make_array() -> PrimitiveArray {
     let values: BufferMut<i32> = (0..50).cycle().take(64_000).collect();
@@ -39,10 +39,11 @@ fn encode_generic(bencher: Bencher) {
 #[cfg(not(codspeed))]
 #[divan::bench]
 fn encode_specialized(bencher: Bencher) {
-    let stats = IntegerStats::generate(&make_array());
+    let array = make_array();
+    let stats = IntegerStats::generate(&array);
     bencher
         .with_inputs(|| &stats)
-        .bench_refs(|stats| integer_dictionary_encode(stats));
+        .bench_refs(|stats| integer_dictionary_encode(&array, stats));
 }
 
 fn main() {
diff --git a/vortex-btrblocks/benches/stats_calc.rs b/vortex-compressor/benches/stats_calc.rs
similarity index 96%
rename from vortex-btrblocks/benches/stats_calc.rs
rename to vortex-compressor/benches/stats_calc.rs
index b3070598d6b..5675c8de434 100644
--- a/vortex-btrblocks/benches/stats_calc.rs
+++ b/vortex-compressor/benches/stats_calc.rs
@@ -10,10 +10,10 @@ mod benchmarks {
     use divan::Bencher;
     use vortex_array::arrays::PrimitiveArray;
     use vortex_array::validity::Validity;
-    use vortex_btrblocks::GenerateStatsOptions;
-    use vortex_btrblocks::IntegerStats;
     use vortex_buffer::Buffer;
     use vortex_buffer::BufferMut;
+    use vortex_compressor::stats::GenerateStatsOptions;
+    use vortex_compressor::stats::IntegerStats;
 
     fn generate_dataset(max_run: u32, distinct: u32) -> Buffer<u32> {
         let mut output = BufferMut::with_capacity(64_000);
diff --git a/vortex-compressor/public-api.lock b/vortex-compressor/public-api.lock
index a2e1dd47677..f9332cb090b 100644
--- a/vortex-compressor/public-api.lock
+++ b/vortex-compressor/public-api.lock
@@ -30,9 +30,7 @@ pub fn vortex_compressor::builtins::BoolConstantScheme::compress(&self, _compres
 
 pub fn vortex_compressor::builtins::BoolConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::BoolConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::BoolConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -70,9 +68,7 @@ pub fn vortex_compressor::builtins::FloatConstantScheme::compress(&self, _compre
 
 pub fn vortex_compressor::builtins::FloatConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::FloatConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::FloatConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::FloatConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::FloatConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -110,9 +106,7 @@ pub fn vortex_compressor::builtins::FloatDictScheme::compress(&self, compressor:
 
 pub fn vortex_compressor::builtins::FloatDictScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::FloatDictScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::FloatDictScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::FloatDictScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::FloatDictScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -150,9 +144,7 @@ pub fn vortex_compressor::builtins::IntConstantScheme::compress(&self, _compress
 
 pub fn vortex_compressor::builtins::IntConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::IntConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::IntConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::IntConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::IntConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -190,9 +182,7 @@ pub fn vortex_compressor::builtins::IntDictScheme::compress(&self, compressor: &
 
 pub fn vortex_compressor::builtins::IntDictScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::IntDictScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::IntDictScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::IntDictScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::IntDictScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -230,9 +220,7 @@ pub fn vortex_compressor::builtins::StringConstantScheme::compress(&self, _compr
 
 pub fn vortex_compressor::builtins::StringConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::StringConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::StringConstantScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::StringConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::StringConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -270,9 +258,7 @@ pub fn vortex_compressor::builtins::StringDictScheme::compress(&self, compressor
 
 pub fn vortex_compressor::builtins::StringDictScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::StringDictScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::StringDictScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::StringDictScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::StringDictScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -282,11 +268,9 @@ pub fn vortex_compressor::builtins::StringDictScheme::scheme_name(&self) -> &'st
 
 pub fn vortex_compressor::builtins::StringDictScheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions
 
-pub fn vortex_compressor::builtins::float_dictionary_encode(stats: &vortex_compressor::stats::FloatStats) -> vortex_array::arrays::dict::array::DictArray
+pub fn vortex_compressor::builtins::float_dictionary_encode(array: &vortex_array::arrays::primitive::array::PrimitiveArray, stats: &vortex_compressor::stats::FloatStats) -> vortex_array::arrays::dict::array::DictArray
 
-pub fn vortex_compressor::builtins::integer_dictionary_encode(stats: &vortex_compressor::stats::IntegerStats) -> vortex_array::arrays::dict::array::DictArray
-
-pub fn vortex_compressor::builtins::is_bool(canonical: &vortex_array::canonical::Canonical) -> bool
+pub fn vortex_compressor::builtins::integer_dictionary_encode(array: &vortex_array::arrays::primitive::array::PrimitiveArray, stats: &vortex_compressor::stats::IntegerStats) -> vortex_array::arrays::dict::array::DictArray
 
 pub fn vortex_compressor::builtins::is_float_primitive(canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -302,17 +286,13 @@ impl vortex_compressor::ctx::CompressorContext
 
 pub fn vortex_compressor::ctx::CompressorContext::as_leaf(self) -> Self
 
-pub fn vortex_compressor::ctx::CompressorContext::as_sample(self) -> Self
-
 pub fn vortex_compressor::ctx::CompressorContext::cascade_history(&self) -> &[(vortex_compressor::scheme::SchemeId, usize)]
 
 pub fn vortex_compressor::ctx::CompressorContext::finished_cascading(&self) -> bool
 
 pub fn vortex_compressor::ctx::CompressorContext::is_sample(&self) -> bool
 
-pub fn vortex_compressor::ctx::CompressorContext::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions
-
-pub fn vortex_compressor::ctx::CompressorContext::with_stats_options(self, opts: vortex_compressor::stats::GenerateStatsOptions) -> Self
+pub fn vortex_compressor::ctx::CompressorContext::merged_stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions
 
 impl core::clone::Clone for vortex_compressor::ctx::CompressorContext
 
@@ -324,6 +304,26 @@ pub fn vortex_compressor::ctx::CompressorContext::fmt(&self, f: &mut core::fmt::
 
 pub const vortex_compressor::ctx::MAX_CASCADE: usize
 
+pub mod vortex_compressor::estimate
+
+pub enum vortex_compressor::estimate::CompressionEstimate
+
+pub vortex_compressor::estimate::CompressionEstimate::AlwaysUse
+
+pub vortex_compressor::estimate::CompressionEstimate::Estimate(alloc::boxed::Box<vortex_compressor::estimate::EstimateFn>)
+
+pub vortex_compressor::estimate::CompressionEstimate::Ratio(f64)
+
+pub vortex_compressor::estimate::CompressionEstimate::Sample
+
+pub vortex_compressor::estimate::CompressionEstimate::Skip
+
+impl core::fmt::Debug for vortex_compressor::estimate::CompressionEstimate
+
+pub fn vortex_compressor::estimate::CompressionEstimate::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
+
+pub type vortex_compressor::estimate::EstimateFn = (dyn core::ops::function::FnOnce(&vortex_compressor::CascadingCompressor, &mut vortex_compressor::stats::ArrayAndStats, vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_compressor::estimate::CompressionEstimate> + core::marker::Send + core::marker::Sync)
+
 pub mod vortex_compressor::scheme
 
 pub enum vortex_compressor::scheme::ChildSelection
@@ -416,9 +416,7 @@ pub fn vortex_compressor::scheme::Scheme::compress(&self, compressor: &vortex_co
 
 pub fn vortex_compressor::scheme::Scheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::scheme::Scheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::scheme::Scheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::scheme::Scheme::expected_compression_ratio(&self, _data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::scheme::Scheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -436,9 +434,7 @@ pub fn vortex_compressor::builtins::BoolConstantScheme::compress(&self, _compres
 
 pub fn vortex_compressor::builtins::BoolConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::BoolConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::BoolConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -456,9 +452,7 @@ pub fn vortex_compressor::builtins::FloatConstantScheme::compress(&self, _compre
 
 pub fn vortex_compressor::builtins::FloatConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::FloatConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::FloatConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::FloatConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::FloatConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -476,9 +470,7 @@ pub fn vortex_compressor::builtins::FloatDictScheme::compress(&self, compressor:
 
 pub fn vortex_compressor::builtins::FloatDictScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::FloatDictScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::FloatDictScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::FloatDictScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::FloatDictScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -496,9 +488,7 @@ pub fn vortex_compressor::builtins::IntConstantScheme::compress(&self, _compress
 
 pub fn vortex_compressor::builtins::IntConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::IntConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::IntConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::IntConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::IntConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -516,9 +506,7 @@ pub fn vortex_compressor::builtins::IntDictScheme::compress(&self, compressor: &
 
 pub fn vortex_compressor::builtins::IntDictScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::IntDictScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::IntDictScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::IntDictScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::IntDictScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -536,9 +524,7 @@ pub fn vortex_compressor::builtins::StringConstantScheme::compress(&self, _compr
 
 pub fn vortex_compressor::builtins::StringConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::StringConstantScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::StringConstantScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::StringConstantScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::StringConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -556,9 +542,7 @@ pub fn vortex_compressor::builtins::StringDictScheme::compress(&self, compressor
 
 pub fn vortex_compressor::builtins::StringDictScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
 
-pub fn vortex_compressor::builtins::StringDictScheme::detects_constant(&self) -> bool
-
-pub fn vortex_compressor::builtins::StringDictScheme::expected_compression_ratio(&self, compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
+pub fn vortex_compressor::builtins::StringDictScheme::expected_compression_ratio(&self, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_compressor::estimate::CompressionEstimate
 
 pub fn vortex_compressor::builtins::StringDictScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
 
@@ -576,8 +560,6 @@ impl<T: vortex_compressor::scheme::Scheme + ?core::marker::Sized> vortex_compres
 
 pub fn T::id(&self) -> vortex_compressor::scheme::SchemeId
 
-pub fn vortex_compressor::scheme::estimate_compression_ratio_with_sampling<S: vortex_compressor::scheme::Scheme + ?core::marker::Sized>(scheme: &S, compressor: &vortex_compressor::CascadingCompressor, array: &vortex_array::array::ArrayRef, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
-
 pub mod vortex_compressor::stats
 
 pub enum vortex_compressor::stats::FloatErasedStats
@@ -686,6 +668,12 @@ impl vortex_compressor::stats::ArrayAndStats
 
 pub fn vortex_compressor::stats::ArrayAndStats::array(&self) -> &vortex_array::array::ArrayRef
 
+pub fn vortex_compressor::stats::ArrayAndStats::array_as_primitive(&self) -> &vortex_array::arrays::primitive::array::PrimitiveArray
+
+pub fn vortex_compressor::stats::ArrayAndStats::array_as_utf8(&self) -> &vortex_array::arrays::varbinview::array::VarBinViewArray
+
+pub fn vortex_compressor::stats::ArrayAndStats::array_len(&self) -> usize
+
 pub fn vortex_compressor::stats::ArrayAndStats::bool_stats(&mut self) -> &vortex_compressor::stats::BoolStats
 
 pub fn vortex_compressor::stats::ArrayAndStats::float_stats(&mut self) -> &vortex_compressor::stats::FloatStats
@@ -710,8 +698,6 @@ pub fn vortex_compressor::stats::BoolStats::is_constant(&self) -> bool
 
 pub fn vortex_compressor::stats::BoolStats::null_count(&self) -> u32
 
-pub fn vortex_compressor::stats::BoolStats::source(&self) -> &vortex_array::arrays::bool::array::BoolArray
-
 pub fn vortex_compressor::stats::BoolStats::true_count(&self) -> u32
 
 pub fn vortex_compressor::stats::BoolStats::value_count(&self) -> u32
@@ -752,8 +738,6 @@ pub fn vortex_compressor::stats::FloatStats::generate_opts(input: &vortex_array:
 
 pub fn vortex_compressor::stats::FloatStats::null_count(&self) -> u32
 
-pub fn vortex_compressor::stats::FloatStats::source(&self) -> &vortex_array::arrays::primitive::array::PrimitiveArray
-
 pub fn vortex_compressor::stats::FloatStats::value_count(&self) -> u32
 
 impl vortex_compressor::stats::FloatStats
@@ -844,8 +828,6 @@ pub fn vortex_compressor::stats::IntegerStats::generate_opts(input: &vortex_arra
 
 pub fn vortex_compressor::stats::IntegerStats::null_count(&self) -> u32
 
-pub fn vortex_compressor::stats::IntegerStats::source(&self) -> &vortex_array::arrays::primitive::array::PrimitiveArray
-
 pub fn vortex_compressor::stats::IntegerStats::value_count(&self) -> u32
 
 impl vortex_compressor::stats::IntegerStats
@@ -920,8 +902,6 @@ pub fn vortex_compressor::stats::StringStats::generate_opts(input: &vortex_array
 
 pub fn vortex_compressor::stats::StringStats::null_count(&self) -> u32
 
-pub fn vortex_compressor::stats::StringStats::source(&self) -> &vortex_array::arrays::varbinview::array::VarBinViewArray
-
 pub fn vortex_compressor::stats::StringStats::value_count(&self) -> u32
 
 impl core::clone::Clone for vortex_compressor::stats::StringStats
diff --git a/vortex-compressor/src/builtins/constant.rs b/vortex-compressor/src/builtins/constant.rs
deleted file mode 100644
index 53300d49703..00000000000
--- a/vortex-compressor/src/builtins/constant.rs
+++ /dev/null
@@ -1,265 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright the Vortex contributors
-
-//! Constant encoding schemes for integer, float, and string arrays.
-
-use vortex_array::ArrayRef;
-use vortex_array::Canonical;
-use vortex_array::IntoArray;
-use vortex_array::aggregate_fn::fns::is_constant::is_constant;
-use vortex_array::arrays::ConstantArray;
-use vortex_array::arrays::MaskedArray;
-use vortex_array::arrays::PrimitiveArray;
-use vortex_array::scalar::Scalar;
-use vortex_error::VortexResult;
-
-use super::is_bool;
-use super::is_float_primitive;
-use super::is_integer_primitive;
-use super::is_utf8_string;
-use crate::CascadingCompressor;
-use crate::ctx::CompressorContext;
-use crate::scheme::Scheme;
-use crate::stats::ArrayAndStats;
-
-/// Constant encoding for bool arrays where all valid values are the same.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct BoolConstantScheme;
-
-impl Scheme for BoolConstantScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.bool.constant"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_bool(canonical)
-    }
-
-    fn detects_constant(&self) -> bool {
-        true
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        if ctx.is_sample() {
-            return Ok(0.0);
-        }
-
-        let stats = data.bool_stats();
-
-        // Only compress non-nullable or all-valid nullable arrays.
-        if stats.source().dtype().is_nullable() && stats.null_count() > 0 {
-            return Ok(0.0);
-        }
-
-        if !stats.is_constant() {
-            return Ok(0.0);
-        }
-
-        Ok(stats.value_count() as f64)
-    }
-
-    fn compress(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        _ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let stats = data.bool_stats();
-        Ok(ConstantArray::new(stats.source().scalar_at(0)?, stats.source().len()).into_array())
-    }
-}
-
-/// Constant encoding for integer arrays with a single distinct value.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct IntConstantScheme;
-
-impl Scheme for IntConstantScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.int.constant"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_integer_primitive(canonical)
-    }
-
-    fn detects_constant(&self) -> bool {
-        true
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        if ctx.is_sample() {
-            return Ok(0.0);
-        }
-
-        let stats = data.integer_stats();
-
-        if stats.distinct_count().is_none_or(|count| count > 1) {
-            return Ok(0.0);
-        }
-
-        Ok(stats.value_count() as f64)
-    }
-
-    fn compress(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        _ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let source = data.integer_stats().source().clone();
-        compress_constant_primitive(&source)
-    }
-}
-
-/// Constant encoding for float arrays with a single distinct value.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct FloatConstantScheme;
-
-impl Scheme for FloatConstantScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.float.constant"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_float_primitive(canonical)
-    }
-
-    fn detects_constant(&self) -> bool {
-        true
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        if ctx.is_sample() {
-            return Ok(0.0);
-        }
-
-        let stats = data.float_stats();
-
-        if stats.null_count() as usize == stats.source().len() || stats.value_count() == 0 {
-            return Ok(0.0);
-        }
-
-        if stats.distinct_count().is_some_and(|count| count == 1) {
-            return Ok(stats.value_count() as f64);
-        }
-
-        Ok(0.0)
-    }
-
-    fn compress(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        _ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let source = data.float_stats().source().clone();
-        compress_constant_primitive(&source)
-    }
-}
-
-/// Constant encoding for string arrays with a single distinct value.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct StringConstantScheme;
-
-impl Scheme for StringConstantScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.string.constant"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_utf8_string(canonical)
-    }
-
-    fn detects_constant(&self) -> bool {
-        true
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        if ctx.is_sample() {
-            return Ok(0.0);
-        }
-
-        let stats = data.string_stats();
-
-        if stats.estimated_distinct_count().is_none_or(|c| c > 1)
-            || !is_constant(
-                &stats.source().clone().into_array(),
-                &mut compressor.execution_ctx(),
-            )?
-        {
-            return Ok(0.0);
-        }
-
-        // Force constant in these cases.
-        Ok(f64::MAX)
-    }
-
-    fn compress(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        _ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let stats = data.string_stats();
-
-        let scalar_idx =
-            (0..stats.source().len()).position(|idx| stats.source().is_valid(idx).unwrap_or(false));
-
-        match scalar_idx {
-            Some(idx) => {
-                let scalar = stats.source().scalar_at(idx)?;
-                let const_arr = ConstantArray::new(scalar, stats.source().len()).into_array();
-                if !stats.source().all_valid()? {
-                    Ok(MaskedArray::try_new(const_arr, stats.source().validity())?.into_array())
-                } else {
-                    Ok(const_arr)
-                }
-            }
-            None => Ok(ConstantArray::new(
-                Scalar::null(stats.source().dtype().clone()),
-                stats.source().len(),
-            )
-            .into_array()),
-        }
-    }
-}
-
-/// Shared helper for compressing a constant primitive array (int or float).
-fn compress_constant_primitive(source: &PrimitiveArray) -> VortexResult<ArrayRef> {
-    let scalar_idx = (0..source.len()).position(|idx| source.is_valid(idx).unwrap_or(false));
-
-    match scalar_idx {
-        Some(idx) => {
-            let scalar = source.scalar_at(idx)?;
-            let const_arr = ConstantArray::new(scalar, source.len()).into_array();
-            if !source.all_valid()? {
-                Ok(MaskedArray::try_new(const_arr, source.validity())?.into_array())
-            } else {
-                Ok(const_arr)
-            }
-        }
-        None => {
-            Ok(ConstantArray::new(Scalar::null(source.dtype().clone()), source.len()).into_array())
-        }
-    }
-}
diff --git a/vortex-compressor/src/builtins/constant/bool.rs b/vortex-compressor/src/builtins/constant/bool.rs
new file mode 100644
index 00000000000..62e156379e9
--- /dev/null
+++ b/vortex-compressor/src/builtins/constant/bool.rs
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Constant encoding for bool arrays.
+
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_error::VortexResult;
+
+use crate::CascadingCompressor;
+use crate::builtins::BoolConstantScheme;
+use crate::builtins::constant::compress_constant_array_with_validity;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::Scheme;
+use crate::stats::ArrayAndStats;
+
+impl Scheme for BoolConstantScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.bool.constant"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        matches!(canonical, Canonical::Bool(_))
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // Constant detection on a sample is a false positive, since the sample being constant does
+        // not mean the full array is constant.
+        if ctx.is_sample() {
+            return CompressionEstimate::Skip;
+        }
+
+        let array_len = data.array().len();
+        let stats = data.bool_stats();
+
+        // We want to use `Constant` if there are only nulls in the array.
+        if stats.value_count() == 0 {
+            debug_assert_eq!(stats.null_count() as usize, array_len);
+            return CompressionEstimate::AlwaysUse;
+        }
+
+        if stats.is_constant() {
+            return CompressionEstimate::AlwaysUse;
+        }
+
+        CompressionEstimate::Skip
+    }
+
+    fn compress(
+        &self,
+        _compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        compress_constant_array_with_validity(data.array())
+    }
+}
diff --git a/vortex-compressor/src/builtins/constant/float.rs b/vortex-compressor/src/builtins/constant/float.rs
new file mode 100644
index 00000000000..df8ab7464b6
--- /dev/null
+++ b/vortex-compressor/src/builtins/constant/float.rs
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Constant encoding for float arrays.
+
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_array::aggregate_fn::fns::is_constant::is_constant;
+use vortex_error::VortexResult;
+
+use super::is_float_primitive;
+use crate::CascadingCompressor;
+use crate::builtins::FloatConstantScheme;
+use crate::builtins::constant::compress_constant_array_with_validity;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::Scheme;
+use crate::stats::ArrayAndStats;
+
+impl Scheme for FloatConstantScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.float.constant"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        is_float_primitive(canonical)
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // Constant detection on a sample is a false positive, since the sample being constant does
+        // not mean the full array is constant.
+        if ctx.is_sample() {
+            return CompressionEstimate::Skip;
+        }
+
+        let array_len = data.array().len();
+        let stats = data.float_stats();
+
+        // Note that we only compute distinct counts if other schemes have requested it.
+        if let Some(distinct_count) = stats.distinct_count() {
+            if distinct_count > 1 {
+                return CompressionEstimate::Skip;
+            } else {
+                debug_assert_eq!(distinct_count, 1);
+                return CompressionEstimate::AlwaysUse;
+            }
+        }
+
+        // We want to use `Constant` if there are only nulls in the array.
+        if stats.value_count() == 0 {
+            debug_assert_eq!(stats.null_count() as usize, array_len);
+            return CompressionEstimate::AlwaysUse;
+        }
+
+        // TODO(connor): Can we be smart here with the max and min like with integers?
+
+        // Otherwise our best bet is to actually check if the array is constant.
+        // This is an expensive check, but in practice the distinct count is known because we often
+        // include dictionary encoding in our set of schemes, so we rarely call this.
+        CompressionEstimate::Estimate(Box::new(|compressor, data, _ctx| {
+            if is_constant(data.array(), &mut compressor.execution_ctx())? {
+                Ok(CompressionEstimate::AlwaysUse)
+            } else {
+                Ok(CompressionEstimate::Skip)
+            }
+        }))
+    }
+
+    fn compress(
+        &self,
+        _compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        compress_constant_array_with_validity(data.array())
+    }
+}
diff --git a/vortex-compressor/src/builtins/constant/integer.rs b/vortex-compressor/src/builtins/constant/integer.rs
new file mode 100644
index 00000000000..0264893e5c8
--- /dev/null
+++ b/vortex-compressor/src/builtins/constant/integer.rs
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Constant encoding for integer arrays.
+
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_error::VortexResult;
+
+use super::is_integer_primitive;
+use crate::CascadingCompressor;
+use crate::builtins::IntConstantScheme;
+use crate::builtins::constant::compress_constant_array_with_validity;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::Scheme;
+use crate::stats::ArrayAndStats;
+
+impl Scheme for IntConstantScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.int.constant"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        is_integer_primitive(canonical)
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // Constant detection on a sample is a false positive, since the sample being constant does
+        // not mean the full array is constant.
+        if ctx.is_sample() {
+            return CompressionEstimate::Skip;
+        }
+
+        let array_len = data.array().len();
+        let stats = data.integer_stats();
+
+        // Note that we only compute distinct counts if other schemes have requested it.
+        if let Some(distinct_count) = stats.distinct_count() {
+            if distinct_count > 1 {
+                return CompressionEstimate::Skip;
+            } else {
+                debug_assert_eq!(distinct_count, 1);
+                return CompressionEstimate::AlwaysUse;
+            }
+        }
+
+        // We want to use `Constant` if there are only nulls in the array.
+        if stats.value_count() == 0 {
+            debug_assert_eq!(stats.null_count() as usize, array_len);
+            return CompressionEstimate::AlwaysUse;
+        }
+
+        // Otherwise, use the max and min to determine if there is a single value.
+        match stats.erased().max_minus_min().checked_ilog2() {
+            Some(_) => CompressionEstimate::Skip,
+            // If max-min == 0, then we know that there is only 1 value.
+            None => CompressionEstimate::AlwaysUse,
+        }
+    }
+
+    fn compress(
+        &self,
+        _compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        compress_constant_array_with_validity(data.array())
+    }
+}
diff --git a/vortex-compressor/src/builtins/constant/mod.rs b/vortex-compressor/src/builtins/constant/mod.rs
new file mode 100644
index 00000000000..1b177fc530b
--- /dev/null
+++ b/vortex-compressor/src/builtins/constant/mod.rs
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Constant encoding schemes for bool, float, integer, and string arrays.
+
+use vortex_array::ArrayRef;
+use vortex_array::IntoArray;
+use vortex_array::arrays::ConstantArray;
+use vortex_array::arrays::MaskedArray;
+use vortex_array::scalar::Scalar;
+use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
+
+use super::is_float_primitive;
+use super::is_integer_primitive;
+use super::is_utf8_string;
+
+/// Constant encoding for bool arrays where all valid values are the same.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct BoolConstantScheme;
+
+/// Constant encoding for integer arrays with a single distinct value.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct IntConstantScheme;
+
+/// Constant encoding for float arrays with a single distinct value.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct FloatConstantScheme;
+
+/// Constant encoding for string arrays with a single distinct value.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct StringConstantScheme;
+
+mod bool;
+mod float;
+mod integer;
+mod string;
+
+/// Shared helper for compressing a constant array (bool, int, float, string) into a
+/// [`ConstantArray`].
+///
+/// Assumes that the source array has constant valid scalars.
+///
+/// If the array has any nulls, returns a [`MaskedArray`] with a [`ConstantArray`] child.`
+fn compress_constant_array_with_validity(source: &ArrayRef) -> VortexResult<ArrayRef> {
+    if source.all_invalid()? {
+        return Ok(
+            ConstantArray::new(Scalar::null(source.dtype().clone()), source.len()).into_array(),
+        );
+    }
+
+    let scalar_idx = (0..source.len())
+        .position(|idx| source.is_valid(idx).unwrap_or(false))
+        .vortex_expect("We checked that there exists a scalar that is not invalid");
+
+    let scalar = source.scalar_at(scalar_idx)?;
+    let const_arr = ConstantArray::new(scalar, source.len()).into_array();
+
+    if !source.all_valid()? {
+        Ok(MaskedArray::try_new(const_arr, source.validity()?)?.into_array())
+    } else {
+        Ok(const_arr)
+    }
+}
diff --git a/vortex-compressor/src/builtins/constant/string.rs b/vortex-compressor/src/builtins/constant/string.rs
new file mode 100644
index 00000000000..96e4e7ba674
--- /dev/null
+++ b/vortex-compressor/src/builtins/constant/string.rs
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Constant encoding for string arrays.
+
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_array::aggregate_fn::fns::is_constant::is_constant;
+use vortex_error::VortexResult;
+
+use super::is_utf8_string;
+use crate::CascadingCompressor;
+use crate::builtins::StringConstantScheme;
+use crate::builtins::constant::compress_constant_array_with_validity;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::Scheme;
+use crate::stats::ArrayAndStats;
+
+impl Scheme for StringConstantScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.string.constant"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        is_utf8_string(canonical)
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        // Constant detection on a sample is a false positive, since the sample being constant does
+        // not mean the full array is constant.
+        if ctx.is_sample() {
+            return CompressionEstimate::Skip;
+        }
+
+        let array_len = data.array().len();
+        let stats = data.string_stats();
+
+        // We want to use `Constant` if there are only nulls in the array.
+        if stats.value_count() == 0 {
+            debug_assert_eq!(stats.null_count() as usize, array_len);
+            return CompressionEstimate::AlwaysUse;
+        }
+
+        // Since the estimated distinct count is always going to be less than or equal to the actual
+        // distinct count, if this is not equal to 1 the actual is definitely not equal to 1.
+        if stats.estimated_distinct_count().is_some_and(|c| c > 1) {
+            return CompressionEstimate::Skip;
+        }
+
+        // Otherwise our best bet is to actually check if the array is constant.
+        // This is an expensive check, but the alternative of not compressing a constant array is
+        // far less preferable.
+        CompressionEstimate::Estimate(Box::new(|compressor, data, _ctx| {
+            if is_constant(data.array(), &mut compressor.execution_ctx())? {
+                Ok(CompressionEstimate::AlwaysUse)
+            } else {
+                Ok(CompressionEstimate::Skip)
+            }
+        }))
+    }
+
+    fn compress(
+        &self,
+        _compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        compress_constant_array_with_validity(data.array())
+    }
+}
diff --git a/vortex-compressor/src/builtins/dict/float.rs b/vortex-compressor/src/builtins/dict/float.rs
index e331a851cec..72414a6a4bd 100644
--- a/vortex-compressor/src/builtins/dict/float.rs
+++ b/vortex-compressor/src/builtins/dict/float.rs
@@ -6,65 +6,180 @@
 //! Vortex encoders must always produce unsigned integer codes; signed codes are only accepted for
 //! external compatibility.
 
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
 use vortex_array::IntoArray;
+use vortex_array::ToCanonical;
 use vortex_array::arrays::DictArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::dtype::half::f16;
 use vortex_array::validity::Validity;
 use vortex_buffer::Buffer;
 use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
 
+use crate::CascadingCompressor;
+use crate::builtins::FloatDictScheme;
+use crate::builtins::IntDictScheme;
+use crate::builtins::is_float_primitive;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::ChildSelection;
+use crate::scheme::DescendantExclusion;
+use crate::scheme::Scheme;
+use crate::scheme::SchemeExt;
+use crate::stats::ArrayAndStats;
 use crate::stats::FloatErasedStats;
 use crate::stats::FloatStats;
+use crate::stats::GenerateStatsOptions;
+
+impl Scheme for FloatDictScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.float.dict"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        is_float_primitive(canonical)
+    }
+
+    fn stats_options(&self) -> GenerateStatsOptions {
+        GenerateStatsOptions {
+            count_distinct_values: true,
+        }
+    }
+
+    /// Children: values=0, codes=1.
+    fn num_children(&self) -> usize {
+        2
+    }
+
+    /// Float dict codes (child 1) are compact unsigned integers that should not be
+    /// dict-encoded again. Float dict values (child 0) flow through ALP into integer-land,
+    /// where integer dict encoding is redundant since the values are already deduplicated at
+    /// the float level.
+    ///
+    /// Additional exclusions for codes (IntSequenceScheme, IntRunEndScheme, FoRScheme,
+    /// ZigZagScheme, SparseScheme, RLE) are expressed as pull rules on those schemes in
+    /// vortex-btrblocks.
+    fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
+        vec![
+            DescendantExclusion {
+                excluded: IntDictScheme.id(),
+                children: ChildSelection::One(1),
+            },
+            DescendantExclusion {
+                excluded: IntDictScheme.id(),
+                children: ChildSelection::One(0),
+            },
+        ]
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        let stats = data.float_stats();
+
+        if stats.value_count() == 0 {
+            return CompressionEstimate::Skip;
+        }
+
+        let distinct_values_count = stats.distinct_count().vortex_expect(
+            "this must be present since `DictScheme` declared that we need distinct values",
+        );
+
+        // If > 50% of the values are distinct, skip dictionary scheme.
+        if distinct_values_count > stats.value_count() / 2 {
+            return CompressionEstimate::Skip;
+        }
+
+        // Let sampling determine the expected ratio.
+        CompressionEstimate::Sample
+    }
+
+    fn compress(
+        &self,
+        compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        // TODO(connor): Fight the borrow checker (needs interior mutability)!
+        let stats = data.float_stats().clone();
+        let dict = dictionary_encode(data.array_as_primitive(), &stats);
+
+        let has_all_values_referenced = dict.has_all_values_referenced();
+
+        // Values = child 0.
+        let compressed_values = compressor.compress_child(dict.values(), &ctx, self.id(), 0)?;
+
+        // Codes = child 1.
+        let compressed_codes = compressor.compress_child(
+            &dict.codes().to_primitive().narrow()?.into_array(),
+            &ctx,
+            self.id(),
+            1,
+        )?;
+
+        // SAFETY: compressing codes or values does not alter the invariants.
+        unsafe {
+            Ok(
+                DictArray::new_unchecked(compressed_codes, compressed_values)
+                    .set_all_values_referenced(has_all_values_referenced)
+                    .into_array(),
+            )
+        }
+    }
+}
 
 /// Encodes a typed float array into a [`DictArray`] using the pre-computed distinct values.
 macro_rules! typed_encode {
-    ($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
+    ($source_array:ident, $stats:ident, $typed:ident, $typ:ty) => {{
         let distinct = $typed.distinct().vortex_expect(
             "this must be present since `DictScheme` declared that we need distinct values",
         );
 
+        let values_validity = match $source_array.validity().clone() {
+            Validity::NonNullable => Validity::NonNullable,
+            _ => Validity::AllValid,
+        };
+        let codes_validity = $source_array.validity().clone();
+
         let values: Buffer<$typ> = distinct.distinct_values().iter().map(|x| x.0).collect();
 
         let max_code = values.len();
         let codes = if max_code <= u8::MAX as usize {
             let buf = <DictEncoder as Encode<$typ, u8>>::encode(
                 &values,
-                $stats.source().as_slice::<$typ>(),
+                $source_array.as_slice::<$typ>(),
             );
-            PrimitiveArray::new(buf, $validity.clone()).into_array()
+            PrimitiveArray::new(buf, codes_validity).into_array()
         } else if max_code <= u16::MAX as usize {
             let buf = <DictEncoder as Encode<$typ, u16>>::encode(
                 &values,
-                $stats.source().as_slice::<$typ>(),
+                $source_array.as_slice::<$typ>(),
             );
-            PrimitiveArray::new(buf, $validity.clone()).into_array()
+            PrimitiveArray::new(buf, codes_validity).into_array()
         } else {
             let buf = <DictEncoder as Encode<$typ, u32>>::encode(
                 &values,
-                $stats.source().as_slice::<$typ>(),
+                $source_array.as_slice::<$typ>(),
             );
-            PrimitiveArray::new(buf, $validity.clone()).into_array()
+            PrimitiveArray::new(buf, codes_validity).into_array()
         };
 
-        let values_validity = match $validity {
-            Validity::NonNullable => Validity::NonNullable,
-            _ => Validity::AllValid,
-        };
         let values = PrimitiveArray::new(values, values_validity).into_array();
-
         // SAFETY: enforced by the DictEncoder.
         unsafe { DictArray::new_unchecked(codes, values).set_all_values_referenced(true) }
     }};
 }
 
 /// Compresses a floating-point array into a dictionary array according to attached stats.
-pub fn dictionary_encode(stats: &FloatStats) -> DictArray {
-    let validity = stats.source().validity();
+pub fn dictionary_encode(array: &PrimitiveArray, stats: &FloatStats) -> DictArray {
     match stats.erased() {
-        FloatErasedStats::F16(typed) => typed_encode!(stats, typed, validity, f16),
-        FloatErasedStats::F32(typed) => typed_encode!(stats, typed, validity, f32),
-        FloatErasedStats::F64(typed) => typed_encode!(stats, typed, validity, f64),
+        FloatErasedStats::F16(typed) => typed_encode!(array, stats, typed, f16),
+        FloatErasedStats::F32(typed) => typed_encode!(array, stats, typed, f32),
+        FloatErasedStats::F64(typed) => typed_encode!(array, stats, typed, f64),
     }
 }
 
@@ -137,7 +252,7 @@ mod tests {
                 count_distinct_values: true,
             },
         );
-        let dict_array = dictionary_encode(&stats);
+        let dict_array = dictionary_encode(&array, &stats);
         assert_eq!(dict_array.values().len(), 2);
         assert_eq!(dict_array.codes().len(), 5);
 
diff --git a/vortex-compressor/src/builtins/dict/integer.rs b/vortex-compressor/src/builtins/dict/integer.rs
index dfe7377c504..88c51404ef9 100644
--- a/vortex-compressor/src/builtins/dict/integer.rs
+++ b/vortex-compressor/src/builtins/dict/integer.rs
@@ -1,54 +1,162 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Dictionary compressor that reuses the unique values in the [`IntegerStats`].
+//! Integer-specific dictionary encoding implementation.
 //!
 //! Vortex encoders must always produce unsigned integer codes; signed codes are only accepted
 //! for external compatibility.
 
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
 use vortex_array::IntoArray;
+use vortex_array::ToCanonical;
 use vortex_array::arrays::DictArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::validity::Validity;
 use vortex_buffer::Buffer;
 use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
 
+use crate::CascadingCompressor;
+use crate::builtins::IntDictScheme;
+use crate::builtins::is_integer_primitive;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::Scheme;
+use crate::scheme::SchemeExt;
+use crate::stats::ArrayAndStats;
+use crate::stats::GenerateStatsOptions;
 use crate::stats::IntegerErasedStats;
 use crate::stats::IntegerStats;
 
+impl Scheme for IntDictScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.int.dict"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        is_integer_primitive(canonical)
+    }
+
+    fn stats_options(&self) -> GenerateStatsOptions {
+        GenerateStatsOptions {
+            count_distinct_values: true,
+        }
+    }
+
+    /// Children: values=0, codes=1.
+    fn num_children(&self) -> usize {
+        2
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        let bit_width = data.array_as_primitive().ptype().bit_width();
+        let stats = data.integer_stats();
+
+        if stats.value_count() == 0 {
+            return CompressionEstimate::Skip;
+        }
+
+        let distinct_values_count = stats.distinct_count().vortex_expect(
+            "this must be present since `DictScheme` declared that we need distinct values",
+        );
+
+        // If > 50% of the values are distinct, skip dictionary scheme.
+        if distinct_values_count > stats.value_count() / 2 {
+            return CompressionEstimate::Skip;
+        }
+
+        // Ignore nulls encoding for the estimate. We only focus on values.
+
+        let values_size = bit_width * distinct_values_count as usize;
+
+        // TODO(connor): Should we just hardcode this instead of let the compressor choose?
+        // Assume codes are compressed RLE + BitPacking.
+        let codes_bw = u32::BITS - distinct_values_count.leading_zeros();
+
+        let n_runs = (stats.value_count() / stats.average_run_length()) as usize;
+
+        // Assume that codes will either be BitPack or RLE-BitPack.
+        let codes_size_bp = codes_bw as usize * stats.value_count() as usize;
+        let codes_size_rle_bp = usize::checked_mul(codes_bw as usize + 32, n_runs);
+
+        let codes_size = usize::min(codes_size_bp, codes_size_rle_bp.unwrap_or(usize::MAX));
+
+        let before = stats.value_count() as usize * bit_width;
+
+        CompressionEstimate::Ratio(before as f64 / (values_size + codes_size) as f64)
+    }
+
+    fn compress(
+        &self,
+        compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        // TODO(connor): Fight the borrow checker (needs interior mutability)!
+        let stats = data.integer_stats().clone();
+        let dict = dictionary_encode(data.array_as_primitive(), &stats);
+
+        // Values = child 0.
+        let compressed_values = compressor.compress_child(dict.values(), &ctx, self.id(), 0)?;
+
+        // Codes = child 1.
+        let compressed_codes = compressor.compress_child(
+            &dict.codes().to_primitive().narrow()?.into_array(),
+            &ctx,
+            self.id(),
+            1,
+        )?;
+
+        // SAFETY: compressing codes does not change their values.
+        unsafe {
+            Ok(
+                DictArray::new_unchecked(compressed_codes, compressed_values)
+                    .set_all_values_referenced(dict.has_all_values_referenced())
+                    .into_array(),
+            )
+        }
+    }
+}
+
 /// Encodes a typed integer array into a [`DictArray`] using the pre-computed distinct values.
 macro_rules! typed_encode {
-    ($stats:ident, $typed:ident, $validity:ident, $typ:ty) => {{
+    ($source_array:ident, $stats:ident, $typed:ident, $typ:ty) => {{
         let distinct = $typed.distinct().vortex_expect(
             "this must be present since `DictScheme` declared that we need distinct values",
         );
 
+        let values_validity = match $source_array.validity().clone() {
+            Validity::NonNullable => Validity::NonNullable,
+            _ => Validity::AllValid,
+        };
+        let codes_validity = $source_array.validity().clone();
+
         let values: Buffer<$typ> = distinct.distinct_values().keys().map(|x| x.0).collect();
 
         let max_code = values.len();
         let codes = if max_code <= u8::MAX as usize {
             let buf = <DictEncoder as Encode<$typ, u8>>::encode(
                 &values,
-                $stats.source().as_slice::<$typ>(),
+                $source_array.as_slice::<$typ>(),
             );
-            PrimitiveArray::new(buf, $validity.clone()).into_array()
+            PrimitiveArray::new(buf, codes_validity).into_array()
         } else if max_code <= u16::MAX as usize {
             let buf = <DictEncoder as Encode<$typ, u16>>::encode(
                 &values,
-                $stats.source().as_slice::<$typ>(),
+                $source_array.as_slice::<$typ>(),
             );
-            PrimitiveArray::new(buf, $validity.clone()).into_array()
+            PrimitiveArray::new(buf, codes_validity).into_array()
         } else {
             let buf = <DictEncoder as Encode<$typ, u32>>::encode(
                 &values,
-                $stats.source().as_slice::<$typ>(),
+                $source_array.as_slice::<$typ>(),
             );
-            PrimitiveArray::new(buf, $validity.clone()).into_array()
-        };
-
-        let values_validity = match $validity {
-            Validity::NonNullable => Validity::NonNullable,
-            _ => Validity::AllValid,
+            PrimitiveArray::new(buf, codes_validity).into_array()
         };
 
         let values = PrimitiveArray::new(values, values_validity).into_array();
@@ -62,18 +170,16 @@ macro_rules! typed_encode {
     clippy::cognitive_complexity,
     reason = "complexity from match on all integer types"
 )]
-pub fn dictionary_encode(stats: &IntegerStats) -> DictArray {
-    let src_validity = stats.source().validity();
-
+pub fn dictionary_encode(array: &PrimitiveArray, stats: &IntegerStats) -> DictArray {
     match stats.erased() {
-        IntegerErasedStats::U8(typed) => typed_encode!(stats, typed, src_validity, u8),
-        IntegerErasedStats::U16(typed) => typed_encode!(stats, typed, src_validity, u16),
-        IntegerErasedStats::U32(typed) => typed_encode!(stats, typed, src_validity, u32),
-        IntegerErasedStats::U64(typed) => typed_encode!(stats, typed, src_validity, u64),
-        IntegerErasedStats::I8(typed) => typed_encode!(stats, typed, src_validity, i8),
-        IntegerErasedStats::I16(typed) => typed_encode!(stats, typed, src_validity, i16),
-        IntegerErasedStats::I32(typed) => typed_encode!(stats, typed, src_validity, i32),
-        IntegerErasedStats::I64(typed) => typed_encode!(stats, typed, src_validity, i64),
+        IntegerErasedStats::U8(typed) => typed_encode!(array, stats, typed, u8),
+        IntegerErasedStats::U16(typed) => typed_encode!(array, stats, typed, u16),
+        IntegerErasedStats::U32(typed) => typed_encode!(array, stats, typed, u32),
+        IntegerErasedStats::U64(typed) => typed_encode!(array, stats, typed, u64),
+        IntegerErasedStats::I8(typed) => typed_encode!(array, stats, typed, i8),
+        IntegerErasedStats::I16(typed) => typed_encode!(array, stats, typed, i16),
+        IntegerErasedStats::I32(typed) => typed_encode!(array, stats, typed, i32),
+        IntegerErasedStats::I64(typed) => typed_encode!(array, stats, typed, i64),
     }
 }
 
@@ -151,7 +257,7 @@ mod tests {
                 count_distinct_values: true,
             },
         );
-        let dict_array = dictionary_encode(&stats);
+        let dict_array = dictionary_encode(&array, &stats);
         assert_eq!(dict_array.values().len(), 2);
         assert_eq!(dict_array.codes().len(), 5);
 
diff --git a/vortex-compressor/src/builtins/dict/mod.rs b/vortex-compressor/src/builtins/dict/mod.rs
index b7ff63d6b38..c8e573b4fbc 100644
--- a/vortex-compressor/src/builtins/dict/mod.rs
+++ b/vortex-compressor/src/builtins/dict/mod.rs
@@ -3,313 +3,21 @@
 
 //! Dictionary encoding schemes for integer, float, and string arrays.
 
-pub mod float;
-pub mod integer;
-
-use vortex_array::ArrayRef;
-use vortex_array::Canonical;
-use vortex_array::IntoArray;
-use vortex_array::ToCanonical;
-use vortex_array::arrays::DictArray;
-use vortex_array::builders::dict::dict_encode;
-use vortex_error::VortexExpect;
-use vortex_error::VortexResult;
-
-use super::is_float_primitive;
-use super::is_integer_primitive;
-use super::is_utf8_string;
-use crate::CascadingCompressor;
-use crate::ctx::CompressorContext;
-use crate::scheme::ChildSelection;
-use crate::scheme::DescendantExclusion;
-use crate::scheme::Scheme;
-use crate::scheme::SchemeExt;
-use crate::scheme::estimate_compression_ratio_with_sampling;
-use crate::stats::ArrayAndStats;
-use crate::stats::GenerateStatsOptions;
-
-/// Dictionary encoding for low-cardinality integer values.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct IntDictScheme;
-
-impl Scheme for IntDictScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.int.dict"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_integer_primitive(canonical)
-    }
-
-    fn stats_options(&self) -> GenerateStatsOptions {
-        GenerateStatsOptions {
-            count_distinct_values: true,
-        }
-    }
-
-    /// Children: values=0, codes=1.
-    fn num_children(&self) -> usize {
-        2
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        _compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        _ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        let stats = data.integer_stats();
-
-        if stats.value_count() == 0 {
-            return Ok(0.0);
-        }
-
-        let distinct_values_count = stats.distinct_count().vortex_expect(
-            "this must be present since `DictScheme` declared that we need distinct values",
-        );
-
-        // If > 50% of the values are distinct, skip dict.
-        if distinct_values_count > stats.value_count() / 2 {
-            return Ok(0.0);
-        }
-
-        // Ignore nulls encoding for the estimate. We only focus on values.
-        let values_size = stats.source().ptype().bit_width() * distinct_values_count as usize;
-
-        // Assume codes are compressed RLE + BitPacking.
-        let codes_bw = usize::BITS - distinct_values_count.leading_zeros();
-
-        let n_runs = (stats.value_count() / stats.average_run_length()) as usize;
-
-        // Assume that codes will either be BitPack or RLE-BitPack.
-        let codes_size_bp = (codes_bw * stats.value_count()) as usize;
-        let codes_size_rle_bp = usize::checked_mul((codes_bw + 32) as usize, n_runs);
-
-        let codes_size = usize::min(codes_size_bp, codes_size_rle_bp.unwrap_or(usize::MAX));
-
-        let before = stats.value_count() as usize * stats.source().ptype().bit_width();
-
-        Ok(before as f64 / (values_size + codes_size) as f64)
-    }
-
-    fn compress(
-        &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let stats = data.integer_stats();
-
-        let dict = integer::dictionary_encode(stats);
-
-        // Values = child 0.
-        let compressed_values = compressor.compress_child(dict.values(), &ctx, self.id(), 0)?;
-
-        // Codes = child 1.
-        let compressed_codes = compressor.compress_child(
-            &dict.codes().to_primitive().narrow()?.into_array(),
-            &ctx,
-            self.id(),
-            1,
-        )?;
-
-        // SAFETY: compressing codes does not change their values.
-        unsafe {
-            Ok(
-                DictArray::new_unchecked(compressed_codes, compressed_values)
-                    .set_all_values_referenced(dict.has_all_values_referenced())
-                    .into_array(),
-            )
-        }
-    }
-}
-
 /// Dictionary encoding for low-cardinality float values.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub struct FloatDictScheme;
 
-impl Scheme for FloatDictScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.float.dict"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_float_primitive(canonical)
-    }
-
-    fn stats_options(&self) -> GenerateStatsOptions {
-        GenerateStatsOptions {
-            count_distinct_values: true,
-        }
-    }
-
-    /// Children: values=0, codes=1.
-    fn num_children(&self) -> usize {
-        2
-    }
-
-    /// Float dict codes (child 1) are compact unsigned integers that should not be
-    /// dict-encoded again. Float dict values (child 0) flow through ALP into integer-land,
-    /// where integer dict encoding is redundant since the values are already deduplicated at
-    /// the float level.
-    ///
-    /// Additional exclusions for codes (IntSequenceScheme, IntRunEndScheme, FoRScheme,
-    /// ZigZagScheme, SparseScheme, RLE) are expressed as pull rules on those schemes in
-    /// vortex-btrblocks.
-    fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
-        vec![
-            DescendantExclusion {
-                excluded: IntDictScheme.id(),
-                children: ChildSelection::One(1),
-            },
-            DescendantExclusion {
-                excluded: IntDictScheme.id(),
-                children: ChildSelection::One(0),
-            },
-        ]
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        let stats = data.float_stats();
-
-        if stats.value_count() == 0 {
-            return Ok(0.0);
-        }
-
-        if stats
-            .distinct_count()
-            .is_some_and(|count| count <= stats.value_count() / 2)
-        {
-            return estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx);
-        }
-
-        Ok(0.0)
-    }
-
-    fn compress(
-        &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let stats = data.float_stats();
-
-        let dict = float::dictionary_encode(stats);
-        let has_all_values_referenced = dict.has_all_values_referenced();
-
-        // Values = child 0.
-        let compressed_values = compressor.compress_child(dict.values(), &ctx, self.id(), 0)?;
-
-        // Codes = child 1.
-        let compressed_codes = compressor.compress_child(
-            &dict.codes().to_primitive().narrow()?.into_array(),
-            &ctx,
-            self.id(),
-            1,
-        )?;
-
-        // SAFETY: compressing codes or values does not alter the invariants.
-        unsafe {
-            Ok(
-                DictArray::new_unchecked(compressed_codes, compressed_values)
-                    .set_all_values_referenced(has_all_values_referenced)
-                    .into_array(),
-            )
-        }
-    }
-}
+/// Dictionary encoding for low-cardinality integer values.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct IntDictScheme;
 
 /// Dictionary encoding for low-cardinality string values.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub struct StringDictScheme;
 
-impl Scheme for StringDictScheme {
-    fn scheme_name(&self) -> &'static str {
-        "vortex.string.dict"
-    }
-
-    fn matches(&self, canonical: &Canonical) -> bool {
-        is_utf8_string(canonical)
-    }
-
-    fn stats_options(&self) -> GenerateStatsOptions {
-        GenerateStatsOptions {
-            count_distinct_values: true,
-        }
-    }
-
-    /// Children: values=0, codes=1.
-    fn num_children(&self) -> usize {
-        2
-    }
-
-    /// String dict codes (child 1) are compact unsigned integers that should not be dict-encoded
-    /// again.
-    ///
-    /// Additional exclusions for codes (IntSequenceScheme, FoRScheme, ZigZagScheme, SparseScheme,
-    /// RunEndScheme, RLE, etc.) are expressed as pull rules on those schemes in `vortex-btrblocks`.
-    fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
-        vec![DescendantExclusion {
-            excluded: IntDictScheme.id(),
-            children: ChildSelection::One(1),
-        }]
-    }
-
-    fn expected_compression_ratio(
-        &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        let stats = data.string_stats();
-
-        if stats
-            .estimated_distinct_count()
-            .is_none_or(|c| c > stats.value_count() / 2)
-        {
-            return Ok(0.0);
-        }
-
-        if stats.value_count() == 0 {
-            return Ok(0.0);
-        }
-
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
-    }
-
-    fn compress(
-        &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<ArrayRef> {
-        let stats = data.string_stats();
-
-        let dict = dict_encode(&stats.source().clone().into_array())?;
-
-        // Values = child 0.
-        let compressed_values = compressor.compress_child(dict.values(), &ctx, self.id(), 0)?;
-
-        // Codes = child 1.
-        let compressed_codes = compressor.compress_child(
-            &dict.codes().to_primitive().narrow()?.into_array(),
-            &ctx,
-            self.id(),
-            1,
-        )?;
+mod float;
+mod integer;
+mod string;
 
-        // SAFETY: compressing codes or values does not alter the invariants.
-        unsafe {
-            Ok(
-                DictArray::new_unchecked(compressed_codes, compressed_values)
-                    .set_all_values_referenced(dict.has_all_values_referenced())
-                    .into_array(),
-            )
-        }
-    }
-}
+pub use float::dictionary_encode as float_dictionary_encode;
+pub use integer::dictionary_encode as integer_dictionary_encode;
diff --git a/vortex-compressor/src/builtins/dict/string.rs b/vortex-compressor/src/builtins/dict/string.rs
new file mode 100644
index 00000000000..8f896131a94
--- /dev/null
+++ b/vortex-compressor/src/builtins/dict/string.rs
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! UTF8-specific dictionary encoding implementation.
+//!
+//! Vortex encoders must always produce unsigned integer codes; signed codes are only accepted
+//! for external compatibility.
+
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_array::IntoArray;
+use vortex_array::ToCanonical;
+use vortex_array::arrays::DictArray;
+use vortex_array::builders::dict::dict_encode;
+use vortex_error::VortexExpect;
+use vortex_error::VortexResult;
+
+use crate::CascadingCompressor;
+use crate::builtins::IntDictScheme;
+use crate::builtins::StringDictScheme;
+use crate::builtins::is_utf8_string;
+use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::scheme::ChildSelection;
+use crate::scheme::DescendantExclusion;
+use crate::scheme::Scheme;
+use crate::scheme::SchemeExt;
+use crate::stats::ArrayAndStats;
+use crate::stats::GenerateStatsOptions;
+
+impl Scheme for StringDictScheme {
+    fn scheme_name(&self) -> &'static str {
+        "vortex.string.dict"
+    }
+
+    fn matches(&self, canonical: &Canonical) -> bool {
+        is_utf8_string(canonical)
+    }
+
+    fn stats_options(&self) -> GenerateStatsOptions {
+        GenerateStatsOptions {
+            count_distinct_values: true,
+        }
+    }
+
+    /// Children: values=0, codes=1.
+    fn num_children(&self) -> usize {
+        2
+    }
+
+    /// String dict codes (child 1) are compact unsigned integers that should not be dict-encoded
+    /// again.
+    ///
+    /// Additional exclusions for codes (IntSequenceScheme, FoRScheme, ZigZagScheme, SparseScheme,
+    /// RunEndScheme, RLE, etc.) are expressed as pull rules on those schemes in `vortex-btrblocks`.
+    fn descendant_exclusions(&self) -> Vec<DescendantExclusion> {
+        vec![DescendantExclusion {
+            excluded: IntDictScheme.id(),
+            children: ChildSelection::One(1),
+        }]
+    }
+
+    fn expected_compression_ratio(
+        &self,
+        data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate {
+        let stats = data.string_stats();
+
+        if stats.value_count() == 0 {
+            return CompressionEstimate::Skip;
+        }
+
+        let estimated_distinct_values_count = stats.estimated_distinct_count().vortex_expect(
+            "this must be present since `DictScheme` declared that we need distinct values",
+        );
+
+        // If > 50% of the values are distinct, skip dictionary scheme.
+        if estimated_distinct_values_count > stats.value_count() / 2 {
+            return CompressionEstimate::Skip;
+        }
+
+        // Let sampling determine the expected ratio.
+        CompressionEstimate::Sample
+    }
+
+    fn compress(
+        &self,
+        compressor: &CascadingCompressor,
+        data: &mut ArrayAndStats,
+        ctx: CompressorContext,
+    ) -> VortexResult<ArrayRef> {
+        let dict = dict_encode(data.array())?;
+
+        // Values = child 0.
+        let compressed_values = compressor.compress_child(dict.values(), &ctx, self.id(), 0)?;
+
+        // Codes = child 1.
+        let compressed_codes = compressor.compress_child(
+            &dict.codes().to_primitive().narrow()?.into_array(),
+            &ctx,
+            self.id(),
+            1,
+        )?;
+
+        // SAFETY: compressing codes or values does not alter the invariants.
+        unsafe {
+            Ok(
+                DictArray::new_unchecked(compressed_codes, compressed_values)
+                    .set_all_values_referenced(dict.has_all_values_referenced())
+                    .into_array(),
+            )
+        }
+    }
+}
diff --git a/vortex-compressor/src/builtins/mod.rs b/vortex-compressor/src/builtins/mod.rs
index 59609a6afa3..c5bd9f343f5 100644
--- a/vortex-compressor/src/builtins/mod.rs
+++ b/vortex-compressor/src/builtins/mod.rs
@@ -10,28 +10,10 @@
 //! [`DictArray`]: vortex_array::arrays::DictArray
 //! [`MaskedArray`]: vortex_array::arrays::MaskedArray
 
-pub use constant::BoolConstantScheme;
-pub use constant::FloatConstantScheme;
-pub use constant::IntConstantScheme;
-pub use constant::StringConstantScheme;
-pub use dict::FloatDictScheme;
-pub use dict::IntDictScheme;
-pub use dict::StringDictScheme;
-pub use dict::float::dictionary_encode as float_dictionary_encode;
-pub use dict::integer::dictionary_encode as integer_dictionary_encode;
-
-mod constant;
-mod dict;
-
 use vortex_array::Canonical;
 use vortex_array::dtype::DType;
 use vortex_array::dtype::Nullability;
 
-/// Returns `true` if the canonical array is a bool type.
-pub fn is_bool(canonical: &Canonical) -> bool {
-    matches!(canonical, Canonical::Bool(_))
-}
-
 /// Returns `true` if the canonical array is a primitive with an integer ptype.
 pub fn is_integer_primitive(canonical: &Canonical) -> bool {
     matches!(canonical, Canonical::Primitive(p) if p.ptype().is_int())
@@ -49,3 +31,18 @@ pub fn is_utf8_string(canonical: &Canonical) -> bool {
             v.dtype().eq_ignore_nullability(&DType::Utf8(Nullability::NonNullable))
     )
 }
+
+mod dict;
+
+pub use dict::FloatDictScheme;
+pub use dict::IntDictScheme;
+pub use dict::StringDictScheme;
+pub use dict::float_dictionary_encode;
+pub use dict::integer_dictionary_encode;
+
+mod constant;
+
+pub use constant::BoolConstantScheme;
+pub use constant::FloatConstantScheme;
+pub use constant::IntConstantScheme;
+pub use constant::StringConstantScheme;
diff --git a/vortex-compressor/src/compressor.rs b/vortex-compressor/src/compressor.rs
index 5aff682fbad..6d8916b2098 100644
--- a/vortex-compressor/src/compressor.rs
+++ b/vortex-compressor/src/compressor.rs
@@ -29,9 +29,13 @@ use vortex_array::scalar::Scalar;
 use vortex_array::vtable::ValidityHelper;
 use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
+use vortex_error::vortex_panic;
 
 use crate::builtins::IntDictScheme;
 use crate::ctx::CompressorContext;
+use crate::estimate::CompressionEstimate;
+use crate::estimate::estimate_compression_ratio_with_sampling;
+use crate::estimate::is_better_ratio;
 use crate::scheme::ChildSelection;
 use crate::scheme::DescendantExclusion;
 use crate::scheme::Scheme;
@@ -109,6 +113,25 @@ impl CascadingCompressor {
         self.ctx.lock()
     }
 
+    /// Compresses an array using cascading adaptive compression.
+    ///
+    /// First canonicalizes and compacts the array, then applies optimal compression schemes.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if canonicalization or compression fails.
+    pub fn compress(&self, array: &ArrayRef) -> VortexResult<ArrayRef> {
+        let canonical = array
+            .clone()
+            .execute::<CanonicalValidity>(&mut self.execution_ctx())?
+            .0;
+
+        // Compact it, removing any wasted space before we attempt to compress it.
+        let compact = canonical.compact()?;
+
+        self.compress_canonical(compact, CompressorContext::new())
+    }
+
     /// Compresses a child array produced by a cascading scheme.
     ///
     /// If the cascade budget is exhausted, the canonical array is returned as-is. Otherwise,
@@ -141,25 +164,6 @@ impl CascadingCompressor {
         self.compress_canonical(compact, child_ctx)
     }
 
-    /// Compresses an array using cascading adaptive compression.
-    ///
-    /// First canonicalizes and compacts the array, then applies optimal compression schemes.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if canonicalization or compression fails.
-    pub fn compress(&self, array: &ArrayRef) -> VortexResult<ArrayRef> {
-        let canonical = array
-            .clone()
-            .execute::<CanonicalValidity>(&mut self.execution_ctx())?
-            .0;
-
-        // Compact it, removing any wasted space before we attempt to compress it.
-        let compact = canonical.compact()?;
-
-        self.compress_canonical(compact, CompressorContext::new())
-    }
-
     /// Compresses a canonical array by dispatching to type-specific logic.
     ///
     /// # Errors
@@ -286,7 +290,6 @@ impl CascadingCompressor {
         if array.is_empty() {
             return Ok(array);
         }
-
         if array.all_invalid()? {
             return Ok(
                 ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
@@ -294,19 +297,26 @@ impl CascadingCompressor {
         }
 
         let before_nbytes = array.nbytes();
+
         let merged_opts = eligible_schemes
             .iter()
             .fold(GenerateStatsOptions::default(), |acc, s| {
                 acc.merge(s.stats_options())
             });
-
-        let ctx = ctx.with_stats_options(merged_opts);
+        let ctx = ctx.with_merged_stats_options(merged_opts);
 
         let mut data = ArrayAndStats::new(array, merged_opts);
 
-        if let Some(winner) = self.choose_scheme(&eligible_schemes, &mut data, ctx.clone())? {
+        if let Some(winner) = self.choose_best_scheme(&eligible_schemes, &mut data, ctx.clone())? {
+            // TODO(connor): Add a tracing warning here if compression with the chosen scheme
+            // failed, since there was likely more we could have done while choosing schemes.
+
+            // Sampling and estimation chose a scheme, so let's compress the whole array with it.
             let compressed = winner.compress(self, &mut data, ctx)?;
+
+            // Only choose the compressed array if it is smaller than the canonical one.
             if compressed.nbytes() < before_nbytes {
+                // TODO(connor): Add a tracing warning here too.
                 return Ok(compressed);
             }
         }
@@ -320,7 +330,7 @@ impl CascadingCompressor {
     /// (earlier in the list wins).
     ///
     /// [`expected_compression_ratio`]: Scheme::expected_compression_ratio
-    fn choose_scheme(
+    fn choose_best_scheme(
         &self,
         schemes: &[&'static dyn Scheme],
         data: &mut ArrayAndStats,
@@ -328,25 +338,53 @@ impl CascadingCompressor {
     ) -> VortexResult<Option<&'static dyn Scheme>> {
         let mut best: Option<(&'static dyn Scheme, f64)> = None;
 
+        // TODO(connor): Might want to use an `im` data structure inside of `ctx` if the clones here
+        // are expensive.
         for &scheme in schemes {
-            // Constant detection on a sample is a false positive: the sample being constant
-            // does not mean the full array is constant.
-            if ctx.is_sample() && scheme.detects_constant() {
-                continue;
-            }
-
-            let ratio = scheme.expected_compression_ratio(self, data, ctx.clone())?;
-
-            tracing::debug!(scheme = %scheme.id(), ratio, "evaluated compression ratio");
-
-            if is_better_ratio(ratio, &best) {
-                best = Some((scheme, ratio));
-
-                // Schemes that return f64::MAX (like Constant) cannot be beat, so stop early.
-                if ratio == f64::MAX {
-                    break;
+            let estimate = scheme.expected_compression_ratio(data, ctx.clone());
+
+            match estimate {
+                CompressionEstimate::Skip => {}
+                CompressionEstimate::AlwaysUse => return Ok(Some(scheme)),
+                CompressionEstimate::Ratio(ratio) => {
+                    if is_better_ratio(ratio, &best) {
+                        best = Some((scheme, ratio));
+                    }
+                }
+                CompressionEstimate::Sample => {
+                    let sample_ratio = estimate_compression_ratio_with_sampling(
+                        scheme,
+                        self,
+                        data.array(),
+                        ctx.clone(),
+                    )?;
+
+                    if is_better_ratio(sample_ratio, &best) {
+                        best = Some((scheme, sample_ratio));
+                    }
+                }
+                // TODO(connor): Is there a way to deduplicate some of this code?
+                CompressionEstimate::Estimate(estimate_callback) => {
+                    let estimate = estimate_callback(self, data, ctx.clone())?;
+
+                    match estimate {
+                        CompressionEstimate::Skip => {}
+                        CompressionEstimate::AlwaysUse => return Ok(Some(scheme)),
+                        CompressionEstimate::Ratio(ratio) => {
+                            if is_better_ratio(ratio, &best) {
+                                best = Some((scheme, ratio));
+                            }
+                        }
+                        e @ (CompressionEstimate::Sample | CompressionEstimate::Estimate(_)) => {
+                            vortex_panic!(
+                                "an estimation function returned an invalid variant {e:?}"
+                            )
+                        }
+                    }
                 }
             }
+
+            // tracing::debug!(scheme = %scheme.id(), estimate, "evaluated compression ratio");
         }
 
         Ok(best.map(|(s, _)| s))
@@ -458,14 +496,14 @@ impl CascadingCompressor {
     }
 }
 
-/// Returns `true` if `ratio` is a valid compression ratio (> 1.0, finite, not subnormal) that
-/// beats the current best.
-fn is_better_ratio(ratio: f64, best: &Option<(&'static dyn Scheme, f64)>) -> bool {
-    ratio.is_finite() && !ratio.is_subnormal() && ratio > 1.0 && best.is_none_or(|(_, r)| ratio > r)
-}
-
 #[cfg(test)]
 mod tests {
+    use vortex_array::arrays::BoolArray;
+    use vortex_array::arrays::Constant;
+    use vortex_array::arrays::PrimitiveArray;
+    use vortex_array::validity::Validity;
+    use vortex_buffer::buffer;
+
     use super::*;
     use crate::builtins::FloatDictScheme;
     use crate::builtins::IntDictScheme;
@@ -525,4 +563,50 @@ mod tests {
         // No history means no exclusions.
         assert!(!c.is_excluded(&IntDictScheme, &ctx));
     }
+
+    #[test]
+    fn all_null_array_compresses_to_constant() -> VortexResult<()> {
+        let array = PrimitiveArray::new(
+            buffer![0i32, 0, 0, 0, 0],
+            Validity::Array(BoolArray::from_iter([false, false, false, false, false]).into_array()),
+        )
+        .into_array();
+
+        // The compressor should produce a `ConstantArray` for an all-null array regardless of
+        // which schemes are registered.
+        let compressor = CascadingCompressor::new(vec![&IntDictScheme]);
+        let compressed = compressor.compress(&array)?;
+        assert!(compressed.is::<Constant>());
+        Ok(())
+    }
+
+    /// Regression test for <https://github.com/vortex-data/vortex/issues/7227>.
+    ///
+    /// `estimate_compression_ratio_with_sampling` must use the *scheme's* stats options
+    /// (which request distinct-value counting) rather than the context's stats options
+    /// (which may not). With the old code this panicked inside `dictionary_encode` because
+    /// distinct values were never computed for the sample.
+    #[test]
+    fn sampling_uses_scheme_stats_options() -> VortexResult<()> {
+        // Low-cardinality float array so FloatDictScheme considers it compressible.
+        let array = PrimitiveArray::new(
+            buffer![1.0f32, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
+            Validity::NonNullable,
+        )
+        .into_array();
+
+        let compressor = CascadingCompressor::new(vec![&FloatDictScheme]);
+
+        // A context with default stats_options (count_distinct_values = false) and
+        // marked as a sample so the function skips the sampling step and compresses
+        // the array directly.
+        let ctx = CompressorContext::new().with_sampling();
+
+        // Before the fix this panicked with:
+        //   "this must be present since `DictScheme` declared that we need distinct values"
+        let ratio =
+            estimate_compression_ratio_with_sampling(&FloatDictScheme, &compressor, &array, ctx)?;
+        assert!(ratio.is_finite());
+        Ok(())
+    }
 }
diff --git a/vortex-compressor/src/ctx.rs b/vortex-compressor/src/ctx.rs
index 465a7398350..a488bef17bf 100644
--- a/vortex-compressor/src/ctx.rs
+++ b/vortex-compressor/src/ctx.rs
@@ -20,10 +20,13 @@ pub const MAX_CASCADE: usize = 3;
 pub struct CompressorContext {
     /// Whether we're compressing a sample (for ratio estimation).
     is_sample: bool,
+
     /// Remaining cascade depth allowed.
     allowed_cascading: usize,
+
     /// Merged stats options from all eligible schemes at this compression site.
-    stats_options: GenerateStatsOptions,
+    merged_stats_options: GenerateStatsOptions,
+
     /// The cascade chain: `(scheme_id, child_index)` pairs from root to current depth.
     /// Used for self-exclusion, push rules ([`descendant_exclusions`]), and pull rules
     /// ([`ancestor_exclusions`]).
@@ -41,7 +44,7 @@ impl CompressorContext {
         Self {
             is_sample: false,
             allowed_cascading: MAX_CASCADE,
-            stats_options: GenerateStatsOptions::default(),
+            merged_stats_options: GenerateStatsOptions::default(),
             cascade_history: Vec::new(),
         }
     }
@@ -60,40 +63,48 @@ impl CompressorContext {
         self.is_sample
     }
 
+    /// Returns the merged stats generation options for this compression site.
+    pub fn merged_stats_options(&self) -> GenerateStatsOptions {
+        self.merged_stats_options
+    }
+
+    /// Returns the cascade chain of `(scheme_id, child_index)` pairs.
+    pub fn cascade_history(&self) -> &[(SchemeId, usize)] {
+        &self.cascade_history
+    }
+
     /// Whether cascading is exhausted (no further cascade levels allowed).
+    ///
+    /// This should only be used in the implementation of a [`Scheme`](crate::scheme::Scheme) if the
+    /// scheme knows that it's child _must_ be compressed for it to make any sense being chosen.
     pub fn finished_cascading(&self) -> bool {
         self.allowed_cascading == 0
     }
 
-    /// Returns the merged stats generation options for this compression site.
-    pub fn stats_options(&self) -> GenerateStatsOptions {
-        self.stats_options
+    /// Returns a context that disallows further cascading.
+    pub fn as_leaf(mut self) -> Self {
+        self.allowed_cascading = 0;
+        self
     }
 
     /// Returns a context with the given stats options.
-    pub fn with_stats_options(mut self, opts: GenerateStatsOptions) -> Self {
-        self.stats_options = opts;
+    pub(super) fn with_merged_stats_options(mut self, opts: GenerateStatsOptions) -> Self {
+        self.merged_stats_options = opts;
         self
     }
 
     /// Returns a context marked as sample compression.
-    pub fn as_sample(mut self) -> Self {
+    pub(super) fn with_sampling(mut self) -> Self {
         self.is_sample = true;
         self
     }
 
-    /// Returns a context that disallows further cascading.
-    pub fn as_leaf(mut self) -> Self {
-        self.allowed_cascading = 0;
-        self
-    }
-
     /// Descends one level in the cascade, recording the current scheme and which child is
     /// being compressed.
     ///
     /// The `child_index` identifies which child of the scheme is being compressed (e.g. for
     /// Dict: values=0, codes=1).
-    pub(crate) fn descend_with_scheme(mut self, id: SchemeId, child_index: usize) -> Self {
+    pub(super) fn descend_with_scheme(mut self, id: SchemeId, child_index: usize) -> Self {
         self.allowed_cascading = self
             .allowed_cascading
             .checked_sub(1)
@@ -101,9 +112,4 @@ impl CompressorContext {
         self.cascade_history.push((id, child_index));
         self
     }
-
-    /// Returns the cascade chain of `(scheme_id, child_index)` pairs.
-    pub fn cascade_history(&self) -> &[(SchemeId, usize)] {
-        &self.cascade_history
-    }
 }
diff --git a/vortex-compressor/src/estimate.rs b/vortex-compressor/src/estimate.rs
new file mode 100644
index 00000000000..9d3993494ed
--- /dev/null
+++ b/vortex-compressor/src/estimate.rs
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+//! Compression ratio estimation types and sampling-based estimation.
+
+use std::fmt;
+
+use vortex_array::ArrayRef;
+use vortex_array::Canonical;
+use vortex_array::IntoArray;
+use vortex_error::VortexResult;
+
+use crate::CascadingCompressor;
+use crate::ctx::CompressorContext;
+use crate::sample::SAMPLE_SIZE;
+use crate::sample::sample;
+use crate::sample::sample_count_approx_one_percent;
+use crate::scheme::Scheme;
+use crate::scheme::SchemeExt;
+use crate::stats::ArrayAndStats;
+
+/// Closure type for [`CompressionEstimate::Estimate`]. The compressor calls this with the same
+/// arguments it would pass to sampling.
+#[rustfmt::skip]
+pub type EstimateFn = dyn FnOnce(
+        &CascadingCompressor,
+        &mut ArrayAndStats,
+        CompressorContext,
+    ) -> VortexResult<CompressionEstimate>
+    + Send
+    + Sync;
+
+// TODO(connor): We should make use of the fact that some checks are cheap and some checks are
+// expensive (sample or estimate variants).
+/// The result of a [`Scheme`]'s compression ratio estimation.
+///
+/// This type is returned by [`Scheme::expected_compression_ratio`] to tell the compressor how
+/// promising this scheme is for a given array without performing any expensive work.
+///
+/// All expensive or fallible operations (sampling, trial encoding) are deferred to the compressor
+/// via the [`Sample`](CompressionEstimate::Sample) and [`Estimate`](CompressionEstimate::Estimate)
+/// variants.
+///
+/// [`Sample`]: CompressionEstimate::Sample
+/// [`Estimate`]: CompressionEstimate::Estimate
+pub enum CompressionEstimate {
+    /// Do not use this scheme for this array.
+    Skip,
+
+    /// Always use this scheme, as we know it is definitively the best choice.
+    ///
+    /// Some examples include constant detection, decimal byte parts, and temporal decomposition.
+    ///
+    /// The compressor will select this scheme immediately without evaluating further candidates.
+    /// Schemes that return `AlwaysUse` must be mutually exclusive per canonical type (enforced by
+    /// [`Scheme::matches`]), otherwise the winner depends silently on registration order.
+    ///
+    /// [`Scheme::matches`]: crate::scheme::Scheme::matches
+    AlwaysUse,
+
+    /// The estimated compression ratio. This must be greater than `1.0` to be considered by the
+    /// compressor, otherwise it is worse than the canonical encoding.
+    Ratio(f64),
+
+    /// The scheme cannot cheaply estimate its ratio, so the compressor should compress a small
+    /// sample to determine effectiveness.
+    Sample,
+
+    /// A fallible estimation requiring a custom expensive computation. The compressor will call the
+    /// closure and handle the result.
+    ///
+    /// Use this only when the scheme needs to perform trial encoding or other costly checks to
+    /// determine its compression ratio.
+    ///
+    /// The estimation function must **not** return a [`Sample`](CompressionEstimate::Sample) or
+    /// [`Estimate`](CompressionEstimate::Estimate) variant to ensure the estimation process is
+    /// bounded.
+    Estimate(Box<EstimateFn>),
+}
+
+/// Returns `true` if `ratio` is a valid compression ratio (> 1.0, finite, not subnormal) that
+/// beats the current best.
+pub(super) fn is_better_ratio(ratio: f64, best: &Option<(&'static dyn Scheme, f64)>) -> bool {
+    ratio.is_finite() && !ratio.is_subnormal() && ratio > 1.0 && best.is_none_or(|(_, r)| ratio > r)
+}
+
+/// Estimates compression ratio by compressing a ~1% sample of the data.
+///
+/// Creates a new [`ArrayAndStats`] for the sample so that stats are generated from the sample, not
+/// the full array.
+///
+/// # Errors
+///
+/// Returns an error if sample compression fails.
+pub(super) fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
+    scheme: &S,
+    compressor: &CascadingCompressor,
+    array: &ArrayRef,
+    ctx: CompressorContext,
+) -> VortexResult<f64> {
+    let sample_array = if ctx.is_sample() {
+        array.clone()
+    } else {
+        let source_len = array.len();
+        let sample_count = sample_count_approx_one_percent(source_len);
+
+        tracing::trace!(
+            "Sampling {} values out of {}",
+            SAMPLE_SIZE as u64 * sample_count as u64,
+            source_len
+        );
+
+        // `ArrayAndStats` expects a canonical array (so that it can easily compute lazy stats).
+        let canonical: Canonical =
+            sample(array, SAMPLE_SIZE, sample_count).execute(&mut compressor.execution_ctx())?;
+        canonical.into_array()
+    };
+
+    let mut sample_data = ArrayAndStats::new(sample_array, scheme.stats_options());
+    let sample_ctx = ctx.with_sampling();
+
+    let after = scheme
+        .compress(compressor, &mut sample_data, sample_ctx)?
+        .nbytes();
+    let before = sample_data.array().nbytes();
+
+    if after == 0 {
+        tracing::warn!(
+            scheme = %scheme.id(),
+            "sample compressed to 0 bytes, which should only happen for constant arrays",
+        );
+    }
+
+    let ratio = before as f64 / after as f64;
+
+    tracing::debug!("estimate_compression_ratio_with_sampling(compressor={scheme:#?}) = {ratio}",);
+
+    Ok(ratio)
+}
+
+impl fmt::Debug for CompressionEstimate {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            CompressionEstimate::Skip => write!(f, "Skip"),
+            CompressionEstimate::AlwaysUse => write!(f, "AlwaysUse"),
+            CompressionEstimate::Ratio(r) => f.debug_tuple("Ratio").field(r).finish(),
+            CompressionEstimate::Sample => write!(f, "Sample"),
+            CompressionEstimate::Estimate(_) => write!(f, "Estimate(..)"),
+        }
+    }
+}
diff --git a/vortex-compressor/src/lib.rs b/vortex-compressor/src/lib.rs
index 683bea4f8aa..65fd3f09c56 100644
--- a/vortex-compressor/src/lib.rs
+++ b/vortex-compressor/src/lib.rs
@@ -18,6 +18,7 @@
 
 pub mod builtins;
 pub mod ctx;
+pub mod estimate;
 pub mod scheme;
 pub mod stats;
 
diff --git a/vortex-compressor/src/scheme.rs b/vortex-compressor/src/scheme.rs
index aae8e4606db..66b35051e95 100644
--- a/vortex-compressor/src/scheme.rs
+++ b/vortex-compressor/src/scheme.rs
@@ -14,9 +14,7 @@ use vortex_error::VortexResult;
 
 use crate::CascadingCompressor;
 use crate::ctx::CompressorContext;
-use crate::sample::SAMPLE_SIZE;
-use crate::sample::sample;
-use crate::sample::sample_count_approx_one_percent;
+use crate::estimate::CompressionEstimate;
 use crate::stats::ArrayAndStats;
 use crate::stats::GenerateStatsOptions;
 
@@ -88,32 +86,33 @@ pub struct AncestorExclusion {
     pub children: ChildSelection,
 }
 
+// TODO(connor): Remove all default implemented methods.
 /// A single compression encoding that the [`CascadingCompressor`] can select from.
 ///
-/// The compressor evaluates every registered scheme whose [`matches`] returns `true` for a
-/// given array, picks the one with the highest [`expected_compression_ratio`], and calls
-/// [`compress`] on the winner.
+/// The compressor evaluates every registered scheme whose [`matches`] returns `true` for a given
+/// array, picks the one with the highest [`expected_compression_ratio`], and calls [`compress`] on
+/// the winner.
 ///
-/// One of the key features of this compressor is that schemes may "cascade": a scheme's
-/// [`compress`] can call back into the compressor via [`CascadingCompressor::compress_child`] to
-/// compress child or transformed arrays, building up multiple encoding layers (e.g.
-/// frame-of-reference and then bit-packing).
+/// One of the key features of the compressor in this crate is that schemes may "cascade". A
+/// scheme's [`compress`] can call back into the compressor via
+/// [`CascadingCompressor::compress_child`] to compress child or transformed arrays, building up
+/// multiple encoding layers (e.g. frame-of-reference and then bit-packing).
 ///
-/// # Identity
+/// # Scheme IDs
 ///
 /// Every scheme has a globally unique name returned by [`scheme_name`]. The [`SchemeExt::id`]
 /// method (auto-implemented, cannot be overridden) wraps that name in an opaque [`SchemeId`] used
-/// for equality, hashing, and exclusion rules.
+/// for equality, hashing, and exclusion rules (see below).
 ///
 /// # Cascading and children
 ///
-/// Schemes that produce child arrays for further compression declare [`num_children`] > 0. Each
-/// child is identified by index. Cascading schemes should use
+/// Schemes that produce child arrays for further compression must declare [`num_children`] > 0.
+/// Each child should be identified by a stable index. Cascading schemes should use
 /// [`CascadingCompressor::compress_child`] to compress each child array, which handles cascade
 /// level / budget tracking and context management automatically.
 ///
-/// No scheme may appear twice in a cascade chain (enforced by the compressor). This keeps the
-/// search space a tree.
+/// No scheme may appear twice in a cascade (descendant) chain (enforced by the compressor). This
+/// keeps the search space a tree.
 ///
 /// # Exclusion rules
 ///
@@ -125,13 +124,15 @@ pub struct AncestorExclusion {
 /// - [`ancestor_exclusions`] (pull): "exclude me if ancestor X's child Y is above me." Used when
 ///   the declaring scheme knows about the ancestor.
 ///
-/// # Implementing a scheme
+/// We do this because different schemes will live in different crates, and we cannot know the
+/// dependency direction ahead of time.
 ///
-/// At a minimum, implementors must provide [`scheme_name`], [`matches`], and [`compress`].
+/// # Implementing a scheme
 ///
-/// The default [`expected_compression_ratio`] estimates the ratio by compressing a small sample.
-/// Implementors should only override this method when a cheaper heuristic is available (e.g.
-/// returning `f64::MAX` for constant detection or `0.0` for early rejection based on stats).
+/// [`expected_compression_ratio`] should return [`CompressionEstimate::Sample`] when a cheap
+/// heuristic is not available, asking the compressor to estimate via sampling. Implementors should
+/// return a more specific variant when possible (e.g. [`CompressionEstimate::AlwaysUse`] for
+/// constant detection or [`CompressionEstimate::Skip`] for early rejection based on stats).
 ///
 /// Schemes that need statistics that may be expensive to compute should override [`stats_options`]
 /// to declare what they require. The compressor merges all eligible schemes' options before
@@ -152,11 +153,6 @@ pub trait Scheme: Debug + Send + Sync {
     /// Whether this scheme can compress the given canonical array.
     fn matches(&self, canonical: &Canonical) -> bool;
 
-    /// True if this scheme detects constant arrays.
-    fn detects_constant(&self) -> bool {
-        false
-    }
-
     /// Returns the stats generation options this scheme requires. The compressor merges all
     /// eligible schemes' options before generating stats so that a single stats pass satisfies
     /// every scheme.
@@ -186,21 +182,30 @@ pub trait Scheme: Debug + Send + Sync {
         Vec::new()
     }
 
-    // TODO(connor): It would be nice if we returned a more useful type that said "choose me no
-    // matter what" instead of `f64::MAX`.
-    /// Estimate the compression ratio for this scheme on the given array.
+    /// Cheaply estimate the compression ratio for this scheme on the given array.
     ///
-    /// # Errors
+    /// This method should be fast and infallible. Any expensive or fallible work should be deferred
+    /// to the compressor by returning [`CompressionEstimate::Sample`] or
+    /// [`CompressionEstimate::Estimate`].
+    ///
+    /// The compressor will ask all schemes what their expected compression ratio is given the array
+    /// and statistics. The scheme with the highest estimated ratio will then be applied to the
+    /// entire array.
     ///
-    /// Returns an error if compression of the sample fails.
+    /// Note that the compressor will also use this method when compressing samples, so some
+    /// statistics that might hold for the samples may not hold for the entire array (e.g.,
+    /// `Constant`). Implementations should check `ctx.is_sample` to make sure that they are
+    /// returning the correct information.
+    ///
+    /// The compressor guarantees that empty and all-null arrays are handled before this method is
+    /// called. Implementations may assume the array has at least one valid element. However, a
+    /// constant scheme should still be registered with the compressor to detect single-value arrays
+    /// that are not all-null.
     fn expected_compression_ratio(
         &self,
-        compressor: &CascadingCompressor,
-        data: &mut ArrayAndStats,
-        ctx: CompressorContext,
-    ) -> VortexResult<f64> {
-        estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
-    }
+        _data: &mut ArrayAndStats,
+        _ctx: CompressorContext,
+    ) -> CompressionEstimate;
 
     /// Compress the array using this scheme.
     ///
@@ -243,90 +248,3 @@ pub trait SchemeExt: Scheme {
 }
 
 impl<T: Scheme + ?Sized> SchemeExt for T {}
-
-/// Estimates compression ratio by compressing a ~1% sample of the data.
-///
-/// Creates a new [`ArrayAndStats`] for the sample so that stats are generated from the sample, not
-/// the full array.
-///
-/// # Errors
-///
-/// Returns an error if sample compression fails.
-pub fn estimate_compression_ratio_with_sampling<S: Scheme + ?Sized>(
-    scheme: &S,
-    compressor: &CascadingCompressor,
-    array: &ArrayRef,
-    ctx: CompressorContext,
-) -> VortexResult<f64> {
-    let sample_array = if ctx.is_sample() {
-        array.clone()
-    } else {
-        let source_len = array.len();
-        let sample_count = sample_count_approx_one_percent(source_len);
-
-        tracing::trace!(
-            "Sampling {} values out of {}",
-            SAMPLE_SIZE as u64 * sample_count as u64,
-            source_len
-        );
-
-        sample(array, SAMPLE_SIZE, sample_count)
-    };
-
-    let mut sample_data = ArrayAndStats::new(sample_array, scheme.stats_options());
-    let sample_ctx = ctx.as_sample();
-
-    let after = scheme
-        .compress(compressor, &mut sample_data, sample_ctx)?
-        .nbytes();
-    let before = sample_data.array().nbytes();
-    let ratio = before as f64 / after as f64;
-
-    tracing::debug!("estimate_compression_ratio_with_sampling(compressor={scheme:#?}) = {ratio}",);
-
-    Ok(ratio)
-}
-
-#[cfg(test)]
-mod tests {
-    use vortex_array::IntoArray;
-    use vortex_array::arrays::PrimitiveArray;
-    use vortex_array::validity::Validity;
-    use vortex_buffer::buffer;
-    use vortex_error::VortexResult;
-
-    use super::estimate_compression_ratio_with_sampling;
-    use crate::CascadingCompressor;
-    use crate::builtins::FloatDictScheme;
-    use crate::ctx::CompressorContext;
-
-    /// Regression test for <https://github.com/spiraldb/vortex/issues/7227>.
-    ///
-    /// `estimate_compression_ratio_with_sampling` must use the *scheme's* stats options
-    /// (which request distinct-value counting) rather than the context's stats options
-    /// (which may not). With the old code this panicked inside `dictionary_encode` because
-    /// distinct values were never computed for the sample.
-    #[test]
-    fn sampling_uses_scheme_stats_options() -> VortexResult<()> {
-        // Low-cardinality float array so FloatDictScheme considers it compressible.
-        let array = PrimitiveArray::new(
-            buffer![1.0f32, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
-            Validity::NonNullable,
-        )
-        .into_array();
-
-        let compressor = CascadingCompressor::new(vec![&FloatDictScheme]);
-
-        // A context with default stats_options (count_distinct_values = false) and
-        // marked as a sample so the function skips the sampling step and compresses
-        // the array directly.
-        let ctx = CompressorContext::default().as_sample();
-
-        // Before the fix this panicked with:
-        //   "this must be present since `DictScheme` declared that we need distinct values"
-        let ratio =
-            estimate_compression_ratio_with_sampling(&FloatDictScheme, &compressor, &array, ctx)?;
-        assert!(ratio.is_finite());
-        Ok(())
-    }
-}
diff --git a/vortex-compressor/src/stats/bool.rs b/vortex-compressor/src/stats/bool.rs
index 0f85d8f52b2..3df1cea9f98 100644
--- a/vortex-compressor/src/stats/bool.rs
+++ b/vortex-compressor/src/stats/bool.rs
@@ -10,14 +10,12 @@ use vortex_mask::AllOr;
 /// Array of booleans and relevant stats for compression.
 #[derive(Clone, Debug)]
 pub struct BoolStats {
-    /// The underlying source array.
-    src: BoolArray,
     /// Number of null values.
     null_count: u32,
-    /// Number of `true` values among valid (non-null) elements.
-    true_count: u32,
     /// Number of non-null values.
     value_count: u32,
+    /// Number of `true` values among valid (non-null) elements.
+    true_count: u32,
 }
 
 impl BoolStats {
@@ -29,7 +27,6 @@ impl BoolStats {
     pub fn generate(input: &BoolArray) -> VortexResult<Self> {
         if input.is_empty() {
             return Ok(Self {
-                src: input.clone(),
                 null_count: 0,
                 value_count: 0,
                 true_count: 0,
@@ -38,7 +35,6 @@ impl BoolStats {
 
         if input.all_invalid()? {
             return Ok(Self {
-                src: input.clone(),
                 null_count: u32::try_from(input.len())?,
                 value_count: 0,
                 true_count: 0,
@@ -62,18 +58,12 @@ impl BoolStats {
         };
 
         Ok(Self {
-            src: input.clone(),
             null_count: u32::try_from(null_count)?,
             value_count: u32::try_from(value_count)?,
             true_count: u32::try_from(true_count)?,
         })
     }
 
-    /// Returns the underlying source array.
-    pub fn source(&self) -> &BoolArray {
-        &self.src
-    }
-
     /// Returns the number of null values.
     pub fn null_count(&self) -> u32 {
         self.null_count
diff --git a/vortex-compressor/src/stats/cache.rs b/vortex-compressor/src/stats/cache.rs
index c83bf044b03..3be9cc2bb78 100644
--- a/vortex-compressor/src/stats/cache.rs
+++ b/vortex-compressor/src/stats/cache.rs
@@ -8,6 +8,10 @@ use std::any::TypeId;
 
 use vortex_array::ArrayRef;
 use vortex_array::ToCanonical;
+use vortex_array::arrays::Primitive;
+use vortex_array::arrays::PrimitiveArray;
+use vortex_array::arrays::VarBinView;
+use vortex_array::arrays::VarBinViewArray;
 use vortex_error::VortexExpect;
 
 use super::BoolStats;
@@ -67,7 +71,7 @@ impl StatsCache {
 ///
 /// Extension schemes can use `get_or_insert_with` for custom stats types.
 pub struct ArrayAndStats {
-    /// The array.
+    /// The array. This is always in canonical form.
     array: ArrayRef,
     /// The stats cache.
     cache: StatsCache,
@@ -79,7 +83,16 @@ impl ArrayAndStats {
     /// Creates a new bundle with the given stats generation options.
     ///
     /// Stats are generated lazily on first access via the typed accessor methods.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the array is not canonical.
     pub fn new(array: ArrayRef, opts: GenerateStatsOptions) -> Self {
+        assert!(
+            array.is_canonical(),
+            "ArrayAndStats should only be created with canonical arrays"
+        );
+
         Self {
             array,
             cache: StatsCache::new(),
@@ -92,11 +105,38 @@ impl ArrayAndStats {
         &self.array
     }
 
+    /// Returns the array as a [`PrimitiveArray`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if the array is not a primitive array.
+    pub fn array_as_primitive(&self) -> &PrimitiveArray {
+        self.array
+            .as_opt::<Primitive>()
+            .vortex_expect("the array is guaranteed to already be canonical by construction")
+    }
+
+    /// Returns the array as a [`VarBinViewArray`].
+    ///
+    /// # Panics
+    ///
+    /// Panics if the array is not a UTF-8 string array.
+    pub fn array_as_utf8(&self) -> &VarBinViewArray {
+        self.array
+            .as_opt::<VarBinView>()
+            .vortex_expect("the array is guaranteed to already be canonical by construction")
+    }
+
     /// Consumes the bundle and returns the array.
     pub fn into_array(self) -> ArrayRef {
         self.array
     }
 
+    /// Returns the length of the array.
+    pub fn array_len(&self) -> usize {
+        self.array.len()
+    }
+
     /// Returns bool stats, generating them lazily on first access.
     pub fn bool_stats(&mut self) -> &BoolStats {
         let array = self.array.clone();
@@ -106,6 +146,8 @@ impl ArrayAndStats {
         })
     }
 
+    // TODO(connor): These should all have interior mutability instead!!!
+
     /// Returns integer stats, generating them lazily on first access.
     pub fn integer_stats(&mut self) -> &IntegerStats {
         let array = self.array.clone();
diff --git a/vortex-compressor/src/stats/float.rs b/vortex-compressor/src/stats/float.rs
index 67877d7796c..c89de9c9893 100644
--- a/vortex-compressor/src/stats/float.rs
+++ b/vortex-compressor/src/stats/float.rs
@@ -27,7 +27,7 @@ use super::GenerateStatsOptions;
 pub struct DistinctInfo<T> {
     /// The set of distinct float values.
     distinct_values: HashSet<NativeValue<T>, FxBuildHasher>,
-    /// The count of unique values.
+    /// The count of unique values. This _must_ be non-zero.
     distinct_count: u32,
 }
 
@@ -92,8 +92,6 @@ impl_from_typed!(f64, ErasedStats::F64);
 /// Array of floating-point numbers and relevant stats for compression.
 #[derive(Debug, Clone)]
 pub struct FloatStats {
-    /// The underlying source array.
-    src: PrimitiveArray,
     /// Cache for `validity.false_count()`.
     null_count: u32,
     /// Cache for `validity.true_count()`.
@@ -136,11 +134,6 @@ impl FloatStats {
             .vortex_expect("FloatStats::generate_opts should not fail")
     }
 
-    /// Returns the underlying source array.
-    pub fn source(&self) -> &PrimitiveArray {
-        &self.src
-    }
-
     /// Returns the number of null values.
     pub fn null_count(&self) -> u32 {
         self.null_count
@@ -174,15 +167,15 @@ where
     // Special case: empty array.
     if array.is_empty() {
         return Ok(FloatStats {
-            src: array.clone(),
             null_count: 0,
             value_count: 0,
             average_run_length: 0,
             erased: TypedStats { distinct: None }.into(),
         });
-    } else if array.all_invalid()? {
+    }
+
+    if array.all_invalid()? {
         return Ok(FloatStats {
-            src: array.clone(),
             null_count: u32::try_from(array.len())?,
             value_count: 0,
             average_run_length: 0,
@@ -259,7 +252,6 @@ where
     Ok(FloatStats {
         null_count,
         value_count,
-        src: array.clone(),
         average_run_length: value_count / runs,
         erased: TypedStats { distinct }.into(),
     })
diff --git a/vortex-compressor/src/stats/integer.rs b/vortex-compressor/src/stats/integer.rs
index 1f13118584b..f800085da18 100644
--- a/vortex-compressor/src/stats/integer.rs
+++ b/vortex-compressor/src/stats/integer.rs
@@ -28,7 +28,7 @@ use super::GenerateStatsOptions;
 pub struct DistinctInfo<T> {
     /// The unique values and their occurrences.
     distinct_values: HashMap<NativeValue<T>, u32, FxBuildHasher>,
-    /// The count of unique values.
+    /// The count of unique values. This _must_ be non-zero.
     distinct_count: u32,
     /// The most frequent value.
     most_frequent_value: T,
@@ -240,8 +240,6 @@ impl_from_typed!(i64, ErasedStats::I64);
 /// Array of integers and relevant stats for compression.
 #[derive(Clone, Debug)]
 pub struct IntegerStats {
-    /// The underlying source array.
-    src: PrimitiveArray,
     /// Cache for `validity.false_count()`.
     null_count: u32,
     /// Cache for `validity.true_count()`.
@@ -286,11 +284,6 @@ impl IntegerStats {
             .vortex_expect("IntegerStats::generate_opts should not fail")
     }
 
-    /// Returns the underlying source array.
-    pub fn source(&self) -> &PrimitiveArray {
-        &self.src
-    }
-
     /// Returns the number of null values.
     pub fn null_count(&self) -> u32 {
         self.null_count
@@ -325,7 +318,6 @@ where
     // Special case: empty array.
     if array.is_empty() {
         return Ok(IntegerStats {
-            src: array.clone(),
             null_count: 0,
             value_count: 0,
             average_run_length: 0,
@@ -336,9 +328,10 @@ where
             }
             .into(),
         });
-    } else if array.all_invalid()? {
+    }
+
+    if array.all_invalid()? {
         return Ok(IntegerStats {
-            src: array.clone(),
             null_count: u32::try_from(array.len())?,
             value_count: 0,
             average_run_length: 0,
@@ -462,7 +455,6 @@ where
     let value_count = u32::try_from(value_count)?;
 
     Ok(IntegerStats {
-        src: array.clone(),
         null_count,
         value_count,
         average_run_length: value_count / runs,
diff --git a/vortex-compressor/src/stats/string.rs b/vortex-compressor/src/stats/string.rs
index f8db9d0c4f2..d35d8381611 100644
--- a/vortex-compressor/src/stats/string.rs
+++ b/vortex-compressor/src/stats/string.rs
@@ -14,9 +14,8 @@ use super::GenerateStatsOptions;
 /// Array of variable-length byte arrays, and relevant stats for compression.
 #[derive(Clone, Debug)]
 pub struct StringStats {
-    /// The underlying source array.
-    src: VarBinViewArray,
     /// The estimated number of distinct strings, or `None` if not computed.
+    /// This _must_ be non-zero.
     estimated_distinct_count: Option<u32>,
     /// The number of non-null values.
     value_count: u32,
@@ -60,7 +59,6 @@ impl StringStats {
             .transpose()?;
 
         Ok(Self {
-            src: input.clone(),
             value_count: u32::try_from(value_count)?,
             null_count: u32::try_from(null_count)?,
             estimated_distinct_count,
@@ -80,12 +78,9 @@ impl StringStats {
             .vortex_expect("StringStats::generate_opts should not fail")
     }
 
-    /// Returns the underlying source array.
-    pub fn source(&self) -> &VarBinViewArray {
-        &self.src
-    }
-
     /// Returns the estimated number of distinct strings, or `None` if not computed.
+    ///
+    /// This estimation is always going to be less than or equal to the actual distinct count.
     pub fn estimated_distinct_count(&self) -> Option<u32> {
         self.estimated_distinct_count
     }