Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions vortex-btrblocks/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ pub mod vortex_btrblocks

pub use vortex_btrblocks::ArrayAndStats

pub use vortex_btrblocks::BoolStats

pub use vortex_btrblocks::CascadingCompressor

pub use vortex_btrblocks::CompressorContext
Expand All @@ -28,6 +30,12 @@ pub use vortex_btrblocks::integer_dictionary_encode

pub mod vortex_btrblocks::schemes

pub mod vortex_btrblocks::schemes::bool

pub use vortex_btrblocks::schemes::bool::BoolConstantScheme

pub use vortex_btrblocks::schemes::bool::BoolStats

pub mod vortex_btrblocks::schemes::decimal

pub struct vortex_btrblocks::schemes::decimal::DecimalScheme
Expand Down
5 changes: 5 additions & 0 deletions vortex-btrblocks/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::CascadingCompressor;
use crate::Scheme;
use crate::SchemeExt;
use crate::SchemeId;
use crate::schemes::bool;
use crate::schemes::decimal;
use crate::schemes::float;
use crate::schemes::integer;
Expand All @@ -22,6 +23,10 @@ use crate::schemes::temporal;
/// This list is order-sensitive: the builder preserves this order when constructing
/// the final scheme list, so that tie-breaking is deterministic.
pub const ALL_SCHEMES: &[&dyn Scheme] = &[
////////////////////////////////////////////////////////////////////////////////////////////////
// Bool schemes.
////////////////////////////////////////////////////////////////////////////////////////////////
&bool::BoolConstantScheme,
////////////////////////////////////////////////////////////////////////////////////////////////
// Integer schemes.
////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
60 changes: 60 additions & 0 deletions vortex-btrblocks/src/canonical_compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,14 @@ mod tests {
use rstest::rstest;
use vortex_array::DynArray;
use vortex_array::IntoArray;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::Constant;
use vortex_array::arrays::List;
use vortex_array::arrays::ListView;
use vortex_array::arrays::ListViewArray;
use vortex_array::assert_arrays_eq;
use vortex_array::validity::Validity;
use vortex_buffer::BitBuffer;
use vortex_buffer::buffer;
use vortex_error::VortexResult;

Expand Down Expand Up @@ -107,4 +110,61 @@ mod tests {
assert_arrays_eq!(result, input);
Ok(())
}

#[test]
fn test_constant_all_true() -> VortexResult<()> {
let array = BoolArray::new(BitBuffer::from(vec![true; 100]), Validity::NonNullable);
let btr = BtrBlocksCompressor::default();
let compressed = btr.compress(&array.clone().into_array())?;
assert!(compressed.is::<Constant>());
assert_arrays_eq!(compressed, array);
Ok(())
}

#[test]
fn test_constant_all_false() -> VortexResult<()> {
let array = BoolArray::new(BitBuffer::from(vec![false; 100]), Validity::NonNullable);
let btr = BtrBlocksCompressor::default();
let compressed = btr.compress(&array.clone().into_array())?;
assert!(compressed.is::<Constant>());
assert_arrays_eq!(compressed, array);
Ok(())
}

#[test]
fn test_nullable_all_valid_compressed() -> VortexResult<()> {
let array = BoolArray::new(
BitBuffer::from(vec![true; 100]),
Validity::from(BitBuffer::from(vec![true; 100])),
);
let btr = BtrBlocksCompressor::default();
let compressed = btr.compress(&array.clone().into_array())?;
assert!(compressed.is::<Constant>());
assert_arrays_eq!(compressed, array);
Ok(())
}

#[test]
fn test_nullable_with_nulls_not_compressed() -> VortexResult<()> {
let validity = Validity::from(BitBuffer::from_iter((0..100).map(|i| i % 3 != 0)));
let array = BoolArray::new(BitBuffer::from(vec![true; 100]), validity);
let btr = BtrBlocksCompressor::default();
let compressed = btr.compress(&array.clone().into_array())?;
assert!(!compressed.is::<Constant>());
assert_arrays_eq!(compressed, array);
Ok(())
}

#[test]
fn test_mixed_not_constant() -> VortexResult<()> {
let array = BoolArray::new(
BitBuffer::from(vec![true, false, true, false, true]),
Validity::NonNullable,
);
let btr = BtrBlocksCompressor::default();
let compressed = btr.compress(&array.clone().into_array())?;
assert!(!compressed.is::<Constant>());
assert_arrays_eq!(compressed, array);
Ok(())
}
}
1 change: 1 addition & 0 deletions vortex-btrblocks/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pub use vortex_compressor::scheme::SchemeExt;
pub use vortex_compressor::scheme::SchemeId;
pub use vortex_compressor::scheme::estimate_compression_ratio_with_sampling;
pub use vortex_compressor::stats::ArrayAndStats;
pub use vortex_compressor::stats::BoolStats;
pub use vortex_compressor::stats::FloatStats;
pub use vortex_compressor::stats::GenerateStatsOptions;
pub use vortex_compressor::stats::IntegerStats;
Expand Down
7 changes: 7 additions & 0 deletions vortex-btrblocks/src/schemes/bool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Bool compression schemes.

pub use vortex_compressor::builtins::BoolConstantScheme;
pub use vortex_compressor::stats::BoolStats;
1 change: 1 addition & 0 deletions vortex-btrblocks/src/schemes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

//! Compression scheme implementations.

pub mod bool;
pub mod float;
pub mod integer;
pub mod string;
Expand Down
88 changes: 88 additions & 0 deletions vortex-compressor/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,46 @@ pub mod vortex_compressor

pub mod vortex_compressor::builtins

pub struct vortex_compressor::builtins::BoolConstantScheme

impl core::clone::Clone for vortex_compressor::builtins::BoolConstantScheme

pub fn vortex_compressor::builtins::BoolConstantScheme::clone(&self) -> vortex_compressor::builtins::BoolConstantScheme

impl core::cmp::Eq for vortex_compressor::builtins::BoolConstantScheme

impl core::cmp::PartialEq for vortex_compressor::builtins::BoolConstantScheme

pub fn vortex_compressor::builtins::BoolConstantScheme::eq(&self, other: &vortex_compressor::builtins::BoolConstantScheme) -> bool

impl core::fmt::Debug for vortex_compressor::builtins::BoolConstantScheme

pub fn vortex_compressor::builtins::BoolConstantScheme::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

impl core::marker::Copy for vortex_compressor::builtins::BoolConstantScheme

impl core::marker::StructuralPartialEq for vortex_compressor::builtins::BoolConstantScheme

impl vortex_compressor::scheme::Scheme for vortex_compressor::builtins::BoolConstantScheme

pub fn vortex_compressor::builtins::BoolConstantScheme::ancestor_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::AncestorExclusion>

pub fn vortex_compressor::builtins::BoolConstantScheme::compress(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>

pub fn vortex_compressor::builtins::BoolConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>

pub fn vortex_compressor::builtins::BoolConstantScheme::detects_constant(&self) -> bool

pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>

pub fn vortex_compressor::builtins::BoolConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool

pub fn vortex_compressor::builtins::BoolConstantScheme::num_children(&self) -> usize

pub fn vortex_compressor::builtins::BoolConstantScheme::scheme_name(&self) -> &'static str

pub fn vortex_compressor::builtins::BoolConstantScheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions

pub struct vortex_compressor::builtins::FloatConstantScheme

impl core::clone::Clone for vortex_compressor::builtins::FloatConstantScheme
Expand Down Expand Up @@ -246,6 +286,8 @@ pub fn vortex_compressor::builtins::float_dictionary_encode(stats: &vortex_compr

pub fn vortex_compressor::builtins::integer_dictionary_encode(stats: &vortex_compressor::stats::IntegerStats) -> vortex_array::arrays::dict::array::DictArray

pub fn vortex_compressor::builtins::is_bool(canonical: &vortex_array::canonical::Canonical) -> bool

pub fn vortex_compressor::builtins::is_float_primitive(canonical: &vortex_array::canonical::Canonical) -> bool

pub fn vortex_compressor::builtins::is_integer_primitive(canonical: &vortex_array::canonical::Canonical) -> bool
Expand Down Expand Up @@ -386,6 +428,26 @@ pub fn vortex_compressor::scheme::Scheme::scheme_name(&self) -> &'static str

pub fn vortex_compressor::scheme::Scheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions

impl vortex_compressor::scheme::Scheme for vortex_compressor::builtins::BoolConstantScheme

pub fn vortex_compressor::builtins::BoolConstantScheme::ancestor_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::AncestorExclusion>

pub fn vortex_compressor::builtins::BoolConstantScheme::compress(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>

pub fn vortex_compressor::builtins::BoolConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>

pub fn vortex_compressor::builtins::BoolConstantScheme::detects_constant(&self) -> bool

pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>

pub fn vortex_compressor::builtins::BoolConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool

pub fn vortex_compressor::builtins::BoolConstantScheme::num_children(&self) -> usize

pub fn vortex_compressor::builtins::BoolConstantScheme::scheme_name(&self) -> &'static str

pub fn vortex_compressor::builtins::BoolConstantScheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions

impl vortex_compressor::scheme::Scheme for vortex_compressor::builtins::FloatConstantScheme

pub fn vortex_compressor::builtins::FloatConstantScheme::ancestor_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::AncestorExclusion>
Expand Down Expand Up @@ -624,6 +686,8 @@ impl vortex_compressor::stats::ArrayAndStats

pub fn vortex_compressor::stats::ArrayAndStats::array(&self) -> &vortex_array::array::ArrayRef

pub fn vortex_compressor::stats::ArrayAndStats::bool_stats(&mut self) -> &vortex_compressor::stats::BoolStats

pub fn vortex_compressor::stats::ArrayAndStats::float_stats(&mut self) -> &vortex_compressor::stats::FloatStats

pub fn vortex_compressor::stats::ArrayAndStats::get_or_insert_with<T: 'static>(&mut self, f: impl core::ops::function::FnOnce() -> T) -> &T
Expand All @@ -636,6 +700,30 @@ pub fn vortex_compressor::stats::ArrayAndStats::new(array: vortex_array::array::

pub fn vortex_compressor::stats::ArrayAndStats::string_stats(&mut self) -> &vortex_compressor::stats::StringStats

pub struct vortex_compressor::stats::BoolStats

impl vortex_compressor::stats::BoolStats

pub fn vortex_compressor::stats::BoolStats::generate(input: &vortex_array::arrays::bool::array::BoolArray) -> vortex_error::VortexResult<Self>

pub fn vortex_compressor::stats::BoolStats::is_constant(&self) -> bool

pub fn vortex_compressor::stats::BoolStats::null_count(&self) -> u32

pub fn vortex_compressor::stats::BoolStats::source(&self) -> &vortex_array::arrays::bool::array::BoolArray

pub fn vortex_compressor::stats::BoolStats::true_count(&self) -> u32

pub fn vortex_compressor::stats::BoolStats::value_count(&self) -> u32

impl core::clone::Clone for vortex_compressor::stats::BoolStats

pub fn vortex_compressor::stats::BoolStats::clone(&self) -> vortex_compressor::stats::BoolStats

impl core::fmt::Debug for vortex_compressor::stats::BoolStats

pub fn vortex_compressor::stats::BoolStats::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result

pub struct vortex_compressor::stats::FloatDistinctInfo<T>

impl<T> vortex_compressor::stats::FloatDistinctInfo<T>
Expand Down
53 changes: 53 additions & 0 deletions vortex-compressor/src/builtins/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use vortex_array::scalar::Scalar;
use vortex_array::vtable::ValidityHelper;
use vortex_error::VortexResult;

use super::is_bool;
use super::is_float_primitive;
use super::is_integer_primitive;
use super::is_utf8_string;
Expand All @@ -22,6 +23,58 @@ use crate::ctx::CompressorContext;
use crate::scheme::Scheme;
use crate::stats::ArrayAndStats;

/// Constant encoding for bool arrays where all valid values are the same.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct BoolConstantScheme;

impl Scheme for BoolConstantScheme {
fn scheme_name(&self) -> &'static str {
"vortex.bool.constant"
}

fn matches(&self, canonical: &Canonical) -> bool {
is_bool(canonical)
}

fn detects_constant(&self) -> bool {
true
}

fn expected_compression_ratio(
&self,
_compressor: &CascadingCompressor,
data: &mut ArrayAndStats,
ctx: CompressorContext,
) -> VortexResult<f64> {
if ctx.is_sample() {
return Ok(0.0);
}

let stats = data.bool_stats();

// Only compress non-nullable or all-valid nullable arrays.
if stats.source().dtype().is_nullable() && stats.null_count() > 0 {
return Ok(0.0);
}

if !stats.is_constant() {
return Ok(0.0);
}

Ok(stats.value_count() as f64)
}

fn compress(
&self,
_compressor: &CascadingCompressor,
data: &mut ArrayAndStats,
_ctx: CompressorContext,
) -> VortexResult<ArrayRef> {
let stats = data.bool_stats();
Ok(ConstantArray::new(stats.source().scalar_at(0)?, stats.source().len()).into_array())
}
}

/// Constant encoding for integer arrays with a single distinct value.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct IntConstantScheme;
Expand Down
6 changes: 6 additions & 0 deletions vortex-compressor/src/builtins/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//! [`DictArray`]: vortex_array::arrays::DictArray
//! [`MaskedArray`]: vortex_array::arrays::MaskedArray

pub use constant::BoolConstantScheme;
pub use constant::FloatConstantScheme;
pub use constant::IntConstantScheme;
pub use constant::StringConstantScheme;
Expand All @@ -26,6 +27,11 @@ use vortex_array::Canonical;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;

/// Returns `true` if the canonical array is a bool type.
pub fn is_bool(canonical: &Canonical) -> bool {
matches!(canonical, Canonical::Bool(_))
}

/// Returns `true` if the canonical array is a primitive with an integer ptype.
pub fn is_integer_primitive(canonical: &Canonical) -> bool {
matches!(canonical, Canonical::Primitive(p) if p.ptype().is_int())
Expand Down
4 changes: 3 additions & 1 deletion vortex-compressor/src/compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ impl CascadingCompressor {
) -> VortexResult<ArrayRef> {
match array {
Canonical::Null(null_array) => Ok(null_array.into_array()),
Canonical::Bool(bool_array) => Ok(bool_array.into_array()),
Canonical::Bool(bool_array) => {
self.choose_and_compress(Canonical::Bool(bool_array), ctx)
}
Canonical::Primitive(primitive) => {
self.choose_and_compress(Canonical::Primitive(primitive), ctx)
}
Expand Down
Loading
Loading