Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 0 additions & 10 deletions vortex-btrblocks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,5 @@ name = "compress_listview"
harness = false
test = false

[[bench]]
name = "dict_encode"
harness = false
test = false

[[bench]]
name = "stats_calc"
harness = false
test = false

[package.metadata.cargo-machete]
ignored = ["getrandom_v03"]
38 changes: 19 additions & 19 deletions vortex-btrblocks/public-api.lock

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions vortex-btrblocks/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,11 @@ pub use builder::default_excluded;
pub use canonical_compressor::BtrBlocksCompressor;
pub use schemes::patches::compress_patches;
pub use vortex_compressor::CascadingCompressor;
pub use vortex_compressor::builtins::integer_dictionary_encode;
pub use vortex_compressor::ctx::CompressorContext;
pub use vortex_compressor::ctx::MAX_CASCADE;
pub use vortex_compressor::scheme::Scheme;
pub use vortex_compressor::scheme::SchemeExt;
pub use vortex_compressor::scheme::SchemeId;
pub use vortex_compressor::scheme::estimate_compression_ratio_with_sampling;
pub use vortex_compressor::stats::ArrayAndStats;
pub use vortex_compressor::stats::BoolStats;
pub use vortex_compressor::stats::FloatStats;
Expand Down
6 changes: 3 additions & 3 deletions vortex-btrblocks/src/schemes/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use vortex_array::ToCanonical;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::decimal::narrowed_decimal;
use vortex_array::dtype::DecimalType;
use vortex_compressor::estimate::CompressionEstimate;
use vortex_decimal_byte_parts::DecimalBytePartsArray;
use vortex_error::VortexResult;

Expand Down Expand Up @@ -42,12 +43,11 @@ impl Scheme for DecimalScheme {

fn expected_compression_ratio(
&self,
_compressor: &CascadingCompressor,
_data: &mut ArrayAndStats,
_ctx: CompressorContext,
) -> VortexResult<f64> {
) -> CompressionEstimate {
// Decimal compression is almost always beneficial (narrowing + primitive compression).
Ok(f64::MAX)
CompressionEstimate::AlwaysUse
}

fn compress(
Expand Down
74 changes: 39 additions & 35 deletions vortex-btrblocks/src/schemes/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::ToCanonical;
use vortex_array::dtype::PType;
use vortex_compressor::estimate::CompressionEstimate;
use vortex_compressor::scheme::ChildSelection;
use vortex_compressor::scheme::DescendantExclusion;
use vortex_error::VortexResult;
Expand All @@ -25,7 +26,6 @@ use crate::CompressorContext;
use crate::Scheme;
use crate::SchemeExt;
use crate::compress_patches;
use crate::estimate_compression_ratio_with_sampling;

/// ALP (Adaptive Lossless floating-Point) encoding.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
Expand Down Expand Up @@ -70,22 +70,21 @@ impl Scheme for ALPScheme {

fn expected_compression_ratio(
&self,
compressor: &CascadingCompressor,
data: &mut ArrayAndStats,
ctx: CompressorContext,
) -> VortexResult<f64> {
) -> CompressionEstimate {
// ALP encodes floats as integers. Without integer compression afterward, the encoded ints
// are the same size.
if ctx.finished_cascading() {
return Ok(0.0);
return CompressionEstimate::Skip;
}

// We don't support ALP for f16.
if data.float_stats().source().ptype() == PType::F16 {
return Ok(0.0);
if data.array_as_primitive().ptype() == PType::F16 {
return CompressionEstimate::Skip;
}

estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
CompressionEstimate::Sample
}

fn compress(
Expand All @@ -94,9 +93,7 @@ impl Scheme for ALPScheme {
data: &mut ArrayAndStats,
ctx: CompressorContext,
) -> VortexResult<ArrayRef> {
let stats = data.float_stats();

let alp_encoded = alp_encode(&stats.source().to_primitive(), None)?;
let alp_encoded = alp_encode(data.array_as_primitive(), None)?;

// Compress the ALP ints.
let compressed_alp_ints =
Expand All @@ -121,15 +118,15 @@ impl Scheme for ALPRDScheme {

fn expected_compression_ratio(
&self,
compressor: &CascadingCompressor,
data: &mut ArrayAndStats,
ctx: CompressorContext,
) -> VortexResult<f64> {
if data.float_stats().source().ptype() == PType::F16 {
return Ok(0.0);
_ctx: CompressorContext,
) -> CompressionEstimate {
// We don't support ALPRD for f16.
if data.array_as_primitive().ptype() == PType::F16 {
return CompressionEstimate::Skip;
}

estimate_compression_ratio_with_sampling(self, compressor, data.array(), ctx)
CompressionEstimate::Sample
}

fn compress(
Expand All @@ -138,15 +135,15 @@ impl Scheme for ALPRDScheme {
data: &mut ArrayAndStats,
_ctx: CompressorContext,
) -> VortexResult<ArrayRef> {
let stats = data.float_stats();
let primitive_array = data.array_as_primitive();

let encoder = match stats.source().ptype() {
PType::F32 => RDEncoder::new(stats.source().as_slice::<f32>()),
PType::F64 => RDEncoder::new(stats.source().as_slice::<f64>()),
let encoder = match primitive_array.ptype() {
PType::F32 => RDEncoder::new(primitive_array.as_slice::<f32>()),
PType::F64 => RDEncoder::new(primitive_array.as_slice::<f64>()),
ptype => vortex_panic!("cannot ALPRD compress ptype {ptype}"),
};

let mut alp_rd = encoder.encode(stats.source());
let mut alp_rd = encoder.encode(primitive_array);

let patches = alp_rd
.left_parts_patches()
Expand Down Expand Up @@ -182,24 +179,25 @@ impl Scheme for NullDominatedSparseScheme {

fn expected_compression_ratio(
&self,
_compressor: &CascadingCompressor,
data: &mut ArrayAndStats,
_ctx: CompressorContext,
) -> VortexResult<f64> {
) -> CompressionEstimate {
let len = data.array_len() as f64;
let stats = data.float_stats();
let value_count = stats.value_count();

if stats.value_count() == 0 {
// All nulls should use ConstantScheme instead of this.
return Ok(0.0);
// All-null arrays should be compressed as constant instead anyways.
if value_count == 0 {
return CompressionEstimate::Skip;
}

// If the majority (90%) of values is null, this will compress well.
if stats.null_count() as f64 / stats.source().len() as f64 > 0.9 {
return Ok(stats.source().len() as f64 / stats.value_count() as f64);
if stats.null_count() as f64 / len > 0.9 {
return CompressionEstimate::Ratio(len / value_count as f64);
}

// Otherwise we don't go this route.
Ok(0.0)
CompressionEstimate::Skip
}

fn compress(
Expand All @@ -208,10 +206,8 @@ impl Scheme for NullDominatedSparseScheme {
data: &mut ArrayAndStats,
ctx: CompressorContext,
) -> VortexResult<ArrayRef> {
let stats = data.float_stats();

// We pass None as we only run this pathway for NULL-dominated float arrays.
let sparse_encoded = SparseArray::encode(&stats.source().clone().into_array(), None)?;
let sparse_encoded = SparseArray::encode(data.array(), None)?;

if let Some(sparse) = sparse_encoded.as_opt::<Sparse>() {
let indices = sparse.patches().indices().to_primitive().narrow()?;
Expand Down Expand Up @@ -241,15 +237,22 @@ impl Scheme for PcoScheme {
is_float_primitive(canonical)
}

fn expected_compression_ratio(
&self,
_data: &mut ArrayAndStats,
_ctx: CompressorContext,
) -> CompressionEstimate {
CompressionEstimate::Sample
}

fn compress(
&self,
_compressor: &CascadingCompressor,
data: &mut ArrayAndStats,
_ctx: CompressorContext,
) -> VortexResult<ArrayRef> {
let stats = data.float_stats();
Ok(vortex_pco::PcoArray::from_primitive(
stats.source(),
data.array_as_primitive(),
pco::DEFAULT_COMPRESSION_LEVEL,
8192,
)?
Expand Down Expand Up @@ -401,7 +404,8 @@ mod scheme_selection_tests {
let array = PrimitiveArray::new(Buffer::copy_from(&values), Validity::NonNullable);
let btr = BtrBlocksCompressor::default();
let compressed = btr.compress(&array.into_array())?;
assert!(compressed.is::<Dict>());
assert!(compressed.is::<ALP>());
assert!(compressed.children()[0].is::<Dict>());
Ok(())
}

Expand Down
Loading
Loading