Skip to content

Commit

Permalink
fix: ALP encoding handles infinity, negative zero and nan values (#2555)
Browse files Browse the repository at this point in the history
Using NativePType::is_eq and due to rust float -> int cast semantics we
can avoid checking whether value is one of the unencodable variants.
https://doc.rust-lang.org/stable/reference/expressions/operator-expr.html#numeric-cast
for reference
  • Loading branch information
robert3005 authored Mar 3, 2025
1 parent 459dec3 commit 0dd969d
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 15 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions encodings/alp/src/alp/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,4 +268,22 @@ mod tests {
Scalar::null_typed::<f64>()
);
}

#[test]
fn non_finite_numbers() {
let original = PrimitiveArray::new(
buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
Validity::NonNullable,
);
let encoded = alp_encode(&original).unwrap();
let decoded = encoded.to_primitive().unwrap();
for idx in 0..original.len() {
let decoded_val = decoded.as_slice::<f32>()[idx];
let original_val = original.as_slice::<f32>()[idx];
assert!(
decoded_val.is_eq(original_val),
"Expected {original_val} but got {decoded_val}"
);
}
}
}
2 changes: 1 addition & 1 deletion encodings/alp/src/alp/compute/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ mod tests {
assert_eq!(r_gt, vec![true; 10]);

let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gt).unwrap();
assert_eq!(r_gte, vec![false; 10]);
assert_eq!(r_gte, vec![true; 10]);

let r_lte = test_alp_compare(&encoded, 0.06051_f32, Operator::Lte).unwrap();
assert_eq!(r_lte, vec![true; 10]);
Expand Down
22 changes: 11 additions & 11 deletions encodings/alp/src/alp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod serde;
pub use array::*;
pub use compress::*;
use vortex_buffer::{Buffer, BufferMut};
use vortex_dtype::NativePType;

const SAMPLE_SIZE: usize = 32;

Expand All @@ -35,7 +36,7 @@ mod private {
impl Sealed for f64 {}
}

pub trait ALPFloat: private::Sealed + Float + Display + 'static {
pub trait ALPFloat: private::Sealed + Float + Display + NativePType {
type ALPInt: PrimInt + Display + ToPrimitive + Copy;

const FRACTIONAL_BITS: u8;
Expand Down Expand Up @@ -148,9 +149,9 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {

#[inline]
fn encode_single(value: Self, exponents: Exponents) -> Option<Self::ALPInt> {
let encoded = unsafe { Self::encode_single_unchecked(value, exponents) };
let encoded = Self::encode_single_unchecked(value, exponents);
let decoded = Self::decode_single(encoded, exponents);
if decoded == value {
if decoded.is_eq(value) {
return Some(encoded);
}
None
Expand Down Expand Up @@ -185,11 +186,10 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
}

/// # Safety
///
/// The returned value may not decode back to the original value.
/// Encode single float value. The returned value might decode to a different value than passed in.
/// Consider using [`Self::encode_single`] if you want the checked version of this function.
#[inline(always)]
unsafe fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
.fast_round()
.as_int()
Expand All @@ -212,10 +212,10 @@ fn encode_chunk_unchecked<T: ALPFloat>(

// encode the chunk, counting the number of patches
let mut chunk_patch_count = 0;
encoded_output.extend(chunk.iter().map(|v| {
let encoded = unsafe { T::encode_single_unchecked(*v, exp) };
encoded_output.extend(chunk.iter().map(|&v| {
let encoded = T::encode_single_unchecked(v, exp);
let decoded = T::decode_single(encoded, exp);
let neq = (decoded != *v) as usize;
let neq = !decoded.is_eq(v) as usize;
chunk_patch_count += neq;
encoded
}));
Expand All @@ -237,7 +237,7 @@ fn encode_chunk_unchecked<T: ALPFloat>(
// write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
patch_indices_mut[chunk_patch_index].write(i as u64);
patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize;
chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize;
}
assert_eq!(chunk_patch_index, chunk_patch_count);
unsafe {
Expand Down
1 change: 0 additions & 1 deletion encodings/fastlanes/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ itertools = { workspace = true }
num-traits = { workspace = true }
rand = { workspace = true, optional = true }
rkyv = { workspace = true }
serde = { workspace = true }
vortex-array = { workspace = true }
vortex-buffer = { workspace = true }
vortex-dtype = { workspace = true }
Expand Down
1 change: 0 additions & 1 deletion wasm-test/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0dd969d

Please sign in to comment.