diff --git a/Cargo.lock b/Cargo.lock index 94bc354d73..caa078f5be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5495,7 +5495,6 @@ dependencies = [ "num-traits", "rand 0.9.0", "rkyv", - "serde", "vortex-alp", "vortex-array", "vortex-buffer", diff --git a/encodings/alp/src/alp/compress.rs b/encodings/alp/src/alp/compress.rs index d7cb617bf9..748f81aeb3 100644 --- a/encodings/alp/src/alp/compress.rs +++ b/encodings/alp/src/alp/compress.rs @@ -268,4 +268,22 @@ mod tests { Scalar::null_typed::() ); } + + #[test] + fn non_finite_numbers() { + let original = PrimitiveArray::new( + buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY], + Validity::NonNullable, + ); + let encoded = alp_encode(&original).unwrap(); + let decoded = encoded.to_primitive().unwrap(); + for idx in 0..original.len() { + let decoded_val = decoded.as_slice::()[idx]; + let original_val = original.as_slice::()[idx]; + assert!( + decoded_val.is_eq(original_val), + "Expected {original_val} but got {decoded_val}" + ); + } + } } diff --git a/encodings/alp/src/alp/compute/compare.rs b/encodings/alp/src/alp/compute/compare.rs index 66bd3a6ef8..e630d98d2f 100644 --- a/encodings/alp/src/alp/compute/compare.rs +++ b/encodings/alp/src/alp/compute/compare.rs @@ -245,7 +245,7 @@ mod tests { assert_eq!(r_gt, vec![true; 10]); let r_gte = test_alp_compare(&encoded, -0.0_f32, Operator::Gt).unwrap(); - assert_eq!(r_gte, vec![false; 10]); + assert_eq!(r_gte, vec![true; 10]); let r_lte = test_alp_compare(&encoded, 0.06051_f32, Operator::Lte).unwrap(); assert_eq!(r_lte, vec![true; 10]); diff --git a/encodings/alp/src/alp/mod.rs b/encodings/alp/src/alp/mod.rs index f0d524f120..81d15c42c0 100644 --- a/encodings/alp/src/alp/mod.rs +++ b/encodings/alp/src/alp/mod.rs @@ -13,6 +13,7 @@ mod serde; pub use array::*; pub use compress::*; use vortex_buffer::{Buffer, BufferMut}; +use vortex_dtype::NativePType; const SAMPLE_SIZE: usize = 32; @@ -35,7 +36,7 @@ mod private { impl Sealed for f64 {} } -pub trait ALPFloat: private::Sealed + Float + Display + 'static { +pub trait ALPFloat: private::Sealed + Float + Display + NativePType { type ALPInt: PrimInt + Display + ToPrimitive + Copy; const FRACTIONAL_BITS: u8; @@ -148,9 +149,9 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static { #[inline] fn encode_single(value: Self, exponents: Exponents) -> Option { - let encoded = unsafe { Self::encode_single_unchecked(value, exponents) }; + let encoded = Self::encode_single_unchecked(value, exponents); let decoded = Self::decode_single(encoded, exponents); - if decoded == value { + if decoded.is_eq(value) { return Some(encoded); } None @@ -185,11 +186,10 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static { Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize] } - /// # Safety - /// - /// The returned value may not decode back to the original value. + /// Encode single float value. The returned value might decode to a different value than passed in. + /// Consider using [`Self::encode_single`] if you want the checked version of this function. #[inline(always)] - unsafe fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt { + fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt { (value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize]) .fast_round() .as_int() @@ -212,10 +212,10 @@ fn encode_chunk_unchecked( // encode the chunk, counting the number of patches let mut chunk_patch_count = 0; - encoded_output.extend(chunk.iter().map(|v| { - let encoded = unsafe { T::encode_single_unchecked(*v, exp) }; + encoded_output.extend(chunk.iter().map(|&v| { + let encoded = T::encode_single_unchecked(v, exp); let decoded = T::decode_single(encoded, exp); - let neq = (decoded != *v) as usize; + let neq = !decoded.is_eq(v) as usize; chunk_patch_count += neq; encoded })); @@ -237,7 +237,7 @@ fn encode_chunk_unchecked( // write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op) patch_indices_mut[chunk_patch_index].write(i as u64); patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]); - chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize; + chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize; } assert_eq!(chunk_patch_index, chunk_patch_count); unsafe { diff --git a/encodings/fastlanes/Cargo.toml b/encodings/fastlanes/Cargo.toml index 58c8beb963..e5a234cde7 100644 --- a/encodings/fastlanes/Cargo.toml +++ b/encodings/fastlanes/Cargo.toml @@ -24,7 +24,6 @@ itertools = { workspace = true } num-traits = { workspace = true } rand = { workspace = true, optional = true } rkyv = { workspace = true } -serde = { workspace = true } vortex-array = { workspace = true } vortex-buffer = { workspace = true } vortex-dtype = { workspace = true } diff --git a/wasm-test/Cargo.lock b/wasm-test/Cargo.lock index fdd3be8907..95d5442a0c 100644 --- a/wasm-test/Cargo.lock +++ b/wasm-test/Cargo.lock @@ -2038,7 +2038,6 @@ dependencies = [ "itertools", "num-traits", "rkyv", - "serde", "vortex-array", "vortex-buffer", "vortex-dtype",