Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Homogeneous codec names. #1481

Merged
merged 1 commit into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@ extern crate test;
mod tests {
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
};
use fastfield_codecs::linear::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use fastfield_codecs::linear::{LinearFastFieldReader, LinearFastFieldSerializer};
use fastfield_codecs::*;

fn get_data() -> Vec<u64> {
Expand Down Expand Up @@ -59,12 +57,12 @@ mod tests {
#[bench]
fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<LinearInterpolFastFieldSerializer>(b, &data);
bench_create::<LinearFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<BlockwiseLinearInterpolFastFieldSerializer>(b, &data);
bench_create::<BlockwiseLinearFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
Expand All @@ -74,14 +72,12 @@ mod tests {
#[bench]
fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(b, &data);
bench_get::<LinearFastFieldSerializer, LinearFastFieldReader>(b, &data);
}
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
b, &data,
);
bench_get::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(b, &data);
}
pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
let min_value = data.iter().cloned().min().unwrap_or(0);
Expand Down
32 changes: 16 additions & 16 deletions fastfield_codecs/src/blockwise_linear.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! MultiLinearInterpol compressor uses linear interpolation to guess a values and stores the
//! The BlockwiseLinear codec uses linear interpolation to guess a values and stores the
//! offset, but in blocks of 512.
//!
//! With a CHUNK_SIZE of 512 and 29 byte metadata per block, we get a overhead for metadata of 232 /
Expand Down Expand Up @@ -27,9 +27,9 @@ const CHUNK_SIZE: u64 = 512;
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct MultiLinearInterpolFastFieldReader {
pub struct BlockwiseLinearFastFieldReader {
data: OwnedBytes,
pub footer: MultiLinearInterpolFooter,
pub footer: BlockwiseLinearFooter,
}

#[derive(Clone, Debug, Default)]
Expand Down Expand Up @@ -104,14 +104,14 @@ impl BinarySerializable for Function {
}

#[derive(Clone, Debug)]
pub struct MultiLinearInterpolFooter {
pub struct BlockwiseLinearFooter {
pub num_vals: u64,
pub min_value: u64,
pub max_value: u64,
interpolations: Vec<Function>,
}

impl BinarySerializable for MultiLinearInterpolFooter {
impl BinarySerializable for BlockwiseLinearFooter {
fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
let mut out = vec![];
self.num_vals.serialize(&mut out)?;
Expand All @@ -123,8 +123,8 @@ impl BinarySerializable for MultiLinearInterpolFooter {
Ok(())
}

fn deserialize<R: Read>(reader: &mut R) -> io::Result<MultiLinearInterpolFooter> {
let mut footer = MultiLinearInterpolFooter {
fn deserialize<R: Read>(reader: &mut R) -> io::Result<BlockwiseLinearFooter> {
let mut footer = BlockwiseLinearFooter {
num_vals: u64::deserialize(reader)?,
min_value: u64::deserialize(reader)?,
max_value: u64::deserialize(reader)?,
Expand All @@ -148,14 +148,14 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio
&interpolations[get_interpolation_position(doc)]
}

impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
impl FastFieldCodecReader for BlockwiseLinearFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
let footer_offset = bytes.len() - 4 - footer_len as usize;
let (data, mut footer) = bytes.split(footer_offset);
let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
Ok(MultiLinearInterpolFastFieldReader { data, footer })
let footer = BlockwiseLinearFooter::deserialize(&mut footer)?;
Ok(BlockwiseLinearFastFieldReader { data, footer })
}

#[inline]
Expand All @@ -181,10 +181,10 @@ impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
}

/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct BlockwiseLinearInterpolFastFieldSerializer {}
pub struct BlockwiseLinearFastFieldSerializer {}

impl FastFieldCodecSerializer for BlockwiseLinearInterpolFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinearInterpol;
impl FastFieldCodecSerializer for BlockwiseLinearFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinear;
/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
Expand Down Expand Up @@ -270,7 +270,7 @@ impl FastFieldCodecSerializer for BlockwiseLinearInterpolFastFieldSerializer {
}
bit_packer.close(write)?;

let footer = MultiLinearInterpolFooter {
let footer = BlockwiseLinearFooter {
num_vals: fastfield_accessor.num_vals(),
min_value: fastfield_accessor.min_value(),
max_value: fastfield_accessor.max_value(),
Expand Down Expand Up @@ -360,8 +360,8 @@ mod tests {

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
BlockwiseLinearInterpolFastFieldSerializer,
MultiLinearInterpolFastFieldReader,
BlockwiseLinearFastFieldSerializer,
BlockwiseLinearFastFieldReader,
>(data, name)
}

Expand Down
34 changes: 16 additions & 18 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ pub trait FastFieldCodecReader: Sized {
#[repr(u8)]
pub enum FastFieldCodecType {
Bitpacked = 1,
LinearInterpol = 2,
BlockwiseLinearInterpol = 3,
Linear = 2,
BlockwiseLinear = 3,
Gcd = 4,
}

Expand All @@ -50,8 +50,8 @@ impl FastFieldCodecType {
pub fn from_code(code: u8) -> Option<Self> {
match code {
1 => Some(Self::Bitpacked),
2 => Some(Self::LinearInterpol),
3 => Some(Self::BlockwiseLinearInterpol),
2 => Some(Self::Linear),
3 => Some(Self::BlockwiseLinear),
4 => Some(Self::Gcd),
_ => None,
}
Expand Down Expand Up @@ -167,9 +167,9 @@ mod tests {

use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use crate::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
BlockwiseLinearFastFieldReader, BlockwiseLinearFastFieldSerializer,
};
use crate::linear::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};
use crate::linear::{LinearFastFieldReader, LinearFastFieldSerializer};

pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
Expand Down Expand Up @@ -200,15 +200,15 @@ mod tests {
proptest! {
#[test]
fn test_proptest_small(data in proptest::collection::vec(any::<u64>(), 1..10)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

#[test]
fn test_proptest_large(data in proptest::collection::vec(any::<u64>(), 1..6000)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

Expand Down Expand Up @@ -248,12 +248,11 @@ mod tests {
}
#[test]
fn test_codec_interpolation() {
test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
test_codec::<LinearFastFieldSerializer, LinearFastFieldReader>();
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
);
test_codec::<BlockwiseLinearFastFieldSerializer, BlockwiseLinearFastFieldReader>();
}

use super::*;
Expand All @@ -262,11 +261,10 @@ mod tests {
fn estimation_good_interpolation_case() {
let data = (10..=20000_u64).collect::<Vec<_>>();

let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.01);

let multi_linear_interpol_estimation =
BlockwiseLinearInterpolFastFieldSerializer::estimate(&data);
let multi_linear_interpol_estimation = BlockwiseLinearFastFieldSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);

Expand All @@ -277,7 +275,7 @@ mod tests {
fn estimation_test_bad_interpolation_case() {
let data = vec![200, 10, 10, 10, 10, 1000, 20];

let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.32);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
Expand All @@ -290,7 +288,7 @@ mod tests {

// in this case the linear interpolation can't in fact not be worse than bitpacking,
// but the estimator adds some threshold, which leads to estimated worse behavior
let linear_interpol_estimation = LinearInterpolFastFieldSerializer::estimate(&data);
let linear_interpol_estimation = LinearFastFieldSerializer::estimate(&data);
assert_le!(linear_interpol_estimation, 0.35);

let bitpacked_estimation = BitpackedFastFieldSerializer::estimate(&data);
Expand Down
39 changes: 19 additions & 20 deletions fastfield_codecs/src/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ use crate::{
/// Depending on the field type, a different
/// fast field is required.
#[derive(Clone)]
pub struct LinearInterpolFastFieldReader {
pub struct LinearFastFieldReader {
data: OwnedBytes,
bit_unpacker: BitUnpacker,
pub footer: LinearInterpolFooter,
pub footer: LinearFooter,
pub slope: f32,
}

#[derive(Clone, Debug)]
pub struct LinearInterpolFooter {
pub struct LinearFooter {
pub relative_max_value: u64,
pub offset: u64,
pub first_val: u64,
Expand All @@ -30,7 +30,7 @@ pub struct LinearInterpolFooter {
pub max_value: u64,
}

impl BinarySerializable for LinearInterpolFooter {
impl BinarySerializable for LinearFooter {
fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
self.relative_max_value.serialize(write)?;
self.offset.serialize(write)?;
Expand All @@ -42,8 +42,8 @@ impl BinarySerializable for LinearInterpolFooter {
Ok(())
}

fn deserialize<R: Read>(reader: &mut R) -> io::Result<LinearInterpolFooter> {
Ok(LinearInterpolFooter {
fn deserialize<R: Read>(reader: &mut R) -> io::Result<LinearFooter> {
Ok(LinearFooter {
relative_max_value: u64::deserialize(reader)?,
offset: u64::deserialize(reader)?,
first_val: u64::deserialize(reader)?,
Expand All @@ -55,20 +55,20 @@ impl BinarySerializable for LinearInterpolFooter {
}
}

impl FixedSize for LinearInterpolFooter {
impl FixedSize for LinearFooter {
const SIZE_IN_BYTES: usize = 56;
}

impl FastFieldCodecReader for LinearInterpolFastFieldReader {
impl FastFieldCodecReader for LinearFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES;
let footer_offset = bytes.len() - LinearFooter::SIZE_IN_BYTES;
let (data, mut footer) = bytes.split(footer_offset);
let footer = LinearInterpolFooter::deserialize(&mut footer)?;
let footer = LinearFooter::deserialize(&mut footer)?;
let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
let num_bits = compute_num_bits(footer.relative_max_value);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(LinearInterpolFastFieldReader {
Ok(LinearFastFieldReader {
data,
bit_unpacker,
footer,
Expand All @@ -93,7 +93,7 @@ impl FastFieldCodecReader for LinearInterpolFastFieldReader {

/// Fastfield serializer, which tries to guess values by linear interpolation
/// and stores the difference bitpacked.
pub struct LinearInterpolFastFieldSerializer {}
pub struct LinearFastFieldSerializer {}

#[inline]
pub(crate) fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
Expand Down Expand Up @@ -134,8 +134,8 @@ pub fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}
}

impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::LinearInterpol;
impl FastFieldCodecSerializer for LinearFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Linear;

/// Creates a new fast field serializer.
fn serialize(
Expand Down Expand Up @@ -175,7 +175,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
}
bit_packer.close(write)?;

let footer = LinearInterpolFooter {
let footer = LinearFooter {
relative_max_value,
offset,
first_val,
Expand Down Expand Up @@ -239,7 +239,7 @@ impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {

let num_bits = compute_num_bits(relative_max_value as u64) as u64
* fastfield_accessor.num_vals()
+ LinearInterpolFooter::SIZE_IN_BYTES as u64;
+ LinearFooter::SIZE_IN_BYTES as u64;
let num_bits_uncompressed = 64 * fastfield_accessor.num_vals();
num_bits as f32 / num_bits_uncompressed as f32
}
Expand All @@ -260,10 +260,9 @@ mod tests {
use crate::tests::get_codec_test_data_sets;

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
LinearInterpolFastFieldSerializer,
LinearInterpolFastFieldReader,
>(data, name)
crate::tests::create_and_validate::<LinearFastFieldSerializer, LinearFastFieldReader>(
data, name,
)
}

#[test]
Expand Down
8 changes: 4 additions & 4 deletions fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[macro_use]
extern crate prettytable;
use fastfield_codecs::blockwise_linear::BlockwiseLinearInterpolFastFieldSerializer;
use fastfield_codecs::linear::LinearInterpolFastFieldSerializer;
use fastfield_codecs::blockwise_linear::BlockwiseLinearFastFieldSerializer;
use fastfield_codecs::linear::LinearFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};

Expand All @@ -13,9 +13,9 @@ fn main() {

for (data, data_set_name) in get_codec_test_data_sets() {
let mut results = vec![];
let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
let res = serialize_with_codec::<LinearFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<BlockwiseLinearInterpolFastFieldSerializer>(&data);
let res = serialize_with_codec::<BlockwiseLinearFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
Expand Down
8 changes: 3 additions & 5 deletions src/fastfield/gcd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,9 @@ mod tests {
assert!(size_prec_sec < size_prec_micro);

let size_prec_sec =
test_gcd_date_with_codec(FastFieldCodecType::LinearInterpol, DatePrecision::Seconds)?;
let size_prec_micro = test_gcd_date_with_codec(
FastFieldCodecType::LinearInterpol,
DatePrecision::Microseconds,
)?;
test_gcd_date_with_codec(FastFieldCodecType::Linear, DatePrecision::Seconds)?;
let size_prec_micro =
test_gcd_date_with_codec(FastFieldCodecType::Linear, DatePrecision::Microseconds)?;
assert!(size_prec_sec < size_prec_micro);

Ok(())
Expand Down
Loading