Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor refactoring. Introducing a codec type enum. #1477

Merged
merged 1 commit into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ extern crate test;
#[cfg(test)]
mod tests {
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::linearinterpol::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
use fastfield_codecs::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
};
use fastfield_codecs::multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
use fastfield_codecs::linear::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
};
use fastfield_codecs::*;

Expand Down Expand Up @@ -64,7 +64,7 @@ mod tests {
#[bench]
fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_create::<MultiLinearInterpolFastFieldSerializer>(b, &data);
bench_create::<BlockwiseLinearInterpolFastFieldSerializer>(b, &data);
}
#[bench]
fn bench_fastfield_bitpack_get(b: &mut Bencher) {
Expand All @@ -79,7 +79,7 @@ mod tests {
#[bench]
fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
let data: Vec<_> = get_data();
bench_get::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
bench_get::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
b, &data,
);
}
Expand Down
9 changes: 6 additions & 3 deletions fastfield_codecs/src/bitpacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
};

/// Depending on the field type, a different
/// fast field is required.
Expand Down Expand Up @@ -99,8 +101,9 @@ impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
pub struct BitpackedFastFieldSerializer {}

impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
const NAME: &'static str = "Bitpacked";
const ID: u8 = 1;
/// The CODEC_TYPE is an enum value used for serialization.
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Bitpacked;

/// Serializes data with the BitpackedFastFieldSerializer.
///
/// The serializer in fact encode the values by bitpacking
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ use common::{BinarySerializable, CountingWriter, DeserializeFrom};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::linearinterpol::{get_calculated_value, get_slope};
use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
use crate::linear::{get_calculated_value, get_slope};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
};

const CHUNK_SIZE: u64 = 512;

Expand Down Expand Up @@ -179,11 +181,10 @@ impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
}

/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
pub struct MultiLinearInterpolFastFieldSerializer {}
pub struct BlockwiseLinearInterpolFastFieldSerializer {}

impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
const NAME: &'static str = "MultiLinearInterpol";
const ID: u8 = 3;
impl FastFieldCodecSerializer for BlockwiseLinearInterpolFastFieldSerializer {
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::BlockwiseLinearInterpol;
/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
Expand Down Expand Up @@ -359,7 +360,7 @@ mod tests {

fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
crate::tests::create_and_validate::<
MultiLinearInterpolFastFieldSerializer,
BlockwiseLinearInterpolFastFieldSerializer,
MultiLinearInterpolFastFieldReader,
>(data, name)
}
Expand Down
91 changes: 69 additions & 22 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ extern crate more_asserts;
use std::io;
use std::io::Write;

use common::BinarySerializable;
use ownedbytes::OwnedBytes;

pub mod bitpacked;
pub mod linearinterpol;
pub mod multilinearinterpol;
pub mod blockwise_linear;
pub mod linear;

pub trait FastFieldCodecReader: Sized {
/// reads the metadata and returns the CodecReader
Expand All @@ -19,13 +20,50 @@ pub trait FastFieldCodecReader: Sized {
fn max_value(&self) -> u64;
}

#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
#[repr(u8)]
pub enum FastFieldCodecType {
Bitpacked = 1,
LinearInterpol = 2,
BlockwiseLinearInterpol = 3,
Gcd = 4,
}

impl BinarySerializable for FastFieldCodecType {
fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
self.to_code().serialize(wrt)
}

fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
let code = u8::deserialize(reader)?;
let codec_type: Self = Self::from_code(code)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
Ok(codec_type)
}
}

impl FastFieldCodecType {
pub fn to_code(self) -> u8 {
self as u8
}

pub fn from_code(code: u8) -> Option<Self> {
match code {
1 => Some(Self::Bitpacked),
2 => Some(Self::LinearInterpol),
3 => Some(Self::BlockwiseLinearInterpol),
4 => Some(Self::Gcd),
_ => None,
}
}
}

/// The FastFieldSerializerEstimate trait is required on all variants
/// of fast field compressions, to decide which one to choose.
pub trait FastFieldCodecSerializer {
/// A codex needs to provide a unique name and id, which is
/// used for debugging and de/serialization.
const NAME: &'static str;
const ID: u8;
const CODEC_TYPE: FastFieldCodecType;

/// Check if the Codec is able to compress the data
fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess) -> bool;
Expand Down Expand Up @@ -128,10 +166,10 @@ mod tests {
use proptest::proptest;

use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use crate::linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};
use crate::multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
use crate::blockwise_linear::{
BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader,
};
use crate::linear::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};

pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
data: &[u64],
Expand All @@ -151,8 +189,8 @@ mod tests {
let val = reader.get_u64(doc as u64);
if val != *orig_val {
panic!(
"val {:?} does not match orig_val {:?}, in data set {}, data {:?}",
val, orig_val, name, data
"val {val:?} does not match orig_val {orig_val:?}, in data set {name}, data \
{data:?}",
);
}
}
Expand All @@ -163,14 +201,14 @@ mod tests {
#[test]
fn test_proptest_small(data in proptest::collection::vec(any::<u64>(), 1..10)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

#[test]
fn test_proptest_large(data in proptest::collection::vec(any::<u64>(), 1..6000)) {
create_and_validate::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(&data, "proptest linearinterpol");
create_and_validate::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(&data, "proptest multilinearinterpol");
create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(&data, "proptest bitpacked");
}

Expand All @@ -193,19 +231,15 @@ mod tests {
}

fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
let codec_name = S::NAME;
for (data, data_set_name) in get_codec_test_data_sets() {
let (estimate, actual) =
crate::tests::create_and_validate::<S, R>(&data, data_set_name);
let codec_name = format!("{:?}", S::CODEC_TYPE);
for (data, dataset_name) in get_codec_test_data_sets() {
let (estimate, actual) = crate::tests::create_and_validate::<S, R>(&data, dataset_name);
let result = if estimate == f32::MAX {
"Disabled".to_string()
} else {
format!("Estimate {:?} Actual {:?} ", estimate, actual)
format!("Estimate `{estimate}` Actual `{actual}`")
};
println!(
"Codec {}, DataSet {}, {}",
codec_name, data_set_name, result
);
println!("Codec {codec_name}, DataSet {dataset_name}, {result}");
}
}
#[test]
Expand All @@ -218,7 +252,8 @@ mod tests {
}
#[test]
fn test_codec_multi_interpolation() {
test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
test_codec::<BlockwiseLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
);
}

use super::*;
Expand All @@ -231,7 +266,7 @@ mod tests {
assert_le!(linear_interpol_estimation, 0.01);

let multi_linear_interpol_estimation =
MultiLinearInterpolFastFieldSerializer::estimate(&data);
BlockwiseLinearInterpolFastFieldSerializer::estimate(&data);
assert_le!(multi_linear_interpol_estimation, 0.2);
assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);

Expand Down Expand Up @@ -262,4 +297,16 @@ mod tests {
assert_le!(bitpacked_estimation, 0.32);
assert_le!(bitpacked_estimation, linear_interpol_estimation);
}

#[test]
fn test_fast_field_codec_type_to_code() {
let mut count_codec = 0;
for code in 0..=255 {
if let Some(codec_type) = FastFieldCodecType::from_code(code) {
assert_eq!(codec_type.to_code(), code);
count_codec += 1;
}
}
assert_eq!(count_codec, 4);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use common::{BinarySerializable, FixedSize};
use ownedbytes::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess};
use crate::{
FastFieldCodecReader, FastFieldCodecSerializer, FastFieldCodecType, FastFieldDataAccess,
};

/// Depending on the field type, a different
/// fast field is required.
Expand Down Expand Up @@ -133,8 +135,8 @@ pub fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
}

impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
const NAME: &'static str = "LinearInterpol";
const ID: u8 = 2;
const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::LinearInterpol;

/// Creates a new fast field serializer.
fn serialize(
write: &mut impl Write,
Expand Down
18 changes: 9 additions & 9 deletions fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#[macro_use]
extern crate prettytable;
use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
use fastfield_codecs::blockwise_linear::BlockwiseLinearInterpolFastFieldSerializer;
use fastfield_codecs::linear::LinearInterpolFastFieldSerializer;
use fastfield_codecs::{FastFieldCodecSerializer, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};

fn main() {
Expand All @@ -15,7 +15,7 @@ fn main() {
let mut results = vec![];
let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
let res = serialize_with_codec::<BlockwiseLinearInterpolFastFieldSerializer>(&data);
results.push(res);
let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
&data,
Expand All @@ -33,7 +33,7 @@ fn main() {
.unwrap();

table.add_row(Row::new(vec![Cell::new(data_set_name).style_spec("Bbb")]));
for (is_applicable, est, comp, name) in results {
for (is_applicable, est, comp, codec_type) in results {
let (est_cell, ratio_cell) = if !is_applicable {
("Codec Disabled".to_string(), "".to_string())
} else {
Expand All @@ -46,7 +46,7 @@ fn main() {
};

table.add_row(Row::new(vec![
Cell::new(name).style_spec("bFg"),
Cell::new(&format!("{codec_type:?}")).style_spec("bFg"),
Cell::new(&ratio_cell).style_spec(style),
Cell::new(&est_cell).style_spec(""),
]));
Expand Down Expand Up @@ -93,17 +93,17 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {

pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
data: &[u64],
) -> (bool, f32, f32, &'static str) {
) -> (bool, f32, f32, FastFieldCodecType) {
let is_applicable = S::is_applicable(&data);
if !is_applicable {
return (false, 0.0, 0.0, S::NAME);
return (false, 0.0, 0.0, S::CODEC_TYPE);
}
let estimation = S::estimate(&data);
let mut out = vec![];
S::serialize(&mut out, &data).unwrap();

let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
(true, estimation, actual_compression, S::NAME)
(true, estimation, actual_compression, S::CODEC_TYPE)
}

pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
Expand Down
Loading