Skip to content

Commit

Permalink
fix: StructLayoutWriter requires unique field names
Browse files Browse the repository at this point in the history
  • Loading branch information
robert3005 committed Feb 27, 2025
1 parent 5cd0a96 commit f7e6c42
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 16 deletions.
21 changes: 16 additions & 5 deletions fuzz/fuzz_targets/file_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use arrow_ord::sort::SortOptions;
use bytes::Bytes;
use futures_util::TryStreamExt;
use libfuzzer_sys::{Corpus, fuzz_target};
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::arrays::ChunkedArray;
use vortex_array::arrays::arbitrary::ArbitraryArray;
use vortex_array::arrow::IntoArrowArray;
Expand All @@ -18,11 +19,10 @@ use vortex_file::{VortexOpenOptions, VortexWriteOptions};
fuzz_target!(|array_data: ArbitraryArray| -> Corpus {
let array_data = array_data.0;

if !array_data.dtype().is_struct() {
return Corpus::Reject;
}

if has_nullable_struct(array_data.dtype()) {
if !array_data.dtype().is_struct()
|| has_nullable_struct(array_data.dtype())
|| has_duplicate_field_names(array_data.dtype())
{
return Corpus::Reject;
}

Expand Down Expand Up @@ -122,3 +122,14 @@ fn has_nullable_struct(dtype: &DType) -> bool {
.map(|sdt| sdt.fields().any(|dtype| has_nullable_struct(&dtype)))
.unwrap_or(false)
}

fn has_duplicate_field_names(dtype: &DType) -> bool {
let Some(struct_dtype) = dtype.as_struct() else {
return false;
};
HashSet::from_iter(struct_dtype.names().iter()).len() != struct_dtype.names().len()
|| dtype
.as_struct()
.map(|sdt| sdt.fields().any(|dtype| has_duplicate_field_names(&dtype)))
.unwrap_or(false)
}
2 changes: 1 addition & 1 deletion vortex-dtype/src/struct_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ where
.into_iter()
.map(|(name, dtype)| (name.into(), dtype.into()))
.unzip();
StructDType::from_fields(names.into(), dtypes.into_iter().collect())
StructDType::from_fields(names.into(), dtypes)
}
}

Expand Down
4 changes: 3 additions & 1 deletion vortex-layout/src/layouts/struct_/eval_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ mod tests {
use vortex_buffer::buffer;
use vortex_dtype::PType::I32;
use vortex_dtype::{DType, Nullability, StructDType};
use vortex_error::VortexUnwrap;
use vortex_expr::{get_item, gt, ident, pack};
use vortex_mask::Mask;

Expand All @@ -101,7 +102,7 @@ mod tests {
let ctx = ArrayContext::empty();
let mut segments = TestSegments::default();

let layout = StructLayoutWriter::new(
let layout = StructLayoutWriter::try_new(
DType::Struct(
Arc::new(StructDType::new(
vec!["a".into(), "b".into(), "c".into()].into(),
Expand All @@ -127,6 +128,7 @@ mod tests {
)),
],
)
.vortex_unwrap()
.push_all(
&mut segments,
[Ok(StructArray::from_fields(
Expand Down
74 changes: 65 additions & 9 deletions vortex-layout/src/layouts/struct_/writer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use itertools::Itertools;
use vortex_array::aliases::hash_set::HashSet;
use vortex_array::iter::ArrayIteratorArrayExt;
use vortex_array::{ArrayContext, ArrayRef};
use vortex_dtype::DType;
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err, vortex_panic};
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};

use crate::LayoutVTableRef;
use crate::data::Layout;
Expand All @@ -19,33 +20,43 @@ pub struct StructLayoutWriter {
}

impl StructLayoutWriter {
pub fn new(dtype: DType, column_layout_writers: Vec<Box<dyn LayoutWriter>>) -> Self {
let struct_dtype = dtype.as_struct().vortex_expect("dtype is not a struct");
pub fn try_new(
dtype: DType,
column_layout_writers: Vec<Box<dyn LayoutWriter>>,
) -> VortexResult<Self> {
let struct_dtype = dtype
.as_struct()
.ok_or_else(|| vortex_err!("expected StructDType"))?;
if HashSet::from_iter(struct_dtype.names().iter()).len() != struct_dtype.names().len() {
vortex_bail!("StructLayout must have unique field names")
}
if struct_dtype.fields().len() != column_layout_writers.len() {
vortex_panic!(
vortex_bail!(
"number of fields in struct dtype does not match number of column layout writers"
);
}
Self {
Ok(Self {
column_strategies: column_layout_writers,
dtype,
row_count: 0,
}
})
}

pub fn try_new_with_factory<F: LayoutStrategy>(
ctx: &ArrayContext,
dtype: &DType,
factory: F,
) -> VortexResult<Self> {
let struct_dtype = dtype.as_struct().vortex_expect("dtype is not a struct");
Ok(Self::new(
let struct_dtype = dtype
.as_struct()
.ok_or_else(|| vortex_err!("expected StructDType"))?;
Self::try_new(
dtype.clone(),
struct_dtype
.fields()
.map(|dtype| factory.new_writer(ctx, &dtype))
.try_collect()?,
))
)
}
}

Expand Down Expand Up @@ -98,3 +109,48 @@ impl LayoutWriter for StructLayoutWriter {
))
}
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use vortex_array::ArrayContext;
use vortex_dtype::{DType, Nullability, PType};

use crate::LayoutWriterExt;
use crate::layouts::flat::writer::{FlatLayoutOptions, FlatLayoutWriter};
use crate::layouts::struct_::writer::StructLayoutWriter;

#[test]
#[should_panic]
fn fails_on_duplicate_field() {
StructLayoutWriter::try_new(
DType::Struct(
Arc::new(
[
("a", DType::Primitive(PType::I32, Nullability::NonNullable)),
("a", DType::Primitive(PType::I32, Nullability::NonNullable)),
]
.into_iter()
.collect(),
),
Nullability::NonNullable,
),
vec![
FlatLayoutWriter::new(
ArrayContext::empty(),
DType::Primitive(PType::I32, Nullability::NonNullable),
FlatLayoutOptions::default(),
)
.boxed(),
FlatLayoutWriter::new(
ArrayContext::empty(),
DType::Primitive(PType::I32, Nullability::NonNullable),
FlatLayoutOptions::default(),
)
.boxed(),
],
)
.unwrap();
}
}

0 comments on commit f7e6c42

Please sign in to comment.