From ef9a0d561f0397f8a2a3854e5586a007a5396b9e Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 29 Jan 2024 10:17:11 +0800 Subject: [PATCH 1/5] support cast utf8 to fixedsizelist --- arrow-cast/src/cast.rs | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index bd35096e0645..82c40f385150 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -802,6 +802,9 @@ pub fn cast_with_options( } (_, List(ref to)) => cast_values_to_list::(array, to, cast_options), (_, LargeList(ref to)) => cast_values_to_list::(array, to, cast_options), + (_, FixedSizeList(ref to, size)) => { + cast_values_to_fixed_size_list(array, to, size, cast_options) + } (Decimal128(_, s1), Decimal128(p2, s2)) => { cast_decimal_to_decimal_same_type::( array.as_primitive(), @@ -3040,6 +3043,18 @@ fn cast_values_to_list( Ok(Arc::new(list)) } +/// Helper function that takes a primitive array and casts to a fixed size list array. +fn cast_values_to_fixed_size_list( + array: &dyn Array, + to: &FieldRef, + size: &i32, + cast_options: &CastOptions, +) -> Result { + let values = cast_with_options(array, to.data_type(), cast_options)?; + let list = FixedSizeListArray::new(to.clone(), *size, values, None); + Ok(Arc::new(list)) +} + /// A specified helper to cast from `GenericBinaryArray` to `GenericStringArray` when they have same /// offset size so re-encoding offset is unnecessary. fn cast_binary_to_string( @@ -7609,6 +7624,36 @@ mod tests { assert_eq!(expected.values(), actual.values()); } + #[test] + fn test_cast_utf8_to_list() { + // DataType::List + let array = Arc::new(StringArray::from(vec!["5"])) as ArrayRef; + let field = Arc::new(Field::new("", DataType::Int32, false)); + let list_array = cast(&array, &DataType::List(field.clone())).unwrap(); + let actual = list_array.as_any().downcast_ref::().unwrap(); + let expect = ListArray::from_iter_primitive::([Some([Some(5)])]); + assert_eq!(&expect.value(0), &actual.value(0)); + + // DataType::LargeList + let list_array = cast(&array, &DataType::LargeList(field.clone())).unwrap(); + let actual = list_array + .as_any() + .downcast_ref::() + .unwrap(); + let expect = LargeListArray::from_iter_primitive::([Some([Some(5)])]); + assert_eq!(&expect.value(0), &actual.value(0)); + + // DataType::FixedSizeList + let list_array = cast(&array, &DataType::FixedSizeList(field.clone(), 1)).unwrap(); + let actual = list_array + .as_any() + .downcast_ref::() + .unwrap(); + let expect = + FixedSizeListArray::from_iter_primitive::([Some([Some(5)])], 1); + assert_eq!(&expect.value(0), &actual.value(0)); + } + #[test] fn test_cast_list_containers() { // large-list to list From 2dd229876d8269ba681d78ad4034b279b9f4c504 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Mon, 29 Jan 2024 21:55:59 +0800 Subject: [PATCH 2/5] fix ci --- arrow-cast/src/cast.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 82c40f385150..7687a21b4abb 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -141,6 +141,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { } (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()), (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()), + (_, FixedSizeList(list_to,size)) if size.eq(&1_i32) => { + can_cast_types(from_type, list_to.data_type())}, // cast one decimal type to another decimal type (Decimal128(_, _), Decimal128(_, _)) => true, (Decimal256(_, _), Decimal256(_, _)) => true, @@ -802,7 +804,7 @@ pub fn cast_with_options( } (_, List(ref to)) => cast_values_to_list::(array, to, cast_options), (_, LargeList(ref to)) => cast_values_to_list::(array, to, cast_options), - (_, FixedSizeList(ref to, size)) => { + (_, FixedSizeList(ref to, size)) if size.eq(&1_i32) => { cast_values_to_fixed_size_list(array, to, size, cast_options) } (Decimal128(_, s1), Decimal128(p2, s2)) => { From 405753f346415e4c172dcf0364204161c25b680b Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 30 Jan 2024 19:13:01 +0800 Subject: [PATCH 3/5] fix chore --- arrow-cast/src/cast.rs | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 7687a21b4abb..91db6e4c932c 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -141,7 +141,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { } (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()), (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()), - (_, FixedSizeList(list_to,size)) if size.eq(&1_i32) => { + (_, FixedSizeList(list_to,size)) if *size == 1 => { can_cast_types(from_type, list_to.data_type())}, // cast one decimal type to another decimal type (Decimal128(_, _), Decimal128(_, _)) => true, @@ -805,7 +805,7 @@ pub fn cast_with_options( (_, List(ref to)) => cast_values_to_list::(array, to, cast_options), (_, LargeList(ref to)) => cast_values_to_list::(array, to, cast_options), (_, FixedSizeList(ref to, size)) if size.eq(&1_i32) => { - cast_values_to_fixed_size_list(array, to, size, cast_options) + cast_values_to_fixed_size_list(array, to, *size, cast_options) } (Decimal128(_, s1), Decimal128(p2, s2)) => { cast_decimal_to_decimal_same_type::( @@ -3049,11 +3049,11 @@ fn cast_values_to_list( fn cast_values_to_fixed_size_list( array: &dyn Array, to: &FieldRef, - size: &i32, + size: i32, cast_options: &CastOptions, ) -> Result { let values = cast_with_options(array, to.data_type(), cast_options)?; - let list = FixedSizeListArray::new(to.clone(), *size, values, None); + let list = FixedSizeListArray::new(to.clone(), size, values, None); Ok(Arc::new(list)) } @@ -7632,25 +7632,19 @@ mod tests { let array = Arc::new(StringArray::from(vec!["5"])) as ArrayRef; let field = Arc::new(Field::new("", DataType::Int32, false)); let list_array = cast(&array, &DataType::List(field.clone())).unwrap(); - let actual = list_array.as_any().downcast_ref::().unwrap(); + let actual = list_array.as_list_opt::().unwrap(); let expect = ListArray::from_iter_primitive::([Some([Some(5)])]); assert_eq!(&expect.value(0), &actual.value(0)); // DataType::LargeList let list_array = cast(&array, &DataType::LargeList(field.clone())).unwrap(); - let actual = list_array - .as_any() - .downcast_ref::() - .unwrap(); + let actual = list_array.as_list_opt::().unwrap(); let expect = LargeListArray::from_iter_primitive::([Some([Some(5)])]); assert_eq!(&expect.value(0), &actual.value(0)); // DataType::FixedSizeList let list_array = cast(&array, &DataType::FixedSizeList(field.clone(), 1)).unwrap(); - let actual = list_array - .as_any() - .downcast_ref::() - .unwrap(); + let actual = list_array.as_fixed_size_list_opt().unwrap(); let expect = FixedSizeListArray::from_iter_primitive::([Some([Some(5)])], 1); assert_eq!(&expect.value(0), &actual.value(0)); From 5ff47cca2a89495df3365c37cb6129c3b5296c55 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Feb 2024 08:19:28 -0500 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-cast/src/cast.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 91db6e4c932c..c2ec060eca06 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -804,7 +804,7 @@ pub fn cast_with_options( } (_, List(ref to)) => cast_values_to_list::(array, to, cast_options), (_, LargeList(ref to)) => cast_values_to_list::(array, to, cast_options), - (_, FixedSizeList(ref to, size)) if size.eq(&1_i32) => { + (_, FixedSizeList(ref to, size)) if *size == 1 => { cast_values_to_fixed_size_list(array, to, *size, cast_options) } (Decimal128(_, s1), Decimal128(p2, s2)) => { @@ -7640,14 +7640,14 @@ mod tests { let list_array = cast(&array, &DataType::LargeList(field.clone())).unwrap(); let actual = list_array.as_list_opt::().unwrap(); let expect = LargeListArray::from_iter_primitive::([Some([Some(5)])]); - assert_eq!(&expect.value(0), &actual.value(0)); + assert_eq!(expect.value(0), actual.value(0)); // DataType::FixedSizeList let list_array = cast(&array, &DataType::FixedSizeList(field.clone(), 1)).unwrap(); let actual = list_array.as_fixed_size_list_opt().unwrap(); let expect = FixedSizeListArray::from_iter_primitive::([Some([Some(5)])], 1); - assert_eq!(&expect.value(0), &actual.value(0)); + assert_eq!(expect.value(0), actual.value(0)); } #[test] From 39511d49eff7f6f29410fd3bae1da1afe2664b3d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 4 Feb 2024 08:22:10 -0500 Subject: [PATCH 5/5] fix compile --- arrow-cast/src/cast.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index c2ec060eca06..e321ae1b1b2f 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -7640,14 +7640,14 @@ mod tests { let list_array = cast(&array, &DataType::LargeList(field.clone())).unwrap(); let actual = list_array.as_list_opt::().unwrap(); let expect = LargeListArray::from_iter_primitive::([Some([Some(5)])]); - assert_eq!(expect.value(0), actual.value(0)); + assert_eq!(&expect.value(0), &actual.value(0)); // DataType::FixedSizeList let list_array = cast(&array, &DataType::FixedSizeList(field.clone(), 1)).unwrap(); let actual = list_array.as_fixed_size_list_opt().unwrap(); let expect = FixedSizeListArray::from_iter_primitive::([Some([Some(5)])], 1); - assert_eq!(expect.value(0), actual.value(0)); + assert_eq!(&expect.value(0), &actual.value(0)); } #[test]