From c2f76f917c481b484640cc8cf9266a36524be53a Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Wed, 14 Feb 2024 18:03:12 -0800 Subject: [PATCH 1/9] fix: use `array_and_element` for proper casting in array_position --- datafusion/expr/src/built_in_function.rs | 2 +- datafusion/sqllogictest/test_files/array.slt | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 9f0d5d776297..73ab18314393 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -965,7 +965,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayNdims => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayDistinct => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayPosition => { - Signature::variadic_any(self.volatility()) + Signature::array_and_element(self.volatility()) } BuiltinScalarFunction::ArrayPositions => { Signature::array_and_element(self.volatility()) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 286cc1a30ca6..de2c3f4456a8 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2603,6 +2603,11 @@ select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, ---- 2 2 +query I +SELECT array_position(arrow_cast([1, 2, 3, 4, 5], 'List(Int32)'), 5) +---- +5 + # list_position scalar function #5 (function alias `array_position`) query III select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); From 2d7b78ba47c5f853ee8305578b52f724e13dc049 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Wed, 14 Feb 2024 18:20:55 -0800 Subject: [PATCH 2/9] fix: fix typo --- datafusion/expr/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 48f4c996cb5d..5887d9abcde0 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -91,7 +91,7 @@ pub enum TypeSignature { /// DataFusion attempts to coerce all argument types to match the first argument's type /// /// # Examples - /// Given types in signature should be coericible to the same final type. + /// Given types in signature should be coercible to the same final type. /// A function such as `make_array` is `VariadicEqual`. /// /// `make_array(i32, i64) -> make_array(i64, i64)` From 64b4be3c674dd4de94a6e31a92e1d22e396731f7 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Thu, 15 Feb 2024 15:53:50 -0800 Subject: [PATCH 3/9] feat: add ArrayAndElementAndOptionalIndex --- datafusion/expr/src/built_in_function.rs | 2 +- datafusion/expr/src/signature.rs | 13 +++++++++ .../expr/src/type_coercion/functions.rs | 28 +++++++++++++++++++ datafusion/sqllogictest/test_files/array.slt | 7 ++++- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 73ab18314393..b309c21e6c62 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -965,7 +965,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayNdims => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayDistinct => Signature::any(1, self.volatility()), BuiltinScalarFunction::ArrayPosition => { - Signature::array_and_element(self.volatility()) + Signature::array_and_element_and_optional_index(self.volatility()) } BuiltinScalarFunction::ArrayPositions => { Signature::array_and_element(self.volatility()) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 5887d9abcde0..bdd3585aa458 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -133,6 +133,7 @@ pub enum ArrayFunctionSignature { /// The first argument's list dimension should be one dimension less than the second argument's list dimension. ElementAndArray, ArrayAndIndex, + ArrayAndElementAndOptionalIndex, } impl std::fmt::Display for ArrayFunctionSignature { @@ -141,6 +142,9 @@ impl std::fmt::Display for ArrayFunctionSignature { ArrayFunctionSignature::ArrayAndElement => { write!(f, "array, element") } + ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { + write!(f, "array, element, [index]") + } ArrayFunctionSignature::ElementAndArray => { write!(f, "element, array") } @@ -292,6 +296,15 @@ impl Signature { volatility, } } + /// Specialized Signature for Array functions with an optional index + pub fn array_and_element_and_optional_index(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ArraySignature( + ArrayFunctionSignature::ArrayAndElementAndOptionalIndex, + ), + volatility, + } + } /// Specialized Signature for ArrayPrepend and similar functions pub fn element_and_array(volatility: Volatility) -> Self { Signature { diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 806fdaaa5246..92d82fa4d92b 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -130,6 +130,31 @@ fn get_valid_types( _ => Ok(vec![vec![]]), } } + fn array_append_and_optional_index( + current_types: &[DataType], + ) -> Result>> { + // make sure there's at least 2 arguments + if !(current_types.len() == 2 || current_types.len() == 3) { + return Ok(vec![vec![]]); + } + + let first_two_types = ¤t_types[0..2]; + let valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; + + let valid_types_with_index = valid_types + .iter() + .map(|t| { + let mut t = t.clone(); + t.push(DataType::Int64); + t + }) + .collect::>(); + + let mut valid_types = valid_types; + valid_types.extend(valid_types_with_index); + + Ok(valid_types) + } fn array_and_index(current_types: &[DataType]) -> Result>> { if current_types.len() != 2 { return Ok(vec![vec![]]); @@ -184,6 +209,9 @@ fn get_valid_types( ArrayFunctionSignature::ArrayAndElement => { return array_append_or_prepend_valid_types(current_types, true) } + ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { + return array_append_and_optional_index(current_types) + } ArrayFunctionSignature::ArrayAndIndex => { return array_and_index(current_types) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index de2c3f4456a8..4d02fcbc2cb4 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2604,7 +2604,12 @@ select array_position(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 2 2 query I -SELECT array_position(arrow_cast([1, 2, 3, 4, 5], 'List(Int32)'), 5) +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5) +---- +1 + +query I +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2) ---- 5 From 3cc94acbbaf103d3d127249f86414f6f0d8da72b Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Thu, 15 Feb 2024 15:58:59 -0800 Subject: [PATCH 4/9] refactor: cleanup --- datafusion/expr/src/type_coercion/functions.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 92d82fa4d92b..e9ebd9b5283f 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -133,13 +133,13 @@ fn get_valid_types( fn array_append_and_optional_index( current_types: &[DataType], ) -> Result>> { - // make sure there's at least 2 arguments + // make sure there's 2 or 3 arguments if !(current_types.len() == 2 || current_types.len() == 3) { return Ok(vec![vec![]]); } let first_two_types = ¤t_types[0..2]; - let valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; + let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; let valid_types_with_index = valid_types .iter() @@ -150,7 +150,6 @@ fn get_valid_types( }) .collect::>(); - let mut valid_types = valid_types; valid_types.extend(valid_types_with_index); Ok(valid_types) From b19e3c8bc15b8e169049be7f2a4b4982eb885dc9 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Thu, 15 Feb 2024 16:11:12 -0800 Subject: [PATCH 5/9] docs: add docs to enum variants --- datafusion/expr/src/signature.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index bdd3585aa458..6f6dd9351fb0 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -132,7 +132,9 @@ pub enum ArrayFunctionSignature { /// The first argument should be non-list or list, and the second argument should be List/LargeList. /// The first argument's list dimension should be one dimension less than the second argument's list dimension. ElementAndArray, + /// Specialized Signature for Array functions of the form (List/LargeList, Index) ArrayAndIndex, + /// Specialized Signature for Array functions of the form (List/LargeList, Element, [Index]) ArrayAndElementAndOptionalIndex, } From 2a83ab49abb261983ccd625cc0efa8d61e380d40 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Thu, 15 Feb 2024 16:28:30 -0800 Subject: [PATCH 6/9] doc: fix cargo doc formatting snafu --- datafusion/expr/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs index 6f6dd9351fb0..e8d9d8fb3966 100644 --- a/datafusion/expr/src/signature.rs +++ b/datafusion/expr/src/signature.rs @@ -134,7 +134,7 @@ pub enum ArrayFunctionSignature { ElementAndArray, /// Specialized Signature for Array functions of the form (List/LargeList, Index) ArrayAndIndex, - /// Specialized Signature for Array functions of the form (List/LargeList, Element, [Index]) + /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index) ArrayAndElementAndOptionalIndex, } From 466f2c902a44e37b00a80be685902287c0d000f1 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Fri, 16 Feb 2024 08:17:46 -0800 Subject: [PATCH 7/9] test: add a couple of tests --- datafusion/sqllogictest/test_files/array.slt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 4d02fcbc2cb4..6266f30d887f 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2613,6 +2613,15 @@ SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 5, 2) ---- 5 +query I +SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) +---- +3 + +statement error +SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 'foo') + + # list_position scalar function #5 (function alias `array_position`) query III select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); From 5086f3150b3533f7bc7bb3e60d8561a24b5f6797 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Fri, 16 Feb 2024 08:29:40 -0800 Subject: [PATCH 8/9] refactor: update names, early exit logic --- datafusion/expr/src/type_coercion/functions.rs | 9 +++++++-- datafusion/sqllogictest/test_files/array.slt | 4 ---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index e9ebd9b5283f..9cab04bc7605 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -130,7 +130,7 @@ fn get_valid_types( _ => Ok(vec![vec![]]), } } - fn array_append_and_optional_index( + fn array_element_and_optional_index( current_types: &[DataType], ) -> Result>> { // make sure there's 2 or 3 arguments @@ -141,6 +141,11 @@ fn get_valid_types( let first_two_types = ¤t_types[0..2]; let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?; + // Early return if there are only 2 arguments + if current_types.len() == 2 { + return Ok(valid_types); + } + let valid_types_with_index = valid_types .iter() .map(|t| { @@ -209,7 +214,7 @@ fn get_valid_types( return array_append_or_prepend_valid_types(current_types, true) } ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => { - return array_append_and_optional_index(current_types) + return array_element_and_optional_index(current_types) } ArrayFunctionSignature::ArrayAndIndex => { return array_and_index(current_types) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 6266f30d887f..bfb6ce1a9509 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2618,10 +2618,6 @@ SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) ---- 3 -statement error -SELECT array_position(arrow_cast([5, 2, 3, 4, 5], 'List(Int32)'), 'foo') - - # list_position scalar function #5 (function alias `array_position`) query III select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1); From 4ec62896c0a87b1c20149832b68bb9757a605c77 Mon Sep 17 00:00:00 2001 From: Trent Hauck Date: Fri, 16 Feb 2024 17:26:56 -0800 Subject: [PATCH 9/9] test: add null test for array_position --- datafusion/sqllogictest/test_files/array.slt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index bfb6ce1a9509..88bae310fbe5 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2618,6 +2618,16 @@ SELECT array_position(arrow_cast([1, 1, 100, 1, 1], 'LargeList(Int32)'), 100) ---- 3 +query I +SELECT array_position([1, 2, 3], 'foo') +---- +NULL + +query I +SELECT array_position([1, 2, 3], 'foo', 2) +---- +NULL + # list_position scalar function #5 (function alias `array_position`) query III select list_position(['h', 'e', 'l', 'l', 'o'], 'l'), list_position([1, 2, 3, 4, 5], 5), list_position([1, 1, 1], 1);