diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index 86470dd7a646..371a11c82c54 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -57,7 +57,6 @@ impl BTrimFunc { // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`. // If that fails, it proceeds to `(Utf8, Utf8)`. Exact(vec![Utf8View, Utf8View]), - // Exact(vec![Utf8, Utf8View]), Exact(vec![Utf8, Utf8]), Exact(vec![Utf8View]), Exact(vec![Utf8]), @@ -98,7 +97,7 @@ impl ScalarUDFImpl for BTrimFunc { )(args), other => exec_err!( "Unsupported data type {other:?} for function btrim,\ - expected for Utf8, LargeUtf8 or Utf8View." + expected Utf8, LargeUtf8 or Utf8View." ), } } diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs index 6a9fafdd9299..b7b27afcee1f 100644 --- a/datafusion/functions/src/string/ltrim.rs +++ b/datafusion/functions/src/string/ltrim.rs @@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type}; /// Returns the longest string with leading characters removed. If the characters are not specified, whitespace is removed. /// ltrim('zzzytest', 'xyz') = 'test' fn ltrim(args: &[ArrayRef]) -> Result { - general_trim::(args, TrimType::Left, false) + let use_string_view = args[0].data_type() == &DataType::Utf8View; + general_trim::(args, TrimType::Left, use_string_view) } #[derive(Debug)] @@ -51,7 +52,15 @@ impl LtrimFunc { use DataType::*; Self { signature: Signature::one_of( - vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])], + vec![ + // Planner attempts coercion to the target type starting with the most preferred candidate. + // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`. + // If that fails, it proceeds to `(Utf8, Utf8)`. + Exact(vec![Utf8View, Utf8View]), + Exact(vec![Utf8, Utf8]), + Exact(vec![Utf8View]), + Exact(vec![Utf8]), + ], Volatility::Immutable, ), } @@ -77,7 +86,7 @@ impl ScalarUDFImpl for LtrimFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { - DataType::Utf8 => make_scalar_function( + DataType::Utf8 | DataType::Utf8View => make_scalar_function( ltrim::, vec![Hint::Pad, Hint::AcceptsSingular], )(args), @@ -85,7 +94,10 @@ impl ScalarUDFImpl for LtrimFunc { ltrim::, vec![Hint::Pad, Hint::AcceptsSingular], )(args), - other => exec_err!("Unsupported data type {other:?} for function ltrim"), + other => exec_err!( + "Unsupported data type {other:?} for function ltrim,\ + expected Utf8, LargeUtf8 or Utf8View." + ), } } } diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index 50b626e3df0e..ec53f3ed7430 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -32,7 +32,8 @@ use crate::utils::{make_scalar_function, utf8_to_str_type}; /// Returns the longest string with trailing characters removed. If the characters are not specified, whitespace is removed. /// rtrim('testxxzx', 'xyz') = 'test' fn rtrim(args: &[ArrayRef]) -> Result { - general_trim::(args, TrimType::Right, false) + let use_string_view = args[0].data_type() == &DataType::Utf8View; + general_trim::(args, TrimType::Right, use_string_view) } #[derive(Debug)] @@ -51,7 +52,15 @@ impl RtrimFunc { use DataType::*; Self { signature: Signature::one_of( - vec![Exact(vec![Utf8]), Exact(vec![Utf8, Utf8])], + vec![ + // Planner attempts coercion to the target type starting with the most preferred candidate. + // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`. + // If that fails, it proceeds to `(Utf8, Utf8)`. + Exact(vec![Utf8View, Utf8View]), + Exact(vec![Utf8, Utf8]), + Exact(vec![Utf8View]), + Exact(vec![Utf8]), + ], Volatility::Immutable, ), } @@ -77,7 +86,7 @@ impl ScalarUDFImpl for RtrimFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { - DataType::Utf8 => make_scalar_function( + DataType::Utf8 | DataType::Utf8View => make_scalar_function( rtrim::, vec![Hint::Pad, Hint::AcceptsSingular], )(args), @@ -85,7 +94,10 @@ impl ScalarUDFImpl for RtrimFunc { rtrim::, vec![Hint::Pad, Hint::AcceptsSingular], )(args), - other => exec_err!("Unsupported data type {other:?} for function rtrim"), + other => exec_err!( + "Unsupported data type {other:?} for function rtrim,\ + expected Utf8, LargeUtf8 or Utf8View." + ), } } } diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index fcd71b7f7e94..648994c84658 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -607,6 +607,99 @@ Xiangpeng Xiangpeng Xiangpeng NULL Raphael Raphael Raphael NULL NULL NULL NULL NULL +## Ensure no casts for LTRIM +# Test LTRIM with Utf8View input +query TT +EXPLAIN SELECT + LTRIM(column1_utf8view) AS l +FROM test; +---- +logical_plan +01)Projection: ltrim(test.column1_utf8view) AS l +02)--TableScan: test projection=[column1_utf8view] + +# Test LTRIM with Utf8View input and Utf8View pattern +query TT +EXPLAIN SELECT + LTRIM(column1_utf8view, 'foo') AS l +FROM test; +---- +logical_plan +01)Projection: ltrim(test.column1_utf8view, Utf8View("foo")) AS l +02)--TableScan: test projection=[column1_utf8view] + +# Test LTRIM with Utf8View bytes longer than 12 +query TT +EXPLAIN SELECT + LTRIM(column1_utf8view, 'this is longer than 12') AS l +FROM test; +---- +logical_plan +01)Projection: ltrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l +02)--TableScan: test projection=[column1_utf8view] + +# Test LTRIM outputs +query TTTTT +SELECT + LTRIM(column1_utf8view, 'foo') AS l1, + LTRIM(column1_utf8view, column2_utf8view) AS l2, + LTRIM(column1_utf8view) AS l3, + LTRIM(column1_utf8view, NULL) AS l4, + LTRIM(column1_utf8view, 'Xiang') AS l5 +FROM test; +---- +Andrew Andrew Andrew NULL Andrew +Xiangpeng (empty) Xiangpeng NULL peng +Raphael aphael Raphael NULL Raphael +NULL NULL NULL NULL NULL + +## ensure no casts for RTRIM +# Test RTRIM with Utf8View input +query TT +EXPLAIN SELECT + RTRIM(column1_utf8view) AS l +FROM test; +---- +logical_plan +01)Projection: rtrim(test.column1_utf8view) AS l +02)--TableScan: test projection=[column1_utf8view] + +# Test RTRIM with Utf8View input and Utf8View pattern +query TT +EXPLAIN SELECT + RTRIM(column1_utf8view, 'foo') AS l +FROM test; +---- +logical_plan +01)Projection: rtrim(test.column1_utf8view, Utf8View("foo")) AS l +02)--TableScan: test projection=[column1_utf8view] + +# Test RTRIM with Utf8View bytes longer than 12 +query TT +EXPLAIN SELECT + RTRIM(column1_utf8view, 'this is longer than 12') AS l +FROM test; +---- +logical_plan +01)Projection: rtrim(test.column1_utf8view, Utf8View("this is longer than 12")) AS l +02)--TableScan: test projection=[column1_utf8view] + +# Test RTRIM outputs +query TTTTT +SELECT + RTRIM(column1_utf8view, 'foo') AS l1, + RTRIM(column1_utf8view, column2_utf8view) AS l2, + RTRIM(column1_utf8view) AS l3, + RTRIM(column1_utf8view, NULL) AS l4, + RTRIM(column1_utf8view, 'peng') As l5 +FROM test; +---- +Andrew Andrew Andrew NULL Andrew +Xiangpeng (empty) Xiangpeng NULL Xia +Raphael Raphael Raphael NULL Raphael +NULL NULL NULL NULL NULL + + ## Ensure no casts for CHARACTER_LENGTH query TT EXPLAIN SELECT @@ -685,16 +778,6 @@ logical_plan 01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1 02)--TableScan: test projection=[column1_utf8view] -## Ensure no casts for LTRIM -## TODO https://github.com/apache/datafusion/issues/11856 -query TT -EXPLAIN SELECT - LTRIM(column1_utf8view) as c1 -FROM test; ----- -logical_plan -01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1 -02)--TableScan: test projection=[column1_utf8view] ## Ensure no casts for LPAD ## TODO https://github.com/apache/datafusion/issues/11857 @@ -795,18 +878,6 @@ logical_plan 01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1 02)--TableScan: test projection=[column1_utf8view] -## Ensure no casts for RTRIM -## TODO file ticket -query TT -EXPLAIN SELECT - RTRIM(column1_utf8view) as c1, - RTRIM(column1_utf8view, 'foo') as c2 -FROM test; ----- -logical_plan -01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2 -02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1 -03)----TableScan: test projection=[column1_utf8view] ## Ensure no casts for RIGHT ## TODO file ticket @@ -833,19 +904,6 @@ logical_plan 03)----TableScan: test projection=[column1_utf8view, column2_utf8view] -## Ensure no casts for RTRIM -## TODO file ticket -query TT -EXPLAIN SELECT - RTRIM(column1_utf8view) as c, - RTRIM(column1_utf8view, column2_utf8view) as c1 -FROM test; ----- -logical_plan -01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1 -02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view -03)----TableScan: test projection=[column1_utf8view, column2_utf8view] - ## Ensure no casts for SPLIT_PART ## TODO file ticket query TT