From 444c64d3137126c1b6da84b33825961d3d691821 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Sep 2017 02:40:32 +0000 Subject: [PATCH] Improve test case for `In`. --- .../sql/catalyst/expressions/predicates.scala | 22 ++- .../catalyst/expressions/PredicateSuite.scala | 78 +++++--- .../sql-tests/inputs/predicate-functions.sql | 6 - .../results/predicate-functions.sql.out | 176 +++++++----------- 4 files changed, 144 insertions(+), 138 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 6b99f0c207018..efcd45fad779c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -512,7 +512,8 @@ object Equality { arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be used in equality comparison. + and must be a type that can be used in equality comparison. Map type is not supported. + For complex types such array/struct, the data types of fields must be orderable. """, examples = """ Examples: @@ -547,7 +548,8 @@ case class EqualTo(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be used in equality comparison. + and must be a type that can be used in equality comparison. Map type is not supported. + For complex types such array/struct, the data types of fields must be orderable. """, examples = """ Examples: @@ -593,7 +595,9 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: @@ -621,7 +625,9 @@ case class LessThan(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: @@ -649,7 +655,9 @@ case class LessThanOrEqual(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: @@ -677,7 +685,9 @@ case class GreaterThan(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index 055c31c2b3018..999261d87630b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -123,7 +123,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { (null, false, null) :: (null, null, null) :: Nil) - test("IN") { + test("basic IN predicate test") { checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1), Literal(2))), null) checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), @@ -151,29 +151,63 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true) checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false) - val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType, - LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType) - primitiveTypes.foreach { t => - val dataGen = RandomDataGenerator.forType(t, nullable = true).get - val inputData = Seq.fill(10) { - val value = dataGen.apply() - value match { - case d: Double if d.isNaN => 0.0d - case f: Float if f.isNaN => 0.0f - case _ => value + } + + test("IN with different types") { + def testWithRandomDataGeneration(dataType: DataType, nullable: Boolean): Unit = { + val dataGen = RandomDataGenerator.forType(dataType, nullable = nullable) + if (dataGen.isDefined) { + val inputData = Seq.fill(10) { + val value = dataGen.get.apply() + value match { + case d: Double if d.isNaN => 0.0d + case f: Float if f.isNaN => 0.0f + case _ => value + } } + val input = inputData.map(NonFoldableLiteral.create(_, dataType)) + val expected = if (inputData(0) == null) { + null + } else if (inputData.slice(1, 10).contains(inputData(0))) { + true + } else if (inputData.slice(1, 10).contains(null)) { + null + } else { + false + } + checkEvaluation(In(input(0), input.slice(1, 10)), expected) } - val input = inputData.map(NonFoldableLiteral.create(_, t)) - val expected = if (inputData(0) == null) { - null - } else if (inputData.slice(1, 10).contains(inputData(0))) { - true - } else if (inputData.slice(1, 10).contains(null)) { - null - } else { - false - } - checkEvaluation(In(input(0), input.slice(1, 10)), expected) + } + + val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t => + RandomDataGenerator.forType(t).isDefined && !t.isInstanceOf[DecimalType] + } ++ Seq(DecimalType.USER_DEFAULT) + + val atomicArrayTypes = atomicTypes.map(ArrayType(_, containsNull = true)) + + // Basic types: + for ( + dataType <- atomicTypes; + nullable <- Seq(true, false)) { + testWithRandomDataGeneration(dataType, nullable) + } + + // Array types: + for ( + arrayType <- atomicArrayTypes; + nullable <- Seq(true, false) + if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined) { + testWithRandomDataGeneration(arrayType, nullable) + } + + // Struct types: + for ( + colOneType <- atomicTypes; + colTwoType <- atomicTypes; + nullable <- Seq(true, false)) { + val structType = StructType( + StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil) + testWithRandomDataGeneration(structType, nullable) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql index efaa3f3eb77c4..3b3d4ad64b3ec 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql @@ -1,9 +1,3 @@ --- In -select 1 in(1, 2, 3); -select 1 in(2, 3, 4); -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)); -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)); - -- EqualTo select 1 = 1; select 1 = '1'; diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out index 424cacc8b5279..8e7e04c8e1c4f 100644 --- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out @@ -1,250 +1,218 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 31 +-- Number of queries: 27 -- !query 0 -select 1 in(1, 2, 3) +select 1 = 1 -- !query 0 schema -struct<(1 IN (1, 2, 3)):boolean> +struct<(1 = 1):boolean> -- !query 0 output true -- !query 1 -select 1 in(2, 3, 4) +select 1 = '1' -- !query 1 schema -struct<(1 IN (2, 3, 4)):boolean> +struct<(1 = CAST(1 AS INT)):boolean> -- !query 1 output -false +true -- !query 2 -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)) +select 1.0 = '1' -- !query 2 schema -struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 1), named_struct(a, 1, b, 3))):boolean> +struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean> -- !query 2 output -false +true -- !query 3 -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)) +select 1 > '1' -- !query 3 schema -struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 2), named_struct(a, 1, b, 3))):boolean> +struct<(1 > CAST(1 AS INT)):boolean> -- !query 3 output -true +false -- !query 4 -select 1 = 1 +select 2 > '1.0' -- !query 4 schema -struct<(1 = 1):boolean> +struct<(2 > CAST(1.0 AS INT)):boolean> -- !query 4 output true -- !query 5 -select 1 = '1' +select 2 > '2.0' -- !query 5 schema -struct<(1 = CAST(1 AS INT)):boolean> +struct<(2 > CAST(2.0 AS INT)):boolean> -- !query 5 output -true +false -- !query 6 -select 1.0 = '1' +select 2 > '2.2' -- !query 6 schema -struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean> +struct<(2 > CAST(2.2 AS INT)):boolean> -- !query 6 output -true +false -- !query 7 -select 1 > '1' +select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') -- !query 7 schema -struct<(1 > CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> -- !query 7 output false -- !query 8 -select 2 > '1.0' +select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' -- !query 8 schema -struct<(2 > CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> -- !query 8 output -true +false -- !query 9 -select 2 > '2.0' +select 1 >= '1' -- !query 9 schema -struct<(2 > CAST(2.0 AS INT)):boolean> +struct<(1 >= CAST(1 AS INT)):boolean> -- !query 9 output -false +true -- !query 10 -select 2 > '2.2' +select 2 >= '1.0' -- !query 10 schema -struct<(2 > CAST(2.2 AS INT)):boolean> +struct<(2 >= CAST(1.0 AS INT)):boolean> -- !query 10 output -false +true -- !query 11 -select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') +select 2 >= '2.0' -- !query 11 schema -struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> +struct<(2 >= CAST(2.0 AS INT)):boolean> -- !query 11 output -false +true -- !query 12 -select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' +select 2.0 >= '2.2' -- !query 12 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> +struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean> -- !query 12 output false -- !query 13 -select 1 >= '1' +select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') -- !query 13 schema -struct<(1 >= CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> -- !query 13 output true -- !query 14 -select 2 >= '1.0' +select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' -- !query 14 schema -struct<(2 >= CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> -- !query 14 output -true +false -- !query 15 -select 2 >= '2.0' +select 1 < '1' -- !query 15 schema -struct<(2 >= CAST(2.0 AS INT)):boolean> +struct<(1 < CAST(1 AS INT)):boolean> -- !query 15 output -true +false -- !query 16 -select 2.0 >= '2.2' +select 2 < '1.0' -- !query 16 schema -struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(2 < CAST(1.0 AS INT)):boolean> -- !query 16 output false -- !query 17 -select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') +select 2 < '2.0' -- !query 17 schema -struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> +struct<(2 < CAST(2.0 AS INT)):boolean> -- !query 17 output -true +false -- !query 18 -select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' +select 2.0 < '2.2' -- !query 18 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> +struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean> -- !query 18 output -false +true -- !query 19 -select 1 < '1' +select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') -- !query 19 schema -struct<(1 < CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> -- !query 19 output false -- !query 20 -select 2 < '1.0' +select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' -- !query 20 schema -struct<(2 < CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> -- !query 20 output -false +true -- !query 21 -select 2 < '2.0' +select 1 <= '1' -- !query 21 schema -struct<(2 < CAST(2.0 AS INT)):boolean> +struct<(1 <= CAST(1 AS INT)):boolean> -- !query 21 output -false +true -- !query 22 -select 2.0 < '2.2' +select 2 <= '1.0' -- !query 22 schema -struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(2 <= CAST(1.0 AS INT)):boolean> -- !query 22 output -true +false -- !query 23 -select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') +select 2 <= '2.0' -- !query 23 schema -struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> +struct<(2 <= CAST(2.0 AS INT)):boolean> -- !query 23 output -false +true -- !query 24 -select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' +select 2.0 <= '2.2' -- !query 24 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> +struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean> -- !query 24 output true -- !query 25 -select 1 <= '1' +select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') -- !query 25 schema -struct<(1 <= CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> -- !query 25 output true -- !query 26 -select 2 <= '1.0' --- !query 26 schema -struct<(2 <= CAST(1.0 AS INT)):boolean> --- !query 26 output -false - - --- !query 27 -select 2 <= '2.0' --- !query 27 schema -struct<(2 <= CAST(2.0 AS INT)):boolean> --- !query 27 output -true - - --- !query 28 -select 2.0 <= '2.2' --- !query 28 schema -struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean> --- !query 28 output -true - - --- !query 29 -select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') --- !query 29 schema -struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> --- !query 29 output -true - - --- !query 30 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52' --- !query 30 schema +-- !query 26 schema struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean> --- !query 30 output +-- !query 26 output true