Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-21654][SQL] Complement SQL predicates expression description #18869

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,25 @@ case class Not(child: Expression)
/**
* Evaluates to `true` if `list` contains `value`.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.")
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.",
arguments = """
Arguments:
* expr1, expr2, expr3, ... - the arguments must be same type.
""",
examples = """
Examples:
> SELECT 1 _FUNC_(1, 2, 3);
true
> SELECT 1 _FUNC_(2, 3, 4);
false
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3));
false
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
true
""")
// scalastyle:on line.size.limit
case class In(value: Expression, list: Seq[Expression]) extends Predicate {

require(list != null, "list should not be null")
Expand Down Expand Up @@ -491,7 +508,24 @@ object Equality {
// TODO: although map type is not orderable, technically map type should be able to be used
// in equality comparison
@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be used in equality comparison. Map type is not supported.
For complex types such array/struct, the data types of fields must be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1 _FUNC_ '1';
true
> SELECT true _FUNC_ NULL;
NULL
> SELECT NULL _FUNC_ NULL;
NULL
""")
case class EqualTo(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -510,6 +544,23 @@ case class EqualTo(left: Expression, right: Expression)
usage = """
expr1 _FUNC_ expr2 - Returns same result as the EQUAL(=) operator for non-null operands,
but returns true if both are null, false if one of the them is null.
""",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be used in equality comparison. Map type is not supported.
For complex types such array/struct, the data types of fields must be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1 _FUNC_ '1';
true
> SELECT true _FUNC_ NULL;
false
> SELECT NULL _FUNC_ NULL;
true
""")
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {

Expand Down Expand Up @@ -540,7 +591,27 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 1 _FUNC_ 2;
true
> SELECT 1.1 _FUNC_ '1';
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
true
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class LessThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -550,7 +621,27 @@ case class LessThan(left: Expression, right: Expression)
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1.0 _FUNC_ '1';
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
true
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class LessThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -560,7 +651,27 @@ case class LessThanOrEqual(left: Expression, right: Expression)
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 1;
true
> SELECT 2 _FUNC_ '1.1';
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
false
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class GreaterThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -570,7 +681,27 @@ case class GreaterThan(left: Expression, right: Expression)
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 1;
true
> SELECT 2.0 _FUNC_ '2.1';
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
false
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class GreaterThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
(null, false, null) ::
(null, null, null) :: Nil)

test("IN") {
test("basic IN predicate test") {
checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
Literal(2))), null)
checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType),
Expand Down Expand Up @@ -151,29 +151,63 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true)
checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false)

val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType,
LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
primitiveTypes.foreach { t =>
val dataGen = RandomDataGenerator.forType(t, nullable = true).get
val inputData = Seq.fill(10) {
val value = dataGen.apply()
value match {
case d: Double if d.isNaN => 0.0d
case f: Float if f.isNaN => 0.0f
case _ => value
}

test("IN with different types") {
def testWithRandomDataGeneration(dataType: DataType, nullable: Boolean): Unit = {
val dataGen = RandomDataGenerator.forType(dataType, nullable = nullable)
if (dataGen.isDefined) {
Copy link
Member

@gatorsmile gatorsmile Sep 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the data type is not supported, it will silently skip the test. We can do something like

      val maybeDataGen = RandomDataGenerator.forType(dataType, nullable = nullable)
      val dataGen = maybeDataGen.getOrElse(fail(s"Failed to create data generator for type $dataType"))

Copy link
Member Author

@viirya viirya Sep 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although I already filter out the unsupported data types when constructing atomicTypes, this is no harm. So ok for me. I will update it.

val inputData = Seq.fill(10) {
val value = dataGen.get.apply()
value match {
case d: Double if d.isNaN => 0.0d
case f: Float if f.isNaN => 0.0f
case _ => value
}
}
val input = inputData.map(NonFoldableLiteral.create(_, dataType))
val expected = if (inputData(0) == null) {
null
} else if (inputData.slice(1, 10).contains(inputData(0))) {
true
} else if (inputData.slice(1, 10).contains(null)) {
null
} else {
false
}
checkEvaluation(In(input(0), input.slice(1, 10)), expected)
}
val input = inputData.map(NonFoldableLiteral.create(_, t))
val expected = if (inputData(0) == null) {
null
} else if (inputData.slice(1, 10).contains(inputData(0))) {
true
} else if (inputData.slice(1, 10).contains(null)) {
null
} else {
false
}
checkEvaluation(In(input(0), input.slice(1, 10)), expected)
}

val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t =>
RandomDataGenerator.forType(t).isDefined && !t.isInstanceOf[DecimalType]
} ++ Seq(DecimalType.USER_DEFAULT)

val atomicArrayTypes = atomicTypes.map(ArrayType(_, containsNull = true))

// Basic types:
for (
dataType <- atomicTypes;
nullable <- Seq(true, false)) {
testWithRandomDataGeneration(dataType, nullable)
}

// Array types:
for (
arrayType <- atomicArrayTypes;
nullable <- Seq(true, false)
if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined) {
testWithRandomDataGeneration(arrayType, nullable)
}

// Struct types:
for (
colOneType <- atomicTypes;
colTwoType <- atomicTypes;
nullable <- Seq(true, false)) {
val structType = StructType(
StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil)
testWithRandomDataGeneration(structType, nullable)
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
-- EqualTo
select 1 = 1;
select 1 = '1';
select 1.0 = '1';

-- GreaterThan
select 1 > '1';
Copy link
Member Author

@viirya viirya Sep 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following test cases are intended to test the end-to-end comparison between different types. It doesn't make much sense to re-write them with checkEvaluation. We'd have something like checkEvaluation(GreaterThan(Cast(...), ...), true) with manually added Cast. We have unit tests against Cast and GreaterThan individually.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the test cases that trigger implicit type casting, we can keep them here.

select 2 > '1.0';
select 2 > '2.0';
select 2 > '2.2';
select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52';

-- GreaterThanOrEqual
select 1 >= '1';
select 2 >= '1.0';
select 2 >= '2.0';
select 2.0 >= '2.2';
select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52';

-- LessThan
select 1 < '1';
select 2 < '1.0';
select 2 < '2.0';
select 2.0 < '2.2';
select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52';

-- LessThanOrEqual
select 1 <= '1';
select 2 <= '1.0';
select 2 <= '2.0';
select 2.0 <= '2.2';
select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52';
Loading