Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-21654][SQL] Complement SQL predicates expression description #18869

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,25 @@ case class Not(child: Expression)
/**
* Evaluates to `true` if `list` contains `value`.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.")
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.",
arguments = """
Arguments:
* expr1, expr2, expr3, ... - the arguments must be same type.
""",
examples = """
Examples:
> SELECT 1 _FUNC_(1, 2, 3);
true
> SELECT 1 _FUNC_(2, 3, 4);
false
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3));
false
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
true
""")
// scalastyle:on line.size.limit
case class In(value: Expression, list: Seq[Expression]) extends Predicate {

require(list != null, "list should not be null")
Expand Down Expand Up @@ -484,7 +501,22 @@ object Equality {
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1 _FUNC_ '1';
true
> SELECT true _FUNC_ NULL;
NULL
> SELECT NULL _FUNC_ NULL;
NULL
""")
case class EqualTo(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand Down Expand Up @@ -518,6 +550,21 @@ case class EqualTo(left: Expression, right: Expression)
usage = """
expr1 _FUNC_ expr2 - Returns same result as the EQUAL(=) operator for non-null operands,
but returns true if both are null, false if one of the them is null.
""",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1 _FUNC_ '1';
true
> SELECT true _FUNC_ NULL;
false
> SELECT NULL _FUNC_ NULL;
true
""")
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {

Expand Down Expand Up @@ -564,8 +611,27 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
}
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@viirya, BTW, I think we could also do this as below:

-      * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared.
+      * expr1, expr2 - the two expressions must be same type or can be casted to a common type,
+          and must be a type that can be ordered/compared.

Just double checked it renders fine in the doc as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Fixed.

""",
examples = """
Examples:
> SELECT 1 _FUNC_ 2;
true
> SELECT 1.1 _FUNC_ '1';
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
true
> SELECT 1 _FUNC_ NULL;
NULL
""")
// scalastyle:on line.size.limit
case class LessThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -576,8 +642,27 @@ case class LessThan(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2)
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1.0 _FUNC_ '1';
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
true
> SELECT 1 _FUNC_ NULL;
NULL
""")
// scalastyle:on line.size.limit
case class LessThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -588,8 +673,27 @@ case class LessThanOrEqual(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2)
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 1;
true
> SELECT 2 _FUNC_ '1.1';
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
false
> SELECT 1 _FUNC_ NULL;
NULL
""")
// scalastyle:on line.size.limit
case class GreaterThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -600,8 +704,27 @@ case class GreaterThan(left: Expression, right: Expression)
protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2)
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 1;
true
> SELECT 2.0 _FUNC_ '2.1';
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
false
> SELECT 1 _FUNC_ NULL;
NULL
""")
// scalastyle:on line.size.limit
case class GreaterThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
-- In
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we move these to PredicateSuite and then we can use checkEvaluation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I feel the test coverage might be out of the scope of this PR a little bit. This PR is for the expression description. If you don't insist we add test here together, I'd like to submit other PR to improve the test coverage.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we do not have a test case, how can we know the description is correct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me explain it a little bit more. No need to add more and more test cases in this PR. We need to ensure anything claimed in the descriptions is well tested; if no test case exists, we need to add it into this PR. It should include both negative and positive test cases. For example, map is not supported; and then this needs to be documented and tested.

Copy link
Member Author

@viirya viirya Sep 3, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, the test cases should be added along with the codes which defines the semantics or the code changes such as #18818 that changes the underlying semantics.

For example, I saw there are tests added to test map is not supported by #18818:

assertError(EqualTo('mapField, 'mapField), "EqualTo does not support ordering on type MapType")
assertError(EqualNullSafe('mapField, 'mapField),
"EqualNullSafe does not support ordering on type MapType")
assertError(LessThan('mapField, 'mapField),
"LessThan does not support ordering on type MapType")
assertError(LessThanOrEqual('mapField, 'mapField),
"LessThanOrEqual does not support ordering on type MapType")
assertError(GreaterThan('mapField, 'mapField),
"GreaterThan does not support ordering on type MapType")
assertError(GreaterThanOrEqual('mapField, 'mapField),
"GreaterThanOrEqual does not support ordering on type MapType")

More tests are always good and not harm, I agreed. If the necessary tests are not added before, we should add and improve them.

select 1 in(1, 2, 3);
select 1 in(2, 3, 4);
select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3));
select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
Copy link
Member Author

@viirya viirya Sep 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For In, I've locally complemented its test coverage in PredicateSuite and use checkEvaluation. I'd prefer to have it in a following PR if you don't insist to add it in this PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the existing test for In is not well covered. We should improve it. I will commit the tests I added locally.


-- EqualTo
select 1 = 1;
select 1 = '1';
select 1.0 = '1';

-- GreaterThan
select 1 > '1';
Copy link
Member Author

@viirya viirya Sep 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following test cases are intended to test the end-to-end comparison between different types. It doesn't make much sense to re-write them with checkEvaluation. We'd have something like checkEvaluation(GreaterThan(Cast(...), ...), true) with manually added Cast. We have unit tests against Cast and GreaterThan individually.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the test cases that trigger implicit type casting, we can keep them here.

select 2 > '1.0';
select 2 > '2.0';
select 2 > '2.2';
select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52';

-- GreaterThanOrEqual
select 1 >= '1';
select 2 >= '1.0';
select 2 >= '2.0';
select 2.0 >= '2.2';
select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52';

-- LessThan
select 1 < '1';
select 2 < '1.0';
select 2 < '2.0';
select 2.0 < '2.2';
select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52';

-- LessThanOrEqual
select 1 <= '1';
select 2 <= '1.0';
select 2 <= '2.0';
select 2.0 <= '2.2';
select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52';
Loading