Skip to content

Commit

Permalink
feat(optimizer): support m <= rank AND rank <= n (#6384)
Browse files Browse the repository at this point in the history
* feat(optimizer): support m <= ROW AND rank <= n

* handle more cases

* add e2e tests

* clippy

* test BETWEEN
  • Loading branch information
xxchan authored Nov 19, 2022
1 parent f663277 commit 709e1be
Show file tree
Hide file tree
Showing 14 changed files with 234 additions and 122 deletions.
13 changes: 13 additions & 0 deletions e2e_test/batch/top_n/group_top_n.slt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ where rank <= 3;
3 2
3 3

query II rowsort
select x, y from (
select *, ROW_NUMBER() OVER (PARTITION BY x ORDER BY y) as rank from t
)
where rank <= 3 AND rank > 1;
----
1 2
1 3
2 2
2 3
3 2
3 3

query II rowsort
select x, y from (
select *, RANK() OVER (ORDER BY y) as rank from t
Expand Down
20 changes: 20 additions & 0 deletions e2e_test/streaming/group_top_n.slt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ select x, y from (
)
where rank <= 3;

statement ok
create materialized view mv_with_lb as
select x, y from (
select *, ROW_NUMBER() OVER (PARTITION BY x ORDER BY y) as rank from t
)
where rank <= 3 AND rank > 1;

statement ok
create materialized view mv_rank_no_group as
select x, y from (
Expand Down Expand Up @@ -46,6 +53,16 @@ select * from mv;
3 2
3 3

query II rowsort
select * from mv_with_lb;
----
1 2
1 3
2 2
2 3
3 2
3 3

query II rowsort
select * from mv_rank_no_group;
----
Expand Down Expand Up @@ -78,6 +95,9 @@ DROP MATERIALIZED VIEW mv_rank_no_group;
statement ok
DROP MATERIALIZED VIEW mv_rank;

statement ok
DROP MATERIALIZED VIEW mv_with_lb;

statement ok
drop materialized view mv;

Expand Down
27 changes: 9 additions & 18 deletions src/frontend/planner_test/tests/testdata/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,11 @@
- sql: |
create table t(v1 int, v2 int);
select v1 from t group by v2;
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause
or be used in an aggregate function'
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function'
- sql: |
create table t(v1 int, v2 int);
select sum(v1), v1 from t group by v2, v2;
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause
or be used in an aggregate function'
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function'
- sql: |
create table t(v1 int, v2 int, v3 int);
select v3, min(v1) * avg(v1+v2) as agg from t group by v3;
Expand Down Expand Up @@ -117,8 +115,7 @@
└─LogicalAgg { group_key: [(t.v1 + t.v2)], aggs: [] }
└─LogicalProject { exprs: [(t.v1 + t.v2)] }
└─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] }
- name: "test logical_agg with complex group expression \nshould complain about nested
agg call \n"
- name: "test logical_agg with complex group expression \nshould complain about nested agg call \n"
sql: |
create table t(v1 int, v2 int);
select avg(sum(v1 + v2)) from t group by v1 + v2;
Expand All @@ -137,8 +134,7 @@
- sql: |
create table t(v1 int, v2 int);
select v1 from t group by v1 + v2;
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause
or be used in an aggregate function'
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function'
- sql: |
create table t(v1 int, v2 int);
select count(v1 + v2) as cnt, sum(v1 + v2) as sum from t;
Expand Down Expand Up @@ -261,8 +257,7 @@
sql: |
create table t (v1 real, v2 int);
select 1 from t group by v1 having v2 > 5;
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause
or be used in an aggregate function'
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function'
- name: distinct without agg
sql: |
create table t (v1 int, v2 int);
Expand Down Expand Up @@ -555,8 +550,7 @@
sql: |
create table t(a int, b int);
select avg(a) FILTER (WHERE abs(a)) AS avga from t;
binder_error: 'Invalid input syntax: the type of filter clause should be boolean,
but found Int32'
binder_error: 'Invalid input syntax: the type of filter clause should be boolean, but found Int32'
- name: filter clause + subquery
sql: |
create table t(a int, b int);
Expand All @@ -575,8 +569,7 @@
sql: |
create table t(a int, b int);
select abs(a) FILTER (WHERE a > 0) AS avga from t;
binder_error: 'Invalid input syntax: DISTINCT, ORDER BY or FILTER is only allowed
in aggregation functions, but `abs` is not an aggregation function'
binder_error: 'Invalid input syntax: DISTINCT, ORDER BY or FILTER is only allowed in aggregation functions, but `abs` is not an aggregation function'
- name: prune column before filter
sql: |
create table t(v1 int, v2 int);
Expand Down Expand Up @@ -660,8 +653,7 @@
- sql: |
create table t(x int, y varchar);
select string_agg(y, ',' order by y), count(distinct x) from t;
planner_error: 'Invalid input syntax: Order by aggregates are disallowed to occur
with distinct aggregates'
planner_error: 'Invalid input syntax: Order by aggregates are disallowed to occur with distinct aggregates'
- sql: |
create table t(v1 int, v2 int);
with z(a, b) as (select count(distinct v1), count(v2) from t) select a from z;
Expand Down Expand Up @@ -854,8 +846,7 @@
sql: |
create table t (v1 int, v2 int);
select min(v1), unnest(array[2, v2]) from t;
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause
or be used in an aggregate function'
planner_error: 'Invalid input syntax: column must appear in the GROUP BY clause or be used in an aggregate function'
- name: post-agg project set - grouped
sql: |
create table t (v1 int, v2 int);
Expand Down
6 changes: 2 additions & 4 deletions src/frontend/planner_test/tests/testdata/array.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@
└─BatchValues { rows: [[]] }
- sql: |
select array_cat(array[233], array[array[array[66]]]);
binder_error: 'Bind error: unable to find least restrictive type between integer[]
and integer[][][]'
binder_error: 'Bind error: unable to find least restrictive type between integer[] and integer[][][]'
- sql: |
select array_cat(array[233], 123);
binder_error: 'Bind error: Cannot concatenate integer[] and integer'
Expand Down Expand Up @@ -116,8 +115,7 @@
- name: string from/to varchar[] in implicit context
sql: |
values (array['a', 'b']), ('{c,' || 'd}');
binder_error: 'Bind error: types List { datatype: Varchar } and Varchar cannot be
matched'
binder_error: 'Bind error: types List { datatype: Varchar } and Varchar cannot be matched'
- name: string to varchar[] in assign context
sql: |
create table t (v1 varchar[]);
Expand Down
28 changes: 12 additions & 16 deletions src/frontend/planner_test/tests/testdata/insert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,32 +32,32 @@
sql: |
create table t (v1 int, v2 int);
insert into t (v1, v3) values (1, 2);
binder_error: "Bind error: Column v3 not found in table t"
binder_error: 'Bind error: Column v3 not found in table t'
- name: Invalid column name 2
sql: |
create table t (v1 int, v2 int);
insert into t (v3, v1) values (1, 2);
binder_error: "Bind error: Column v3 not found in table t"
binder_error: 'Bind error: Column v3 not found in table t'
- name: Duplicate column
sql: |
create table t (v1 int, v2 int);
insert into t (v1, v1) values (1, 2);
binder_error: "Bind error: Column specified more than once"
binder_error: 'Bind error: Column specified more than once'
- name: To many target columns
sql: |
create table t (v1 int, v2 int);
insert into t (v1, v2, v2) values (5, 6);
binder_error: "Bind error: INSERT has more target columns than values"
binder_error: 'Bind error: INSERT has more target columns than values'
- name: Not enough target columns
sql: |
create table t (v1 int, v2 int);
insert into t (v1) values (5, 6);
binder_error: "Bind error: INSERT has less target columns than values"
binder_error: 'Bind error: INSERT has less target columns than values'
- name: insert values mismatch columns length
sql: |
create table t (v1 real, v2 int, v3 varchar);
insert into t values (1, 2), (3, 4);
binder_error: "Bind error: INSERT has more target columns than expressions"
binder_error: 'Bind error: INSERT has more target columns than expressions'
- name: insert literal null
sql: |
create table t(v1 int);
Expand All @@ -74,18 +74,16 @@
BatchExchange { order: [], dist: Single }
└─BatchInsert { table: t }
└─BatchValues { rows: [['2020-01-01 01:02:03':Varchar::Timestamp::Time], ['03:04:05':Varchar::Time]] }
- name:
a `VALUES` without insert context may be invalid on its own (compare with
above)
- name: a `VALUES` without insert context may be invalid on its own (compare with above)
sql: |
create table t (v1 time);
values (timestamp '2020-01-01 01:02:03'), (time '03:04:05');
binder_error: "Bind error: types Timestamp and Time cannot be matched"
binder_error: 'Bind error: types Timestamp and Time cannot be matched'
- name: a `VALUES` with `limit` loses insert context (compare with 2 cases above)
sql: |
create table t (v1 time);
insert into t values (timestamp '2020-01-01 01:02:03'), (time '03:04:05') limit 1;
binder_error: "Bind error: types Timestamp and Time cannot be matched"
binder_error: 'Bind error: types Timestamp and Time cannot be matched'
- name: null in first row without insert context
sql: |
values (null), (1);
Expand All @@ -99,7 +97,7 @@
- name: rows of different number of columns
sql: |
values (1), (2, 3);
binder_error: "Bind error: VALUES lists must all be the same length"
binder_error: 'Bind error: VALUES lists must all be the same length'
- name: insert into select without cast
sql: |
create table t (v1 time);
Expand All @@ -123,14 +121,12 @@
sql: |
create table t (v1 timestamp, v2 real);
insert into t select time '01:02:03', 4.5 from t;
binder_error:
'Bind error: cannot cast type "time without time zone" to "timestamp
without time zone" in Assign context'
binder_error: 'Bind error: cannot cast type "time without time zone" to "timestamp without time zone" in Assign context'
- name: insert into select mismatch columns length
sql: |
create table t (v1 int, v2 real);
insert into t select 2, 3, 4.5 from t;
binder_error: "Bind error: INSERT has more expressions than target columns"
binder_error: 'Bind error: INSERT has more expressions than target columns'
- name: insert with join
sql: |
create table t1 (a int, b int);
Expand Down
15 changes: 5 additions & 10 deletions src/frontend/planner_test/tests/testdata/join.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@
LogicalJoin { type: Inner, on: (t1.v2 = t2.v2), output: all }
├─LogicalScan { table: t1, output_columns: [t1.v1, t1.v2], required_columns: [v1, v2], predicate: (t1.v1 > 100:Int32) }
└─LogicalScan { table: t2, output_columns: [t2.v1, t2.v2], required_columns: [v1, v2], predicate: (t2.v1 < 1000:Int32) }
- name: Left & right has same SomeShard distribution. There should still be exchanges
below hash join
- name: Left & right has same SomeShard distribution. There should still be exchanges below hash join
sql: |
create table t(x int);
create index i on t(x);
Expand All @@ -190,8 +189,7 @@
| └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
└─StreamExchange { dist: HashShard(i.x) }
└─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
- name: Left & right has same SomeShard distribution. There should still be exchanges
below hash join
- name: Left & right has same SomeShard distribution. There should still be exchanges below hash join
sql: |
create table t(x int);
create index i on t(x);
Expand All @@ -210,8 +208,7 @@
| └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
└─StreamExchange { dist: HashShard(t.x) }
└─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- name: Left & right has same HashShard distribution. There should be no exchange
below hash join
- name: Left & right has same HashShard distribution. There should be no exchange below hash join
sql: |
create table t(x int);
create index i on t(x);
Expand Down Expand Up @@ -480,15 +477,13 @@
create table t1(v1 int, v2 int);
create table t2(v1 int, v3 int);
select * from t1 join t2 using (v2);
binder_error: 'Item not found: column "v2" specified in USING clause does not exist
in right table'
binder_error: 'Item not found: column "v2" specified in USING clause does not exist in right table'
- name: Ensure error on non-existent USING col
sql: |
create table t1(v1 int, v2 int);
create table t2(v1 int, v3 int);
select * from t1 join t2 using (v3);
binder_error: 'Item not found: column "v3" specified in USING clause does not exist
in left table'
binder_error: 'Item not found: column "v3" specified in USING clause does not exist in left table'
- name: Ensure that we can correctly bind nested joins
sql: |
create table t1(v1 int, v2 int);
Expand Down
3 changes: 1 addition & 2 deletions src/frontend/planner_test/tests/testdata/nexmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@
- id: nexmark_q2
before:
- create_tables
sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction
= 2001 OR auction = 2019 OR auction = 2087;
sql: SELECT auction, price FROM bid WHERE auction = 1007 OR auction = 1020 OR auction = 2001 OR auction = 2019 OR auction = 2087;
batch_plan: |
BatchExchange { order: [], dist: Single }
└─BatchFilter { predicate: (((((bid.auction = 1007:Int32) OR (bid.auction = 1020:Int32)) OR (bid.auction = 2001:Int32)) OR (bid.auction = 2019:Int32)) OR (bid.auction = 2087:Int32)) }
Expand Down
3 changes: 1 addition & 2 deletions src/frontend/planner_test/tests/testdata/order_by.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,7 @@
sql: |
create table t (x int, y int);
select distinct x from t order by y;
planner_error: 'Invalid input syntax: for SELECT DISTINCT, ORDER BY expressions
must appear in select list'
planner_error: 'Invalid input syntax: for SELECT DISTINCT, ORDER BY expressions must appear in select list'
- name: No BatchSort needed, when input is already sorted
sql: |
create table t(v int);
Expand Down
Loading

0 comments on commit 709e1be

Please sign in to comment.