Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove filter plan node in pipeline #1126

Merged
merged 6 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions integration_tests/cases/common/dml/issue-341.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
DROP TABLE IF EXISTS `issue341_t1`;

affected_rows: 0

DROP TABLE IF EXISTS `issue341_t2`;

affected_rows: 0

CREATE TABLE `issue341_t1` (
`timestamp` timestamp NOT NULL,
`value` int,
`tag1` string tag,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
update_mode='append'
);

affected_rows: 0

INSERT INTO `issue341_t1` (`timestamp`, `value`, `tag1`)
VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3");

affected_rows: 3

SELECT
`timestamp`,
`value`
FROM
`issue341_t1`;

timestamp,value,
Timestamp(1),Int32(1),
Timestamp(3),Int32(3),
Timestamp(2),Int32(2),


SELECT
`timestamp`,
`value`
FROM
`issue341_t1`
WHERE
`value` = 3;

timestamp,value,
Timestamp(3),Int32(3),


EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t1`
WHERE
`value` = 3;

plan_type,plan,
String("logical_plan"),String("Projection: issue341_t1.timestamp, issue341_t1.value\n TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.value = Int32(3)]"),
String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t1, parallelism=8\n"),


EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t1`
WHERE
tag1 = "t3";

plan_type,plan,
String("logical_plan"),String("Projection: issue341_t1.timestamp, issue341_t1.value\n TableScan: issue341_t1 projection=[timestamp, value, tag1], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"),
String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t1, parallelism=8\n"),


CREATE TABLE `issue341_t2` (
`timestamp` timestamp NOT NULL,
`value` double,
`tag1` string tag,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
update_mode='overwrite'
);

affected_rows: 0

INSERT INTO `issue341_t2` (`timestamp`, `value`, `tag1`)
VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3");

affected_rows: 3

SELECT
`timestamp`,
`value`
FROM
`issue341_t2`
WHERE
`value` = 3;

timestamp,value,
Timestamp(3),Double(3.0),


EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t2`
WHERE
`value` = 3;

plan_type,plan,
String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"),
String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8\n"),


EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t2`
WHERE
tag1 = "t3";

plan_type,plan,
String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n TableScan: issue341_t2 projection=[timestamp, value, tag1], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t2, parallelism=8\n"),


DROP TABLE IF EXISTS `issue341_t1`;

affected_rows: 0

DROP TABLE IF EXISTS `issue341_t2`;

affected_rows: 0

92 changes: 92 additions & 0 deletions integration_tests/cases/common/dml/issue-341.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

DROP TABLE IF EXISTS `issue341_t1`;
DROP TABLE IF EXISTS `issue341_t2`;

CREATE TABLE `issue341_t1` (
`timestamp` timestamp NOT NULL,
`value` int,
`tag1` string tag,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
update_mode='append'
);

INSERT INTO `issue341_t1` (`timestamp`, `value`, `tag1`)
VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3");

SELECT
`timestamp`,
`value`
FROM
`issue341_t1`;

SELECT
`timestamp`,
`value`
FROM
`issue341_t1`
WHERE
`value` = 3;

-- FilterExec node should not be in plan.
EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t1`
WHERE
`value` = 3;

-- FilterExec node should not be in plan.
EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t1`
WHERE
tag1 = "t3";

-- Repeat operations above, but with overwrite table

CREATE TABLE `issue341_t2` (
`timestamp` timestamp NOT NULL,
`value` double,
`tag1` string tag,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
update_mode='overwrite'
);

INSERT INTO `issue341_t2` (`timestamp`, `value`, `tag1`)
VALUES (1, 1, "t1"), (2, 2, "t2"), (3, 3, "t3");

SELECT
`timestamp`,
`value`
FROM
`issue341_t2`
WHERE
`value` = 3;

-- FilterExec node should be in plan.
EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t2`
WHERE
`value` = 3;

-- When using tag as filter, FilterExec node should not be in plan.
EXPLAIN SELECT
`timestamp`,
`value`
FROM
`issue341_t2`
WHERE
tag1 = "t3";

DROP TABLE IF EXISTS `issue341_t1`;
DROP TABLE IF EXISTS `issue341_t2`;
55 changes: 43 additions & 12 deletions table_engine/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ use std::{

use arrow::datatypes::SchemaRef;
use async_trait::async_trait;
use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema};
use common_types::{
projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema, UPDATE_MODE,
};
use datafusion::{
config::{ConfigEntry, ConfigExtension, ExtensionOptions},
datasource::TableProvider,
Expand Down Expand Up @@ -184,12 +186,12 @@ impl TableProviderAdapter {
}

fn check_and_build_predicate_from_filters(&self, filters: &[Expr]) -> PredicateRef {
let unique_keys = self.read_schema.unique_keys();

let push_down_filters = filters
let pushdown_states = self.pushdown_inner(&filters.iter().collect::<Vec<_>>());
let pushdown_filters = filters
.iter()
.filter_map(|filter| {
if Self::only_filter_unique_key_columns(filter, &unique_keys) {
.zip(pushdown_states.iter())
.filter_map(|(filter, state)| {
if matches!(state, &TableProviderFilterPushDown::Exact) {
Some(filter.clone())
} else {
None
Expand All @@ -198,8 +200,8 @@ impl TableProviderAdapter {
.collect::<Vec<_>>();

PredicateBuilder::default()
.add_pushdown_exprs(&push_down_filters)
.extract_time_range(&self.read_schema, &push_down_filters)
.add_pushdown_exprs(&pushdown_filters)
.extract_time_range(&self.read_schema, filters)
.build()
}

Expand All @@ -214,6 +216,29 @@ impl TableProviderAdapter {
}
true
}

fn pushdown_inner(&self, filters: &[&Expr]) -> Vec<TableProviderFilterPushDown> {
let unique_keys = self.read_schema.unique_keys();
// TODO: add pushdown check in table trait
let options = &self.table.options();
let is_append = matches!(options.get(UPDATE_MODE), Some(mode) if mode == "APPEND");
let is_system_engine = self.table.engine_type() == "system";

filters
.iter()
.map(|filter| {
if is_system_engine {
return TableProviderFilterPushDown::Inexact;
}

if is_append || Self::only_filter_unique_key_columns(filter, &unique_keys) {
TableProviderFilterPushDown::Exact
} else {
TableProviderFilterPushDown::Inexact
}
})
.collect()
}
}

#[async_trait]
Expand All @@ -237,8 +262,11 @@ impl TableProvider for TableProviderAdapter {
self.scan_table(state, projection, filters, limit).await
}

fn supports_filter_pushdown(&self, _filter: &Expr) -> Result<TableProviderFilterPushDown> {
Ok(TableProviderFilterPushDown::Inexact)
fn supports_filters_pushdown(
&self,
filters: &[&Expr],
) -> Result<Vec<TableProviderFilterPushDown>> {
Ok(self.pushdown_inner(filters))
}

/// Get the type of this table for metadata/catalog purposes.
Expand All @@ -264,8 +292,11 @@ impl TableSource for TableProviderAdapter {

/// Tests whether the table provider can make use of a filter expression
/// to optimize data retrieval.
fn supports_filter_pushdown(&self, _filter: &Expr) -> Result<TableProviderFilterPushDown> {
Ok(TableProviderFilterPushDown::Inexact)
fn supports_filters_pushdown(
&self,
filters: &[&Expr],
) -> Result<Vec<TableProviderFilterPushDown>> {
Ok(self.pushdown_inner(filters))
}
}

Expand Down