Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: sql support dictionary column #1049

Merged
merged 70 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
d612b0e
draft
tanruixiang Jun 13, 2023
7a5e617
draft
tanruixiang Jun 14, 2023
c0bb71c
add builder testcase
tanruixiang Jun 14, 2023
9b60d2b
dictionary builder complete
tanruixiang Jun 15, 2023
9433827
fmt
tanruixiang Jun 15, 2023
a55acd1
complete write process
tanruixiang Jun 15, 2023
56ea056
draft
tanruixiang Jun 15, 2023
1848b8a
draft
tanruixiang Jun 19, 2023
b4c3535
draft
tanruixiang Jun 19, 2023
48436d9
fmt
tanruixiang Jun 19, 2023
526dec9
delete debug print
tanruixiang Jun 19, 2023
123a3c0
Merge branch 'main' into support_dictionary
tanruixiang Jun 19, 2023
326251b
adjust ut and delete debug display print
tanruixiang Jun 20, 2023
eb9b616
hard code:replace is_dictionary with is_tag just for test, need to re…
tanruixiang Jun 20, 2023
5907560
fix convert bug
tanruixiang Jun 20, 2023
929124c
fix bug
tanruixiang Jun 20, 2023
2141b40
resotre info level
tanruixiang Jun 20, 2023
c8f7c81
support get ScalarValue from dictionary type
tanruixiang Jun 20, 2023
cfc6d12
Revert "hard code:replace is_dictionary with is_tag just for test, ne…
tanruixiang Jun 21, 2023
d83fecf
support is_dictionary in sql
tanruixiang Jun 21, 2023
30424bd
add is_dictionary schema parse
tanruixiang Jun 21, 2023
54521bc
revert log level
tanruixiang Jun 21, 2023
1364b78
Revert "revert log level"
tanruixiang Jun 21, 2023
3666a26
Revert "add is_dictionary schema parse"
tanruixiang Jun 21, 2023
df5b49b
Revert "support is_dictionary in sql"
tanruixiang Jun 21, 2023
eb12365
Revert "Revert "hard code:replace is_dictionary with is_tag just for …
tanruixiang Jun 21, 2023
c32ebd1
Revert "support get ScalarValue from dictionary type"
tanruixiang Jun 21, 2023
663fba4
Revert "resotre info level"
tanruixiang Jun 21, 2023
249e259
Revert "fix bug"
tanruixiang Jun 21, 2023
0b4ed48
Revert "fix convert bug"
tanruixiang Jun 21, 2023
c9ca452
Revert "hard code:replace is_dictionary with is_tag just for test, ne…
tanruixiang Jun 21, 2023
3afe919
datum support dictionary
tanruixiang Jun 21, 2023
4c46824
support is_dictionary in sql
tanruixiang Jun 21, 2023
76f6110
add is_dictionary schema parse
tanruixiang Jun 21, 2023
bbd1b45
Merge branch 'main' into support_dictionary
tanruixiang Jun 21, 2023
009fabd
add some testcase
tanruixiang Jun 23, 2023
81acc25
fix: arrow meta data is lost when decode custom meta data (#1004)
ShiKaiWi Jun 19, 2023
67b9a8e
clean code
tanruixiang Jun 25, 2023
0772bc2
add more testcase
tanruixiang Jun 25, 2023
f39c273
format and clippy
tanruixiang Jun 25, 2023
0dd05af
modify intergration test
tanruixiang Jun 25, 2023
a757b36
adjust test
tanruixiang Jun 25, 2023
30a24b5
Merge branch 'main' into support_dictionary
tanruixiang Jun 25, 2023
01ab11a
update result
tanruixiang Jun 25, 2023
f7b35c4
update result
tanruixiang Jun 25, 2023
0abc918
update pb
tanruixiang Jun 25, 2023
8a4128e
update result
tanruixiang Jun 25, 2023
65b0c47
Merge branch 'main' into support_dictionary
tanruixiang Jun 26, 2023
cd1ae6f
use new client
tanruixiang Jun 26, 2023
f9028f2
fix ut
tanruixiang Jun 26, 2023
026f7cb
when read added column, construct null dictionary columnblock
tanruixiang Jun 26, 2023
d9e8824
fix ut
tanruixiang Jun 26, 2023
fb8631d
Merge branch 'main' into support_dictionary
tanruixiang Jun 26, 2023
174b164
fix ut
tanruixiang Jun 26, 2023
e702039
add testcase
tanruixiang Jun 26, 2023
3a61d65
use dict_id to new dict field
tanruixiang Jun 27, 2023
99f448d
modify test_case
tanruixiang Jun 27, 2023
1cb6e30
fix bug
tanruixiang Jun 27, 2023
0b1e996
use new_dict
tanruixiang Jun 27, 2023
0be7026
Merge branch 'main' into support_dictionary
tanruixiang Jun 27, 2023
8a5b4f4
update datafusion version
tanruixiang Jun 28, 2023
f561a72
update version
tanruixiang Jun 28, 2023
103e000
restore
tanruixiang Jun 28, 2023
b8e6ac4
update
tanruixiang Jun 28, 2023
cfc5f44
fmt
tanruixiang Jun 28, 2023
85d5702
Merge branch 'main' into client_support_dictionary
tanruixiang Jul 5, 2023
3e2974d
remove error merge
tanruixiang Jul 5, 2023
373a906
add original comment
tanruixiang Jul 5, 2023
1aabaf1
fix compile error
tanruixiang Jul 5, 2023
e317fd4
update ceresdb-rs version
tanruixiang Jul 5, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 42 additions & 2 deletions components/arrow_ext/src/ipc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,40 @@ mod tests {
use std::sync::Arc;

use arrow::{
array::{Int32Array, StringArray},
datatypes::{DataType, Field, Schema},
array::{Int32Array, StringArray, StringDictionaryBuilder},
datatypes::{DataType, Field, Int32Type, Schema},
};

use super::*;

fn create_dictionary_record_batch() -> RecordBatch {
let col1 = Field::new_dict(
"dic1",
DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
false,
1,
false,
);
let col2 = Field::new_dict(
"dic2",
DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
true,
0,
false,
);
let schema = Schema::new(vec![col1, col2]);
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
builder.append_value("d1");
builder.append_value("d2");
let dic1 = builder.finish();
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
builder.append_null();
builder.append_value("d3");
let dic2 = builder.finish();

RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dic1), Arc::new(dic2)]).unwrap()
}

fn create_batch(rows: usize) -> RecordBatch {
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Expand Down Expand Up @@ -226,6 +254,18 @@ mod tests {
}
}

#[test]
fn test_ipc_encode_decode_with_dicitonary_encode() {
let batch = create_dictionary_record_batch();
for compression in [CompressionMethod::None, CompressionMethod::Zstd] {
let compress_opts = CompressOptions {
compress_min_length: 0,
method: compression,
};
ensure_encoding_and_decoding(&batch, compress_opts, compression);
}
}

#[test]
fn test_encode_multiple_record_batches() {
let num_batches = 1000;
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ workspace = true
[dependencies]
anyhow = "1.0.58"
async-trait = "0.1"
ceresdb-client = "1.0"
ceresdb-client = "1.0.2"
local-ip-address = "0.5"
reqwest = { workspace = true }
serde = { workspace = true }
Expand Down
16 changes: 8 additions & 8 deletions integration_tests/cases/common/dml/case_sensitive.result
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to cr

DESC case_SENSITIVE_table1;

name,type,is_primary,is_nullable,is_tag,
String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),
String("ts"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),
String("VALUE1"),String("double"),Boolean(false),Boolean(true),Boolean(false),
name,type,is_primary,is_nullable,is_tag,is_dictionary,
String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false),
String("ts"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false),
String("VALUE1"),String("double"),Boolean(false),Boolean(true),Boolean(false),Boolean(false),


DESC CASE_SENSITIVE_TABLE1;
Expand All @@ -87,10 +87,10 @@ Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to cr

DESC `case_SENSITIVE_table1`;

name,type,is_primary,is_nullable,is_tag,
String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),
String("ts"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),
String("VALUE1"),String("double"),Boolean(false),Boolean(true),Boolean(false),
name,type,is_primary,is_nullable,is_tag,is_dictionary,
String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false),
String("ts"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false),
String("VALUE1"),String("double"),Boolean(false),Boolean(true),Boolean(false),Boolean(false),


DESC `CASE_SENSITIVE_TABLE1`;
Expand Down
84 changes: 44 additions & 40 deletions integration_tests/cases/common/dml/insert_mode.result
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ affected_rows: 0
CREATE TABLE `03_dml_insert_mode_table1` (
`timestamp` timestamp NOT NULL,
`value` double,
`dic` string dictionary,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
Expand All @@ -13,8 +14,8 @@ WITH(

affected_rows: 0

INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`)
VALUES (1, +10), (2, 0), (3, -30);
INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`, `dic`)
VALUES (1, +10, "d1"), (2, 0, "d2"), (3, -30, "d1");

affected_rows: 3

Expand All @@ -25,10 +26,10 @@ FROM
ORDER BY
`value` ASC;

tsid,timestamp,value,
UInt64(0),Timestamp(3),Double(-30.0),
UInt64(0),Timestamp(2),Double(0.0),
UInt64(0),Timestamp(1),Double(10.0),
tsid,timestamp,value,dic,
UInt64(0),Timestamp(3),Double(-30.0),String("d1"),
UInt64(0),Timestamp(2),Double(0.0),String("d2"),
UInt64(0),Timestamp(1),Double(10.0),String("d1"),


INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`)
Expand All @@ -43,10 +44,10 @@ FROM
ORDER BY
`value` ASC;

tsid,timestamp,value,
UInt64(0),Timestamp(1),Double(100.0),
UInt64(0),Timestamp(2),Double(200.0),
UInt64(0),Timestamp(3),Double(300.0),
tsid,timestamp,value,dic,
UInt64(0),Timestamp(1),Double(100.0),String(""),
UInt64(0),Timestamp(2),Double(200.0),String(""),
UInt64(0),Timestamp(3),Double(300.0),String(""),


DROP TABLE `03_dml_insert_mode_table1`;
Expand All @@ -60,6 +61,7 @@ affected_rows: 0
CREATE TABLE `03_dml_insert_mode_table2` (
`timestamp` timestamp NOT NULL,
`value` double,
`dic` string dictionary,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
Expand All @@ -68,8 +70,8 @@ WITH(

affected_rows: 0

INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`)
VALUES (1, 10), (2, 20), (3, 30);
INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`)
VALUES (1, 10, "d1"), (2, 20, ""), (3, 30, "d2");

affected_rows: 3

Expand All @@ -80,14 +82,14 @@ FROM
ORDER BY
`value` ASC;

tsid,timestamp,value,
UInt64(0),Timestamp(1),Double(10.0),
UInt64(0),Timestamp(2),Double(20.0),
UInt64(0),Timestamp(3),Double(30.0),
tsid,timestamp,value,dic,
UInt64(0),Timestamp(1),Double(10.0),String("d1"),
UInt64(0),Timestamp(2),Double(20.0),String(""),
UInt64(0),Timestamp(3),Double(30.0),String("d2"),


INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`)
VALUES (1, 100), (2, 200), (3, 300);
INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`)
VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, "");

affected_rows: 3

Expand All @@ -98,13 +100,13 @@ FROM
ORDER BY
`value` ASC;

tsid,timestamp,value,
UInt64(0),Timestamp(1),Double(10.0),
UInt64(0),Timestamp(2),Double(20.0),
UInt64(0),Timestamp(3),Double(30.0),
UInt64(0),Timestamp(1),Double(100.0),
UInt64(0),Timestamp(2),Double(200.0),
UInt64(0),Timestamp(3),Double(300.0),
tsid,timestamp,value,dic,
UInt64(0),Timestamp(1),Double(10.0),String("d1"),
UInt64(0),Timestamp(2),Double(20.0),String(""),
UInt64(0),Timestamp(3),Double(30.0),String("d2"),
UInt64(0),Timestamp(1),Double(100.0),String("d2"),
UInt64(0),Timestamp(2),Double(200.0),String("d1"),
UInt64(0),Timestamp(3),Double(300.0),String(""),


DROP TABLE `03_dml_insert_mode_table2`;
Expand All @@ -118,15 +120,16 @@ affected_rows: 0
CREATE TABLE `03_dml_insert_mode_table3` (
`timestamp` timestamp NOT NULL,
`value` double,
`dic` string dictionary,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false'
);

affected_rows: 0

INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`)
VALUES (1, 10), (2, 20), (3, 30);
INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`)
VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, "d1");

affected_rows: 3

Expand All @@ -137,14 +140,14 @@ FROM
ORDER BY
`value` ASC;

tsid,timestamp,value,
UInt64(0),Timestamp(1),Double(10.0),
UInt64(0),Timestamp(2),Double(20.0),
UInt64(0),Timestamp(3),Double(30.0),
tsid,timestamp,value,dic,
UInt64(0),Timestamp(1),Double(100.0),String("d2"),
UInt64(0),Timestamp(2),Double(200.0),String("d1"),
UInt64(0),Timestamp(3),Double(300.0),String("d1"),


INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`)
VALUES (1, 100), (2, 200), (3, 300);
VALUES (1, 100, "d5"), (2, 200, "d6"), (3, 300, "d7");

affected_rows: 3

Expand All @@ -155,10 +158,10 @@ FROM
ORDER BY
`value` ASC;

tsid,timestamp,value,
UInt64(0),Timestamp(1),Double(100.0),
UInt64(0),Timestamp(2),Double(200.0),
UInt64(0),Timestamp(3),Double(300.0),
tsid,timestamp,value,dic,
UInt64(0),Timestamp(1),Double(100.0),String(""),
UInt64(0),Timestamp(2),Double(200.0),String(""),
UInt64(0),Timestamp(3),Double(300.0),String(""),


DROP TABLE `03_dml_insert_mode_table3`;
Expand All @@ -176,6 +179,7 @@ CREATE TABLE `03_dml_insert_mode_table4` (
`c3` uint32 default c1 + 1,
`c4` uint32 default c3 + 1,
`c5` uint32 default c3 + 10,
`c6` string default "default",
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false'
Expand All @@ -195,10 +199,10 @@ FROM
ORDER BY
`c1` ASC;

tsid,timestamp,c1,c2,c3,c4,c5,
UInt64(0),Timestamp(1),UInt32(10),String("123"),UInt32(11),UInt32(12),UInt32(3),
UInt64(0),Timestamp(2),UInt32(20),String("123"),UInt32(21),UInt32(22),UInt32(4),
UInt64(0),Timestamp(3),UInt32(30),String("123"),UInt32(31),UInt32(32),UInt32(5),
tsid,timestamp,c1,c2,c3,c4,c5,c6,
UInt64(0),Timestamp(1),UInt32(10),String("123"),UInt32(11),UInt32(12),UInt32(3),String("default"),
UInt64(0),Timestamp(2),UInt32(20),String("123"),UInt32(21),UInt32(22),UInt32(4),String("default"),
UInt64(0),Timestamp(3),UInt32(30),String("123"),UInt32(31),UInt32(32),UInt32(5),String("default"),


DROP TABLE IF EXISTS `03_dml_insert_mode_table4`;
Expand Down
25 changes: 16 additions & 9 deletions integration_tests/cases/common/dml/insert_mode.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ DROP TABLE IF EXISTS `03_dml_insert_mode_table1`;
CREATE TABLE `03_dml_insert_mode_table1` (
`timestamp` timestamp NOT NULL,
`value` double,
`dic` string dictionary,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
update_mode='OVERWRITE'
);


INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`)
VALUES (1, +10), (2, 0), (3, -30);
INSERT INTO `03_dml_insert_mode_table1` (`timestamp`, `value`, `dic`)
VALUES (1, +10, "d1"), (2, 0, "d2"), (3, -30, "d1");


SELECT
Expand Down Expand Up @@ -42,15 +43,16 @@ DROP TABLE IF EXISTS `03_dml_insert_mode_table2`;
CREATE TABLE `03_dml_insert_mode_table2` (
`timestamp` timestamp NOT NULL,
`value` double,
`dic` string dictionary,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false',
update_mode='APPEND'
);


INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`)
VALUES (1, 10), (2, 20), (3, 30);
INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`)
VALUES (1, 10, "d1"), (2, 20, ""), (3, 30, "d2");

SELECT
*
Expand All @@ -59,8 +61,8 @@ FROM
ORDER BY
`value` ASC;

INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`)
VALUES (1, 100), (2, 200), (3, 300);
INSERT INTO `03_dml_insert_mode_table2` (`timestamp`, `value`, `dic`)
VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, "");

SELECT
*
Expand All @@ -77,14 +79,18 @@ DROP TABLE IF EXISTS `03_dml_insert_mode_table3`;
CREATE TABLE `03_dml_insert_mode_table3` (
`timestamp` timestamp NOT NULL,
`value` double,
`dic` string dictionary,
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false'
);


INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`)
VALUES (1, 10), (2, 20), (3, 30);
INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`)
VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, "d1");

-- TODO support insert Null
-- INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`, `dic`) VALUES (1, 100, "d2"), (2, 200, "d1"), (3, 300, Null);

SELECT
*
Expand All @@ -94,7 +100,7 @@ ORDER BY
`value` ASC;

INSERT INTO `03_dml_insert_mode_table3` (`timestamp`, `value`)
VALUES (1, 100), (2, 200), (3, 300);
VALUES (1, 100, "d5"), (2, 200, "d6"), (3, 300, "d7");


SELECT
Expand All @@ -116,6 +122,7 @@ CREATE TABLE `03_dml_insert_mode_table4` (
`c3` uint32 default c1 + 1,
`c4` uint32 default c3 + 1,
`c5` uint32 default c3 + 10,
`c6` string default "default",
timestamp KEY (timestamp)) ENGINE=Analytic
WITH(
enable_ttl='false'
Expand Down
Loading