apache · jiacai2050 · Jul 14, 2023 · Jul 12, 2023 · Jul 12, 2023 · Jul 12, 2023
diff --git a/components/arrow_ext/Cargo.toml b/components/arrow_ext/Cargo.toml
@@ -12,6 +12,6 @@ workspace = true
 
 [dependencies]
 arrow = { workspace = true }
-serde = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
 snafu = { workspace = true }
 zstd = { workspace = true }
diff --git a/components/arrow_ext/src/ipc.rs b/components/arrow_ext/src/ipc.rs
@@ -2,9 +2,10 @@
 
 //! Utilities for `RecordBatch` serialization using Arrow IPC
 
-use std::io::Cursor;
+use std::{borrow::Cow, io::Cursor, sync::Arc};
 
 use arrow::{
+    datatypes::{DataType, Field, Schema, SchemaRef},
     ipc::{reader::StreamReader, writer::StreamWriter},
     record_batch::RecordBatch,
 };
@@ -48,6 +49,9 @@ const ZSTD_LEVEL: i32 = 3;
 pub struct RecordBatchesEncoder {
     stream_writer: Option<StreamWriter<Vec<u8>>>,
     num_rows: usize,
+    /// Whether the writer has more than one dict fields, we need to do schema
+    /// convert.
+    cached_converted_schema: Option<SchemaRef>,
     compress_opts: CompressOptions,
 }
 
@@ -107,6 +111,7 @@ impl RecordBatchesEncoder {
         Self {
             stream_writer: None,
             num_rows: 0,
+            cached_converted_schema: None,
             compress_opts,
         }
     }
@@ -116,20 +121,82 @@ impl RecordBatchesEncoder {
         self.num_rows
     }
 
+    /// When schema more than one dict fields, it will return a new owned
+    /// schema, otherwise it just return the origin schema.
+    ///
+    /// Workaround for https://github.com/apache/arrow-datafusion/issues/6784
+    fn convert_schema(schema: &SchemaRef) -> Cow<SchemaRef> {
+        let dict_field_num: usize = schema
+            .fields()
+            .iter()
+            .map(|f| {
+                if let DataType::Dictionary(_, _) = f.data_type() {
+                    1
+                } else {
+                    0
+                }
+            })
+            .sum();
+        if dict_field_num <= 1 {
+            return Cow::Borrowed(schema);
+        }
+
+        let new_fields = schema
+            .fields()
+            .iter()
+            .enumerate()
+            .map(|(i, f)| {
+                if let DataType::Dictionary(_, _) = f.data_type() {
+                    let dict_id = i as i64;
+                    Arc::new(Field::new_dict(
+                        f.name(),
+                        f.data_type().clone(),
+                        f.is_nullable(),
+                        dict_id,
+                        f.dict_is_ordered().unwrap_or(false),
+                    ))
+                } else {
+                    f.clone()
+                }
+            })
+            .collect::<Vec<_>>();
+
+        let schema_ref = Arc::new(Schema::new_with_metadata(
+            new_fields,
+            schema.metadata.clone(),
+        ));
+
+        Cow::Owned(schema_ref)
+    }
+
     /// Append one batch into the encoder for encoding.
     pub fn write(&mut self, batch: &RecordBatch) -> Result<()> {
         let stream_writer = if let Some(v) = &mut self.stream_writer {
             v
         } else {
-            // TODO: pre-allocate the buffer.
-            let buffer: Vec<u8> = Vec::new();
-            let stream_writer =
-                StreamWriter::try_new(buffer, &batch.schema()).context(ArrowError)?;
+            let mem_size = batch
+                .columns()
+                .iter()
+                .map(|col| col.get_buffer_memory_size())
+                .sum();
+            let buffer: Vec<u8> = Vec::with_capacity(mem_size);
+            let schema = batch.schema();
+            let schema = Self::convert_schema(&schema);
+            let stream_writer = StreamWriter::try_new(buffer, &schema).context(ArrowError)?;
+            if schema.is_owned() {
+                self.cached_converted_schema = Some(schema.into_owned());
+            }
             self.stream_writer = Some(stream_writer);
             self.stream_writer.as_mut().unwrap()
         };
 
-        stream_writer.write(batch).context(ArrowError)?;
+        if let Some(schema) = &self.cached_converted_schema {
+            let batch = RecordBatch::try_new(schema.clone(), batch.columns().to_vec())
+                .context(ArrowError)?;
+            stream_writer.write(&batch).context(ArrowError)?;
+        } else {
+            stream_writer.write(batch).context(ArrowError)?;
+        }
         self.num_rows += batch.num_rows();
         Ok(())
     }
@@ -218,16 +285,43 @@ mod tests {
         RecordBatch::try_new(Arc::new(schema), vec![Arc::new(dic1), Arc::new(dic2)]).unwrap()
     }
 
-    fn create_batch(rows: usize) -> RecordBatch {
+    fn create_batch(seed: usize, rows: usize) -> RecordBatch {
         let schema = Schema::new(vec![
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Utf8, false),
+            Field::new(
+                "c",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                false,
+            ),
+            Field::new(
+                "d",
+                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+                false,
+            ),
         ]);
 
         let a = Int32Array::from_iter_values(0..rows as i32);
-        let b = StringArray::from_iter_values((0..rows).map(|i| i.to_string()));
-
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap()
+        let b = StringArray::from_iter_values((0..rows).map(|i| (i + seed).to_string()));
+        let c = {
+            let mut b = StringDictionaryBuilder::<Int32Type>::new();
+            for i in 0..rows {
+                b.append_value(((i + seed) % 10).to_string());
+            }
+            b.finish()
+        };
+        let d = {
+            let mut b = StringDictionaryBuilder::<Int32Type>::new();
+            for i in 0..rows {
+                b.append_value(((i + seed) % 20).to_string());
+            }
+            b.finish()
+        };
+        RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(a), Arc::new(b), Arc::new(c), Arc::new(d)],
+        )
+        .unwrap()
     }
 
     fn ensure_encoding_and_decoding(
@@ -244,7 +338,7 @@ mod tests {
 
     #[test]
     fn test_ipc_encode_decode() {
-        let batch = create_batch(1024);
+        let batch = create_batch(0, 1024);
         for compression in [CompressionMethod::None, CompressionMethod::Zstd] {
             let compress_opts = CompressOptions {
                 compress_min_length: 0,
@@ -270,8 +364,8 @@ mod tests {
     fn test_encode_multiple_record_batches() {
         let num_batches = 1000;
         let mut batches = Vec::with_capacity(num_batches);
-        for _ in 0..num_batches {
-            batches.push(create_batch(1024));
+        for i in 0..num_batches {
+            batches.push(create_batch(i, 1024));
         }
 
         let compress_opts = CompressOptions {
@@ -291,7 +385,7 @@ mod tests {
 
     #[test]
     fn test_compression_decision() {
-        let batch = create_batch(1024);
+        let batch = create_batch(0, 1024);
 
         {
             // Encode the record batch with a large `compress_min_length`, so the output

diff --git a/components/arrow_ext/src/lib.rs b/components/arrow_ext/src/lib.rs
@@ -1,4 +1,5 @@
 // Copyright 2022 CeresDB Project Authors. Licensed under Apache-2.0.
 
+#![feature(cow_is_borrowed)]
 pub mod ipc;
 pub mod operation;
diff --git a/integration_tests/cases/env/cluster/ddl/alter_table.sql b/integration_tests/cases/env/cluster/ddl/alter_table.sql
@@ -12,16 +12,9 @@ DESCRIBE TABLE `05_alter_table_t0`;
 INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic) values (2, '2', 2, "d2");
 SELECT * FROM `05_alter_table_t0`;
 
--- waiting for datafusion's bug fix
--- ALTER TABLE `05_alter_table_t0` add COLUMN (add_dic string dictionary);
--- DESCRIBE TABLE `05_alter_table_t0`;
--- INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic, add_dic) values (2, '2', 2, "d2", "d3");
--- SELECT * FROM `05_alter_table_t0`;
-
-
 -- doesn't support drop column
 ALTER TABLE `05_alter_table_t0` DROP COLUMN b;
 DESCRIBE TABLE `05_alter_table_t0`;
 SELECT * FROM `05_alter_table_t0`;
 
-DROP TABLE `05_alter_table_t0`;
+DROP TABLE `05_alter_table_t0`;
diff --git a/integration_tests/cases/env/local/ddl/alter_table.result b/integration_tests/cases/env/local/ddl/alter_table.result
@@ -45,6 +45,35 @@ UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""),
 UInt64(0),Timestamp(2),Int32(2),String("d2"),String("2"),
 
 
+ALTER TABLE `05_alter_table_t0` add COLUMN (add_dic string dictionary);
+
+affected_rows: 0
+
+DESCRIBE TABLE `05_alter_table_t0`;
+
+name,type,is_primary,is_nullable,is_tag,is_dictionary,
+String("tsid"),String("uint64"),Boolean(true),Boolean(false),Boolean(false),Boolean(false),
+String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Boolean(false),
+String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false),
+String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true),
+String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false),
+String("add_dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true),
+
+
+INSERT INTO TABLE `05_alter_table_t0` (a, b, t, dic, add_dic)
+    VALUES (2, '2', 2, "d11", "d22"),
+    (3, '3', 3, "d22", "d33");
+
+affected_rows: 2
+
+SELECT * FROM `05_alter_table_t0`;
+
+tsid,t,a,dic,b,add_dic,
+UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""),String(""),
+UInt64(0),Timestamp(2),Int32(2),String("d11"),String("2"),String("d22"),
+UInt64(0),Timestamp(3),Int32(3),String("d22"),String("3"),String("d33"),
+
+
 ALTER TABLE `05_alter_table_t0` DROP COLUMN b;
 
 Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to create plan, query: ALTER TABLE `05_alter_table_t0` DROP COLUMN b;. Caused by: Failed to create plan, err:Unsupported SQL statement" })
@@ -57,13 +86,15 @@ String("t"),String("timestamp"),Boolean(true),Boolean(false),Boolean(false),Bool
 String("a"),String("int"),Boolean(false),Boolean(true),Boolean(false),Boolean(false),
 String("dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true),
 String("b"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(false),
+String("add_dic"),String("string"),Boolean(false),Boolean(true),Boolean(false),Boolean(true),
 
 
 SELECT * FROM `05_alter_table_t0`;
 
-tsid,t,a,dic,b,
-UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""),
-UInt64(0),Timestamp(2),Int32(2),String("d2"),String("2"),
+tsid,t,a,dic,b,add_dic,
+UInt64(0),Timestamp(1),Int32(1),String("d1"),String(""),String(""),
+UInt64(0),Timestamp(2),Int32(2),String("d11"),String("2"),String("d22"),
+UInt64(0),Timestamp(3),Int32(3),String("d22"),String("3"),String("d33"),
 
 
 DROP TABLE `05_alter_table_t0`;

diff --git a/integration_tests/cases/env/local/ddl/alter_table.sql b/integration_tests/cases/env/local/ddl/alter_table.sql
@@ -12,16 +12,18 @@ DESCRIBE TABLE `05_alter_table_t0`;
 INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic) values (2, '2', 2, "d2");
 SELECT * FROM `05_alter_table_t0`;
 
--- waiting for datafusion's bug fix
--- ALTER TABLE `05_alter_table_t0` add COLUMN (add_dic string dictionary);
--- DESCRIBE TABLE `05_alter_table_t0`;
--- INSERT INTO TABLE `05_alter_table_t0`(a, b, t, dic, add_dic) values (2, '2', 2, "d2", "d3");
--- SELECT * FROM `05_alter_table_t0`;
+ALTER TABLE `05_alter_table_t0` add COLUMN (add_dic string dictionary);
+DESCRIBE TABLE `05_alter_table_t0`;
+INSERT INTO TABLE `05_alter_table_t0` (a, b, t, dic, add_dic)
+    VALUES (2, '2', 2, "d11", "d22"),
+    (3, '3', 3, "d22", "d33");
+
 
+SELECT * FROM `05_alter_table_t0`;
 
 -- doesn't support drop column
 ALTER TABLE `05_alter_table_t0` DROP COLUMN b;
 DESCRIBE TABLE `05_alter_table_t0`;
 SELECT * FROM `05_alter_table_t0`;
 
-DROP TABLE `05_alter_table_t0`;
+DROP TABLE `05_alter_table_t0`;