@@ -6,7 +6,12 @@ use std::{fmt, ops::Index, sync::Arc};
6
6
7
7
use bytes:: Bytes ;
8
8
use ceresdbproto:: { schema as schema_pb, sst as sst_pb} ;
9
- use common_types:: { schema:: Schema , time:: TimeRange , SequenceNumber } ;
9
+ use common_types:: {
10
+ datum:: DatumKind ,
11
+ schema:: { RecordSchemaWithKey , Schema } ,
12
+ time:: TimeRange ,
13
+ SequenceNumber ,
14
+ } ;
10
15
use common_util:: define_result;
11
16
use snafu:: { Backtrace , OptionExt , ResultExt , Snafu } ;
12
17
use xorfilter:: { Xor8 , Xor8Builder } ;
@@ -120,14 +125,38 @@ pub struct RowGroupFilterBuilder {
120
125
}
121
126
122
127
impl RowGroupFilterBuilder {
123
- pub ( crate ) fn with_num_columns ( num_col : usize ) -> Self {
124
- Self {
125
- builders : vec ! [ None ; num_col] ,
126
- }
128
+ pub ( crate ) fn new ( record_schema : & RecordSchemaWithKey ) -> Self {
129
+ let builders = record_schema
130
+ . columns ( )
131
+ . iter ( )
132
+ . enumerate ( )
133
+ . map ( |( i, col) | {
134
+ if record_schema. is_primary_key_index ( i) {
135
+ return None ;
136
+ }
137
+
138
+ if matches ! (
139
+ col. data_type,
140
+ DatumKind :: Null
141
+ | DatumKind :: Double
142
+ | DatumKind :: Float
143
+ | DatumKind :: Varbinary
144
+ | DatumKind :: Boolean
145
+ ) {
146
+ return None ;
147
+ }
148
+
149
+ Some ( Xor8Builder :: default ( ) )
150
+ } )
151
+ . collect ( ) ;
152
+
153
+ Self { builders }
127
154
}
128
155
129
156
pub ( crate ) fn add_key ( & mut self , col_idx : usize , key : & [ u8 ] ) {
130
- self . builders [ col_idx] . get_or_insert_default ( ) . insert ( key)
157
+ if let Some ( b) = self . builders [ col_idx] . as_mut ( ) {
158
+ b. insert ( key)
159
+ }
131
160
}
132
161
133
162
pub ( crate ) fn build ( self ) -> Result < RowGroupFilter > {
@@ -403,6 +432,8 @@ impl TryFrom<sst_pb::ParquetMetaData> for ParquetMetaData {
403
432
404
433
#[ cfg( test) ]
405
434
mod tests {
435
+ use common_types:: tests:: build_schema;
436
+
406
437
use super :: * ;
407
438
408
439
#[ test]
@@ -447,16 +478,22 @@ mod tests {
447
478
448
479
#[ test]
449
480
fn test_row_group_filter_builder ( ) {
450
- let mut builders = RowGroupFilterBuilder :: with_num_columns ( 1 ) ;
481
+ // (key1(varbinary), key2(timestamp), field1(double), field2(string))
482
+ let schema = build_schema ( ) ;
483
+ let record_schema = schema. to_record_schema_with_key ( ) ;
484
+ let mut builders = RowGroupFilterBuilder :: new ( & record_schema) ;
451
485
for key in [ "host-123" , "host-456" , "host-789" ] {
452
- builders. add_key ( 0 , key. as_bytes ( ) ) ;
486
+ builders. add_key ( 3 , key. as_bytes ( ) ) ;
453
487
}
454
488
let row_group_filter = builders. build ( ) . unwrap ( ) ;
489
+ for i in 0 ..3 {
490
+ assert ! ( row_group_filter. column_filters[ i] . is_none( ) ) ;
491
+ }
455
492
456
493
let testcase = [ ( "host-123" , true ) , ( "host-321" , false ) ] ;
457
494
for ( key, expected) in testcase {
458
495
let actual = row_group_filter
459
- . contains_column_data ( 0 , key. as_bytes ( ) )
496
+ . contains_column_data ( 3 , key. as_bytes ( ) )
460
497
. unwrap ( ) ;
461
498
462
499
assert_eq ! ( expected, actual) ;
0 commit comments