16
16
#include < unordered_set>
17
17
18
18
#include " rocksdb/cache.h"
19
+ #include " rocksdb/options.h"
20
+ #include " rocksdb/slice_transform.h"
19
21
#include " rocksdb/table.h"
20
22
#include " rocksdb/db.h"
21
23
#include " rocksdb/utilities/stackable_db.h"
@@ -244,13 +246,76 @@ std::string FeatureSet::DebugString() const {
244
246
return out + " }" ;
245
247
}
246
248
249
+ class ValueGetter {
250
+ public:
251
+ ValueGetter () {}
252
+ virtual ~ValueGetter () {}
253
+
254
+ virtual bool Get (uint64_t id) = 0;
255
+ virtual const Slice value () const = 0;
256
+
257
+ virtual Status status () const = 0;
258
+ };
259
+
260
+ class ValueGetterFromDB : public ValueGetter {
261
+ public:
262
+ ValueGetterFromDB (DB* db, ColumnFamilyHandle* cf) : db_(db), cf_(cf) {}
263
+
264
+ virtual bool Get (uint64_t id) override {
265
+ std::string encoded_id;
266
+ PutFixed64BigEndian (&encoded_id, id);
267
+ status_ = db_->Get (ReadOptions (), cf_, encoded_id, &value_);
268
+ if (status_.IsNotFound ()) {
269
+ status_ = Status::Corruption (" Index inconsistency" );
270
+ return false ;
271
+ }
272
+
273
+ return true ;
274
+ }
275
+
276
+ virtual const Slice value () const override { return value_; }
277
+
278
+ virtual Status status () const override { return status_; }
279
+
280
+ private:
281
+ std::string value_;
282
+ DB* db_;
283
+ ColumnFamilyHandle* cf_;
284
+ Status status_;
285
+ };
286
+
287
+ class ValueGetterFromIterator : public ValueGetter {
288
+ public:
289
+ explicit ValueGetterFromIterator (Iterator* iterator) : iterator_(iterator) {}
290
+
291
+ virtual bool Get (uint64_t id) override {
292
+ std::string encoded_id;
293
+ PutFixed64BigEndian (&encoded_id, id);
294
+ iterator_->Seek (encoded_id);
295
+
296
+ if (!iterator_->Valid () || iterator_->key () != Slice (encoded_id)) {
297
+ status_ = Status::Corruption (" Index inconsistency" );
298
+ return false ;
299
+ }
300
+
301
+ return true ;
302
+ }
303
+
304
+ virtual const Slice value () const override { return iterator_->value (); }
305
+
306
+ virtual Status status () const override { return status_; }
307
+
308
+ private:
309
+ std::unique_ptr<Iterator> iterator_;
310
+ Status status_;
311
+ };
312
+
247
313
class SpatialIndexCursor : public Cursor {
248
314
public:
249
315
// tile_box is inclusive
250
- SpatialIndexCursor (Iterator* spatial_iterator, Iterator* data_iterator ,
316
+ SpatialIndexCursor (Iterator* spatial_iterator, ValueGetter* value_getter ,
251
317
const BoundingBox<uint64_t >& tile_bbox, uint32_t tile_bits)
252
- : data_iterator_(data_iterator),
253
- valid_ (true ) {
318
+ : value_getter_(value_getter), valid_(true ) {
254
319
// calculate quad keys we'll need to query
255
320
std::vector<uint64_t > quad_keys;
256
321
quad_keys.reserve ((tile_bbox.max_x - tile_bbox.min_x + 1 ) *
@@ -329,7 +394,7 @@ class SpatialIndexCursor : public Cursor {
329
394
if (!status_.ok ()) {
330
395
return status_;
331
396
}
332
- return data_iterator_ ->status ();
397
+ return value_getter_ ->status ();
333
398
}
334
399
335
400
private:
@@ -356,32 +421,23 @@ class SpatialIndexCursor : public Cursor {
356
421
return true ;
357
422
}
358
423
359
- // doesn't return anything, but sets valid_ and status_ on corruption
360
424
void ExtractData () {
361
425
assert (valid_);
362
- std::string encoded_id;
363
- PutFixed64BigEndian (&encoded_id, *primary_keys_iterator_);
426
+ valid_ = value_getter_->Get (*primary_keys_iterator_);
364
427
365
- data_iterator_->Seek (encoded_id);
366
-
367
- if (!data_iterator_->Valid () ||
368
- data_iterator_->key () != Slice (encoded_id)) {
369
- status_ = Status::Corruption (" Index inconsistency" );
370
- valid_ = false ;
371
- return ;
428
+ if (valid_) {
429
+ Slice data = value_getter_->value ();
430
+ current_feature_set_.Clear ();
431
+ if (!GetLengthPrefixedSlice (&data, ¤t_blob_) ||
432
+ !current_feature_set_.Deserialize (data)) {
433
+ status_ = Status::Corruption (" Primary key column family corruption" );
434
+ valid_ = false ;
435
+ }
372
436
}
373
437
374
- Slice data = data_iterator_->value ();
375
- current_feature_set_.Clear ();
376
- if (!GetLengthPrefixedSlice (&data, ¤t_blob_) ||
377
- !current_feature_set_.Deserialize (data)) {
378
- status_ = Status::Corruption (" Primary key column family corruption" );
379
- valid_ = false ;
380
- return ;
381
- }
382
438
}
383
439
384
- unique_ptr<Iterator> data_iterator_ ;
440
+ unique_ptr<ValueGetter> value_getter_ ;
385
441
bool valid_;
386
442
Status status_;
387
443
@@ -427,10 +483,11 @@ class SpatialDBImpl : public SpatialDB {
427
483
DB* db, ColumnFamilyHandle* data_column_family,
428
484
const std::vector<std::pair<SpatialIndexOptions, ColumnFamilyHandle*>>&
429
485
spatial_indexes,
430
- uint64_t next_id)
486
+ uint64_t next_id, bool read_only )
431
487
: SpatialDB(db),
432
488
data_column_family_ (data_column_family),
433
- next_id_(next_id) {
489
+ next_id_(next_id),
490
+ read_only_(read_only) {
434
491
for (const auto & index : spatial_indexes) {
435
492
name_to_index_.insert (
436
493
{index .first .name , IndexColumnFamily (index .first , index .second )});
@@ -521,17 +578,26 @@ class SpatialDBImpl : public SpatialDB {
521
578
return new ErrorCursor (Status::InvalidArgument (
522
579
" Spatial index " + spatial_index + " not found" ));
523
580
}
581
+ const auto & si = itr->second .index ;
582
+ Iterator* spatial_iterator;
583
+ ValueGetter* value_getter;
524
584
525
- std::vector<Iterator*> iterators;
526
- Status s = NewIterators (read_options,
527
- {data_column_family_, itr->second .column_family },
528
- &iterators);
529
- if (!s.ok ()) {
530
- return new ErrorCursor (s);
531
- }
585
+ if (read_only_) {
586
+ spatial_iterator = NewIterator (read_options, itr->second .column_family );
587
+ value_getter = new ValueGetterFromDB (this , data_column_family_);
588
+ } else {
589
+ std::vector<Iterator*> iterators;
590
+ Status s = NewIterators (read_options,
591
+ {data_column_family_, itr->second .column_family },
592
+ &iterators);
593
+ if (!s.ok ()) {
594
+ return new ErrorCursor (s);
595
+ }
532
596
533
- const auto & si = itr->second .index ;
534
- return new SpatialIndexCursor (iterators[1 ], iterators[0 ],
597
+ spatial_iterator = iterators[1 ];
598
+ value_getter = new ValueGetterFromIterator (iterators[0 ]);
599
+ }
600
+ return new SpatialIndexCursor (spatial_iterator, value_getter,
535
601
GetTileBoundingBox (si, bbox), si.tile_bits );
536
602
}
537
603
@@ -548,31 +614,61 @@ class SpatialDBImpl : public SpatialDB {
548
614
std::unordered_map<std::string, IndexColumnFamily> name_to_index_;
549
615
550
616
std::atomic<uint64_t > next_id_;
617
+ bool read_only_;
551
618
};
552
619
553
620
namespace {
554
- Options GetRocksDBOptionsFromOptions (const SpatialDBOptions& options) {
555
- Options rocksdb_options;
556
- rocksdb_options.IncreaseParallelism (options.num_threads );
557
- rocksdb_options.write_buffer_size = 256 * 1024 * 1024 ; // 256MB
558
- rocksdb_options.max_bytes_for_level_base = 1024 * 1024 * 1024 ; // 1 GB
621
+ DBOptions GetDBOptions (const SpatialDBOptions& options) {
622
+ DBOptions db_options;
623
+ db_options.IncreaseParallelism (options.num_threads );
624
+ if (options.bulk_load ) {
625
+ db_options.disableDataSync = true ;
626
+ }
627
+ return db_options;
628
+ }
629
+
630
+ ColumnFamilyOptions GetColumnFamilyOptions (const SpatialDBOptions& options,
631
+ std::shared_ptr<Cache> block_cache) {
632
+ ColumnFamilyOptions column_family_options;
633
+ column_family_options.write_buffer_size = 256 * 1024 * 1024 ; // 256MB
634
+ column_family_options.max_bytes_for_level_base = 1024 * 1024 * 1024 ; // 1 GB
559
635
// only compress levels >= 1
560
- rocksdb_options.compression_per_level .resize (rocksdb_options.num_levels );
561
- for (int i = 0 ; i < rocksdb_options.num_levels ; ++i) {
636
+ column_family_options.compression_per_level .resize (
637
+ column_family_options.num_levels );
638
+ for (int i = 0 ; i < column_family_options.num_levels ; ++i) {
562
639
if (i == 0 ) {
563
- rocksdb_options .compression_per_level [i] = kNoCompression ;
640
+ column_family_options .compression_per_level [i] = kNoCompression ;
564
641
} else {
565
- rocksdb_options .compression_per_level [i] = kLZ4Compression ;
642
+ column_family_options .compression_per_level [i] = kLZ4Compression ;
566
643
}
567
644
}
568
645
BlockBasedTableOptions table_options;
569
- table_options.block_cache = NewLRUCache (options.cache_size );
570
- rocksdb_options.table_factory .reset (NewBlockBasedTableFactory (table_options));
646
+ table_options.block_cache = block_cache;
647
+ column_family_options.table_factory .reset (
648
+ NewBlockBasedTableFactory (table_options));
571
649
if (options.bulk_load ) {
572
- rocksdb_options.PrepareForBulkLoad ();
573
- }
574
- return rocksdb_options;
650
+ column_family_options.level0_file_num_compaction_trigger = (1 << 30 );
651
+ column_family_options.level0_slowdown_writes_trigger = (1 << 30 );
652
+ column_family_options.level0_stop_writes_trigger = (1 << 30 );
653
+ column_family_options.disable_auto_compactions = true ;
654
+ column_family_options.source_compaction_factor = (1 << 30 );
655
+ column_family_options.num_levels = 2 ;
656
+ column_family_options.target_file_size_base = 256 * 1024 * 1024 ;
657
+ column_family_options.max_mem_compaction_level = 0 ;
658
+ }
659
+ return column_family_options;
660
+ }
661
+
662
+ ColumnFamilyOptions OptimizeOptionsForDataColumnFamily (
663
+ ColumnFamilyOptions options, std::shared_ptr<Cache> block_cache) {
664
+ options.prefix_extractor .reset (NewNoopTransform ());
665
+ BlockBasedTableOptions block_based_options;
666
+ block_based_options.index_type = BlockBasedTableOptions::kHashSearch ;
667
+ block_based_options.block_cache = block_cache;
668
+ options.table_factory .reset (NewBlockBasedTableFactory (block_based_options));
669
+ return options;
575
670
}
671
+
576
672
} // namespace
577
673
578
674
class MetadataStorage {
@@ -618,26 +714,30 @@ class MetadataStorage {
618
714
Status SpatialDB::Create (
619
715
const SpatialDBOptions& options, const std::string& name,
620
716
const std::vector<SpatialIndexOptions>& spatial_indexes) {
621
- Options rocksdb_options = GetRocksDBOptionsFromOptions (options);
622
- rocksdb_options.create_if_missing = true ;
623
- rocksdb_options.create_missing_column_families = true ;
624
- rocksdb_options.error_if_exists = true ;
717
+ DBOptions db_options = GetDBOptions (options);
718
+ db_options.create_if_missing = true ;
719
+ db_options.create_missing_column_families = true ;
720
+ db_options.error_if_exists = true ;
721
+
722
+ auto block_cache = NewLRUCache (options.cache_size );
723
+ ColumnFamilyOptions column_family_options =
724
+ GetColumnFamilyOptions (options, block_cache);
625
725
626
726
std::vector<ColumnFamilyDescriptor> column_families;
627
727
column_families.push_back (ColumnFamilyDescriptor (
628
- kDefaultColumnFamilyName , ColumnFamilyOptions (rocksdb_options)));
629
- column_families.push_back (ColumnFamilyDescriptor (
630
- kMetadataColumnFamilyName , ColumnFamilyOptions (rocksdb_options)));
728
+ kDefaultColumnFamilyName ,
729
+ OptimizeOptionsForDataColumnFamily (column_family_options, block_cache)));
730
+ column_families.push_back (
731
+ ColumnFamilyDescriptor (kMetadataColumnFamilyName , column_family_options));
631
732
632
733
for (const auto & index : spatial_indexes) {
633
734
column_families.emplace_back (GetSpatialIndexColumnFamilyName (index .name ),
634
- ColumnFamilyOptions (rocksdb_options) );
735
+ column_family_options );
635
736
}
636
737
637
738
std::vector<ColumnFamilyHandle*> handles;
638
739
DB* base_db;
639
- Status s = DB::Open (DBOptions (rocksdb_options), name, column_families,
640
- &handles, &base_db);
740
+ Status s = DB::Open (db_options, name, column_families, &handles, &base_db);
641
741
if (!s.ok ()) {
642
742
return s;
643
743
}
@@ -659,13 +759,15 @@ Status SpatialDB::Create(
659
759
660
760
Status SpatialDB::Open (const SpatialDBOptions& options, const std::string& name,
661
761
SpatialDB** db, bool read_only) {
662
- Options rocksdb_options = GetRocksDBOptionsFromOptions (options);
762
+ DBOptions db_options = GetDBOptions (options);
763
+ auto block_cache = NewLRUCache (options.cache_size );
764
+ ColumnFamilyOptions column_family_options =
765
+ GetColumnFamilyOptions (options, block_cache);
663
766
664
767
Status s;
665
768
std::vector<std::string> existing_column_families;
666
769
std::vector<std::string> spatial_indexes;
667
- s = DB::ListColumnFamilies (DBOptions (rocksdb_options), name,
668
- &existing_column_families);
770
+ s = DB::ListColumnFamilies (db_options, name, &existing_column_families);
669
771
if (!s.ok ()) {
670
772
return s;
671
773
}
@@ -678,22 +780,22 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name,
678
780
679
781
std::vector<ColumnFamilyDescriptor> column_families;
680
782
column_families.push_back (ColumnFamilyDescriptor (
681
- kDefaultColumnFamilyName , ColumnFamilyOptions (rocksdb_options)));
682
- column_families.push_back (ColumnFamilyDescriptor (
683
- kMetadataColumnFamilyName , ColumnFamilyOptions (rocksdb_options)));
783
+ kDefaultColumnFamilyName ,
784
+ OptimizeOptionsForDataColumnFamily (column_family_options, block_cache)));
785
+ column_families.push_back (
786
+ ColumnFamilyDescriptor (kMetadataColumnFamilyName , column_family_options));
684
787
685
788
for (const auto & index : spatial_indexes) {
686
789
column_families.emplace_back (GetSpatialIndexColumnFamilyName (index ),
687
- ColumnFamilyOptions (rocksdb_options) );
790
+ column_family_options );
688
791
}
689
792
std::vector<ColumnFamilyHandle*> handles;
690
793
DB* base_db;
691
794
if (read_only) {
692
- s = DB::OpenForReadOnly (DBOptions (rocksdb_options) , name, column_families,
693
- &handles, & base_db);
795
+ s = DB::OpenForReadOnly (db_options , name, column_families, &handles ,
796
+ &base_db);
694
797
} else {
695
- s = DB::Open (DBOptions (rocksdb_options), name, column_families, &handles,
696
- &base_db);
798
+ s = DB::Open (db_options, name, column_families, &handles, &base_db);
697
799
}
698
800
if (!s.ok ()) {
699
801
return s;
@@ -730,13 +832,13 @@ Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name,
730
832
for (auto h : handles) {
731
833
delete h;
732
834
}
733
- delete db ;
835
+ delete base_db ;
734
836
return s;
735
837
}
736
838
737
839
// I don't need metadata column family any more, so delete it
738
840
delete handles[1 ];
739
- *db = new SpatialDBImpl (base_db, handles[0 ], index_cf, next_id);
841
+ *db = new SpatialDBImpl (base_db, handles[0 ], index_cf, next_id, read_only );
740
842
return Status::OK ();
741
843
}
742
844
0 commit comments