20
20
#include < vector>
21
21
22
22
#include " db/builder.h"
23
- #include " db/dbformat.h"
24
23
#include " db/db_iter.h"
24
+ #include " db/dbformat.h"
25
25
#include " db/filename.h"
26
26
#include " db/log_reader.h"
27
27
#include " db/log_writer.h"
43
43
#include " rocksdb/statistics.h"
44
44
#include " rocksdb/status.h"
45
45
#include " rocksdb/table.h"
46
- #include " port/port.h"
47
46
#include " table/block.h"
48
47
#include " table/block_based_table_factory.h"
49
48
#include " table/merger.h"
59
58
60
59
namespace rocksdb {
61
60
62
- void dumpLeveldbBuildVersion (Logger * log);
61
+ void DumpLeveldbBuildVersion (Logger * log);
63
62
64
63
// Information kept for every waiting writer
65
64
struct DBImpl ::Writer {
@@ -266,9 +265,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
266
265
storage_options_(options),
267
266
bg_work_gate_closed_(false ),
268
267
refitting_level_(false ) {
269
-
270
268
mem_->Ref ();
271
-
272
269
env_->GetAbsolutePath (dbname, &db_absolute_path_);
273
270
274
271
stall_leveln_slowdown_.resize (options.num_levels );
@@ -282,16 +279,15 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
282
279
const int table_cache_size = options_.max_open_files - 10 ;
283
280
table_cache_.reset (new TableCache (dbname_, &options_,
284
281
storage_options_, table_cache_size));
285
-
286
282
versions_.reset (new VersionSet (dbname_, &options_, storage_options_,
287
283
table_cache_.get (), &internal_comparator_));
288
284
289
- dumpLeveldbBuildVersion (options_.info_log .get ());
285
+ DumpLeveldbBuildVersion (options_.info_log .get ());
290
286
options_.Dump (options_.info_log .get ());
291
287
292
288
char name[100 ];
293
- Status st = env_->GetHostName (name, 100L );
294
- if (st .ok ()) {
289
+ Status s = env_->GetHostName (name, 100L );
290
+ if (s .ok ()) {
295
291
host_name_ = name;
296
292
} else {
297
293
Log (options_.info_log , " Can't get hostname, use localhost as host name." );
@@ -502,7 +498,7 @@ void DBImpl::SuperVersion::Init(MemTable* new_mem, const MemTableList& new_imm,
502
498
}
503
499
504
500
// Returns the list of live files in 'sst_live' and the list
505
- // of all files in the filesystem in 'all_files '.
501
+ // of all files in the filesystem in 'candidate_files '.
506
502
// no_full_scan = true -- never do the full scan using GetChildren()
507
503
// force = false -- don't force the full scan, except every
508
504
// options_.delete_obsolete_files_period_micros
@@ -554,15 +550,18 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
554
550
versions_->AddLiveFiles (&deletion_state.sst_live );
555
551
556
552
if (doing_the_full_scan) {
557
- // set of all files in the directory
558
- env_->GetChildren (dbname_, &deletion_state.all_files ); // Ignore errors
553
+ // set of all files in the directory. We'll exclude files that are still
554
+ // alive in the subsequent processings.
555
+ env_->GetChildren (
556
+ dbname_, &deletion_state.candidate_files
557
+ ); // Ignore errors
559
558
560
559
// Add log files in wal_dir
561
560
if (options_.wal_dir != dbname_) {
562
561
std::vector<std::string> log_files;
563
562
env_->GetChildren (options_.wal_dir , &log_files); // Ignore errors
564
- deletion_state.all_files .insert (
565
- deletion_state.all_files .end (),
563
+ deletion_state.candidate_files .insert (
564
+ deletion_state.candidate_files .end (),
566
565
log_files.begin (),
567
566
log_files.end ()
568
567
);
@@ -575,11 +574,10 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
575
574
// files in sst_delete_files and log_delete_files.
576
575
// It is not necessary to hold the mutex when invoking this method.
577
576
void DBImpl::PurgeObsoleteFiles (DeletionState& state) {
578
-
579
577
// check if there is anything to do
580
- if (! state.all_files . size () &&
581
- ! state.sst_delete_files .size () &&
582
- ! state.log_delete_files .size ()) {
578
+ if (state.candidate_files . empty () &&
579
+ state.sst_delete_files .empty () &&
580
+ state.log_delete_files .empty ()) {
583
581
return ;
584
582
}
585
583
@@ -589,100 +587,114 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
589
587
if (state.manifest_file_number == 0 ) {
590
588
return ;
591
589
}
592
-
593
- uint64_t number;
594
- FileType type;
595
590
std::vector<std::string> old_log_files;
596
591
597
592
// Now, convert live list to an unordered set, WITHOUT mutex held;
598
593
// set is slow.
599
- std::unordered_set<uint64_t > live_set (state.sst_live .begin (),
600
- state.sst_live .end ());
594
+ std::unordered_set<uint64_t > sst_live (
595
+ state.sst_live .begin (), state.sst_live .end ()
596
+ );
601
597
602
- state.all_files .reserve (state.all_files .size () +
603
- state.sst_delete_files .size ());
598
+ auto & candidate_files = state.candidate_files ;
599
+ candidate_files.reserve (
600
+ candidate_files.size () +
601
+ state.sst_delete_files .size () +
602
+ state.log_delete_files .size ());
603
+ // We may ignore the dbname when generating the file names.
604
+ const char * kDumbDbName = " " ;
604
605
for (auto file : state.sst_delete_files ) {
605
- state.all_files .push_back (TableFileName (" " , file->number ).substr (1 ));
606
+ candidate_files.push_back (
607
+ TableFileName (kDumbDbName , file->number ).substr (1 )
608
+ );
606
609
delete file;
607
610
}
608
611
609
- state. all_files . reserve ( state.all_files . size () +
610
- state. log_delete_files . size ());
611
- for ( auto filenum : state. log_delete_files ) {
612
- if (filenum > 0 ) {
613
- state. all_files . push_back ( LogFileName ( " " , filenum). substr ( 1 ) );
612
+ for ( auto file_num : state.log_delete_files ) {
613
+ if (file_num > 0 ) {
614
+ candidate_files. push_back (
615
+ LogFileName ( kDumbDbName , file_num). substr ( 1 )
616
+ );
614
617
}
615
618
}
616
619
617
- // dedup state.all_files so we don't try to delete the same
620
+ // dedup state.candidate_files so we don't try to delete the same
618
621
// file twice
619
- sort (state.all_files .begin (), state.all_files .end ());
620
- auto unique_end = unique (state.all_files .begin (), state.all_files .end ());
621
-
622
- for (size_t i = 0 ; state.all_files .begin () + i < unique_end; i++) {
623
- if (ParseFileName (state.all_files [i], &number, &type)) {
624
- bool keep = true ;
625
- switch (type) {
626
- case kLogFile :
627
- keep = ((number >= state.log_number ) ||
628
- (number == state.prev_log_number ));
629
- break ;
630
- case kDescriptorFile :
631
- // Keep my manifest file, and any newer incarnations'
632
- // (in case there is a race that allows other incarnations)
633
- keep = (number >= state.manifest_file_number );
634
- break ;
635
- case kTableFile :
636
- keep = (live_set.find (number) != live_set.end ());
637
- break ;
638
- case kTempFile :
639
- // Any temp files that are currently being written to must
640
- // be recorded in pending_outputs_, which is inserted into "live"
641
- keep = (live_set.find (number) != live_set.end ());
642
- break ;
643
- case kInfoLogFile :
644
- keep = true ;
645
- if (number != 0 ) {
646
- old_log_files.push_back (state.all_files [i]);
647
- }
648
- break ;
649
- case kCurrentFile :
650
- case kDBLockFile :
651
- case kIdentityFile :
652
- case kMetaDatabase :
653
- keep = true ;
654
- break ;
655
- }
622
+ sort (candidate_files.begin (), candidate_files.end ());
623
+ candidate_files.erase (
624
+ unique (candidate_files.begin (), candidate_files.end ()),
625
+ candidate_files.end ()
626
+ );
656
627
657
- if (!keep) {
658
- if (type == kTableFile ) {
659
- // evict from cache
660
- table_cache_->Evict (number);
628
+ for (const auto & to_delete : candidate_files) {
629
+ uint64_t number;
630
+ FileType type;
631
+ // Ignore file if we cannot recognize it.
632
+ if (!ParseFileName (to_delete, &number, &type)) {
633
+ continue ;
634
+ }
635
+
636
+ bool keep = true ;
637
+ switch (type) {
638
+ case kLogFile :
639
+ keep = ((number >= state.log_number ) ||
640
+ (number == state.prev_log_number ));
641
+ break ;
642
+ case kDescriptorFile :
643
+ // Keep my manifest file, and any newer incarnations'
644
+ // (in case there is a race that allows other incarnations)
645
+ keep = (number >= state.manifest_file_number );
646
+ break ;
647
+ case kTableFile :
648
+ keep = (sst_live.find (number) != sst_live.end ());
649
+ break ;
650
+ case kTempFile :
651
+ // Any temp files that are currently being written to must
652
+ // be recorded in pending_outputs_, which is inserted into "live"
653
+ keep = (sst_live.find (number) != sst_live.end ());
654
+ break ;
655
+ case kInfoLogFile :
656
+ keep = true ;
657
+ if (number != 0 ) {
658
+ old_log_files.push_back (to_delete);
661
659
}
662
- std::string fname = ((type == kLogFile ) ? options_.wal_dir : dbname_) +
663
- " /" + state.all_files [i];
660
+ break ;
661
+ case kCurrentFile :
662
+ case kDBLockFile :
663
+ case kIdentityFile :
664
+ case kMetaDatabase :
665
+ keep = true ;
666
+ break ;
667
+ }
668
+
669
+ if (keep) {
670
+ continue ;
671
+ }
672
+
673
+ if (type == kTableFile ) {
674
+ // evict from cache
675
+ table_cache_->Evict (number);
676
+ }
677
+ std::string fname = ((type == kLogFile ) ? options_.wal_dir : dbname_) +
678
+ " /" + to_delete;
679
+ Log (options_.info_log ,
680
+ " Delete type=%d #%lu" ,
681
+ int (type),
682
+ (unsigned long )number);
683
+
684
+ if (type == kLogFile &&
685
+ (options_.WAL_ttl_seconds > 0 || options_.WAL_size_limit_MB > 0 )) {
686
+ Status s = env_->RenameFile (fname,
687
+ ArchivedLogFileName (options_.wal_dir , number));
688
+ if (!s.ok ()) {
664
689
Log (options_.info_log ,
665
- " Delete type=%d #%lu" ,
666
- int (type),
667
- (unsigned long )number);
668
-
669
- Status st;
670
- if (type == kLogFile && (options_.WAL_ttl_seconds > 0 ||
671
- options_.WAL_size_limit_MB > 0 )) {
672
- st = env_->RenameFile (fname,
673
- ArchivedLogFileName (options_.wal_dir , number));
674
- if (!st.ok ()) {
675
- Log (options_.info_log ,
676
- " RenameFile logfile #%lu FAILED -- %s\n " ,
677
- (unsigned long )number, st.ToString ().c_str ());
678
- }
679
- } else {
680
- st = env_->DeleteFile (fname);
681
- if (!st.ok ()) {
682
- Log (options_.info_log , " Delete type=%d #%lu FAILED -- %s\n " ,
683
- int (type), (unsigned long )number, st.ToString ().c_str ());
684
- }
685
- }
690
+ " RenameFile logfile #%lu FAILED -- %s\n " ,
691
+ (unsigned long )number, s.ToString ().c_str ());
692
+ }
693
+ } else {
694
+ Status s = env_->DeleteFile (fname);
695
+ if (!s.ok ()) {
696
+ Log (options_.info_log , " Delete type=%d #%lu FAILED -- %s\n " ,
697
+ int (type), (unsigned long )number, s.ToString ().c_str ());
686
698
}
687
699
}
688
700
}
@@ -839,7 +851,9 @@ void DBImpl::PurgeObsoleteWALFiles() {
839
851
840
852
// If externalTable is set, then apply recovered transactions
841
853
// to that table. This is used for readonly mode.
842
- Status DBImpl::Recover (VersionEdit* edit, MemTable* external_table,
854
+ Status DBImpl::Recover (
855
+ VersionEdit* edit,
856
+ MemTable* external_table,
843
857
bool error_if_log_file_exist) {
844
858
mutex_.AssertHeld ();
845
859
@@ -906,10 +920,11 @@ Status DBImpl::Recover(VersionEdit* edit, MemTable* external_table,
906
920
if (!s.ok ()) {
907
921
return s;
908
922
}
909
- uint64_t number;
910
- FileType type;
923
+
911
924
std::vector<uint64_t > logs;
912
925
for (size_t i = 0 ; i < filenames.size (); i++) {
926
+ uint64_t number;
927
+ FileType type;
913
928
if (ParseFileName (filenames[i], &number, &type)
914
929
&& type == kLogFile
915
930
&& ((number >= min_log) || (number == prev_log))) {
@@ -925,12 +940,12 @@ Status DBImpl::Recover(VersionEdit* edit, MemTable* external_table,
925
940
926
941
// Recover in the order in which the logs were generated
927
942
std::sort (logs.begin (), logs.end ());
928
- for (size_t i = 0 ; i < logs. size (); i++ ) {
929
- s = RecoverLogFile (logs[i] , edit, &max_sequence, external_table);
943
+ for (const auto & log : logs) {
944
+ s = RecoverLogFile (log , edit, &max_sequence, external_table);
930
945
// The previous incarnation may not have written any MANIFEST
931
946
// records after allocating this log number. So we manually
932
947
// update the file number allocation counter in VersionSet.
933
- versions_->MarkFileNumberUsed (logs[i] );
948
+ versions_->MarkFileNumberUsed (log );
934
949
}
935
950
936
951
if (s.ok ()) {
@@ -1147,7 +1162,6 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
1147
1162
}
1148
1163
base->Unref ();
1149
1164
1150
-
1151
1165
// re-acquire the most current version
1152
1166
base = versions_->current ();
1153
1167
@@ -3285,7 +3299,7 @@ Status DBImpl::MakeRoomForWrite(bool force,
3285
3299
3286
3300
} else {
3287
3301
unique_ptr<WritableFile> lfile;
3288
- MemTable* memtmp = nullptr ;
3302
+ MemTable* new_mem = nullptr ;
3289
3303
3290
3304
// Attempt to switch to a new memtable and trigger compaction of old.
3291
3305
// Do this without holding the dbmutex lock.
@@ -3306,7 +3320,7 @@ Status DBImpl::MakeRoomForWrite(bool force,
3306
3320
// Our final size should be less than write_buffer_size
3307
3321
// (compression, etc) but err on the side of caution.
3308
3322
lfile->SetPreallocationBlockSize (1.1 * options_.write_buffer_size );
3309
- memtmp = new MemTable (
3323
+ new_mem = new MemTable (
3310
3324
internal_comparator_, mem_rep_factory_, NumberLevels (), options_);
3311
3325
new_superversion = new SuperVersion (options_.max_write_buffer_number );
3312
3326
}
@@ -3315,7 +3329,7 @@ Status DBImpl::MakeRoomForWrite(bool force,
3315
3329
if (!s.ok ()) {
3316
3330
// Avoid chewing through file number space in a tight loop.
3317
3331
versions_->ReuseFileNumber (new_log_number);
3318
- assert (!memtmp );
3332
+ assert (!new_mem );
3319
3333
break ;
3320
3334
}
3321
3335
logfile_number_ = new_log_number;
@@ -3325,7 +3339,7 @@ Status DBImpl::MakeRoomForWrite(bool force,
3325
3339
if (force) {
3326
3340
imm_.FlushRequested ();
3327
3341
}
3328
- mem_ = memtmp ;
3342
+ mem_ = new_mem ;
3329
3343
mem_->Ref ();
3330
3344
Log (options_.info_log ,
3331
3345
" New memtable created with log file: #%lu\n " ,
@@ -3806,7 +3820,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
3806
3820
delete impl;
3807
3821
return s;
3808
3822
}
3809
- impl->mutex_ .Lock ();
3823
+ impl->mutex_ .Lock (); // DBImpl::Recover() requires lock being held
3810
3824
VersionEdit edit (impl->NumberLevels ());
3811
3825
s = impl->Recover (&edit); // Handles create_if_missing, error_if_exists
3812
3826
if (s.ok ()) {
@@ -3929,7 +3943,7 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
3929
3943
3930
3944
//
3931
3945
// A global method that can dump out the build version
3932
- void dumpLeveldbBuildVersion (Logger * log) {
3946
+ void DumpLeveldbBuildVersion (Logger * log) {
3933
3947
Log (log , " Git sha %s" , rocksdb_build_git_sha);
3934
3948
Log (log , " Compile time %s %s" ,
3935
3949
rocksdb_build_compile_time, rocksdb_build_compile_date);
0 commit comments