Skip to content

Commit e3f396f

Browse files
committed
Some fixes to BackupableDB
Summary: (1) Report corruption if backup meta file has tailing data that was not read. This should fix: facebook/rocksdb#81 (also, @sdong reported similar issue) (2) Don't use OS buffer when copying file to backup directory. We don't need the file in cache since we won't be reading it twice (3) Don't delete newer backups when somebody tries to backup the diverged DB (restore from older backup, add new data, try to backup). Rather, just fail the new backup. Test Plan: backupable_db_test Reviewers: ljin, dhruba, sdong Reviewed By: ljin CC: leveldb, sdong Differential Revision: https://reviews.facebook.net/D16287
1 parent a1d56e7 commit e3f396f

File tree

4 files changed

+104
-78
lines changed

4 files changed

+104
-78
lines changed

HISTORY.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* By default, checksums are verified on every read from database
99
* Added is_manual_compaction to CompactionFilter::Context
1010
* Added "virtual void WaitForJoin() = 0" in class Env
11+
* Removed BackupEngine::DeleteBackupsNewerThan() function
1112

1213
### New Features
1314
* If we find one truncated record at the end of the MANIFEST or WAL files,

include/utilities/backupable_db.h

+34-36
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,13 @@ struct BackupableDBOptions {
5858
explicit BackupableDBOptions(const std::string& _backup_dir,
5959
Env* _backup_env = nullptr,
6060
bool _share_table_files = true,
61-
Logger* _info_log = nullptr,
62-
bool _sync = true,
63-
bool _destroy_old_data = false) :
64-
backup_dir(_backup_dir),
65-
backup_env(_backup_env),
66-
info_log(_info_log),
67-
sync(_sync),
68-
destroy_old_data(_destroy_old_data) { }
61+
Logger* _info_log = nullptr, bool _sync = true,
62+
bool _destroy_old_data = false)
63+
: backup_dir(_backup_dir),
64+
backup_env(_backup_env),
65+
info_log(_info_log),
66+
sync(_sync),
67+
destroy_old_data(_destroy_old_data) {}
6968
};
7069

7170
typedef uint32_t BackupID;
@@ -99,8 +98,6 @@ class BackupEngine {
9998
const std::string& wal_dir) = 0;
10099
virtual Status RestoreDBFromLatestBackup(const std::string& db_dir,
101100
const std::string& wal_dir) = 0;
102-
103-
virtual void DeleteBackupsNewerThan(uint64_t sequence_number) = 0;
104101
};
105102

106103
// Stack your DB with BackupableDB to be able to backup the DB
@@ -138,32 +135,33 @@ class BackupableDB : public StackableDB {
138135

139136
// Use this class to access information about backups and restore from them
140137
class RestoreBackupableDB {
141-
public:
142-
RestoreBackupableDB(Env* db_env, const BackupableDBOptions& options);
143-
~RestoreBackupableDB();
144-
145-
// Returns info about backups in backup_info
146-
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
147-
148-
// restore from backup with backup_id
149-
// IMPORTANT -- if options_.share_table_files == true and you restore DB
150-
// from some backup that is not the latest, and you start creating new
151-
// backups from the new DB, all the backups that were newer than the
152-
// backup you restored from will be deleted
153-
//
154-
// Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3.
155-
// If you try creating a new backup now, old backups 4 and 5 will be deleted
156-
// and new backup with ID 4 will be created.
157-
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
158-
const std::string& wal_dir);
159-
160-
// restore from the latest backup
161-
Status RestoreDBFromLatestBackup(const std::string& db_dir,
162-
const std::string& wal_dir);
163-
// deletes old backups, keeping latest num_backups_to_keep alive
164-
Status PurgeOldBackups(uint32_t num_backups_to_keep);
165-
// deletes a specific backup
166-
Status DeleteBackup(BackupID backup_id);
138+
public:
139+
RestoreBackupableDB(Env* db_env, const BackupableDBOptions& options);
140+
~RestoreBackupableDB();
141+
142+
// Returns info about backups in backup_info
143+
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
144+
145+
// restore from backup with backup_id
146+
// IMPORTANT -- if options_.share_table_files == true and you restore DB
147+
// from some backup that is not the latest, and you start creating new
148+
// backups from the new DB, they will probably fail
149+
//
150+
// Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3.
151+
// If you add new data to the DB and try creating a new backup now, the
152+
// database will diverge from backups 4 and 5 and the new backup will fail.
153+
// If you want to create new backup, you will first have to delete backups 4
154+
// and 5.
155+
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
156+
const std::string& wal_dir);
157+
158+
// restore from the latest backup
159+
Status RestoreDBFromLatestBackup(const std::string& db_dir,
160+
const std::string& wal_dir);
161+
// deletes old backups, keeping latest num_backups_to_keep alive
162+
Status PurgeOldBackups(uint32_t num_backups_to_keep);
163+
// deletes a specific backup
164+
Status DeleteBackup(BackupID backup_id);
167165

168166
private:
169167
BackupEngine* backup_engine_;

utilities/backupable/backupable_db.cc

+47-30
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ class BackupEngineImpl : public BackupEngine {
4646
return RestoreDBFromBackup(latest_backup_id_, db_dir, wal_dir);
4747
}
4848

49-
void DeleteBackupsNewerThan(uint64_t sequence_number);
50-
5149
private:
5250
struct FileInfo {
5351
FileInfo(const std::string& fname, uint64_t sz, uint32_t checksum)
@@ -185,6 +183,12 @@ class BackupEngineImpl : public BackupEngine {
185183
Env* db_env_;
186184
Env* backup_env_;
187185

186+
// directories
187+
unique_ptr<Directory> backup_directory_;
188+
unique_ptr<Directory> shared_directory_;
189+
unique_ptr<Directory> meta_directory_;
190+
unique_ptr<Directory> private_directory_;
191+
188192
static const size_t copy_file_buffer_size_ = 5 * 1024 * 1024LL; // 5MB
189193
};
190194

@@ -203,11 +207,17 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
203207

204208
// create all the dirs we need
205209
backup_env_->CreateDirIfMissing(GetAbsolutePath());
210+
backup_env_->NewDirectory(GetAbsolutePath(), &backup_directory_);
206211
if (options_.share_table_files) {
207212
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
213+
backup_env_->NewDirectory(GetAbsolutePath(GetSharedFileRel()),
214+
&shared_directory_);
208215
}
209216
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel()));
217+
backup_env_->NewDirectory(GetAbsolutePath(GetPrivateDirRel()),
218+
&private_directory_);
210219
backup_env_->CreateDirIfMissing(GetBackupMetaDir());
220+
backup_env_->NewDirectory(GetBackupMetaDir(), &meta_directory_);
211221

212222
std::vector<std::string> backup_meta_files;
213223
backup_env_->GetChildren(GetBackupMetaDir(), &backup_meta_files);
@@ -279,26 +289,6 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
279289

280290
BackupEngineImpl::~BackupEngineImpl() { LogFlush(options_.info_log); }
281291

282-
void BackupEngineImpl::DeleteBackupsNewerThan(uint64_t sequence_number) {
283-
for (auto backup : backups_) {
284-
if (backup.second.GetSequenceNumber() > sequence_number) {
285-
Log(options_.info_log,
286-
"Deleting backup %u because sequence number (%" PRIu64
287-
") is newer than %" PRIu64 "",
288-
backup.first, backup.second.GetSequenceNumber(), sequence_number);
289-
backup.second.Delete();
290-
obsolete_backups_.push_back(backup.first);
291-
}
292-
}
293-
for (auto ob : obsolete_backups_) {
294-
backups_.erase(backups_.find(ob));
295-
}
296-
auto itr = backups_.end();
297-
latest_backup_id_ = (itr == backups_.begin()) ? 0 : (--itr)->first;
298-
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
299-
GarbageCollection(false);
300-
}
301-
302292
Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
303293
Status s;
304294
std::vector<std::string> live_files;
@@ -348,9 +338,8 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
348338
return Status::Corruption("Can't parse file name. This is very bad");
349339
}
350340
// we should only get sst, manifest and current files here
351-
assert(type == kTableFile ||
352-
type == kDescriptorFile ||
353-
type == kCurrentFile);
341+
assert(type == kTableFile || type == kDescriptorFile ||
342+
type == kCurrentFile);
354343

355344
// rules:
356345
// * if it's kTableFile, than it's shared
@@ -394,6 +383,28 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
394383
// install the newly created backup meta! (atomic)
395384
s = PutLatestBackupFileContents(new_backup_id);
396385
}
386+
if (s.ok() && options_.sync) {
387+
unique_ptr<Directory> backup_private_directory;
388+
backup_env_->NewDirectory(
389+
GetAbsolutePath(GetPrivateFileRel(new_backup_id, false)),
390+
&backup_private_directory);
391+
if (backup_private_directory != nullptr) {
392+
backup_private_directory->Fsync();
393+
}
394+
if (private_directory_ != nullptr) {
395+
private_directory_->Fsync();
396+
}
397+
if (meta_directory_ != nullptr) {
398+
meta_directory_->Fsync();
399+
}
400+
if (shared_directory_ != nullptr) {
401+
shared_directory_->Fsync();
402+
}
403+
if (backup_directory_ != nullptr) {
404+
backup_directory_->Fsync();
405+
}
406+
}
407+
397408
if (!s.ok()) {
398409
// clean all the files we might have created
399410
Log(options_.info_log, "Backup failed -- %s", s.ToString().c_str());
@@ -591,6 +602,7 @@ Status BackupEngineImpl::CopyFile(const std::string& src,
591602
unique_ptr<SequentialFile> src_file;
592603
EnvOptions env_options;
593604
env_options.use_mmap_writes = false;
605+
env_options.use_os_buffer = false;
594606
if (size != nullptr) {
595607
*size = 0;
596608
}
@@ -706,6 +718,7 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
706718

707719
EnvOptions env_options;
708720
env_options.use_mmap_writes = false;
721+
env_options.use_os_buffer = false;
709722

710723
std::unique_ptr<SequentialFile> src_file;
711724
Status s = src_env->NewSequentialFile(src, &src_file, env_options);
@@ -893,6 +906,9 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
893906

894907
uint64_t size;
895908
s = env_->GetFileSize(backup_dir + "/" + filename, &size);
909+
if (!s.ok()) {
910+
return s;
911+
}
896912

897913
if (line.empty()) {
898914
return Status::Corruption("File checksum is missing");
@@ -913,6 +929,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
913929
files.emplace_back(filename, size, checksum_value);
914930
}
915931

932+
if (s.ok() && data.size() > 0) {
933+
// file has to be read completely. if not, we count it as corruption
934+
s = Status::Corruption("Tailing data in backup meta file");
935+
}
936+
916937
if (s.ok()) {
917938
for (const auto& file_info : files) {
918939
s = AddFile(file_info);
@@ -968,11 +989,7 @@ Status BackupEngineImpl::BackupMeta::StoreToFile(bool sync) {
968989

969990
BackupableDB::BackupableDB(DB* db, const BackupableDBOptions& options)
970991
: StackableDB(db),
971-
backup_engine_(new BackupEngineImpl(db->GetEnv(), options)) {
972-
if (options.share_table_files) {
973-
backup_engine_->DeleteBackupsNewerThan(GetLatestSequenceNumber());
974-
}
975-
}
992+
backup_engine_(new BackupEngineImpl(db->GetEnv(), options)) {}
976993

977994
BackupableDB::~BackupableDB() {
978995
delete backup_engine_;

utilities/backupable/backupable_db_test.cc

+22-12
Original file line numberDiff line numberDiff line change
@@ -709,27 +709,37 @@ TEST(BackupableDBTest, OnlineIntegrationTest) {
709709
CloseRestoreDB();
710710
}
711711

712-
TEST(BackupableDBTest, DeleteNewerBackups) {
712+
TEST(BackupableDBTest, FailOverwritingBackups) {
713+
options_.write_buffer_size = 1024 * 1024 * 1024; // 1GB
713714
// create backups 1, 2, 3, 4, 5
714715
OpenBackupableDB(true);
715716
for (int i = 0; i < 5; ++i) {
716717
FillDB(db_.get(), 100 * i, 100 * (i + 1));
717-
ASSERT_OK(db_->CreateNewBackup(!!(i % 2)));
718+
ASSERT_OK(db_->CreateNewBackup(true));
719+
CloseBackupableDB();
720+
OpenBackupableDB(false);
718721
}
719722
CloseBackupableDB();
720723

721-
// backup 3 is fine
722-
AssertBackupConsistency(3, 0, 300, 500);
723-
// this should delete backups 4 and 5
724-
OpenBackupableDB();
725-
CloseBackupableDB();
726-
// backups 4 and 5 don't exist
724+
// restore 3
727725
OpenRestoreDB();
728-
Status s = restore_db_->RestoreDBFromBackup(4, dbname_, dbname_);
729-
ASSERT_TRUE(s.IsNotFound());
730-
s = restore_db_->RestoreDBFromBackup(5, dbname_, dbname_);
731-
ASSERT_TRUE(s.IsNotFound());
726+
ASSERT_OK(restore_db_->RestoreDBFromBackup(3, dbname_, dbname_));
732727
CloseRestoreDB();
728+
729+
OpenBackupableDB(false);
730+
FillDB(db_.get(), 0, 300);
731+
Status s = db_->CreateNewBackup(true);
732+
// the new backup fails because new table files
733+
// clash with old table files from backups 4 and 5
734+
// (since write_buffer_size is huge, we can be sure that
735+
// each backup will generate only one sst file and that
736+
// a file generated by a new backup is the same as
737+
// sst file generated by backup 4)
738+
ASSERT_TRUE(s.IsCorruption());
739+
ASSERT_OK(db_->DeleteBackup(4));
740+
ASSERT_OK(db_->DeleteBackup(5));
741+
// now, the backup can succeed
742+
ASSERT_OK(db_->CreateNewBackup(true));
733743
}
734744

735745
TEST(BackupableDBTest, NoShareTableFiles) {

0 commit comments

Comments
 (0)