From b3724581bc1dc108b3cc9fbcaac6aa5886c39bdb Mon Sep 17 00:00:00 2001 From: davidby-influx Date: Sun, 15 Nov 2020 21:02:00 -0800 Subject: [PATCH 1/2] fix(tsm1): "snapshot in progress" error during backup Loop with backoff in (*Engine).CreateSnapshot() to retry (*Engine).WriteSnapshot() up to 3 times if ErrSnapshotInPrgress is returned. Then continue on no error or on SnapshotInProgress if skipCacheOk is true. https://github.com/influxdata/plutonium/issues/3227 (cherry picked from commit dfa6aa8cea479fd2973740a8b4059496e73c7393) --- tsdb/engine/tsm1/engine.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tsdb/engine/tsm1/engine.go b/tsdb/engine/tsm1/engine.go index ddd1ad45c6b..51039a19c3e 100644 --- a/tsdb/engine/tsm1/engine.go +++ b/tsdb/engine/tsm1/engine.go @@ -1941,7 +1941,13 @@ func (e *Engine) WriteSnapshot() (err error) { // skipCacheOk controls whether it is permissible to fail writing out // in-memory cache data when a previous snapshot is in progress func (e *Engine) CreateSnapshot(skipCacheOk bool) (string, error) { - if err := e.WriteSnapshot(); (err == ErrSnapshotInProgress) && skipCacheOk { + err := e.WriteSnapshot() + for i := 0; (i < 3) && (err == ErrSnapshotInProgress) ; i += 1 { + backoff := time.Duration(math.Pow(32, float64(i))) * time.Millisecond + time.Sleep(backoff) + err = e.WriteSnapshot() + } + if (err == ErrSnapshotInProgress) && skipCacheOk { e.logger.Warn("Snapshotter busy: proceeding without cache contents.") } else if err != nil { return "", err From 0faac1a4782269eaa81803df1a90525c64863a97 Mon Sep 17 00:00:00 2001 From: davidby-influx Date: Mon, 16 Nov 2020 21:25:26 -0800 Subject: [PATCH 2/2] chore(tsm1): fix formatting Failed to format code before commit. --- tsdb/engine/tsm1/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsdb/engine/tsm1/engine.go b/tsdb/engine/tsm1/engine.go index 51039a19c3e..05cc34f662f 100644 --- a/tsdb/engine/tsm1/engine.go +++ b/tsdb/engine/tsm1/engine.go @@ -1942,7 +1942,7 @@ func (e *Engine) WriteSnapshot() (err error) { // in-memory cache data when a previous snapshot is in progress func (e *Engine) CreateSnapshot(skipCacheOk bool) (string, error) { err := e.WriteSnapshot() - for i := 0; (i < 3) && (err == ErrSnapshotInProgress) ; i += 1 { + for i := 0; (i < 3) && (err == ErrSnapshotInProgress); i += 1 { backoff := time.Duration(math.Pow(32, float64(i))) * time.Millisecond time.Sleep(backoff) err = e.WriteSnapshot()