Skip to content

Commit fcd80db

Browse files
committed
Add e2e test to verify etcd is able to automatically fix the issue
caused by #19557 Signed-off-by: Benjamin Wang <benjamin.ahrtr@gmail.com>
1 parent 3c65dfa commit fcd80db

File tree

2 files changed

+173
-17
lines changed

2 files changed

+173
-17
lines changed

tests/e2e/ctl_v3_member_test.go

+172-16
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ import (
2424

2525
"github.com/stretchr/testify/require"
2626

27+
"go.etcd.io/bbolt"
2728
"go.etcd.io/etcd/api/v3/etcdserverpb"
29+
"go.etcd.io/etcd/client/pkg/v3/types"
30+
"go.etcd.io/etcd/server/v3/datadir"
31+
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+
"go.etcd.io/etcd/server/v3/mvcc/buckets"
2833
"go.etcd.io/etcd/tests/v3/framework/e2e"
2934
)
3035

@@ -230,31 +235,182 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235
return e2e.SpawnWithExpectWithEnv(cmdArgs, cx.envMap, " updated in cluster ")
231236
}
232237

238+
// TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+
// issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240
func TestCtlV3PromotingLearner(t *testing.T) {
234-
e2e.BeforeTest(t)
241+
testCases := []struct {
242+
name string
243+
snapshotCount int
244+
writeToV3StoreSuccess bool
245+
}{
246+
{
247+
name: "create snapshot after learner promotion which is not saved to v3store",
248+
snapshotCount: 10,
249+
},
250+
{
251+
name: "not create snapshot and learner promotion is not saved to v3store",
252+
snapshotCount: 0,
253+
},
254+
{
255+
name: "not create snapshot and learner promotion is saved to v3store",
256+
snapshotCount: 0,
257+
writeToV3StoreSuccess: true,
258+
},
259+
}
260+
261+
for _, tc := range testCases {
262+
t.Run(tc.name, func(t *testing.T) {
263+
t.Log("Create a single node etcd cluster")
264+
cfg := e2e.NewConfigNoTLS()
265+
cfg.BasePeerScheme = "unix"
266+
cfg.ClusterSize = 1
267+
cfg.InitialCorruptCheck = true
268+
if tc.snapshotCount != 0 {
269+
cfg.SnapshotCount = tc.snapshotCount
270+
}
271+
272+
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
273+
require.NoError(t, err, "failed to start etcd cluster: %v", err)
274+
defer func() {
275+
derr := epc.Close()
276+
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
277+
}()
278+
279+
t.Log("Add and start a learner")
280+
learnerID, err := epc.StartNewProc(nil, true, t)
281+
require.NoError(t, err)
282+
283+
t.Log("Write a key to ensure the cluster is healthy so far")
284+
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
285+
err = etcdctl.Put("foo", "bar")
286+
require.NoError(t, err)
287+
288+
t.Logf("Promoting the learner %x", learnerID)
289+
resp, err := etcdctl.MemberPromote(learnerID)
290+
require.NoError(t, err)
291+
292+
var promotedMember *etcdserverpb.Member
293+
for _, m := range resp.Members {
294+
if m.ID == learnerID {
295+
promotedMember = m
296+
break
297+
}
298+
}
299+
require.NotNil(t, promotedMember)
300+
t.Logf("The promoted member: %+v", promotedMember)
301+
302+
t.Log("Ensure all members are voting members from user perspective")
303+
ensureAllMembersAreVotingMembers(t, etcdctl)
235304

236-
t.Log("Create a single node etcd cluster")
237-
cfg := e2e.NewConfigNoTLS()
238-
cfg.BasePeerScheme = "unix"
239-
cfg.ClusterSize = 1
305+
if tc.snapshotCount != 0 {
306+
t.Logf("Write %d keys to trigger a snapshot", tc.snapshotCount)
307+
for i := 0; i < tc.snapshotCount; i++ {
308+
err = etcdctl.Put(fmt.Sprintf("key_%d", i), fmt.Sprintf("value_%d", i))
309+
require.NoError(t, err)
310+
}
311+
}
240312

241-
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
242-
require.NoError(t, err, "failed to start etcd cluster: %v", err)
313+
if tc.writeToV3StoreSuccess {
314+
t.Log("Skip manually changing the already promoted learner to a learner in v3store")
315+
} else {
316+
t.Logf("Stopping the already promoted member")
317+
require.NoError(t, epc.Procs[1].Stop())
318+
319+
t.Log("Manually changing the already promoted member to a learner again in v3store")
320+
promotedMember.IsLearner = true
321+
mustSaveMemberIntoBbolt(t, epc.Procs[1].Config().DataDirPath, promotedMember)
322+
323+
t.Log("Starting the member again")
324+
require.NoError(t, epc.Procs[1].Start())
325+
}
326+
327+
t.Log("Checking all members are ready to serve client requests")
328+
for i := 0; i < len(epc.Procs); i++ {
329+
e2e.AssertProcessLogs(t, epc.Procs[i], e2e.EtcdServerReadyLines[0])
330+
}
331+
332+
// Wait for the learner published attribute to be applied by all members in the cluster
333+
t.Log("Write a key to ensure the the learner published attribute has been applied by all members")
334+
err = etcdctl.Put("foo", "bar")
335+
require.NoError(t, err)
336+
337+
t.Log("Ensure all members in v3store are voting members again")
338+
for i := 0; i < len(epc.Procs); i++ {
339+
t.Logf("Stopping the member: %d", i)
340+
require.NoError(t, epc.Procs[i].Stop())
341+
342+
t.Logf("Checking all members in member's backend store: %d", i)
343+
ensureAllMembersFromV3StoreAreVotingMembers(t, epc.Procs[i].Config().DataDirPath)
344+
345+
t.Logf("Starting the member again: %d", i)
346+
require.NoError(t, epc.Procs[i].Start())
347+
}
348+
})
349+
}
350+
}
351+
352+
func mustSaveMemberIntoBbolt(t *testing.T, dataDir string, protoMember *etcdserverpb.Member) {
353+
dbPath := datadir.ToBackendFileName(dataDir)
354+
db, err := bbolt.Open(dbPath, 0666, nil)
355+
require.NoError(t, err)
243356
defer func() {
244-
derr := epc.Close()
245-
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
357+
require.NoError(t, db.Close())
246358
}()
247359

248-
t.Log("Add and start a learner")
249-
learnerID, err := epc.StartNewProc(nil, true, t)
360+
m := &membership.Member{
361+
ID: types.ID(protoMember.ID),
362+
RaftAttributes: membership.RaftAttributes{
363+
PeerURLs: protoMember.PeerURLs,
364+
IsLearner: protoMember.IsLearner,
365+
},
366+
Attributes: membership.Attributes{
367+
Name: protoMember.Name,
368+
ClientURLs: protoMember.ClientURLs,
369+
},
370+
}
371+
372+
err = db.Update(func(tx *bbolt.Tx) error {
373+
b := tx.Bucket(buckets.Members.Name())
374+
375+
mkey := []byte(m.ID.String())
376+
mvalue, err := json.Marshal(m)
377+
require.NoError(t, err)
378+
379+
return b.Put(mkey, mvalue)
380+
})
250381
require.NoError(t, err)
382+
}
251383

252-
t.Log("Write a key to ensure the cluster is healthy so far")
253-
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
254-
err = etcdctl.Put("foo", "bar")
384+
func ensureAllMembersAreVotingMembers(t *testing.T, etcdctl *e2e.Etcdctl) {
385+
memberListResp, err := etcdctl.MemberList()
255386
require.NoError(t, err)
387+
for _, m := range memberListResp.Members {
388+
require.False(t, m.IsLearner)
389+
}
390+
}
256391

257-
t.Logf("Promoting the learner %x", learnerID)
258-
_, err = etcdctl.MemberPromote(learnerID)
392+
func ensureAllMembersFromV3StoreAreVotingMembers(t *testing.T, dataDir string) {
393+
dbPath := datadir.ToBackendFileName(dataDir)
394+
db, err := bbolt.Open(dbPath, 0400, &bbolt.Options{ReadOnly: true})
259395
require.NoError(t, err)
396+
defer func() {
397+
require.NoError(t, db.Close())
398+
}()
399+
400+
var members []membership.Member
401+
_ = db.View(func(tx *bbolt.Tx) error {
402+
b := tx.Bucket(buckets.Members.Name())
403+
_ = b.ForEach(func(k, v []byte) error {
404+
m := membership.Member{}
405+
err := json.Unmarshal(v, &m)
406+
require.NoError(t, err)
407+
members = append(members, m)
408+
return nil
409+
})
410+
return nil
411+
})
412+
413+
for _, m := range members {
414+
require.Falsef(t, m.IsLearner, "member is still learner: %+v", m)
415+
}
260416
}

tests/go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ require (
3030
github.com/spf13/cobra v1.1.3
3131
github.com/spf13/pflag v1.0.5
3232
github.com/stretchr/testify v1.9.0
33+
go.etcd.io/bbolt v1.3.11
3334
go.etcd.io/etcd/api/v3 v3.5.19
3435
go.etcd.io/etcd/client/pkg/v3 v3.5.19
3536
go.etcd.io/etcd/client/v2 v2.305.19
@@ -78,7 +79,6 @@ require (
7879
github.com/sirupsen/logrus v1.9.3 // indirect
7980
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
8081
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
81-
go.etcd.io/bbolt v1.3.11 // indirect
8282
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect
8383
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect
8484
go.opentelemetry.io/otel/metric v1.20.0 // indirect

0 commit comments

Comments
 (0)