Skip to content

Commit 0a51fc2

Browse files
committed
Add e2e test to verify etcd is able to automatically fix the issue
caused by etcd-io#19557 Signed-off-by: Benjamin Wang <benjamin.ahrtr@gmail.com>
1 parent 3c65dfa commit 0a51fc2

File tree

2 files changed

+176
-17
lines changed

2 files changed

+176
-17
lines changed

tests/e2e/ctl_v3_member_test.go

+175-16
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ import (
2424

2525
"github.com/stretchr/testify/require"
2626

27+
"go.etcd.io/bbolt"
2728
"go.etcd.io/etcd/api/v3/etcdserverpb"
29+
"go.etcd.io/etcd/client/pkg/v3/types"
30+
"go.etcd.io/etcd/server/v3/datadir"
31+
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32+
"go.etcd.io/etcd/server/v3/mvcc/buckets"
2833
"go.etcd.io/etcd/tests/v3/framework/e2e"
2934
)
3035

@@ -230,31 +235,185 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230235
return e2e.SpawnWithExpectWithEnv(cmdArgs, cx.envMap, " updated in cluster ")
231236
}
232237

238+
// TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239+
// issue caused by https://github.com/etcd-io/etcd/issues/19557.
233240
func TestCtlV3PromotingLearner(t *testing.T) {
234-
e2e.BeforeTest(t)
241+
testCases := []struct {
242+
name string
243+
snapshotCount int
244+
writeToV3StoreSuccess bool
245+
}{
246+
{
247+
name: "create snapshot after learner promotion which is not saved to v3store",
248+
snapshotCount: 10,
249+
},
250+
{
251+
name: "not create snapshot and learner promotion is not saved to v3store",
252+
snapshotCount: 0,
253+
},
254+
{
255+
name: "not create snapshot and learner promotion is saved to v3store",
256+
snapshotCount: 0,
257+
writeToV3StoreSuccess: true,
258+
},
259+
}
260+
261+
for _, tc := range testCases {
262+
t.Run(tc.name, func(t *testing.T) {
263+
t.Log("Create a single node etcd cluster")
264+
cfg := e2e.NewConfigNoTLS()
265+
cfg.BasePeerScheme = "unix"
266+
cfg.ClusterSize = 1
267+
cfg.InitialCorruptCheck = true
268+
if tc.snapshotCount != 0 {
269+
cfg.SnapshotCount = tc.snapshotCount
270+
}
271+
272+
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
273+
require.NoError(t, err, "failed to start etcd cluster: %v", err)
274+
defer func() {
275+
derr := epc.Close()
276+
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
277+
}()
278+
279+
t.Log("Add and start a learner")
280+
learnerID, err := epc.StartNewProc(nil, true, t)
281+
require.NoError(t, err)
282+
283+
t.Log("Write a key to ensure the cluster is healthy so far")
284+
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
285+
err = etcdctl.Put("foo", "bar")
286+
require.NoError(t, err)
287+
288+
t.Logf("Promoting the learner %x", learnerID)
289+
resp, err := etcdctl.MemberPromote(learnerID)
290+
require.NoError(t, err)
291+
292+
var promotedMember *etcdserverpb.Member
293+
for _, m := range resp.Members {
294+
if m.ID == learnerID {
295+
promotedMember = m
296+
break
297+
}
298+
}
299+
require.NotNil(t, promotedMember)
300+
t.Logf("The promoted member: %+v", promotedMember)
301+
302+
t.Log("Ensure all members are voting members from user perspective")
303+
ensureAllMembersAreVotingMembers(t, etcdctl)
235304

236-
t.Log("Create a single node etcd cluster")
237-
cfg := e2e.NewConfigNoTLS()
238-
cfg.BasePeerScheme = "unix"
239-
cfg.ClusterSize = 1
305+
if tc.snapshotCount != 0 {
306+
t.Logf("Write %d keys to trigger a snapshot", tc.snapshotCount)
307+
for i := 0; i < tc.snapshotCount; i++ {
308+
err = etcdctl.Put(fmt.Sprintf("key_%d", i), fmt.Sprintf("value_%d", i))
309+
require.NoError(t, err)
310+
}
311+
}
240312

241-
epc, err := e2e.NewEtcdProcessCluster(t, cfg)
242-
require.NoError(t, err, "failed to start etcd cluster: %v", err)
313+
if tc.writeToV3StoreSuccess {
314+
t.Log("Skip manually changing the already promoted learner to a learner in v3store")
315+
} else {
316+
t.Logf("Stopping the already promoted member")
317+
require.NoError(t, epc.Procs[1].Stop())
318+
319+
t.Log("Manually changing the already promoted member to a learner again in v3store")
320+
promotedMember.IsLearner = true
321+
mustSaveMemberIntoBbolt(t, epc.Procs[1].Config().DataDirPath, promotedMember)
322+
323+
t.Log("Starting the member again")
324+
require.NoError(t, epc.Procs[1].Start())
325+
}
326+
327+
t.Log("Checking all members are ready to serve client requests")
328+
for i := 0; i < len(epc.Procs); i++ {
329+
e2e.AssertProcessLogs(t, epc.Procs[i], e2e.EtcdServerReadyLines[0])
330+
}
331+
332+
// Wait for the learner published attribute to be applied by all members in the cluster
333+
t.Log("Write a key to ensure the the learner published attribute has been applied by all members")
334+
for i := 0; i < len(epc.Procs); i++ {
335+
cli := epc.Procs[i].Etcdctl(e2e.ClientNonTLS, false, false)
336+
err = cli.Put("foo", "bar")
337+
require.NoError(t, err)
338+
}
339+
340+
t.Log("Ensure all members in v3store are voting members again")
341+
for i := 0; i < len(epc.Procs); i++ {
342+
t.Logf("Stopping the member: %d", i)
343+
require.NoError(t, epc.Procs[i].Stop())
344+
345+
t.Logf("Checking all members in member's backend store: %d", i)
346+
ensureAllMembersFromV3StoreAreVotingMembers(t, epc.Procs[i].Config().DataDirPath)
347+
348+
t.Logf("Starting the member again: %d", i)
349+
require.NoError(t, epc.Procs[i].Start())
350+
}
351+
})
352+
}
353+
}
354+
355+
func mustSaveMemberIntoBbolt(t *testing.T, dataDir string, protoMember *etcdserverpb.Member) {
356+
dbPath := datadir.ToBackendFileName(dataDir)
357+
db, err := bbolt.Open(dbPath, 0666, nil)
358+
require.NoError(t, err)
243359
defer func() {
244-
derr := epc.Close()
245-
require.NoError(t, derr, "failed to close etcd cluster: %v", derr)
360+
require.NoError(t, db.Close())
246361
}()
247362

248-
t.Log("Add and start a learner")
249-
learnerID, err := epc.StartNewProc(nil, true, t)
363+
m := &membership.Member{
364+
ID: types.ID(protoMember.ID),
365+
RaftAttributes: membership.RaftAttributes{
366+
PeerURLs: protoMember.PeerURLs,
367+
IsLearner: protoMember.IsLearner,
368+
},
369+
Attributes: membership.Attributes{
370+
Name: protoMember.Name,
371+
ClientURLs: protoMember.ClientURLs,
372+
},
373+
}
374+
375+
err = db.Update(func(tx *bbolt.Tx) error {
376+
b := tx.Bucket(buckets.Members.Name())
377+
378+
mkey := []byte(m.ID.String())
379+
mvalue, err := json.Marshal(m)
380+
require.NoError(t, err)
381+
382+
return b.Put(mkey, mvalue)
383+
})
250384
require.NoError(t, err)
385+
}
251386

252-
t.Log("Write a key to ensure the cluster is healthy so far")
253-
etcdctl := epc.Procs[0].Etcdctl(e2e.ClientNonTLS, false, false)
254-
err = etcdctl.Put("foo", "bar")
387+
func ensureAllMembersAreVotingMembers(t *testing.T, etcdctl *e2e.Etcdctl) {
388+
memberListResp, err := etcdctl.MemberList()
255389
require.NoError(t, err)
390+
for _, m := range memberListResp.Members {
391+
require.False(t, m.IsLearner)
392+
}
393+
}
256394

257-
t.Logf("Promoting the learner %x", learnerID)
258-
_, err = etcdctl.MemberPromote(learnerID)
395+
func ensureAllMembersFromV3StoreAreVotingMembers(t *testing.T, dataDir string) {
396+
dbPath := datadir.ToBackendFileName(dataDir)
397+
db, err := bbolt.Open(dbPath, 0400, &bbolt.Options{ReadOnly: true})
259398
require.NoError(t, err)
399+
defer func() {
400+
require.NoError(t, db.Close())
401+
}()
402+
403+
var members []membership.Member
404+
_ = db.View(func(tx *bbolt.Tx) error {
405+
b := tx.Bucket(buckets.Members.Name())
406+
_ = b.ForEach(func(k, v []byte) error {
407+
m := membership.Member{}
408+
err := json.Unmarshal(v, &m)
409+
require.NoError(t, err)
410+
members = append(members, m)
411+
return nil
412+
})
413+
return nil
414+
})
415+
416+
for _, m := range members {
417+
require.Falsef(t, m.IsLearner, "member is still learner: %+v", m)
418+
}
260419
}

tests/go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ require (
3030
github.com/spf13/cobra v1.1.3
3131
github.com/spf13/pflag v1.0.5
3232
github.com/stretchr/testify v1.9.0
33+
go.etcd.io/bbolt v1.3.11
3334
go.etcd.io/etcd/api/v3 v3.5.19
3435
go.etcd.io/etcd/client/pkg/v3 v3.5.19
3536
go.etcd.io/etcd/client/v2 v2.305.19
@@ -78,7 +79,6 @@ require (
7879
github.com/sirupsen/logrus v1.9.3 // indirect
7980
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
8081
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
81-
go.etcd.io/bbolt v1.3.11 // indirect
8282
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect
8383
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect
8484
go.opentelemetry.io/otel/metric v1.20.0 // indirect

0 commit comments

Comments
 (0)