@@ -24,7 +24,12 @@ import (
24
24
25
25
"github.com/stretchr/testify/require"
26
26
27
+ "go.etcd.io/bbolt"
27
28
"go.etcd.io/etcd/api/v3/etcdserverpb"
29
+ "go.etcd.io/etcd/client/pkg/v3/types"
30
+ "go.etcd.io/etcd/server/v3/datadir"
31
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32
+ "go.etcd.io/etcd/server/v3/mvcc/buckets"
28
33
"go.etcd.io/etcd/tests/v3/framework/e2e"
29
34
)
30
35
@@ -230,31 +235,182 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230
235
return e2e .SpawnWithExpectWithEnv (cmdArgs , cx .envMap , " updated in cluster " )
231
236
}
232
237
238
+ // TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239
+ // issue caused by https://github.com/etcd-io/etcd/issues/19557.
233
240
func TestCtlV3PromotingLearner (t * testing.T ) {
234
- e2e .BeforeTest (t )
241
+ testCases := []struct {
242
+ name string
243
+ snapshotCount int
244
+ writeToV3StoreSuccess bool
245
+ }{
246
+ {
247
+ name : "create snapshot after learner promotion which is not saved to v3store" ,
248
+ snapshotCount : 10 ,
249
+ },
250
+ {
251
+ name : "not create snapshot and learner promotion is not saved to v3store" ,
252
+ snapshotCount : 0 ,
253
+ },
254
+ {
255
+ name : "not create snapshot and learner promotion is saved to v3store" ,
256
+ snapshotCount : 0 ,
257
+ writeToV3StoreSuccess : true ,
258
+ },
259
+ }
260
+
261
+ for _ , tc := range testCases {
262
+ t .Run (tc .name , func (t * testing.T ) {
263
+ t .Log ("Create a single node etcd cluster" )
264
+ cfg := e2e .NewConfigNoTLS ()
265
+ cfg .BasePeerScheme = "unix"
266
+ cfg .ClusterSize = 1
267
+ cfg .InitialCorruptCheck = true
268
+ if tc .snapshotCount != 0 {
269
+ cfg .SnapshotCount = tc .snapshotCount
270
+ }
271
+
272
+ epc , err := e2e .NewEtcdProcessCluster (t , cfg )
273
+ require .NoError (t , err , "failed to start etcd cluster: %v" , err )
274
+ defer func () {
275
+ derr := epc .Close ()
276
+ require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
277
+ }()
278
+
279
+ t .Log ("Add and start a learner" )
280
+ learnerID , err := epc .StartNewProc (nil , true , t )
281
+ require .NoError (t , err )
282
+
283
+ t .Log ("Write a key to ensure the cluster is healthy so far" )
284
+ etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
285
+ err = etcdctl .Put ("foo" , "bar" )
286
+ require .NoError (t , err )
287
+
288
+ t .Logf ("Promoting the learner %x" , learnerID )
289
+ resp , err := etcdctl .MemberPromote (learnerID )
290
+ require .NoError (t , err )
291
+
292
+ var promotedMember * etcdserverpb.Member
293
+ for _ , m := range resp .Members {
294
+ if m .ID == learnerID {
295
+ promotedMember = m
296
+ break
297
+ }
298
+ }
299
+ require .NotNil (t , promotedMember )
300
+ t .Logf ("The promoted member: %+v" , promotedMember )
301
+
302
+ t .Log ("Ensure all members are voting members from user perspective" )
303
+ ensureAllMembersAreVotingMembers (t , etcdctl )
235
304
236
- t .Log ("Create a single node etcd cluster" )
237
- cfg := e2e .NewConfigNoTLS ()
238
- cfg .BasePeerScheme = "unix"
239
- cfg .ClusterSize = 1
305
+ if tc .snapshotCount != 0 {
306
+ t .Logf ("Write %d keys to trigger a snapshot" , tc .snapshotCount )
307
+ for i := 0 ; i < tc .snapshotCount ; i ++ {
308
+ err = etcdctl .Put (fmt .Sprintf ("key_%d" , i ), fmt .Sprintf ("value_%d" , i ))
309
+ require .NoError (t , err )
310
+ }
311
+ }
240
312
241
- epc , err := e2e .NewEtcdProcessCluster (t , cfg )
242
- require .NoError (t , err , "failed to start etcd cluster: %v" , err )
313
+ if tc .writeToV3StoreSuccess {
314
+ t .Log ("Skip manually changing the already promoted learner to a learner in v3store" )
315
+ } else {
316
+ t .Logf ("Stopping the already promoted member" )
317
+ require .NoError (t , epc .Procs [1 ].Stop ())
318
+
319
+ t .Log ("Manually changing the already promoted member to a learner again in v3store" )
320
+ promotedMember .IsLearner = true
321
+ mustSaveMemberIntoBbolt (t , epc .Procs [1 ].Config ().DataDirPath , promotedMember )
322
+
323
+ t .Log ("Starting the member again" )
324
+ require .NoError (t , epc .Procs [1 ].Start ())
325
+ }
326
+
327
+ t .Log ("Checking all members are ready to serve client requests" )
328
+ for i := 0 ; i < len (epc .Procs ); i ++ {
329
+ e2e .AssertProcessLogs (t , epc .Procs [i ], e2e .EtcdServerReadyLines [0 ])
330
+ }
331
+
332
+ // Wait for the learner published attribute to be applied by all members in the cluster
333
+ t .Log ("Write a key to ensure the the learner published attribute has been applied by all members" )
334
+ err = etcdctl .Put ("foo" , "bar" )
335
+ require .NoError (t , err )
336
+
337
+ t .Log ("Ensure all members in v3store are voting members again" )
338
+ for i := 0 ; i < len (epc .Procs ); i ++ {
339
+ t .Logf ("Stopping the member: %d" , i )
340
+ require .NoError (t , epc .Procs [i ].Stop ())
341
+
342
+ t .Logf ("Checking all members in member's backend store: %d" , i )
343
+ ensureAllMembersFromV3StoreAreVotingMembers (t , epc .Procs [i ].Config ().DataDirPath )
344
+
345
+ t .Logf ("Starting the member again: %d" , i )
346
+ require .NoError (t , epc .Procs [i ].Start ())
347
+ }
348
+ })
349
+ }
350
+ }
351
+
352
+ func mustSaveMemberIntoBbolt (t * testing.T , dataDir string , protoMember * etcdserverpb.Member ) {
353
+ dbPath := datadir .ToBackendFileName (dataDir )
354
+ db , err := bbolt .Open (dbPath , 0666 , nil )
355
+ require .NoError (t , err )
243
356
defer func () {
244
- derr := epc .Close ()
245
- require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
357
+ require .NoError (t , db .Close ())
246
358
}()
247
359
248
- t .Log ("Add and start a learner" )
249
- learnerID , err := epc .StartNewProc (nil , true , t )
360
+ m := & membership.Member {
361
+ ID : types .ID (protoMember .ID ),
362
+ RaftAttributes : membership.RaftAttributes {
363
+ PeerURLs : protoMember .PeerURLs ,
364
+ IsLearner : protoMember .IsLearner ,
365
+ },
366
+ Attributes : membership.Attributes {
367
+ Name : protoMember .Name ,
368
+ ClientURLs : protoMember .ClientURLs ,
369
+ },
370
+ }
371
+
372
+ err = db .Update (func (tx * bbolt.Tx ) error {
373
+ b := tx .Bucket (buckets .Members .Name ())
374
+
375
+ mkey := []byte (m .ID .String ())
376
+ mvalue , err := json .Marshal (m )
377
+ require .NoError (t , err )
378
+
379
+ return b .Put (mkey , mvalue )
380
+ })
250
381
require .NoError (t , err )
382
+ }
251
383
252
- t .Log ("Write a key to ensure the cluster is healthy so far" )
253
- etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
254
- err = etcdctl .Put ("foo" , "bar" )
384
+ func ensureAllMembersAreVotingMembers (t * testing.T , etcdctl * e2e.Etcdctl ) {
385
+ memberListResp , err := etcdctl .MemberList ()
255
386
require .NoError (t , err )
387
+ for _ , m := range memberListResp .Members {
388
+ require .False (t , m .IsLearner )
389
+ }
390
+ }
256
391
257
- t .Logf ("Promoting the learner %x" , learnerID )
258
- _ , err = etcdctl .MemberPromote (learnerID )
392
+ func ensureAllMembersFromV3StoreAreVotingMembers (t * testing.T , dataDir string ) {
393
+ dbPath := datadir .ToBackendFileName (dataDir )
394
+ db , err := bbolt .Open (dbPath , 0400 , & bbolt.Options {ReadOnly : true })
259
395
require .NoError (t , err )
396
+ defer func () {
397
+ require .NoError (t , db .Close ())
398
+ }()
399
+
400
+ var members []membership.Member
401
+ _ = db .View (func (tx * bbolt.Tx ) error {
402
+ b := tx .Bucket (buckets .Members .Name ())
403
+ _ = b .ForEach (func (k , v []byte ) error {
404
+ m := membership.Member {}
405
+ err := json .Unmarshal (v , & m )
406
+ require .NoError (t , err )
407
+ members = append (members , m )
408
+ return nil
409
+ })
410
+ return nil
411
+ })
412
+
413
+ for _ , m := range members {
414
+ require .Falsef (t , m .IsLearner , "member is still learner: %+v" , m )
415
+ }
260
416
}
0 commit comments