@@ -24,7 +24,12 @@ import (
24
24
25
25
"github.com/stretchr/testify/require"
26
26
27
+ "go.etcd.io/bbolt"
27
28
"go.etcd.io/etcd/api/v3/etcdserverpb"
29
+ "go.etcd.io/etcd/client/pkg/v3/types"
30
+ "go.etcd.io/etcd/server/v3/datadir"
31
+ "go.etcd.io/etcd/server/v3/etcdserver/api/membership"
32
+ "go.etcd.io/etcd/server/v3/mvcc/buckets"
28
33
"go.etcd.io/etcd/tests/v3/framework/e2e"
29
34
)
30
35
@@ -230,31 +235,185 @@ func ctlV3MemberUpdate(cx ctlCtx, memberID, peerURL string) error {
230
235
return e2e .SpawnWithExpectWithEnv (cmdArgs , cx .envMap , " updated in cluster " )
231
236
}
232
237
238
+ // TestCtlV3PromotingLearner tests whether etcd can automatically fix the
239
+ // issue caused by https://github.com/etcd-io/etcd/issues/19557.
233
240
func TestCtlV3PromotingLearner (t * testing.T ) {
234
- e2e .BeforeTest (t )
241
+ testCases := []struct {
242
+ name string
243
+ snapshotCount int
244
+ writeToV3StoreSuccess bool
245
+ }{
246
+ {
247
+ name : "create snapshot after learner promotion which is not saved to v3store" ,
248
+ snapshotCount : 10 ,
249
+ },
250
+ {
251
+ name : "not create snapshot and learner promotion is not saved to v3store" ,
252
+ snapshotCount : 0 ,
253
+ },
254
+ {
255
+ name : "not create snapshot and learner promotion is saved to v3store" ,
256
+ snapshotCount : 0 ,
257
+ writeToV3StoreSuccess : true ,
258
+ },
259
+ }
260
+
261
+ for _ , tc := range testCases {
262
+ t .Run (tc .name , func (t * testing.T ) {
263
+ t .Log ("Create a single node etcd cluster" )
264
+ cfg := e2e .NewConfigNoTLS ()
265
+ cfg .BasePeerScheme = "unix"
266
+ cfg .ClusterSize = 1
267
+ cfg .InitialCorruptCheck = true
268
+ if tc .snapshotCount != 0 {
269
+ cfg .SnapshotCount = tc .snapshotCount
270
+ }
271
+
272
+ epc , err := e2e .NewEtcdProcessCluster (t , cfg )
273
+ require .NoError (t , err , "failed to start etcd cluster: %v" , err )
274
+ defer func () {
275
+ derr := epc .Close ()
276
+ require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
277
+ }()
278
+
279
+ t .Log ("Add and start a learner" )
280
+ learnerID , err := epc .StartNewProc (nil , true , t )
281
+ require .NoError (t , err )
282
+
283
+ t .Log ("Write a key to ensure the cluster is healthy so far" )
284
+ etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
285
+ err = etcdctl .Put ("foo" , "bar" )
286
+ require .NoError (t , err )
287
+
288
+ t .Logf ("Promoting the learner %x" , learnerID )
289
+ resp , err := etcdctl .MemberPromote (learnerID )
290
+ require .NoError (t , err )
291
+
292
+ var promotedMember * etcdserverpb.Member
293
+ for _ , m := range resp .Members {
294
+ if m .ID == learnerID {
295
+ promotedMember = m
296
+ break
297
+ }
298
+ }
299
+ require .NotNil (t , promotedMember )
300
+ t .Logf ("The promoted member: %+v" , promotedMember )
301
+
302
+ t .Log ("Ensure all members are voting members from user perspective" )
303
+ ensureAllMembersAreVotingMembers (t , etcdctl )
235
304
236
- t .Log ("Create a single node etcd cluster" )
237
- cfg := e2e .NewConfigNoTLS ()
238
- cfg .BasePeerScheme = "unix"
239
- cfg .ClusterSize = 1
305
+ if tc .snapshotCount != 0 {
306
+ t .Logf ("Write %d keys to trigger a snapshot" , tc .snapshotCount )
307
+ for i := 0 ; i < tc .snapshotCount ; i ++ {
308
+ err = etcdctl .Put (fmt .Sprintf ("key_%d" , i ), fmt .Sprintf ("value_%d" , i ))
309
+ require .NoError (t , err )
310
+ }
311
+ }
240
312
241
- epc , err := e2e .NewEtcdProcessCluster (t , cfg )
242
- require .NoError (t , err , "failed to start etcd cluster: %v" , err )
313
+ if tc .writeToV3StoreSuccess {
314
+ t .Log ("Skip manually changing the already promoted learner to a learner in v3store" )
315
+ } else {
316
+ t .Logf ("Stopping the already promoted member" )
317
+ require .NoError (t , epc .Procs [1 ].Stop ())
318
+
319
+ t .Log ("Manually changing the already promoted member to a learner again in v3store" )
320
+ promotedMember .IsLearner = true
321
+ mustSaveMemberIntoBbolt (t , epc .Procs [1 ].Config ().DataDirPath , promotedMember )
322
+
323
+ t .Log ("Starting the member again" )
324
+ require .NoError (t , epc .Procs [1 ].Start ())
325
+ }
326
+
327
+ t .Log ("Checking all members are ready to serve client requests" )
328
+ for i := 0 ; i < len (epc .Procs ); i ++ {
329
+ e2e .AssertProcessLogs (t , epc .Procs [i ], e2e .EtcdServerReadyLines [0 ])
330
+ }
331
+
332
+ // Wait for the learner published attribute to be applied by all members in the cluster
333
+ t .Log ("Write a key to ensure the the learner published attribute has been applied by all members" )
334
+ for i := 0 ; i < len (epc .Procs ); i ++ {
335
+ cli := epc .Procs [i ].Etcdctl (e2e .ClientNonTLS , false , false )
336
+ err = cli .Put ("foo" , "bar" )
337
+ require .NoError (t , err )
338
+ }
339
+
340
+ t .Log ("Ensure all members in v3store are voting members again" )
341
+ for i := 0 ; i < len (epc .Procs ); i ++ {
342
+ t .Logf ("Stopping the member: %d" , i )
343
+ require .NoError (t , epc .Procs [i ].Stop ())
344
+
345
+ t .Logf ("Checking all members in member's backend store: %d" , i )
346
+ ensureAllMembersFromV3StoreAreVotingMembers (t , epc .Procs [i ].Config ().DataDirPath )
347
+
348
+ t .Logf ("Starting the member again: %d" , i )
349
+ require .NoError (t , epc .Procs [i ].Start ())
350
+ }
351
+ })
352
+ }
353
+ }
354
+
355
+ func mustSaveMemberIntoBbolt (t * testing.T , dataDir string , protoMember * etcdserverpb.Member ) {
356
+ dbPath := datadir .ToBackendFileName (dataDir )
357
+ db , err := bbolt .Open (dbPath , 0666 , nil )
358
+ require .NoError (t , err )
243
359
defer func () {
244
- derr := epc .Close ()
245
- require .NoError (t , derr , "failed to close etcd cluster: %v" , derr )
360
+ require .NoError (t , db .Close ())
246
361
}()
247
362
248
- t .Log ("Add and start a learner" )
249
- learnerID , err := epc .StartNewProc (nil , true , t )
363
+ m := & membership.Member {
364
+ ID : types .ID (protoMember .ID ),
365
+ RaftAttributes : membership.RaftAttributes {
366
+ PeerURLs : protoMember .PeerURLs ,
367
+ IsLearner : protoMember .IsLearner ,
368
+ },
369
+ Attributes : membership.Attributes {
370
+ Name : protoMember .Name ,
371
+ ClientURLs : protoMember .ClientURLs ,
372
+ },
373
+ }
374
+
375
+ err = db .Update (func (tx * bbolt.Tx ) error {
376
+ b := tx .Bucket (buckets .Members .Name ())
377
+
378
+ mkey := []byte (m .ID .String ())
379
+ mvalue , err := json .Marshal (m )
380
+ require .NoError (t , err )
381
+
382
+ return b .Put (mkey , mvalue )
383
+ })
250
384
require .NoError (t , err )
385
+ }
251
386
252
- t .Log ("Write a key to ensure the cluster is healthy so far" )
253
- etcdctl := epc .Procs [0 ].Etcdctl (e2e .ClientNonTLS , false , false )
254
- err = etcdctl .Put ("foo" , "bar" )
387
+ func ensureAllMembersAreVotingMembers (t * testing.T , etcdctl * e2e.Etcdctl ) {
388
+ memberListResp , err := etcdctl .MemberList ()
255
389
require .NoError (t , err )
390
+ for _ , m := range memberListResp .Members {
391
+ require .False (t , m .IsLearner )
392
+ }
393
+ }
256
394
257
- t .Logf ("Promoting the learner %x" , learnerID )
258
- _ , err = etcdctl .MemberPromote (learnerID )
395
+ func ensureAllMembersFromV3StoreAreVotingMembers (t * testing.T , dataDir string ) {
396
+ dbPath := datadir .ToBackendFileName (dataDir )
397
+ db , err := bbolt .Open (dbPath , 0400 , & bbolt.Options {ReadOnly : true })
259
398
require .NoError (t , err )
399
+ defer func () {
400
+ require .NoError (t , db .Close ())
401
+ }()
402
+
403
+ var members []membership.Member
404
+ _ = db .View (func (tx * bbolt.Tx ) error {
405
+ b := tx .Bucket (buckets .Members .Name ())
406
+ _ = b .ForEach (func (k , v []byte ) error {
407
+ m := membership.Member {}
408
+ err := json .Unmarshal (v , & m )
409
+ require .NoError (t , err )
410
+ members = append (members , m )
411
+ return nil
412
+ })
413
+ return nil
414
+ })
415
+
416
+ for _ , m := range members {
417
+ require .Falsef (t , m .IsLearner , "member is still learner: %+v" , m )
418
+ }
260
419
}
0 commit comments