Skip to content

Commit 3f23337

Browse files
committed
Auto sync members in v3store if Islearner is the only field that differs between v2store and v3store
Signed-off-by: Benjamin Wang <benjamin.ahrtr@gmail.com>
1 parent 0d8349e commit 3f23337

File tree

4 files changed

+196
-1
lines changed

4 files changed

+196
-1
lines changed

server/embed/etcd.go

+3
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,9 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
299299
zap.Strings("listen-metrics-urls", e.cfg.getMetricsURLs()),
300300
)
301301
serving = true
302+
303+
e.Server.SyncLearnerPromotionIfNeeded()
304+
302305
return e, nil
303306
}
304307

server/etcdserver/api/membership/cluster.go

+42
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"errors"
2424
"fmt"
2525
"path"
26+
"reflect"
2627
"sort"
2728
"strings"
2829
"sync"
@@ -542,6 +543,47 @@ func (c *RaftCluster) PromoteMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
542543
)
543544
}
544545

546+
func (c *RaftCluster) SyncLearnerPromotionIfNeeded() {
547+
c.Lock()
548+
defer c.Unlock()
549+
550+
v2Members, _ := membersFromStore(c.lg, c.v2store)
551+
v3Members, _ := membersFromBackend(c.lg, c.be)
552+
553+
for id, v2Member := range v2Members {
554+
v3Member, ok := v3Members[id]
555+
556+
if !ok {
557+
c.lg.Error("Detected member only in v2store but missing in v3store", zap.String("member", fmt.Sprintf("%+v", *v2Member)))
558+
continue
559+
}
560+
561+
if reflect.DeepEqual(*v2Member, *v3Member) {
562+
c.lg.Info("Member is consistent between v2store and v3store", zap.String("member", fmt.Sprintf("%+v", *v2Member)))
563+
continue
564+
}
565+
566+
c.lg.Warn("Detected member inconsistent between v2store and v3store",
567+
zap.String("v2member", fmt.Sprintf("%+v", *v2Member)),
568+
zap.String("v3member", fmt.Sprintf("%+v", *v3Member)),
569+
)
570+
571+
// Sync member iff both conditions below are true,
572+
// 1. IsLearner is the only field that differs between v2store and v3store.
573+
// 2. v2store.IsLearner == true && v3store.IsLearner == false.
574+
clonedV3Member := v3Member.Clone()
575+
clonedV3Member.IsLearner = false
576+
if reflect.DeepEqual(*v2Member, *clonedV3Member) {
577+
c.lg.Warn("Syncing member in v3store", zap.String("memberID", v3Member.ID.String()))
578+
unsafeSaveMemberToBackend(c.lg, c.be, clonedV3Member)
579+
c.be.ForceCommit()
580+
continue
581+
}
582+
583+
c.lg.Error("Cannot sync member in v3store due to IsLearner not being the only field that differs between v2store amd v3store", zap.String("memberID", v3Member.ID.String()))
584+
}
585+
}
586+
545587
func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes, shouldApplyV3 ShouldApplyV3) {
546588
c.Lock()
547589
defer c.Unlock()

server/etcdserver/api/membership/cluster_test.go

+129-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424

2525
"github.com/coreos/go-semver/semver"
2626
"github.com/stretchr/testify/assert"
27-
betesting "go.etcd.io/etcd/server/v3/mvcc/backend/testing"
27+
"github.com/stretchr/testify/require"
2828
"go.uber.org/zap"
2929
"go.uber.org/zap/zaptest"
3030

@@ -33,6 +33,7 @@ import (
3333
"go.etcd.io/etcd/raft/v3/raftpb"
3434
"go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
3535
"go.etcd.io/etcd/server/v3/mock/mockstore"
36+
betesting "go.etcd.io/etcd/server/v3/mvcc/backend/testing"
3637
)
3738

3839
func TestClusterMember(t *testing.T) {
@@ -1210,3 +1211,130 @@ func TestRemoveMemberSyncsBackendAndStoreV2(t *testing.T) {
12101211
})
12111212
}
12121213
}
1214+
1215+
func TestSyncLearnerPromotion(t *testing.T) {
1216+
tcs := []struct {
1217+
name string
1218+
1219+
storeV2Members []*Member
1220+
backendMembers []*Member
1221+
1222+
expectV3Members map[types.ID]*Member
1223+
}{
1224+
{
1225+
name: "v3store should keep unchanged if IsLearner isn't the only field that differs",
1226+
storeV2Members: []*Member{
1227+
{
1228+
ID: 100,
1229+
RaftAttributes: RaftAttributes{
1230+
PeerURLs: []string{"http://10.0.0.100:2380"},
1231+
IsLearner: false,
1232+
},
1233+
},
1234+
},
1235+
backendMembers: []*Member{
1236+
{
1237+
ID: 100,
1238+
RaftAttributes: RaftAttributes{
1239+
PeerURLs: []string{"http://10.0.0.9:2380"},
1240+
IsLearner: true,
1241+
},
1242+
},
1243+
},
1244+
expectV3Members: map[types.ID]*Member{
1245+
100: {
1246+
ID: 100,
1247+
RaftAttributes: RaftAttributes{
1248+
PeerURLs: []string{"http://10.0.0.9:2380"},
1249+
IsLearner: true,
1250+
},
1251+
},
1252+
},
1253+
},
1254+
{
1255+
name: "v3store should keep unchanged if IsLearner is the only field that differs but v3store.IsLearner is false",
1256+
storeV2Members: []*Member{
1257+
{
1258+
ID: 100,
1259+
RaftAttributes: RaftAttributes{
1260+
PeerURLs: []string{"http://10.0.0.9:2380"},
1261+
IsLearner: true,
1262+
},
1263+
},
1264+
},
1265+
backendMembers: []*Member{
1266+
{
1267+
ID: 100,
1268+
RaftAttributes: RaftAttributes{
1269+
PeerURLs: []string{"http://10.0.0.9:2380"},
1270+
IsLearner: false,
1271+
},
1272+
},
1273+
},
1274+
expectV3Members: map[types.ID]*Member{
1275+
100: {
1276+
ID: 100,
1277+
RaftAttributes: RaftAttributes{
1278+
PeerURLs: []string{"http://10.0.0.9:2380"},
1279+
IsLearner: false,
1280+
},
1281+
},
1282+
},
1283+
},
1284+
{
1285+
name: "v3store should be updated if IsLearner is the only field that differs and v3store.IsLearner is true",
1286+
storeV2Members: []*Member{
1287+
{
1288+
ID: 100,
1289+
RaftAttributes: RaftAttributes{
1290+
PeerURLs: []string{"http://10.0.0.9:2380"},
1291+
IsLearner: false,
1292+
},
1293+
},
1294+
},
1295+
backendMembers: []*Member{
1296+
{
1297+
ID: 100,
1298+
RaftAttributes: RaftAttributes{
1299+
PeerURLs: []string{"http://10.0.0.9:2380"},
1300+
IsLearner: true,
1301+
},
1302+
},
1303+
},
1304+
expectV3Members: map[types.ID]*Member{
1305+
100: {
1306+
ID: 100,
1307+
RaftAttributes: RaftAttributes{
1308+
PeerURLs: []string{"http://10.0.0.9:2380"},
1309+
IsLearner: false,
1310+
},
1311+
},
1312+
},
1313+
},
1314+
}
1315+
for _, tc := range tcs {
1316+
t.Run(tc.name, func(t *testing.T) {
1317+
lg := zaptest.NewLogger(t)
1318+
be, _ := betesting.NewDefaultTmpBackend(t)
1319+
defer be.Close()
1320+
mustCreateBackendBuckets(be)
1321+
for _, m := range tc.backendMembers {
1322+
unsafeSaveMemberToBackend(lg, be, m)
1323+
}
1324+
be.ForceCommit()
1325+
1326+
st := v2store.New()
1327+
for _, m := range tc.storeV2Members {
1328+
mustSaveMemberToStore(lg, st, m)
1329+
}
1330+
1331+
cluster := NewCluster(lg)
1332+
cluster.SetBackend(be)
1333+
cluster.SetStore(st)
1334+
1335+
cluster.SyncLearnerPromotionIfNeeded()
1336+
v3Members, _ := cluster.MembersFromBackend()
1337+
require.Equal(t, tc.expectV3Members, v3Members)
1338+
})
1339+
}
1340+
}

server/etcdserver/server.go

+22
Original file line numberDiff line numberDiff line change
@@ -2931,3 +2931,25 @@ func maybeDefragBackend(cfg config.ServerConfig, be backend.Backend) error {
29312931
func (s *EtcdServer) CorruptionChecker() CorruptionChecker {
29322932
return s.corruptionChecker
29332933
}
2934+
2935+
// SyncLearnerPromotionIfNeeded provides a workaround for the users who have
2936+
// already been affected by https://github.com/etcd-io/etcd/issues/19557.
2937+
// It automatically syncs the v3store (bbolt) from v2store iff all the
2938+
// conditions below are true for each member,
2939+
// 1. IsLearner is the only field that differs between v2store and v3store.
2940+
// 2. v2store.IsLearner == true && v3store.IsLearner == false.
2941+
// 3. etcd is ready to serve client requests, which means it has finished
2942+
// replaying the WAL records.
2943+
func (s *EtcdServer) SyncLearnerPromotionIfNeeded() {
2944+
lg := s.Logger()
2945+
select {
2946+
case <-s.StoppingNotify():
2947+
lg.Warn("stop sync learner promotion operations as the server is stopping")
2948+
return
2949+
case <-s.ReadyNotify():
2950+
}
2951+
2952+
lg.Info("Trying to sync the learner promotion operations for v3store if needed")
2953+
s.cluster.SyncLearnerPromotionIfNeeded()
2954+
lg.Info("Finished syncing the learner promotion operations for v3store")
2955+
}

0 commit comments

Comments
 (0)