Skip to content

Commit fdfc485

Browse files
authored
support fix resource when get node resource (#231)
1 parent d9015db commit fdfc485

File tree

8 files changed

+497
-406
lines changed

8 files changed

+497
-406
lines changed

cluster/calcium/node.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ func (c *Calcium) GetNode(ctx context.Context, nodename string) (*types.Node, er
3535
func (c *Calcium) SetNode(ctx context.Context, opts *types.SetNodeOptions) (*types.Node, error) { // nolint
3636
var n *types.Node
3737
return n, c.withNodeLocked(ctx, opts.Nodename, func(node *types.Node) error {
38+
litter.Dump(opts)
3839
opts.Normalize(node)
3940
n = node
40-
litter.Dump(opts)
4141
n.Available = (opts.Status == types.TriTrue) || (opts.Status == types.TriKeep && n.Available)
4242
if opts.ContainersDown {
4343
containers, err := c.store.ListNodeContainers(ctx, opts.Nodename, nil)

cluster/calcium/resource.go

+89-60
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func (c *Calcium) PodResource(ctx context.Context, podname string) (*types.PodRe
2929
Details: map[string]string{},
3030
}
3131
for _, node := range nodes {
32-
nodeDetail, err := c.doGetNodeResource(ctx, node)
32+
nodeDetail, err := c.doGetNodeResource(ctx, node.Name, false)
3333
if err != nil {
3434
return nil, err
3535
}
@@ -43,18 +43,13 @@ func (c *Calcium) PodResource(ctx context.Context, podname string) (*types.PodRe
4343
}
4444

4545
// NodeResource check node's container and resource
46-
func (c *Calcium) NodeResource(ctx context.Context, nodename string) (*types.NodeResource, error) {
47-
node, err := c.GetNode(ctx, nodename)
48-
if err != nil {
49-
return nil, err
50-
}
51-
nr, err := c.doGetNodeResource(ctx, node)
46+
func (c *Calcium) NodeResource(ctx context.Context, nodename string, fix bool) (*types.NodeResource, error) {
47+
nr, err := c.doGetNodeResource(ctx, nodename, fix)
5248
if err != nil {
5349
return nil, err
5450
}
5551
for _, container := range nr.Containers {
56-
_, err := container.Inspect(ctx) // 用于探测节点上容器是否存在
57-
if err != nil {
52+
if _, err := container.Inspect(ctx); err != nil { // 用于探测节点上容器是否存在
5853
nr.Verification = false
5954
nr.Details = append(nr.Details, fmt.Sprintf("container %s inspect failed %v \n", container.ID, err))
6055
continue
@@ -63,65 +58,99 @@ func (c *Calcium) NodeResource(ctx context.Context, nodename string) (*types.Nod
6358
return nr, err
6459
}
6560

66-
func (c *Calcium) doGetNodeResource(ctx context.Context, node *types.Node) (*types.NodeResource, error) {
67-
containers, err := c.ListNodeContainers(ctx, node.Name, nil)
68-
if err != nil {
69-
return nil, err
70-
}
71-
nr := &types.NodeResource{
72-
Name: node.Name, CPU: node.CPU, MemCap: node.MemCap, StorageCap: node.StorageCap,
73-
Containers: containers, Verification: true, Details: []string{},
74-
}
75-
cpus := 0.0
76-
memory := int64(0)
77-
storage := int64(0)
78-
cpumap := types.CPUMap{}
79-
for _, container := range containers {
80-
cpus = utils.Round(cpus + container.Quota)
81-
memory += container.Memory
82-
storage += container.Storage
83-
cpumap.Add(container.CPU)
84-
}
85-
nr.CPUPercent = cpus / float64(len(node.InitCPU))
86-
nr.MemoryPercent = float64(memory) / float64(node.InitMemCap)
87-
nr.NUMAMemoryPercent = map[string]float64{}
88-
nr.VolumePercent = float64(node.VolumeUsed) / float64(node.InitVolume.Total())
89-
for nodeID, nmemory := range node.NUMAMemory {
90-
if initMemory, ok := node.InitNUMAMemory[nodeID]; ok {
91-
nr.NUMAMemoryPercent[nodeID] = float64(nmemory) / float64(initMemory)
61+
func (c *Calcium) doGetNodeResource(ctx context.Context, nodename string, fix bool) (*types.NodeResource, error) {
62+
var nr *types.NodeResource
63+
return nr, c.withNodeLocked(ctx, nodename, func(node *types.Node) error {
64+
containers, err := c.ListNodeContainers(ctx, node.Name, nil)
65+
if err != nil {
66+
return err
9267
}
93-
}
94-
if cpus != node.CPUUsed {
95-
nr.Verification = false
96-
nr.Details = append(nr.Details, fmt.Sprintf("cpus used record: %f but now: %f", node.CPUUsed, cpus))
97-
}
98-
node.CPU.Add(cpumap)
99-
for i, v := range node.CPU {
100-
if node.InitCPU[i] != v {
101-
nr.Verification = false
102-
nr.Details = append(nr.Details, fmt.Sprintf("cpu %s now %d", i, v))
68+
nr = &types.NodeResource{
69+
Name: node.Name, CPU: node.CPU, MemCap: node.MemCap, StorageCap: node.StorageCap,
70+
Containers: containers, Verification: true, Details: []string{},
10371
}
104-
}
10572

106-
if memory+node.MemCap != node.InitMemCap {
107-
nr.Verification = false
108-
nr.Details = append(nr.Details, fmt.Sprintf("memory now %d", node.InitMemCap-(memory+node.MemCap)))
109-
}
73+
cpus := 0.0
74+
memory := int64(0)
75+
storage := int64(0)
76+
cpumap := types.CPUMap{}
77+
for _, container := range containers {
78+
cpus = utils.Round(cpus + container.Quota)
79+
memory += container.Memory
80+
storage += container.Storage
81+
cpumap.Add(container.CPU)
82+
}
83+
nr.CPUPercent = cpus / float64(len(node.InitCPU))
84+
nr.MemoryPercent = float64(memory) / float64(node.InitMemCap)
85+
nr.NUMAMemoryPercent = map[string]float64{}
86+
nr.VolumePercent = float64(node.VolumeUsed) / float64(node.InitVolume.Total())
87+
for nodeID, nmemory := range node.NUMAMemory {
88+
if initMemory, ok := node.InitNUMAMemory[nodeID]; ok {
89+
nr.NUMAMemoryPercent[nodeID] = float64(nmemory) / float64(initMemory)
90+
}
91+
}
92+
if cpus != node.CPUUsed {
93+
nr.Verification = false
94+
nr.Details = append(nr.Details, fmt.Sprintf("cpus used: %f diff: %f", node.CPUUsed, cpus))
95+
}
96+
node.CPU.Add(cpumap)
97+
for i, v := range node.CPU {
98+
if node.InitCPU[i] != v {
99+
nr.Verification = false
100+
nr.Details = append(nr.Details, fmt.Sprintf("cpu %s diff %d", i, node.InitCPU[i]-v))
101+
}
102+
}
110103

111-
nr.StoragePercent = 0
112-
if node.InitStorageCap != 0 {
113-
nr.StoragePercent = float64(storage) / float64(node.InitStorageCap)
114-
if storage+node.StorageCap != node.InitStorageCap {
104+
if memory+node.MemCap != node.InitMemCap {
115105
nr.Verification = false
116-
nr.Details = append(nr.Details, fmt.Sprintf("storage now %d", node.InitStorageCap-(storage+node.StorageCap)))
106+
nr.Details = append(nr.Details, fmt.Sprintf("memory used: %d, diff %d", node.MemCap, node.InitMemCap-(memory+node.MemCap)))
117107
}
118-
}
119108

120-
if err := node.Engine.ResourceValidate(ctx, cpus, cpumap, memory, storage); err != nil {
121-
nr.Details = append(nr.Details, err.Error())
122-
}
109+
nr.StoragePercent = 0
110+
if node.InitStorageCap != 0 {
111+
nr.StoragePercent = float64(storage) / float64(node.InitStorageCap)
112+
if storage+node.StorageCap != node.InitStorageCap {
113+
nr.Verification = false
114+
nr.Details = append(nr.Details, fmt.Sprintf("storage used: %d, diff %d", node.StorageCap, node.InitStorageCap-(storage+node.StorageCap)))
115+
}
116+
}
117+
118+
if err := node.Engine.ResourceValidate(ctx, cpus, cpumap, memory, storage); err != nil {
119+
nr.Details = append(nr.Details, err.Error())
120+
}
123121

124-
return nr, nil
122+
if fix {
123+
if err := c.doFixDiffResource(ctx, node, cpus, memory, storage); err != nil {
124+
log.Warnf("[doGetNodeResource] fix node resource failed %v", err)
125+
}
126+
}
127+
128+
return nil
129+
})
130+
}
131+
132+
func (c *Calcium) doFixDiffResource(ctx context.Context, node *types.Node, cpus float64, memory, storage int64) error {
133+
var n *types.Node
134+
var err error
135+
return utils.Txn(ctx,
136+
func(ctx context.Context) error {
137+
if n, err = c.GetNode(ctx, node.Name); err != nil {
138+
return err
139+
}
140+
n.CPUUsed = cpus
141+
for i, v := range node.CPU {
142+
n.CPU[i] += node.InitCPU[i] - v
143+
}
144+
n.MemCap += node.InitMemCap - (memory + node.MemCap)
145+
n.StorageCap += node.InitStorageCap - (storage + node.StorageCap)
146+
return nil
147+
},
148+
func(ctx context.Context) error {
149+
return c.store.UpdateNode(ctx, n)
150+
},
151+
nil,
152+
c.config.GlobalTimeout,
153+
)
125154
}
126155

127156
func (c *Calcium) doAllocResource(ctx context.Context, opts *types.DeployOptions) ([]types.NodeInfo, error) {

cluster/calcium/resource_test.go

+15-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414

1515
"github.com/projecteru2/core/cluster"
1616
enginemocks "github.com/projecteru2/core/engine/mocks"
17+
lockmocks "github.com/projecteru2/core/lock/mocks"
1718
schedulermocks "github.com/projecteru2/core/scheduler/mocks"
1819
storemocks "github.com/projecteru2/core/store/mocks"
1920
"github.com/projecteru2/core/types"
@@ -26,6 +27,10 @@ func TestPodResource(t *testing.T) {
2627
nodename := "testnode"
2728
store := &storemocks.Store{}
2829
c.store = store
30+
lock := &lockmocks.DistributedLock{}
31+
store.On("CreateLock", mock.Anything, mock.Anything).Return(lock, nil)
32+
lock.On("Lock", mock.Anything).Return(nil)
33+
lock.On("Unlock", mock.Anything).Return(nil)
2934
// failed by GetNodesByPod
3035
store.On("GetNodesByPod", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, types.ErrNoETCD).Once()
3136
_, err := c.PodResource(ctx, podname)
@@ -39,6 +44,7 @@ func TestPodResource(t *testing.T) {
3944
InitStorageCap: 10,
4045
}
4146
store.On("GetNodesByPod", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*types.Node{node}, nil)
47+
store.On("GetNode", mock.Anything, mock.Anything).Return(node, nil)
4248
// failed by ListNodeContainers
4349
store.On("ListNodeContainers", mock.Anything, mock.Anything, mock.Anything).Return(nil, types.ErrNoETCD).Once()
4450
_, err = c.PodResource(ctx, podname)
@@ -78,6 +84,10 @@ func TestNodeResource(t *testing.T) {
7884
nodename := "testnode"
7985
store := &storemocks.Store{}
8086
c.store = store
87+
lock := &lockmocks.DistributedLock{}
88+
store.On("CreateLock", mock.Anything, mock.Anything).Return(lock, nil)
89+
lock.On("Lock", mock.Anything).Return(nil)
90+
lock.On("Unlock", mock.Anything).Return(nil)
8191
node := &types.Node{
8292
Name: nodename,
8393
CPU: types.CPUMap{"0": 0, "1": 10},
@@ -94,12 +104,12 @@ func TestNodeResource(t *testing.T) {
94104
node.Engine = engine
95105
// failed by GetNode
96106
store.On("GetNode", ctx, nodename).Return(nil, types.ErrNoETCD).Once()
97-
_, err := c.NodeResource(ctx, nodename)
107+
_, err := c.NodeResource(ctx, nodename, false)
98108
assert.Error(t, err)
99-
store.On("GetNode", ctx, nodename).Return(node, nil)
109+
store.On("GetNode", mock.Anything, nodename).Return(node, nil)
100110
// failed by list node containers
101111
store.On("ListNodeContainers", mock.Anything, mock.Anything, mock.Anything).Return(nil, types.ErrNoETCD).Once()
102-
_, err = c.NodeResource(ctx, nodename)
112+
_, err = c.NodeResource(ctx, nodename, false)
103113
assert.Error(t, err)
104114
containers := []*types.Container{
105115
{
@@ -114,8 +124,9 @@ func TestNodeResource(t *testing.T) {
114124
},
115125
}
116126
store.On("ListNodeContainers", mock.Anything, mock.Anything, mock.Anything).Return(containers, nil)
127+
store.On("UpdateNode", mock.Anything, mock.Anything).Return(nil)
117128
// success but container inspect failed
118-
nr, err := c.NodeResource(ctx, nodename)
129+
nr, err := c.NodeResource(ctx, nodename, true)
119130
assert.NoError(t, err)
120131
assert.Equal(t, nr.Name, nodename)
121132
assert.NotEmpty(t, nr.Details)

cluster/cluster.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ type Cluster interface {
6060
GetNode(ctx context.Context, nodename string) (*types.Node, error)
6161
SetNode(ctx context.Context, opts *types.SetNodeOptions) (*types.Node, error)
6262
// node resource
63-
NodeResource(ctx context.Context, nodename string) (*types.NodeResource, error)
63+
NodeResource(ctx context.Context, nodename string, fix bool) (*types.NodeResource, error)
6464
// meta containers
6565
GetContainer(ctx context.Context, ID string) (*types.Container, error)
6666
GetContainers(ctx context.Context, IDs []string) ([]*types.Container, error)

cluster/mocks/Cluster.go

+11-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)