Skip to content

Commit 64d3c5d

Browse files
zcCMGS
zc
authored andcommitted
improve resource managements on replace and realloc (#213)
* replace rollback doCreateAndStart properly * realloc calculates node resource properly * pass tests
1 parent 7b7d9b3 commit 64d3c5d

File tree

7 files changed

+42
-21
lines changed

7 files changed

+42
-21
lines changed

cluster/calcium/build.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,6 @@ func (c *Calcium) buildFromContent(ctx context.Context, node *types.Node, refs [
8383
}
8484

8585
func (c *Calcium) buildFromExist(ctx context.Context, ref, existID string) (chan *types.BuildImageMessage, error) {
86-
buildErrMsg := func(err error) *types.BuildImageMessage {
87-
msg := &types.BuildImageMessage{Error: err.Error()}
88-
msg.ErrorDetail.Message = err.Error()
89-
return msg
90-
}
91-
9286
return withImageBuiltChannel(func(ch chan *types.BuildImageMessage) {
9387
node, err := c.getContainerNode(ctx, existID)
9488
if err != nil {
@@ -184,3 +178,9 @@ func cleanupNodeImages(node *types.Node, IDs []string, ttl time.Duration) {
184178
log.Infof("[BuildImage] Clean cached image and release space %d", spaceReclaimed)
185179
}
186180
}
181+
182+
func buildErrMsg(err error) *types.BuildImageMessage {
183+
msg := &types.BuildImageMessage{Error: err.Error()}
184+
msg.ErrorDetail.Message = err.Error()
185+
return msg
186+
}

cluster/calcium/create.go

-1
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ func (c *Calcium) doCreateAndStartContainer(
289289
},
290290
c.config.GlobalTimeout,
291291
)
292-
293292
return createContainerMessage
294293
}
295294

cluster/calcium/realloc.go

+6-7
Original file line numberDiff line numberDiff line change
@@ -229,17 +229,17 @@ func (c *Calcium) updateContainersResources(ctx context.Context, ch chan *types.
229229
}
230230

231231
func (c *Calcium) updateResource(ctx context.Context, node *types.Node, container *types.Container, newResource *enginetypes.VirtualizationResource) error {
232-
if err := node.Engine.VirtualizationUpdateResource(ctx, container.ID, newResource); err == nil {
233-
oldVolumes := container.Volumes
232+
updateResourceErr := node.Engine.VirtualizationUpdateResource(ctx, container.ID, newResource)
233+
if updateResourceErr == nil {
234+
oldVolumeSize := container.Volumes.TotalSize()
234235
container.CPU = newResource.CPU
235236
container.Quota = newResource.Quota
236237
container.Memory = newResource.Memory
237238
container.Volumes, _ = types.MakeVolumeBindings(newResource.Volumes)
238239
container.VolumePlan = types.MustToVolumePlan(newResource.VolumePlan)
239-
container.Storage += container.Volumes.TotalSize() - oldVolumes.TotalSize()
240+
container.Storage += container.Volumes.TotalSize() - oldVolumeSize
240241
} else {
241-
log.Errorf("[updateResource] When Realloc container, VirtualizationUpdateResource %s failed %v", container.ID, err)
242-
return err
242+
log.Errorf("[updateResource] When Realloc container, VirtualizationUpdateResource %s failed %v", container.ID, updateResourceErr)
243243
}
244244
// 成功失败都需要修改 node 的占用
245245
// 成功的话,node 占用为新资源
@@ -257,10 +257,9 @@ func (c *Calcium) updateResource(ctx context.Context, node *types.Node, containe
257257
// since we don't rollback VirutalUpdateResource, client can't interrupt
258258
if err := c.store.UpdateContainer(context.Background(), container); err != nil {
259259
log.Errorf("[updateResource] Realloc finish but update container %s failed %v", container.ID, err)
260-
261260
return err
262261
}
263-
return nil
262+
return updateResourceErr
264263
}
265264

266265
func (c *Calcium) reallocVolume(node *types.Node, containers []*types.Container, vbs types.VolumeBindings) (plans map[*types.Container]types.VolumePlan, err error) {

cluster/calcium/realloc_test.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ package calcium
22

33
import (
44
"context"
5-
complexscheduler "github.com/projecteru2/core/scheduler/complex"
65
"testing"
76

7+
complexscheduler "github.com/projecteru2/core/scheduler/complex"
8+
89
"github.com/stretchr/testify/assert"
910

1011
"github.com/docker/go-units"
@@ -135,6 +136,7 @@ func TestRealloc(t *testing.T) {
135136
simpleMockScheduler.On("SelectCPUNodes", mock.Anything, mock.Anything, mock.Anything).Return(nil, nodeCPUPlans, 2, nil).Times(5)
136137
// failed by apply resource
137138
engine.On("VirtualizationUpdateResource", mock.Anything, mock.Anything, mock.Anything).Return(types.ErrBadContainerID).Twice()
139+
store.On("UpdateContainer", mock.Anything, mock.Anything).Return(nil).Twice()
138140
// update node failed
139141
store.On("UpdateNode", mock.Anything, mock.Anything).Return(types.ErrNoETCD).Once()
140142
// reset node

cluster/calcium/replace.go

+25-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io/ioutil"
99
"sync"
1010

11+
"github.com/projecteru2/core/store"
1112
"github.com/projecteru2/core/types"
1213
"github.com/projecteru2/core/utils"
1314
log "github.com/sirupsen/logrus"
@@ -141,24 +142,43 @@ func (c *Calcium) doReplaceContainer(
141142
ctx,
142143
// if
143144
func(ctx context.Context) error {
144-
createMessage = c.doCreateAndStartContainer(ctx, index, node, &opts.DeployOptions, container.CPU, container.VolumePlan)
145-
return createMessage.Error
145+
return utils.Txn(
146+
ctx,
147+
func(ctx context.Context) error {
148+
createMessage = c.doCreateAndStartContainer(ctx, index, node, &opts.DeployOptions, container.CPU, container.VolumePlan)
149+
return createMessage.Error
150+
},
151+
nil,
152+
func(ctx context.Context) error {
153+
log.Errorf("[doReplaceContainer] Error when create and start a container, %v", createMessage.Error)
154+
if createMessage.ContainerID != "" {
155+
log.Warnf("[doReplaceContainer] Create container failed %v, and container %s not removed", createMessage.Error, createMessage.ContainerID)
156+
return nil
157+
}
158+
if err = c.withNodeLocked(ctx, node.Name, func(node *types.Node) error {
159+
return c.store.UpdateNodeResource(ctx, node, createMessage.CPU, createMessage.Quota, createMessage.Memory, createMessage.Storage, createMessage.VolumePlan.IntoVolumeMap(), store.ActionIncr)
160+
}); err != nil {
161+
log.Errorf("[doReplaceContainer] Reset node resource %s failed %v", node.Name, err)
162+
}
163+
return nil
164+
},
165+
c.config.GlobalTimeout,
166+
)
146167
},
147168
// then
148169
func(ctx context.Context) (err error) {
149170
if err = c.doRemoveContainer(ctx, container, true); err != nil {
150-
log.Errorf("[replaceAndRemove] the new started but the old failed to stop")
171+
log.Errorf("[doReplaceContainer] the new started but the old failed to stop")
151172
return
152173
}
153174
removeMessage.Success = true
154175
return
155176
},
156-
// else
157177
nil,
158178
c.config.GlobalTimeout,
159179
)
160180
},
161-
// else
181+
// rollback
162182
func(ctx context.Context) (err error) {
163183
messages, err := c.doStartContainer(ctx, container, opts.IgnoreHook)
164184
if err != nil {

cluster/calcium/replace_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ func TestReplaceContainer(t *testing.T) {
134134
// failed by VirtualizationCreate
135135
engine.On("VirtualizationCreate", mock.Anything, mock.Anything).Return(nil, types.ErrCannotGetEngine).Once()
136136
engine.On("VirtualizationStart", mock.Anything, mock.Anything).Return(types.ErrCannotGetEngine).Once()
137+
store.On("UpdateNodeResource", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil).Once()
137138
ch, err = c.ReplaceContainer(ctx, opts)
138139
assert.NoError(t, err)
139140
for r := range ch {

cluster/calcium/resource.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ func (c *Calcium) doAllocResource(ctx context.Context, opts *types.DeployOptions
211211
nodesInfo[i].VolumePlans = nodeVolumePlans[nodeInfo.Name][:nodeInfo.Deploy]
212212
}
213213
if err = c.store.UpdateNodeResource(ctx, nodes[nodeInfo.Name], cpuCost, quotaCost, memoryCost, storageCost, volumeCost, store.ActionDecr); err != nil {
214-
return err
214+
return err // due to ctx lifecircle, this will be interrupted by client
215215
}
216216
track = i
217217
}

0 commit comments

Comments
 (0)