@@ -17,61 +17,66 @@ import (
17
17
// returns a channel that contains removing responses
18
18
func (c * Calcium ) RemoveWorkload (ctx context.Context , ids []string , force bool , step int ) (chan * types.RemoveWorkloadMessage , error ) {
19
19
logger := log .WithField ("Calcium" , "RemoveWorkload" ).WithField ("ids" , ids ).WithField ("force" , force ).WithField ("step" , step )
20
- ch := make (chan * types.RemoveWorkloadMessage )
21
- if step < 1 {
22
- step = 1
20
+
21
+ nodeWorkloadGroup , err := c .groupWorkloadsByNode (ctx , ids )
22
+ if err != nil {
23
+ logger .Errorf ("failed to group workloads by node: %+v" , err )
24
+ return nil , errors .WithStack (err )
23
25
}
24
26
27
+ ch := make (chan * types.RemoveWorkloadMessage )
25
28
go func () {
26
29
defer close (ch )
27
30
wg := sync.WaitGroup {}
28
31
defer wg .Wait ()
29
- for i , id := range ids {
32
+ for nodename , workloadIDs := range nodeWorkloadGroup {
30
33
wg .Add (1 )
31
- go func (id string ) {
34
+ go func (nodename string , workloadIDs [] string ) {
32
35
defer wg .Done ()
33
- ret := & types.RemoveWorkloadMessage {WorkloadID : id , Success : false , Hook : []* bytes.Buffer {}}
34
- if err := c .withWorkloadLocked (ctx , id , func (ctx context.Context , workload * types.Workload ) error {
35
- return c .withNodeLocked (ctx , workload .Nodename , func (ctx context.Context , node * types.Node ) (err error ) {
36
- if err = utils .Txn (
37
- ctx ,
38
- // if
39
- func (ctx context.Context ) error {
40
- return errors .WithStack (c .store .UpdateNodeResource (ctx , node , & workload .ResourceMeta , store .ActionIncr ))
41
- },
42
- // then
43
- func (ctx context.Context ) error {
44
- err := errors .WithStack (c .doRemoveWorkload (ctx , workload , force ))
45
- if err != nil {
46
- log .Infof ("[RemoveWorkload] Workload %s removed" , workload .ID )
47
- }
48
- return err
49
- },
50
- // rollback
51
- func (ctx context.Context , _ bool ) error {
52
- return errors .WithStack (c .store .UpdateNodeResource (ctx , node , & workload .ResourceMeta , store .ActionDecr ))
53
- },
54
- c .config .GlobalTimeout ,
55
- ); err != nil {
56
- return
57
- }
36
+ if err := c .withNodeLocked (ctx , nodename , func (ctx context.Context , node * types.Node ) error {
37
+ for _ , workloadID := range workloadIDs {
38
+ if err := c .withWorkloadLocked (ctx , workloadID , func (ctx context.Context , workload * types.Workload ) error {
39
+ ret := & types.RemoveWorkloadMessage {WorkloadID : workloadID , Success : true , Hook : []* bytes.Buffer {}}
40
+ if err := utils .Txn (
41
+ ctx ,
42
+ // if
43
+ func (ctx context.Context ) error {
44
+ return errors .WithStack (c .store .UpdateNodeResource (ctx , node , & workload .ResourceMeta , store .ActionIncr ))
45
+ },
46
+ // then
47
+ func (ctx context.Context ) error {
48
+ err := errors .WithStack (c .doRemoveWorkload (ctx , workload , force ))
49
+ if err != nil {
50
+ log .Infof ("[RemoveWorkload] Workload %s removed" , workload .ID )
51
+ }
52
+ return err
53
+ },
54
+ // rollback
55
+ func (ctx context.Context , failedByCond bool ) error {
56
+ if failedByCond {
57
+ return nil
58
+ }
59
+ return errors .WithStack (c .store .UpdateNodeResource (ctx , node , & workload .ResourceMeta , store .ActionDecr ))
60
+ },
61
+ c .config .GlobalTimeout ,
62
+ ); err != nil {
63
+ logger .WithField ("id" , workloadID ).Errorf ("[RemoveWorkload] Remove workload failed: %+v" , err )
64
+ ret .Hook = append (ret .Hook , bytes .NewBufferString (err .Error ()))
65
+ ret .Success = false
66
+ }
58
67
59
- // TODO@zc: 优化一下, 先按照 node 聚合 ids
60
- c .doRemapResourceAndLog (ctx , logger , node )
61
- return
62
- })
68
+ ch <- ret
69
+ return nil
70
+ }); err != nil {
71
+ logger .WithField ("id" , workloadID ).Errorf ("failed to lock workload: %+v" , err )
72
+ }
73
+ }
74
+ c .doRemapResourceAndLog (ctx , logger , node )
75
+ return nil
63
76
}); err != nil {
64
- logger .Errorf ("[RemoveWorkload] Remove workload %s failed, err: %+v" , id , err )
65
- ret .Hook = append (ret .Hook , bytes .NewBufferString (err .Error ()))
66
- } else {
67
- ret .Success = true
77
+ logger .WithField ("nodename" , nodename ).Errorf ("failed to lock node: %+v" , err )
68
78
}
69
- ch <- ret
70
- }(id )
71
- if (i + 1 )% step == 0 {
72
- log .Info ("[RemoveWorkload] Wait for previous tasks done" )
73
- wg .Wait ()
74
- }
79
+ }(nodename , workloadIDs )
75
80
}
76
81
}()
77
82
return ch , nil
@@ -110,3 +115,15 @@ func (c *Calcium) doRemoveWorkloadSync(ctx context.Context, ids []string) error
110
115
}
111
116
return nil
112
117
}
118
+
119
+ func (c * Calcium ) groupWorkloadsByNode (ctx context.Context , ids []string ) (map [string ][]string , error ) {
120
+ workloads , err := c .store .GetWorkloads (ctx , ids )
121
+ if err != nil {
122
+ return nil , errors .WithStack (err )
123
+ }
124
+ nodeWorkloadGroup := map [string ][]string {}
125
+ for _ , workload := range workloads {
126
+ nodeWorkloadGroup [workload .Nodename ] = append (nodeWorkloadGroup [workload .Nodename ], workload .ID )
127
+ }
128
+ return nodeWorkloadGroup , nil
129
+ }
0 commit comments