|
8 | 8 |
|
9 | 9 | "github.com/pkg/errors"
|
10 | 10 | "github.com/sanity-io/litter"
|
11 |
| - "golang.org/x/sync/semaphore" |
12 | 11 |
|
13 | 12 | "github.com/projecteru2/core/cluster"
|
14 | 13 | enginetypes "github.com/projecteru2/core/engine/types"
|
@@ -104,7 +103,10 @@ func (c *Calcium) doCreateWorkloads(ctx context.Context, opts *types.DeployOptio
|
104 | 103 | },
|
105 | 104 |
|
106 | 105 | // rollback: give back resources
|
107 |
| - func(ctx context.Context, _ bool) (err error) { |
| 106 | + func(ctx context.Context, failedOnCond bool) (err error) { |
| 107 | + if failedOnCond { |
| 108 | + return |
| 109 | + } |
108 | 110 | for nodename, rollbackIndices := range rollbackMap {
|
109 | 111 | if e := c.withNodeLocked(ctx, nodename, func(ctx context.Context, node *types.Node) error {
|
110 | 112 | for _, plan := range plans {
|
@@ -159,61 +161,46 @@ func (c *Calcium) doDeployWorkloadsOnNode(ctx context.Context, ch chan *types.Cr
|
159 | 161 | return utils.Range(deploy), errors.WithStack(err)
|
160 | 162 | }
|
161 | 163 |
|
162 |
| - sem, appendLock := semaphore.NewWeighted(c.config.MaxConcurrency), sync.Mutex{} |
| 164 | + pool, appendLock := utils.NewGoroutinePool(int(c.config.MaxConcurrency)), sync.Mutex{} |
163 | 165 | for idx := 0; idx < deploy; idx++ {
|
164 | 166 | createMsg := &types.CreateWorkloadMessage{
|
165 | 167 | Podname: opts.Podname,
|
166 | 168 | Nodename: nodename,
|
167 | 169 | Publish: map[string][]string{},
|
168 | 170 | }
|
169 | 171 |
|
170 |
| - if e := sem.Acquire(ctx, 1); e != nil { |
171 |
| - logger.Errorf("[Calcium.doDeployWorkloadsOnNode] Failed to acquire semaphore: %+v", e) |
172 |
| - err = e |
173 |
| - ch <- &types.CreateWorkloadMessage{Error: e} |
174 |
| - appendLock.Lock() |
175 |
| - indices = append(indices, idx) |
176 |
| - appendLock.Unlock() |
177 |
| - continue |
178 |
| - } |
179 |
| - go func(idx int) (e error) { |
180 |
| - defer func() { |
181 |
| - if e != nil { |
182 |
| - err = e |
183 |
| - createMsg.Error = logger.Err(e) |
184 |
| - appendLock.Lock() |
185 |
| - indices = append(indices, idx) |
186 |
| - appendLock.Unlock() |
| 172 | + pool.Go(func(idx int) func() { |
| 173 | + return func() { |
| 174 | + var e error |
| 175 | + defer func() { |
| 176 | + if e != nil { |
| 177 | + err = e |
| 178 | + createMsg.Error = logger.Err(e) |
| 179 | + appendLock.Lock() |
| 180 | + indices = append(indices, idx) |
| 181 | + appendLock.Unlock() |
| 182 | + } |
| 183 | + ch <- createMsg |
| 184 | + }() |
| 185 | + |
| 186 | + r := &types.ResourceMeta{} |
| 187 | + o := resourcetypes.DispenseOptions{ |
| 188 | + Node: node, |
| 189 | + Index: idx, |
187 | 190 | }
|
188 |
| - ch <- createMsg |
189 |
| - sem.Release(1) |
190 |
| - }() |
191 |
| - |
192 |
| - r := &types.ResourceMeta{} |
193 |
| - o := resourcetypes.DispenseOptions{ |
194 |
| - Node: node, |
195 |
| - Index: idx, |
196 |
| - } |
197 |
| - for _, plan := range plans { |
198 |
| - if r, e = plan.Dispense(o, r); e != nil { |
199 |
| - return errors.WithStack(e) |
| 191 | + for _, plan := range plans { |
| 192 | + if r, e = plan.Dispense(o, r); e != nil { |
| 193 | + return |
| 194 | + } |
200 | 195 | }
|
201 |
| - } |
202 |
| - |
203 |
| - createMsg.ResourceMeta = *r |
204 |
| - createOpts := c.doMakeWorkloadOptions(seq+idx, createMsg, opts, node) |
205 |
| - return errors.WithStack(c.doDeployOneWorkload(ctx, node, opts, createMsg, createOpts, deploy-1-idx)) |
206 |
| - }(idx) // nolint:errcheck |
207 |
| - } |
208 | 196 |
|
209 |
| - // sem.Acquire(ctx, MaxConcurrency) 等价于 WaitGroup.Wait() |
210 |
| - // 用 context.Background() 是为了防止语义被破坏: 一定要等到所有 goroutine 完毕, 不能被用户 ctx 打断 |
211 |
| - // 否则可能会出现的情况是, 某些 goroutine 还没结束与运行 defer, 这个函数就 return 并 close channel, 导致 defer 里给 closed channel 发消息 panic |
212 |
| - if e := sem.Acquire(context.Background(), c.config.MaxConcurrency); e != nil { |
213 |
| - logger.Errorf("[Calcium.doDeployWorkloadsOnNode] Failed to wait all workers done: %+v", e) |
214 |
| - err = e |
215 |
| - indices = utils.Range(deploy) |
| 197 | + createMsg.ResourceMeta = *r |
| 198 | + createOpts := c.doMakeWorkloadOptions(seq+idx, createMsg, opts, node) |
| 199 | + e = errors.WithStack(c.doDeployOneWorkload(ctx, node, opts, createMsg, createOpts, deploy-1-idx)) |
| 200 | + } |
| 201 | + }(idx)) |
216 | 202 | }
|
| 203 | + pool.Wait() |
217 | 204 |
|
218 | 205 | // remap 就不搞进事务了吧, 回滚代价太大了
|
219 | 206 | // 放任 remap 失败的后果是, share pool 没有更新, 这个后果姑且认为是可以承受的
|
@@ -367,6 +354,9 @@ func (c *Calcium) doDeployOneWorkload(
|
367 | 354 |
|
368 | 355 | // remove workload
|
369 | 356 | func(ctx context.Context, _ bool) error {
|
| 357 | + if workload.ID == "" { |
| 358 | + return nil |
| 359 | + } |
370 | 360 | return errors.WithStack(c.doRemoveWorkload(ctx, workload, true))
|
371 | 361 | },
|
372 | 362 | c.config.GlobalTimeout,
|
|
0 commit comments