@@ -55,14 +55,36 @@ func (c *Calcium) doCreateWorkloads(ctx context.Context, opts *types.DeployOptio
55
55
defer func () {
56
56
cctx , cancel := context .WithTimeout (utils .InheritTracingInfo (ctx , context .TODO ()), c .config .GlobalTimeout )
57
57
for nodename := range deployMap {
58
- if e := c .store .DeleteProcessing (cctx , opts .GetProcessing (nodename )); e != nil {
59
- logger .Errorf (ctx , "[Calcium.doCreateWorkloads] delete processing failed for %s: %+v" , nodename , e )
58
+ processing := opts .GetProcessing (nodename )
59
+ if err := c .store .DeleteProcessing (cctx , processing ); err != nil {
60
+ logger .Errorf (ctx , "[Calcium.doCreateWorkloads] delete processing failed for %s: %+v" , nodename , err )
60
61
}
61
62
}
62
63
close (ch )
63
64
cancel ()
64
65
}()
65
66
67
+ var resourceCommit wal.Commit
68
+ defer func () {
69
+ if resourceCommit != nil {
70
+ if err := resourceCommit (); err != nil {
71
+ logger .Errorf (ctx , "commit wal failed: %s, %+v" , eventWorkloadResourceAllocated , err )
72
+ }
73
+ }
74
+ }()
75
+
76
+ var processingCommits map [string ]wal.Commit
77
+ defer func () {
78
+ for nodename := range processingCommits {
79
+ if processingCommits [nodename ] == nil {
80
+ continue
81
+ }
82
+ if err := processingCommits [nodename ](); err != nil {
83
+ logger .Errorf (ctx , "commit wal failed: %s, %s, %+v" , eventProcessingCreated , nodename , err )
84
+ }
85
+ }
86
+ }()
87
+
66
88
_ = utils .Txn (
67
89
ctx ,
68
90
@@ -81,15 +103,23 @@ func (c *Calcium) doCreateWorkloads(ctx context.Context, opts *types.DeployOptio
81
103
82
104
// commit changes
83
105
nodes := []* types.Node {}
106
+ processingCommits = make (map [string ]wal.Commit )
84
107
for nodename , deploy := range deployMap {
85
108
for _ , plan := range plans {
86
109
plan .ApplyChangesOnNode (nodeMap [nodename ], utils .Range (deploy )... )
87
110
}
88
111
nodes = append (nodes , nodeMap [nodename ])
89
- if err = c .store .CreateProcessing (ctx , opts .GetProcessing (nodename ), deploy ); err != nil {
112
+ processing := opts .GetProcessing (nodename )
113
+ if processingCommits [nodename ], err = c .wal .Log (eventProcessingCreated , processing ); err != nil {
114
+ return errors .WithStack (err )
115
+ }
116
+ if err = c .store .CreateProcessing (ctx , processing , deploy ); err != nil {
90
117
return errors .WithStack (err )
91
118
}
92
119
}
120
+ if resourceCommit , err = c .wal .Log (eventWorkloadResourceAllocated , nodes ); err != nil {
121
+ return errors .WithStack (err )
122
+ }
93
123
return errors .WithStack (c .store .UpdateNodes (ctx , nodes ... ))
94
124
})
95
125
},
@@ -246,6 +276,7 @@ func (c *Calcium) doDeployOneWorkload(
246
276
config * enginetypes.VirtualizationCreateOptions ,
247
277
decrProcessing bool ,
248
278
) (err error ) {
279
+ logger := log .WithField ("Calcium" , "doDeployWorkload" ).WithField ("nodename" , node .Name ).WithField ("opts" , opts ).WithField ("msg" , msg )
249
280
workload := & types.Workload {
250
281
ResourceMeta : types.ResourceMeta {
251
282
CPU : msg .CPU ,
@@ -276,7 +307,7 @@ func (c *Calcium) doDeployOneWorkload(
276
307
defer func () {
277
308
if commit != nil {
278
309
if err := commit (); err != nil {
279
- log .Errorf (ctx , "[doDeployOneWorkload] Commit WAL %s failed: %v" , eventCreateWorkload , err )
310
+ logger .Errorf (ctx , "Commit WAL %s failed: %+ v" , eventWorkloadCreated , err )
280
311
}
281
312
}
282
313
}()
@@ -292,10 +323,11 @@ func (c *Calcium) doDeployOneWorkload(
292
323
// We couldn't WAL the workload ID above VirtualizationCreate temporarily,
293
324
// so there's a time gap window, once the core process crashes between
294
325
// VirtualizationCreate and logCreateWorkload then the worload is leaky.
295
- if commit , err = c .wal .logCreateWorkload (workload .ID , node .Name ); err != nil {
296
- return err
297
- }
298
- return nil
326
+ commit , err = c .wal .Log (eventWorkloadCreated , & types.Workload {
327
+ ID : workload .ID ,
328
+ Nodename : workload .Nodename ,
329
+ })
330
+ return errors .WithStack (err )
299
331
},
300
332
301
333
func (ctx context.Context ) (err error ) {
@@ -375,13 +407,13 @@ func (c *Calcium) doDeployOneWorkload(
375
407
376
408
// remove workload
377
409
func (ctx context.Context , _ bool ) error {
378
- log .Errorf (ctx , "[doDeployOneWorkload] failed to deploy workload %s, rollback" , workload .ID )
410
+ logger .Errorf (ctx , "[doDeployOneWorkload] failed to deploy workload %s, rollback" , workload .ID )
379
411
if workload .ID == "" {
380
412
return nil
381
413
}
382
414
383
415
if err := c .store .RemoveWorkload (ctx , workload ); err != nil {
384
- log .Errorf (ctx , "[doDeployOneWorkload] failed to remove workload %s" )
416
+ logger .Errorf (ctx , "[doDeployOneWorkload] failed to remove workload %s" , workload . ID )
385
417
}
386
418
387
419
return workload .Remove (ctx , true )
0 commit comments