@@ -3,6 +3,8 @@ package calcium
3
3
import (
4
4
"bufio"
5
5
"fmt"
6
+ "sync"
7
+ "time"
6
8
7
9
log "github.com/Sirupsen/logrus"
8
10
enginetypes "github.com/docker/docker/api/types"
@@ -11,6 +13,11 @@ import (
11
13
"golang.org/x/net/context"
12
14
)
13
15
16
+ // TODO 这里还是有个问题啊
17
+ // 如果在这个等待的过程中重启了core, 那岂不是又有容器没有回收的?
18
+ // 有啥方法避免这个问题么?
19
+ const defaultWaitTimeout = 1200
20
+
14
21
func (c * calcium ) RunAndWait (specs types.Specs , opts * types.DeployOptions ) (chan * types.RunAndWaitMessage , error ) {
15
22
ch := make (chan * types.RunAndWaitMessage )
16
23
@@ -19,35 +26,51 @@ func (c *calcium) RunAndWait(specs types.Specs, opts *types.DeployOptions) (chan
19
26
entry .LogConfig = "json-file"
20
27
specs .Entrypoints [opts .Entrypoint ] = entry
21
28
29
+ // 默认给出1200秒的超时时间吧
30
+ // 没别的地方好传了, 不如放这里好了, 不需要用的就默认0或者不传
31
+ waitTimeout := entry .RunAndWaitTimeout
32
+ if waitTimeout == 0 {
33
+ waitTimeout = defaultWaitTimeout
34
+ }
35
+
36
+ // 创建容器, 有问题就gg
22
37
createChan , err := c .CreateContainer (specs , opts )
23
38
if err != nil {
24
- log .Errorf ("[RunAndWait] Create container error, %s " , err . Error () )
39
+ log .Errorf ("[RunAndWait] Create container error, %v " , err )
25
40
return ch , err
26
41
}
27
42
43
+ // 来个goroutine处理剩下的事情
44
+ // 基本上就是, attach拿日志写到channel, 以及等待容器结束后清理资源
28
45
go func () {
29
- defer log .Info ("[RunAndWait] Finish run and wait for containers" )
30
- defer close (ch )
31
46
logsOpts := enginetypes.ContainerLogsOptions {Follow : true , ShowStdout : true , ShowStderr : true }
47
+ wg := sync.WaitGroup {}
32
48
33
- ids := map [string ]* types.Node {}
34
49
for message := range createChan {
50
+ // 可能不成功, 可能没有容器id, 但是其实第一项足够判断了
51
+ // 不成功就无视掉
35
52
if ! message .Success || message .ContainerID == "" {
36
53
log .Errorf ("[RunAndWait] Create container error, %s" , message .Error )
37
54
continue
38
55
}
39
56
57
+ // 找不到对应node也不管
58
+ // 理论上不会这样
40
59
node , err := c .store .GetNode (message .Podname , message .Nodename )
41
60
if err != nil {
42
- log .Errorf ("[RunAndWait] Can't find node, %s " , err . Error () )
61
+ log .Errorf ("[RunAndWait] Can't find node, %v " , err )
43
62
continue
44
63
}
45
64
46
- ids [message .ContainerID ] = node
65
+ // 加个task
66
+ wg .Add (1 )
67
+
68
+ // goroutine attach日志然后处理了写回给channel
69
+ // 日志跟task无关, 不管wg
47
70
go func (node * types.Node , containerID string ) {
48
71
resp , err := node .Engine .ContainerLogs (context .Background (), containerID , logsOpts )
49
72
if err != nil {
50
- log .Errorf ("[RunAndWait] Failed to get logs, %s " , err . Error () )
73
+ log .Errorf ("[RunAndWait] Failed to get logs, %v " , err )
51
74
return
52
75
}
53
76
@@ -60,25 +83,37 @@ func (c *calcium) RunAndWait(specs types.Specs, opts *types.DeployOptions) (chan
60
83
}
61
84
62
85
if err := scanner .Err (); err != nil {
63
- log .Errorf ("[RunAndWait] Parse log failed, %s " , err . Error () )
86
+ log .Errorf ("[RunAndWait] Parse log failed, %v " , err )
64
87
return
65
88
}
66
89
}(node , message .ContainerID )
67
- }
68
90
69
- rmids := []string {}
70
- for id , node := range ids {
71
- rmids = append (rmids , id )
72
- code , err := node .Engine .ContainerWait (context .Background (), id )
73
- exitData := []byte (fmt .Sprintf ("[exitcode] %d" , code ))
74
- if err != nil {
75
- log .Errorf ("%s run failed, %s" , id [:12 ], err .Error ())
76
- exitData = []byte (fmt .Sprintf ("[exitcode]unknown %s" , err .Error ()))
77
- }
78
- ch <- & types.RunAndWaitMessage {ContainerID : id , Data : exitData }
79
- log .Infof ("[RunAndWait] Container %s finished, remove" , id [:12 ])
91
+ // goroutine 等待容器结束后删除掉
92
+ go func (node * types.Node , containerID string ) {
93
+ // 无论如何一定要删掉这个容器并且回收资源
94
+ // 并且容器删除了才算是task完成了一个
95
+ defer wg .Done ()
96
+ defer log .Infof ("[RunAndWait] Container %s finished and removed" , containerID [:12 ])
97
+ defer c .RemoveContainer ([]string {containerID })
98
+
99
+ // 给超时时间, 不能等个没完没了
100
+ // 超时的时候返回的code会是1, 不过会被err给覆盖掉
101
+ ctx , _ := context .WithTimeout (context .Background (), time .Duration (waitTimeout )* time .Second )
102
+ code , err := node .Engine .ContainerWait (ctx , containerID )
103
+ exitData := []byte (fmt .Sprintf ("[exitcode] %d" , code ))
104
+ if err != nil {
105
+ log .Errorf ("%s run failed, %v" , containerID [:12 ], err )
106
+ exitData = []byte (fmt .Sprintf ("[exitcode]unknown %v" , err ))
107
+ }
108
+
109
+ ch <- & types.RunAndWaitMessage {ContainerID : containerID , Data : exitData }
110
+ }(node , message .ContainerID )
80
111
}
81
- go c .RemoveContainer (rmids )
112
+
113
+ // 等待全部任务完成才可以关闭channel
114
+ wg .Wait ()
115
+ log .Info ("[RunAndWait] Finish run and wait for containers" )
116
+ close (ch )
82
117
}()
83
118
84
119
return ch , nil
0 commit comments