forked from volcano-sh/volcano
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjob.go
305 lines (247 loc) · 11.8 KB
/
job.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
/*
Copyright 2018 The Volcano Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// Job defines the volcano job
type Job struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
// Specification of the desired behavior of a cron job, including the minAvailable
// +optional
Spec JobSpec `json:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"`
// Current status of Job
// +optional
Status JobStatus `json:"status,omitempty" protobuf:"bytes,3,opt,name=status"`
}
// JobSpec describes how the job execution will look like and when it will actually run
type JobSpec struct {
// SchedulerName is the default value of `tasks.template.spec.schedulerName`.
// +optional
SchedulerName string `json:"schedulerName,omitempty" protobuf:"bytes,1,opt,name=schedulerName"`
// The minimal available pods to run for this Job
// +optional
MinAvailable int32 `json:"minAvailable,omitempty" protobuf:"bytes,2,opt,name=minAvailable"`
// The volumes mount on Job
Volumes []VolumeSpec `json:"volumes,omitempty" protobuf:"bytes,3,opt,name=volumes"`
// Tasks specifies the task specification of Job
// +optional
Tasks []TaskSpec `json:"tasks,omitempty" protobuf:"bytes,4,opt,name=tasks"`
// Specifies the default lifecycle of tasks
// +optional
Policies []LifecyclePolicy `json:"policies,omitempty" protobuf:"bytes,5,opt,name=policies"`
// Specifies the plugin of job
// Key is plugin name, value is the arguments of the plugin
// +optional
Plugins map[string][]string `json:"plugins,omitempty" protobuf:"bytes,6,opt,name=plugins"`
//Specifies the queue that will be used in the scheduler, "default" queue is used this leaves empty.
Queue string `json:"queue,omitempty" protobuf:"bytes,7,opt,name=queue"`
// Specifies the maximum number of retries before marking this Job failed.
// Defaults to 3.
// +optional
MaxRetry int32 `json:"maxRetry,omitempty" protobuf:"bytes,8,opt,name=maxRetry"`
// ttlSecondsAfterFinished limits the lifetime of a Job that has finished
// execution (either Completed or Failed). If this field is set,
// ttlSecondsAfterFinished after the Job finishes, it is eligible to be
// automatically deleted. If this field is unset,
// the Job won't be automatically deleted. If this field is set to zero,
// the Job becomes eligible to be deleted immediately after it finishes.
// +optional
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty" protobuf:"varint,9,opt,name=ttlSecondsAfterFinished"`
// If specified, indicates the job's priority.
// +optional
PriorityClassName string `json:"priorityClassName,omitempty" protobuf:"bytes,10,opt,name=priorityClassName"`
}
// VolumeSpec defines the specification of Volume, e.g. PVC
type VolumeSpec struct {
// Path within the container at which the volume should be mounted. Must
// not contain ':'.
MountPath string `json:"mountPath" protobuf:"bytes,1,opt,name=mountPath"`
// defined the PVC name
VolumeClaimName string `json:"volumeClaimName,omitempty" protobuf:"bytes,2,opt,name=volumeClaimName"`
// VolumeClaim defines the PVC used by the VolumeMount.
VolumeClaim *v1.PersistentVolumeClaimSpec `json:"volumeClaim,omitempty" protobuf:"bytes,3,opt,name=volumeClaim"`
}
// JobEvent job event
type JobEvent string
const (
// CommandIssued command issued event is generated if a command is raised by user
CommandIssued JobEvent = "CommandIssued"
// PluginError plugin error event is generated if error happens
PluginError JobEvent = "PluginError"
// PVCError pvc error event is generated if error happens during IO creation
PVCError JobEvent = "PVCError"
// PodGroupError pod grp error event is generated if error happens during pod grp creation
PodGroupError JobEvent = "PodGroupError"
//ExecuteAction action issued event for each action
ExecuteAction JobEvent = "ExecuteAction"
)
// Event represent the phase of Job, e.g. pod-failed.
type Event string
const (
// AnyEvent means all event
AnyEvent Event = "*"
// PodFailedEvent is triggered if Pod was failed
PodFailedEvent Event = "PodFailed"
// PodEvictedEvent is triggered if Pod was deleted
PodEvictedEvent Event = "PodEvicted"
// JobUnknownEvent These below are several events can lead to job 'Unknown'
// 1. Task Unschedulable, this is triggered when part of
// pods can't be scheduled while some are already running in gang-scheduling case.
JobUnknownEvent Event = "Unknown"
// OutOfSyncEvent is triggered if Pod/Job were updated
OutOfSyncEvent Event = "OutOfSync"
// CommandIssuedEvent is triggered if a command is raised by user
CommandIssuedEvent Event = "CommandIssued"
// TaskCompletedEvent is triggered if the 'Replicas' amount of pods in one task are succeed
TaskCompletedEvent Event = "TaskCompleted"
)
// Action is the action that Job controller will take according to the event.
type Action string
const (
// AbortJobAction if this action is set, the whole job will be aborted:
// all Pod of Job will be evicted, and no Pod will be recreated
AbortJobAction Action = "AbortJob"
// RestartJobAction if this action is set, the whole job will be restarted
RestartJobAction Action = "RestartJob"
// RestartTaskAction if this action is set, only the task will be restarted; default action.
// This action can not work together with job level events, e.g. JobUnschedulable
RestartTaskAction Action = "RestartTask"
// TerminateJobAction if this action is set, the whole job wil be terminated
// and can not be resumed: all Pod of Job will be evicted, and no Pod will be recreated.
TerminateJobAction Action = "TerminateJob"
// CompleteJobAction if this action is set, the unfinished pods will be killed, job completed.
CompleteJobAction Action = "CompleteJob"
// ResumeJobAction is the action to resume an aborted job.
ResumeJobAction Action = "ResumeJob"
// SyncJobAction is the action to sync Job/Pod status.
SyncJobAction Action = "SyncJob"
// EnqueueAction is the action to sync Job inqueue status.
EnqueueAction Action = "EnqueueJob"
)
// LifecyclePolicy specifies the lifecycle and error handling of task and job.
type LifecyclePolicy struct {
// The action that will be taken to the PodGroup according to Event.
// One of "Restart", "None".
// Default to None.
// +optional
Action Action `json:"action,omitempty" protobuf:"bytes,1,opt,name=action"`
// The Event recorded by scheduler; the controller takes actions
// according to this Event.
// +optional
Event Event `json:"event,omitempty" protobuf:"bytes,2,opt,name=event"`
// The exit code of the pod container, controller will take action
// according to this code.
// Note: only one of `Event` or `ExitCode` can be specified.
// +optional
ExitCode *int32
// Timeout is the grace period for controller to take actions.
// Default to nil (take action immediately).
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty" protobuf:"bytes,3,opt,name=timeout"`
}
// TaskSpec specifies the task specification of Job
type TaskSpec struct {
// Name specifies the name of tasks
Name string `json:"name,omitempty" protobuf:"bytes,1,opt,name=name"`
// Replicas specifies the replicas of this TaskSpec in Job
Replicas int32 `json:"replicas,omitempty" protobuf:"bytes,2,opt,name=replicas"`
// Specifies the pod that will be created for this TaskSpec
// when executing a Job
Template v1.PodTemplateSpec `json:"template,omitempty" protobuf:"bytes,3,opt,name=template"`
// Specifies the lifecycle of task
// +optional
Policies []LifecyclePolicy `json:"policies,omitempty" protobuf:"bytes,4,opt,name=policies"`
}
// JobPhase defines the phase of the job
type JobPhase string
const (
// Pending is the phase that job is pending in the queue, waiting for scheduling decision
Pending JobPhase = "Pending"
// Aborting is the phase that job is aborted, waiting for releasing pods
Aborting JobPhase = "Aborting"
// Aborted is the phase that job is aborted by user or error handling
Aborted JobPhase = "Aborted"
// Running is the phase that minimal available tasks of Job are running
Running JobPhase = "Running"
// Restarting is the phase that the Job is restarted, waiting for pod releasing and recreating
Restarting JobPhase = "Restarting"
// Completing is the phase that required tasks of job are completed, job starts to clean up
Completing JobPhase = "Completing"
// Completed is the phase that all tasks of Job are completed
Completed JobPhase = "Completed"
// Terminating is the phase that the Job is terminated, waiting for releasing pods
Terminating JobPhase = "Terminating"
// Terminated is the phase that the job is finished unexpected, e.g. events
Terminated JobPhase = "Terminated"
// Failed is the phase that the job is restarted failed reached the maximum number of retries.
Failed JobPhase = "Failed"
// Inqueue is the phase that cluster have idle resource to schedule the job
Inqueue JobPhase = "Inqueue"
)
// JobState contains details for the current state of the job.
type JobState struct {
// The phase of Job.
// +optional
Phase JobPhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase"`
// Unique, one-word, CamelCase reason for the phase's last transition.
// +optional
Reason string `json:"reason,omitempty" protobuf:"bytes,2,opt,name=reason"`
// Human-readable message indicating details about last transition.
// +optional
Message string `json:"message,omitempty" protobuf:"bytes,3,opt,name=message"`
// Last time the condition transit from one phase to another.
// +optional
LastTransitionTime metav1.Time `json:"lastTransitionTime,omitempty" protobuf:"bytes,4,opt,name=lastTransitionTime"`
}
// JobStatus represents the current status of a Job
type JobStatus struct {
// Current state of Job.
State JobState `json:"state,omitempty" protobuf:"bytes,1,opt,name=state"`
// The minimal available pods to run for this Job
// +optional
MinAvailable int32 `json:"minAvailable,omitempty" protobuf:"bytes,2,opt,name=minAvailable"`
// The number of pending pods.
// +optional
Pending int32 `json:"pending,omitempty" protobuf:"bytes,3,opt,name=pending"`
// The number of running pods.
// +optional
Running int32 `json:"running,omitempty" protobuf:"bytes,4,opt,name=running"`
// The number of pods which reached phase Succeeded.
// +optional
Succeeded int32 `json:"Succeeded,omitempty" protobuf:"bytes,5,opt,name=succeeded"`
// The number of pods which reached phase Failed.
// +optional
Failed int32 `json:"failed,omitempty" protobuf:"bytes,6,opt,name=failed"`
// The number of pods which reached phase Terminating.
// +optional
Terminating int32 `json:"terminating,omitempty" protobuf:"bytes,7,opt,name=terminating"`
//Current version of job
Version int32 `json:"version,omitempty" protobuf:"bytes,8,opt,name=version"`
// The number of Job retries.
// +optional
RetryCount int32 `json:"retryCount,omitempty" protobuf:"bytes,9,opt,name=retryCount"`
// The resources that controlled by this job, e.g. Service, ConfigMap
ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,8,opt,name=controlledResources"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// JobList defines the list of jobs
type JobList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
Items []Job `json:"items" protobuf:"bytes,2,rep,name=items"`
}