Skip to content

Commit 54ee389

Browse files
Merge pull request #4700 from umohnani8/jobs-final
MCO-1231: MCO-783: MCO-1232: Use Jobs for OCL builds
2 parents b212490 + b4b7451 commit 54ee389

19 files changed

+701
-605
lines changed

manifests/machineosbuilder/clusterrole.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,9 @@ rules:
6464
- leases
6565
verbs:
6666
- "*"
67+
- apiGroups:
68+
- batch
69+
resources:
70+
- jobs
71+
verbs:
72+
- "*"

pkg/controller/build/buildrequest/assets/buildah-build.sh

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ function retry {
4343
echo "Retry $count/$MAX_RETRIES exited $exit, retrying..."
4444
else
4545
echo "Retry $count/$MAX_RETRIES exited $exit, no more retries left."
46+
echo $exit > /tmp/done/errorfile
4647
return $exit
4748
fi
4849
done

pkg/controller/build/buildrequest/assets/wait.sh

+16-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,22 @@
88
# that the build operation is complete.
99
set -euo
1010

11-
# Wait until the done file appears.
12-
while [ ! -f "/tmp/done/digestfile" ]
13-
do
14-
sleep 1
11+
# Wait for either the digestfile or an errorfile
12+
while true; do
13+
if [ -f "/tmp/done/digestfile" ]; then
14+
# If digestfile is found, break the loop and proceed
15+
echo "Digest file found. Proceeding with ConfigMap creation."
16+
break
17+
elif [ -f "/tmp/done/errorfile" ]; then
18+
# If errorfile is found, exit the script with an error
19+
echo "Error file found. Exiting."
20+
# Delete the error file so that when a build pod is scheduled again
21+
# on the same node, it doesn't error out immediately
22+
# /tmp should be automatically cleared up on reboot but let's manually clean up as well
23+
rm /tmp/done/errorfile
24+
exit 1
25+
fi
26+
sleep 1
1527
done
1628

1729
# Inject the contents of the digestfile into a ConfigMap.

pkg/controller/build/buildrequest/buildrequest.go

+29-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/openshift/machine-config-operator/pkg/controller/build/constants"
1515
"github.com/openshift/machine-config-operator/pkg/controller/build/utils"
1616
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
17+
batchv1 "k8s.io/api/batch/v1"
1718
corev1 "k8s.io/api/core/v1"
1819
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1920
"k8s.io/apimachinery/pkg/labels"
@@ -33,7 +34,7 @@ var buildahBuildScript string
3334
var podmanBuildScript string
3435

3536
const (
36-
// Filename for the machineconfig JSON tarball expected by the build pod
37+
// Filename for the machineconfig JSON tarball expected by the build job
3738
machineConfigJSONFilename string = "machineconfig.json.gz"
3839
)
3940

@@ -75,9 +76,9 @@ func (br buildRequestImpl) Opts() BuildRequestOpts {
7576
return br.opts
7677
}
7778

78-
// Creates the Build Pod object.
79+
// Creates the Build Job object.
7980
func (br buildRequestImpl) Builder() Builder {
80-
return newBuilder(br.toBuildahPod())
81+
return newBuilder(br.podToJob(br.toBuildahPod()))
8182
}
8283

8384
// Takes the configured secrets and creates an ephemeral clone of them, canonicalizing them, if needed.
@@ -242,6 +243,30 @@ func (br buildRequestImpl) renderContainerfile() (string, error) {
242243
return out.String(), nil
243244
}
244245

246+
// podToJob creates a Job with the spec of the given Pod
247+
func (br buildRequestImpl) podToJob(pod *corev1.Pod) *batchv1.Job {
248+
// Set the backoffLimit to 3 so the job will retry 4 times before reporting a failure
249+
var backoffLimit int32 = 3
250+
// Set completion to 1 so that as soon as the pod has completed successfully the job is
251+
// considered a success
252+
var completions int32 = 1
253+
return &batchv1.Job{
254+
ObjectMeta: pod.ObjectMeta,
255+
TypeMeta: metav1.TypeMeta{
256+
APIVersion: "batch/v1",
257+
Kind: "Job",
258+
},
259+
Spec: batchv1.JobSpec{
260+
BackoffLimit: &backoffLimit,
261+
Completions: &completions,
262+
Template: corev1.PodTemplateSpec{
263+
ObjectMeta: metav1.ObjectMeta{},
264+
Spec: pod.Spec,
265+
},
266+
},
267+
}
268+
}
269+
245270
// We're able to run the Buildah image in an unprivileged pod provided that the
246271
// machine-os-builder service account has the anyuid security constraint
247272
// context enabled to allow us to use UID 1000, which maps to the UID within
@@ -569,7 +594,7 @@ func (br buildRequestImpl) getMCConfigMapName() string {
569594

570595
// Computes the build name based upon the MachineConfigPool name.
571596
func (br buildRequestImpl) getBuildName() string {
572-
return utils.GetBuildPodName(br.opts.MachineOSBuild)
597+
return utils.GetBuildJobName(br.opts.MachineOSBuild)
573598
}
574599

575600
func (br buildRequestImpl) getDigestConfigMapName() string {

pkg/controller/build/buildrequest/buildrequest_test.go

+20-19
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/openshift/machine-config-operator/pkg/controller/build/utils"
1212
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
1313
"github.com/stretchr/testify/assert"
14+
batchv1 "k8s.io/api/batch/v1"
1415
corev1 "k8s.io/api/core/v1"
1516
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617
"k8s.io/apimachinery/pkg/labels"
@@ -141,22 +142,22 @@ func TestBuildRequest(t *testing.T) {
141142
assert.NotContains(t, containerfile, content)
142143
}
143144

144-
buildPod := br.Builder().GetObject().(*corev1.Pod)
145+
buildJob := br.Builder().GetObject().(*batchv1.Job)
145146

146-
_, err = NewBuilder(buildPod)
147+
_, err = NewBuilder(buildJob)
147148
assert.NoError(t, err)
148149

149150
assert.Equal(t, "containerfile-worker-afc35db0f874c9bfdc586e6ba39f1504", configmaps[0].Name)
150151
assert.Equal(t, "mc-worker-afc35db0f874c9bfdc586e6ba39f1504", configmaps[1].Name)
151-
assert.Equal(t, "build-worker-afc35db0f874c9bfdc586e6ba39f1504", buildPod.Name)
152+
assert.Equal(t, "build-worker-afc35db0f874c9bfdc586e6ba39f1504", buildJob.Name)
152153

153154
secrets, err := br.Secrets()
154155
assert.NoError(t, err)
155156

156157
objects := []metav1.Object{
157158
configmaps[0],
158159
configmaps[1],
159-
buildPod,
160+
buildJob,
160161
secrets[0],
161162
secrets[1],
162163
}
@@ -174,7 +175,7 @@ func TestBuildRequest(t *testing.T) {
174175
assert.Equal(t, secrets[0].Name, "base-worker-afc35db0f874c9bfdc586e6ba39f1504")
175176
assert.Equal(t, secrets[1].Name, "final-worker-afc35db0f874c9bfdc586e6ba39f1504")
176177

177-
assertBuildPodIsCorrect(t, buildPod, opts)
178+
assertBuildJobIsCorrect(t, buildJob, opts)
178179
})
179180
}
180181
}
@@ -190,37 +191,37 @@ func assertSecretInCorrectFormat(t *testing.T, secret *corev1.Secret) {
190191
assert.JSONEq(t, string(secret.Data[corev1.DockerConfigJsonKey]), `{"auths":{"registry.hostname.com": {"username": "user", "password": "s3kr1t", "auth": "s00pers3kr1t", "email": "user@hostname.com"}}}`)
191192
}
192193

193-
func assertBuildPodIsCorrect(t *testing.T, buildPod *corev1.Pod, opts BuildRequestOpts) {
194+
func assertBuildJobIsCorrect(t *testing.T, buildJob *batchv1.Job, opts BuildRequestOpts) {
194195
etcRpmGpgKeysOpts := optsForEtcRpmGpgKeys()
195-
assertBuildPodMatchesExpectations(t, opts.HasEtcPkiRpmGpgKeys, buildPod,
196+
assertBuildJobMatchesExpectations(t, opts.HasEtcPkiRpmGpgKeys, buildJob,
196197
etcRpmGpgKeysOpts.envVar(),
197198
etcRpmGpgKeysOpts.volumeForSecret(constants.EtcPkiRpmGpgSecretName),
198199
etcRpmGpgKeysOpts.volumeMount(),
199200
)
200201

201202
etcYumReposDOpts := optsForEtcYumReposD()
202-
assertBuildPodMatchesExpectations(t, opts.HasEtcYumReposDConfigs, buildPod,
203+
assertBuildJobMatchesExpectations(t, opts.HasEtcYumReposDConfigs, buildJob,
203204
etcYumReposDOpts.envVar(),
204205
etcYumReposDOpts.volumeForConfigMap(),
205206
etcYumReposDOpts.volumeMount(),
206207
)
207208

208209
etcPkiEntitlementKeysOpts := optsForEtcPkiEntitlements()
209-
assertBuildPodMatchesExpectations(t, opts.HasEtcPkiEntitlementKeys, buildPod,
210+
assertBuildJobMatchesExpectations(t, opts.HasEtcPkiEntitlementKeys, buildJob,
210211
etcPkiEntitlementKeysOpts.envVar(),
211212
etcPkiEntitlementKeysOpts.volumeForSecret(constants.EtcPkiEntitlementSecretName+"-"+opts.MachineOSConfig.Spec.MachineConfigPool.Name),
212213
etcPkiEntitlementKeysOpts.volumeMount(),
213214
)
214215

215-
assert.Equal(t, buildPod.Spec.Containers[0].Image, mcoImagePullspec)
216+
assert.Equal(t, buildJob.Spec.Template.Spec.Containers[0].Image, mcoImagePullspec)
216217
expectedPullspecs := []string{
217218
"base-os-image-from-machineosconfig",
218219
fixtures.OSImageURLConfig().BaseOSContainerImage,
219220
}
220221

221-
assert.Contains(t, expectedPullspecs, buildPod.Spec.Containers[1].Image)
222+
assert.Contains(t, expectedPullspecs, buildJob.Spec.Template.Spec.Containers[1].Image)
222223

223-
assertPodHasVolume(t, buildPod, corev1.Volume{
224+
assertPodHasVolume(t, buildJob.Spec.Template.Spec, corev1.Volume{
224225
Name: "final-image-push-creds",
225226
VolumeSource: corev1.VolumeSource{
226227
Secret: &corev1.SecretVolumeSource{
@@ -235,7 +236,7 @@ func assertBuildPodIsCorrect(t *testing.T, buildPod *corev1.Pod, opts BuildReque
235236
},
236237
})
237238

238-
assertPodHasVolume(t, buildPod, corev1.Volume{
239+
assertPodHasVolume(t, buildJob.Spec.Template.Spec, corev1.Volume{
239240
Name: "base-image-pull-creds",
240241
VolumeSource: corev1.VolumeSource{
241242
Secret: &corev1.SecretVolumeSource{
@@ -251,20 +252,20 @@ func assertBuildPodIsCorrect(t *testing.T, buildPod *corev1.Pod, opts BuildReque
251252
})
252253
}
253254

254-
func assertPodHasVolume(t *testing.T, pod *corev1.Pod, volume corev1.Volume) {
255-
assert.Contains(t, pod.Spec.Volumes, volume)
255+
func assertPodHasVolume(t *testing.T, pod corev1.PodSpec, volume corev1.Volume) {
256+
assert.Contains(t, pod.Volumes, volume)
256257
}
257258

258-
func assertBuildPodMatchesExpectations(t *testing.T, shouldBePresent bool, buildPod *corev1.Pod, envvar corev1.EnvVar, volume corev1.Volume, volumeMount corev1.VolumeMount) {
259-
for _, container := range buildPod.Spec.Containers {
259+
func assertBuildJobMatchesExpectations(t *testing.T, shouldBePresent bool, buildJob *batchv1.Job, envvar corev1.EnvVar, volume corev1.Volume, volumeMount corev1.VolumeMount) {
260+
for _, container := range buildJob.Spec.Template.Spec.Containers {
260261
if shouldBePresent {
261262
assert.Contains(t, container.Env, envvar)
262263
assert.Contains(t, container.VolumeMounts, volumeMount)
263-
assertPodHasVolume(t, buildPod, volume)
264+
assertPodHasVolume(t, buildJob.Spec.Template.Spec, volume)
264265
} else {
265266
assert.NotContains(t, container.Env, envvar)
266267
assert.NotContains(t, container.VolumeMounts, volumeMount)
267-
assert.NotContains(t, buildPod.Spec.Volumes, volume)
268+
assert.NotContains(t, buildJob.Spec.Template.Spec.Volumes, volume)
268269
}
269270

270271
assert.Contains(t, container.Env, corev1.EnvVar{

pkg/controller/build/clients.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ import (
55

66
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
77
coreinformers "k8s.io/client-go/informers"
8-
coreinformersv1 "k8s.io/client-go/informers/core/v1"
8+
batchinformersv1 "k8s.io/client-go/informers/batch/v1"
99
clientset "k8s.io/client-go/kubernetes"
10-
corelistersv1 "k8s.io/client-go/listers/core/v1"
10+
batchlisterv1 "k8s.io/client-go/listers/batch/v1"
1111
"k8s.io/client-go/tools/cache"
1212

1313
mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
@@ -25,7 +25,7 @@ import (
2525
type informers struct {
2626
controllerConfigInformer mcfginformersv1.ControllerConfigInformer
2727
machineConfigPoolInformer mcfginformersv1.MachineConfigPoolInformer
28-
podInformer coreinformersv1.PodInformer
28+
jobInformer batchinformersv1.JobInformer
2929
machineOSBuildInformer mcfginformersv1alpha1.MachineOSBuildInformer
3030
machineOSConfigInformer mcfginformersv1alpha1.MachineOSConfigInformer
3131
toStart []interface{ Start(<-chan struct{}) }
@@ -45,7 +45,7 @@ func (i *informers) listers() *listers {
4545
machineOSBuildLister: i.machineOSBuildInformer.Lister(),
4646
machineOSConfigLister: i.machineOSConfigInformer.Lister(),
4747
machineConfigPoolLister: i.machineConfigPoolInformer.Lister(),
48-
podLister: i.podInformer.Lister(),
48+
jobLister: i.jobInformer.Lister(),
4949
controllerConfigLister: i.controllerConfigInformer.Lister(),
5050
}
5151
}
@@ -55,7 +55,7 @@ type listers struct {
5555
machineOSBuildLister mcfglistersv1alpha1.MachineOSBuildLister
5656
machineOSConfigLister mcfglistersv1alpha1.MachineOSConfigLister
5757
machineConfigPoolLister mcfglistersv1.MachineConfigPoolLister
58-
podLister corelistersv1.PodLister
58+
jobLister batchlisterv1.JobLister
5959
controllerConfigLister mcfglistersv1.ControllerConfigLister
6060
}
6161

@@ -87,22 +87,22 @@ func newInformers(mcfgclient mcfgclientset.Interface, kubeclient clientset.Inter
8787
machineConfigPoolInformer := mcoInformerFactory.Machineconfiguration().V1().MachineConfigPools()
8888
machineOSBuildInformer := mcoInformerFactory.Machineconfiguration().V1alpha1().MachineOSBuilds()
8989
machineOSConfigInformer := mcoInformerFactory.Machineconfiguration().V1alpha1().MachineOSConfigs()
90-
podInformer := coreInformerFactory.Core().V1().Pods()
90+
jobInformer := coreInformerFactory.Batch().V1().Jobs()
9191

9292
return &informers{
9393
controllerConfigInformer: controllerConfigInformer,
9494
machineConfigPoolInformer: machineConfigPoolInformer,
9595
machineOSBuildInformer: machineOSBuildInformer,
9696
machineOSConfigInformer: machineOSConfigInformer,
97-
podInformer: podInformer,
97+
jobInformer: jobInformer,
9898
toStart: []interface{ Start(<-chan struct{}) }{
9999
mcoInformerFactory,
100100
coreInformerFactory,
101101
},
102102
hasSynced: []cache.InformerSynced{
103103
controllerConfigInformer.Informer().HasSynced,
104104
machineConfigPoolInformer.Informer().HasSynced,
105-
podInformer.Informer().HasSynced,
105+
jobInformer.Informer().HasSynced,
106106
machineOSBuildInformer.Informer().HasSynced,
107107
machineOSConfigInformer.Informer().HasSynced,
108108
},

pkg/controller/build/fixtures/helpers.go

+30-14
Original file line numberDiff line numberDiff line change
@@ -9,45 +9,61 @@ import (
99
mcfgv1alpha1 "github.com/openshift/api/machineconfiguration/v1alpha1"
1010
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
1111
"github.com/stretchr/testify/require"
12+
batchv1 "k8s.io/api/batch/v1"
1213
corev1 "k8s.io/api/core/v1"
1314
k8serrors "k8s.io/apimachinery/pkg/api/errors"
1415
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16+
apitypes "k8s.io/apimachinery/pkg/types"
1517
clientset "k8s.io/client-go/kubernetes"
1618
)
1719

18-
// Sets the provided pod phase on a given pod under test. If successful, it will also insert the digestfile ConfigMap.
19-
func SetPodPhase(ctx context.Context, t *testing.T, kubeclient clientset.Interface, mosb *mcfgv1alpha1.MachineOSBuild, phase corev1.PodPhase) {
20-
require.NoError(t, setPodPhase(ctx, kubeclient, mosb, phase))
20+
// JobStatus is used to set the active, succeeded, and failed fields for the k8s Job
21+
// Status so that we can parse those to imitate Job phases for testing purposes
22+
type JobStatus struct {
23+
Active int32
24+
Succeeded int32
25+
Failed int32
26+
UncountedTerminatedPodsFailed string
27+
}
28+
29+
// Sets the provided job status on a given job under test. If successful, it will also insert the digestfile ConfigMap.
30+
func SetJobStatus(ctx context.Context, t *testing.T, kubeclient clientset.Interface, mosb *mcfgv1alpha1.MachineOSBuild, jobStatus JobStatus) {
31+
require.NoError(t, setJobStatusFields(ctx, kubeclient, mosb, jobStatus))
2132

22-
if phase == corev1.PodSucceeded {
33+
if jobStatus.Succeeded == 1 {
2334
require.NoError(t, createDigestfileConfigMap(ctx, kubeclient, mosb))
2435
} else {
2536
require.NoError(t, deleteDigestfileConfigMap(ctx, kubeclient, mosb))
2637
}
2738
}
2839

29-
func SetPodDeletionTimestamp(ctx context.Context, t *testing.T, kubeclient clientset.Interface, mosb *mcfgv1alpha1.MachineOSBuild, timestamp *metav1.Time) {
30-
podName := fmt.Sprintf("build-%s", mosb.Name)
40+
func SetJobDeletionTimestamp(ctx context.Context, t *testing.T, kubeclient clientset.Interface, mosb *mcfgv1alpha1.MachineOSBuild, timestamp *metav1.Time) {
41+
jobName := fmt.Sprintf("build-%s", mosb.Name)
3142

32-
p, err := kubeclient.CoreV1().Pods(ctrlcommon.MCONamespace).Get(ctx, podName, metav1.GetOptions{})
43+
j, err := kubeclient.BatchV1().Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{})
3344
require.NoError(t, err)
3445

35-
p.SetDeletionTimestamp(timestamp)
46+
j.SetDeletionTimestamp(timestamp)
3647

37-
_, err = kubeclient.CoreV1().Pods(ctrlcommon.MCONamespace).UpdateStatus(ctx, p, metav1.UpdateOptions{})
48+
_, err = kubeclient.BatchV1().Jobs(ctrlcommon.MCONamespace).UpdateStatus(ctx, j, metav1.UpdateOptions{})
3849
require.NoError(t, err)
3950
}
4051

41-
func setPodPhase(ctx context.Context, kubeclient clientset.Interface, mosb *mcfgv1alpha1.MachineOSBuild, phase corev1.PodPhase) error {
42-
podName := fmt.Sprintf("build-%s", mosb.Name)
52+
func setJobStatusFields(ctx context.Context, kubeclient clientset.Interface, mosb *mcfgv1alpha1.MachineOSBuild, jobStatus JobStatus) error {
53+
jobName := fmt.Sprintf("build-%s", mosb.Name)
4354

44-
p, err := kubeclient.CoreV1().Pods(ctrlcommon.MCONamespace).Get(ctx, podName, metav1.GetOptions{})
55+
j, err := kubeclient.BatchV1().Jobs(ctrlcommon.MCONamespace).Get(ctx, jobName, metav1.GetOptions{})
4556
if err != nil {
4657
return err
4758
}
4859

49-
p.Status.Phase = phase
50-
_, err = kubeclient.CoreV1().Pods(ctrlcommon.MCONamespace).UpdateStatus(ctx, p, metav1.UpdateOptions{})
60+
j.Status.Active = jobStatus.Active
61+
j.Status.Succeeded = jobStatus.Succeeded
62+
j.Status.Failed = jobStatus.Failed
63+
if jobStatus.UncountedTerminatedPodsFailed != "" {
64+
j.Status.UncountedTerminatedPods = &batchv1.UncountedTerminatedPods{Failed: []apitypes.UID{apitypes.UID(jobStatus.UncountedTerminatedPodsFailed)}}
65+
}
66+
_, err = kubeclient.BatchV1().Jobs(ctrlcommon.MCONamespace).UpdateStatus(ctx, j, metav1.UpdateOptions{})
5167
return err
5268
}
5369

pkg/controller/build/imagebuilder/base.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -379,12 +379,12 @@ func (b *baseImageBuilder) getMachineOSConfigName() (string, error) {
379379
return b.builder.MachineOSBuild()
380380
}
381381

382-
// Gets the name of the builder execution unit (could be a Pod or Job) by
382+
// Gets the name of the builder execution unit by
383383
// either looking for the MachineOSBuild name and computing it or by getting it
384384
// directly from the Builder object.
385385
func (b *baseImageBuilder) getBuilderName() string {
386386
if b.mosb != nil {
387-
return utils.GetBuildPodName(b.mosb)
387+
return utils.GetBuildJobName(b.mosb)
388388
}
389389

390390
return b.builder.GetObject().GetName()

0 commit comments

Comments
 (0)