Skip to content

Commit 361d3c3

Browse files
authored
Injector stability improvements (zarf-dev#510)
- Switch to default image pull policy to simplify connected-cloud deployments - Add more code comments to clarify podspec in the code - Add node affinity for more reliable image mounting with multi-node clusters - Extend the configmap push interval 100ms -> 250ms to reduce mount failures/control plane pressure (cost is slightly slower configmap apply) * Remove unused function k8s.GetImages()
1 parent f3233f0 commit 361d3c3

File tree

2 files changed

+57
-31
lines changed

2 files changed

+57
-31
lines changed

src/internal/k8s/images.go

+20-13
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@ import (
1010
)
1111

1212
type ImageMap map[string]bool
13+
type ImageNodeMap map[string][]string
1314

14-
// GetAllImages returns a list of images found in pods in the cluster.
15-
func GetAllImages() ([]string, error) {
16-
var images []string
17-
var err error
15+
// GetAllImages returns a list of images and their nodes found in pods in the cluster.
16+
func GetAllImages() (ImageNodeMap, error) {
1817
timeout := time.After(5 * time.Minute)
1918

2019
for {
@@ -24,13 +23,12 @@ func GetAllImages() ([]string, error) {
2423

2524
// on timeout abort
2625
case <-timeout:
27-
message.Debug("get image list timed-out")
28-
return images, nil
26+
return nil, fmt.Errorf("get image list timed-out")
2927

3028
// after delay, try running
3129
default:
3230
// If no images or an error, log and loop
33-
if images, err = GetImages(corev1.NamespaceAll); len(images) < 1 || err != nil {
31+
if images, err := GetImagesWithNodes(corev1.NamespaceAll); len(images) < 1 || err != nil {
3432
message.Debug(err)
3533
} else {
3634
// Otherwise, return the image list
@@ -40,20 +38,29 @@ func GetAllImages() ([]string, error) {
4038
}
4139
}
4240

43-
// GetImages returns all images for in pods in a given namespace.
44-
func GetImages(namespace string) ([]string, error) {
45-
images := make(ImageMap)
41+
// GetImagesWithNodes returns all images and their nodes in a given namespace.
42+
func GetImagesWithNodes(namespace string) (ImageNodeMap, error) {
43+
result := make(ImageNodeMap)
4644

4745
pods, err := GetPods(namespace)
4846
if err != nil {
49-
return []string{}, fmt.Errorf("unable to get the list of pods in the cluster")
47+
return nil, fmt.Errorf("unable to get the list of pods in the cluster")
5048
}
5149

5250
for _, pod := range pods.Items {
53-
images = BuildImageMap(images, pod.Spec)
51+
node := pod.Spec.NodeName
52+
for _, container := range pod.Spec.InitContainers {
53+
result[container.Image] = append(result[container.Image], node)
54+
}
55+
for _, container := range pod.Spec.Containers {
56+
result[container.Image] = append(result[container.Image], node)
57+
}
58+
for _, container := range pod.Spec.EphemeralContainers {
59+
result[container.Image] = append(result[container.Image], node)
60+
}
5461
}
5562

56-
return SortImages(images, nil), nil
63+
return result, nil
5764
}
5865

5966
// BuildImageMap looks for init container, ephemeral and regular container images.

src/internal/packager/injector.go

+37-18
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func runInjectionMadness(tempPath tempPaths) {
2929
defer spinner.Stop()
3030

3131
var err error
32-
var images []string
32+
var images k8s.ImageNodeMap
3333
var envVars []corev1.EnvVar
3434
var payloadConfigmaps []string
3535
var sha256sum string
@@ -72,19 +72,19 @@ func runInjectionMadness(tempPath tempPaths) {
7272
zarfImageRegex := regexp.MustCompile(`(?m)^127\.0\.0\.1:`)
7373

7474
// Try to create an injector pod using an existing image in the cluster
75-
for _, image := range images {
75+
for image, node := range images {
7676
// Don't try to run against the seed image if this is a secondary zarf init run
7777
if zarfImageRegex.MatchString(image) {
7878
continue
7979
}
8080

81-
spinner.Updatef("Attempting to bootstrap with the %s", image)
81+
spinner.Updatef("Attempting to bootstrap with the %s/%s", node, image)
8282

8383
// Make sure the pod is not there first
8484
_ = k8s.DeletePod(k8s.ZarfNamespace, "injector")
8585

86-
// Update the podspec image path
87-
pod := buildInjectionPod(image, envVars, payloadConfigmaps, sha256sum)
86+
// Update the podspec image path and use the first node found
87+
pod := buildInjectionPod(node[0], image, envVars, payloadConfigmaps, sha256sum)
8888

8989
// Create the pod in the cluster
9090
pod, err = k8s.CreatePod(pod)
@@ -177,8 +177,8 @@ func createPayloadConfigmaps(tempPath tempPaths, spinner *message.Spinner) ([]st
177177
// Add the configmap to the configmaps slice for later usage in the pod
178178
configMaps = append(configMaps, fileName)
179179

180-
// Give the control plane a slight buffeer
181-
time.Sleep(100 * time.Millisecond)
180+
// Give the control plane a 250ms buffer between each configmap
181+
time.Sleep(250 * time.Millisecond)
182182
}
183183

184184
return configMaps, sha256sum, nil
@@ -301,21 +301,30 @@ func buildEnvVars(tempPath tempPaths) ([]corev1.EnvVar, error) {
301301
}
302302

303303
// buildInjectionPod return a pod for injection with the appropriate containers to perform the injection
304-
func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps []string, payloadShasum string) *corev1.Pod {
304+
func buildInjectionPod(node, image string, envVars []corev1.EnvVar, payloadConfigmaps []string, payloadShasum string) *corev1.Pod {
305305
pod := k8s.GeneratePod("injector", k8s.ZarfNamespace)
306306
executeMode := int32(0777)
307307
seedImage := config.GetSeedImage()
308308

309309
pod.Labels["app"] = "zarf-injector"
310310

311+
// Bind the pod to the node the image was found on
312+
pod.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": node}
313+
314+
// Do not try to restart the pod as it will be deleted/re-created instead
311315
pod.Spec.RestartPolicy = corev1.RestartPolicyNever
316+
317+
// Init container used to combine and decompress the split tarball into the stage2 directory for use in the main container
312318
pod.Spec.InitContainers = []corev1.Container{
313319
{
314-
Name: "init-injector",
315-
Image: image,
316-
ImagePullPolicy: corev1.PullNever,
317-
WorkingDir: "/zarf-stage1",
318-
Command: []string{"/zarf-stage1/zarf-injector", payloadShasum},
320+
Name: "init-injector",
321+
// An existing image already present on the cluster
322+
Image: image,
323+
// PullIfNotPresent because some distros provide a way (even in airgap) to pull images from local or direct-connected registries
324+
ImagePullPolicy: corev1.PullIfNotPresent,
325+
// This directory is filled via the configmap injections
326+
WorkingDir: "/zarf-stage1",
327+
Command: []string{"/zarf-stage1/zarf-injector", payloadShasum},
319328

320329
VolumeMounts: []corev1.VolumeMount{
321330
{
@@ -329,6 +338,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
329338
},
330339
},
331340

341+
// Keep resources as light as possible as we aren't actually running the container's other binaries
332342
Resources: corev1.ResourceRequirements{
333343
Requests: corev1.ResourceList{
334344
corev1.ResourceCPU: resource.MustParse(".5"),
@@ -344,26 +354,32 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
344354
},
345355
}
346356

357+
// Container definition for the injector pod
347358
pod.Spec.Containers = []corev1.Container{
348359
{
349-
Name: "injector",
350-
Image: image,
351-
ImagePullPolicy: corev1.PullNever,
352-
WorkingDir: "/zarf-stage2",
360+
Name: "injector",
361+
// An existing image already present on the cluster
362+
Image: image,
363+
// PullIfNotPresent because some distros provide a way (even in airgap) to pull images from local or direct-connected registries
364+
ImagePullPolicy: corev1.PullIfNotPresent,
365+
// This directory's contents come from the init container output
366+
WorkingDir: "/zarf-stage2",
353367
Command: []string{
354368
"/zarf-stage2/zarf-registry",
355369
"/zarf-stage2/seed-image.tar",
356370
seedImage,
357371
utils.SwapHost(seedImage, "127.0.0.1:5001"),
358372
},
359373

374+
// Shared mount between the init and regular containers
360375
VolumeMounts: []corev1.VolumeMount{
361376
{
362377
Name: "stage2",
363378
MountPath: "/zarf-stage2",
364379
},
365380
},
366381

382+
// Keep resources as light as possible as we aren't actually running the container's other binaries
367383
Resources: corev1.ResourceRequirements{
368384
Requests: corev1.ResourceList{
369385
corev1.ResourceCPU: resource.MustParse(".5"),
@@ -380,7 +396,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
380396
}
381397

382398
pod.Spec.Volumes = []corev1.Volume{
383-
// Bin volume hosts the injector binary and init script
399+
// Stage1 contains the rust binary and collection of configmaps from the tarball (go binary + seed image)
384400
{
385401
Name: "stage1",
386402
VolumeSource: corev1.VolumeSource{
@@ -392,6 +408,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
392408
},
393409
},
394410
},
411+
// Stage2 is an emtpy directory shared between the containers
395412
{
396413
Name: "stage2",
397414
VolumeSource: corev1.VolumeSource{
@@ -402,6 +419,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
402419

403420
// Iterate over all the payload configmaps and add their mounts
404421
for _, filename := range payloadConfigmaps {
422+
// Create the configmap volume from the given filename
405423
pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{
406424
Name: filename,
407425
VolumeSource: corev1.VolumeSource{
@@ -413,6 +431,7 @@ func buildInjectionPod(image string, envVars []corev1.EnvVar, payloadConfigmaps
413431
},
414432
})
415433

434+
// Create the volume mount to place the new volume in the stage1 directory
416435
pod.Spec.InitContainers[0].VolumeMounts = append(pod.Spec.InitContainers[0].VolumeMounts, corev1.VolumeMount{
417436
Name: filename,
418437
MountPath: fmt.Sprintf("/zarf-stage1/%s", filename),

0 commit comments

Comments
 (0)