diff --git a/controllers/certs.go b/controllers/certs.go index f05a1b2..e438234 100644 --- a/controllers/certs.go +++ b/controllers/certs.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/cluster-api/util/certs" "sigs.k8s.io/cluster-api/util/secret" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/etcdadm/certs/pkiutil" "sigs.k8s.io/etcdadm/constants" ) @@ -87,7 +88,16 @@ func (r *EtcdadmClusterReconciler) generateCAandClientCertSecrets(ctx context.Co Generated: true, } s := apiServerClientCertKeyPair.AsSecret(client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}, *metav1.NewControllerRef(etcdCluster, etcdv1.GroupVersion.WithKind("EtcdadmCluster"))) - if err := r.Client.Create(ctx, s); err != nil && !apierrors.IsAlreadyExists(err) { + secretToPatch := s.DeepCopy() + + // CreateOrPatch performs a create operation when the object is not found. + // But if an object is found, the function expects to reconcile the fields we want patched in a callback func. + // Hence we keep a copy of the newly generated secret and update the secret Data field in a callback func. + // Ex; https://github.com/kubernetes-sigs/controller-runtime/blob/v0.14.5/pkg/controller/controllerutil/example_test.go + if _, err := controllerutil.CreateOrPatch(ctx, r.Client, s, func() error { + s.Data = secretToPatch.Data + return nil + }); err != nil { return fmt.Errorf("failure while saving etcd client key and certificate: %v", err) } diff --git a/controllers/controller.go b/controllers/controller.go index 12f7913..9581f6d 100644 --- a/controllers/controller.go +++ b/controllers/controller.go @@ -312,6 +312,8 @@ func (r *EtcdadmClusterReconciler) reconcile(ctx context.Context, etcdCluster *e } } conditions.MarkFalse(ep.EC, etcdv1.EtcdMachinesSpecUpToDateCondition, etcdv1.EtcdRollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(ep.Machines)-len(needRollout)) + conditions.MarkFalse(ep.EC, etcdv1.EtcdCertificatesAvailableCondition, etcdv1.EtcdRollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(ep.Machines)-len(needRollout)) + return r.upgradeEtcdCluster(ctx, cluster, etcdCluster, ep, needRollout) default: // make sure last upgrade operation is marked as completed. @@ -325,6 +327,20 @@ func (r *EtcdadmClusterReconciler) reconcile(ctx context.Context, etcdCluster *e delete(etcdCluster.Annotations, etcdv1.UpgradeInProgressAnnotation) } } + + // If the ETCD nodes have performed a rollout, the etcd client certs on the CP nodes need to be renewed. + // We mark the condition EtcdCertificatesAvailable False in the rollout case and check for its value. + // The default case is hit right after ScaleUp of ETCD nodes is completed and before the first CP comes up. + // If EtcdCertificatesAvailable is False, this means we need to update the certs. + // EtcdCertificatesAvailable is set to True once the certs are updated. + if conditions.Has(ep.EC, etcdv1.EtcdCertificatesAvailableCondition) && + conditions.IsFalse(ep.EC, etcdv1.EtcdCertificatesAvailableCondition) { + log.Info("Updating Etcd certs") + if err := r.generateCAandClientCertSecrets(ctx, cluster, etcdCluster); err != nil { + r.Log.Error(err, "error generating etcd CA certs") + return ctrl.Result{}, err + } + } } switch { diff --git a/controllers/controller_test.go b/controllers/controller_test.go index 06d7258..92f9a21 100644 --- a/controllers/controller_test.go +++ b/controllers/controller_test.go @@ -402,6 +402,62 @@ func TestReconcileDeleteOutdatedMachines(t *testing.T) { g.Expect(machineList.Items).To(BeEmpty()) } +func TestReconcileNeedsRollOutEtcdCluster(t *testing.T) { + g := NewWithT(t) + + cluster := newClusterWithExternalEtcd() + etcdadmCluster := newEtcdadmCluster(cluster) + + // CAPI machine controller has set status.Initialized to true, after the first etcd Machine is created, and after creating the Secret containing etcd init address + etcdadmCluster.Status.Initialized = true + etcdadmCluster.Spec.Replicas = pointer.Int32(int32(1)) + + // etcdadm controller has also registered that the status.Initialized field is true, so it has set InitializedCondition to true + conditions.MarkTrue(etcdadmCluster, etcdv1.InitializedCondition) + machine := newEtcdMachine(etcdadmCluster, cluster) + machine.Spec.Bootstrap = clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + Name: testClusterName, + Namespace: testNamespace, + }, + } + + // EtcdadmConfig with a different spec to trigger rollout. + etcdadmConfig := &etcdbootstrapv1.EtcdadmConfig{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testNamespace, + Name: testClusterName, + }, + Spec: etcdbootstrapv1.EtcdadmConfigSpec{ + EtcdadmInstallCommands: []string{"etcdadmInstallCommands is not empty"}, + CloudInitConfig: &etcdbootstrapv1.CloudInitConfig{ + Version: "v3.4.9", + }, + }, + } + + objects := []client.Object{ + cluster, + etcdadmCluster, + infraTemplate.DeepCopy(), + machine, + etcdadmConfig, + } + fakeClient := fake.NewClientBuilder().WithScheme(setupScheme()).WithObjects(objects...).Build() + + r := &EtcdadmClusterReconciler{ + Client: fakeClient, + uncachedClient: fakeClient, + Log: log.Log, + } + _, err := r.Reconcile(ctx, ctrl.Request{NamespacedName: util.ObjectKey(etcdadmCluster)}) + g.Expect(err).NotTo(HaveOccurred()) + + machineList := &clusterv1.MachineList{} + g.Expect(fakeClient.List(context.Background(), machineList, client.InNamespace("test"))).To(Succeed()) + g.Expect(len(machineList.Items)).To(Equal(2)) +} + // newClusterWithExternalEtcd return a CAPI cluster object with managed external etcd ref func newClusterWithExternalEtcd() *clusterv1.Cluster { return &clusterv1.Cluster{