diff --git a/design/clean_artifacts_in_csi_flow.md b/design/clean_artifacts_in_csi_flow.md index 5fd4c2a8c4..2d42bb2a93 100644 --- a/design/clean_artifacts_in_csi_flow.md +++ b/design/clean_artifacts_in_csi_flow.md @@ -95,7 +95,7 @@ func DeleteReadyVolumeSnapshot( ### Restore -#### Restore the VolumeSnapshotContent from the backup instead of creating a new one dynamically +#### Restore the VolumeSnapshotContent The current behavior of VSC restoration is that the VSC from the backup is restore, and the restored VS also triggers creating a new VSC dynamically. Two VSCs created for the same VS in one restore seems not right. @@ -106,6 +106,16 @@ If the `SkipRestore` is set true in the restore action's result, the secret retu As a result, restore the VSC from the backup, and setup the VSC and the VS's relation is a better choice. +Another consideration is the VSC name should not be the same as the backed-up VSC's, because the older version Velero's restore and backup keep the VSC after completion. + +There's high possibility that the restore will fail due to the VSC already exists in the cluster. + +Multiple restores of the same backup will also meet the same problem. + +The proposed solution is using the restore's UID and the VS's name to generate sha256 hash value as the new VSC name. Both the VS and VSC RestoreItemAction can access those UIDs, and it will avoid the conflicts issues. + +The restored VS name also shares the same generated name. + The VS-referenced VSC name and the VSC's snapshot handle name are in their status. Velero restore process purges the restore resources' metadata and status before running the RestoreItemActions. @@ -114,6 +124,78 @@ As a result, we cannot read these information in the VS and VSC RestoreItemActio Fortunately, RestoreItemAction input parameters includes the `ItemFromBackup`. The status is intact in `ItemFromBackup`. +``` go +func (p *volumeSnapshotRestoreItemAction) Execute( + input *velero.RestoreItemActionExecuteInput, +) (*velero.RestoreItemActionExecuteOutput, error) { + p.log.Info("Starting VolumeSnapshotRestoreItemAction") + + if boolptr.IsSetToFalse(input.Restore.Spec.RestorePVs) { + p.log.Infof("Restore %s/%s did not request for PVs to be restored.", + input.Restore.Namespace, input.Restore.Name) + return &velero.RestoreItemActionExecuteOutput{SkipRestore: true}, nil + } + + var vs snapshotv1api.VolumeSnapshot + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.Item.UnstructuredContent(), &vs); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Wrapf(err, "failed to convert input.Item from unstructured") + } + + var vsFromBackup snapshotv1api.VolumeSnapshot + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.ItemFromBackup.UnstructuredContent(), &vsFromBackup); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Wrapf(err, "failed to convert input.Item from unstructured") + } + + // If cross-namespace restore is configured, change the namespace + // for VolumeSnapshot object to be restored + newNamespace, ok := input.Restore.Spec.NamespaceMapping[vs.GetNamespace()] + if !ok { + // Use original namespace + newNamespace = vs.Namespace + } + + if csiutil.IsVolumeSnapshotExists(newNamespace, vs.Name, p.crClient) { + p.log.Debugf("VolumeSnapshot %s already exists in the cluster. Return without change.", vs.Namespace+"/"+vs.Name) + return &velero.RestoreItemActionExecuteOutput{UpdatedItem: input.Item}, nil + } + + newVSCName := generateSha256FromRestoreAndVsUID(string(input.Restore.UID), string(vsFromBackup.UID)) + // Reset Spec to convert the VolumeSnapshot from using + // the dynamic VolumeSnapshotContent to the static one. + resetVolumeSnapshotSpecForRestore(&vs, &newVSCName) + + // Reset VolumeSnapshot annotation. By now, only change + // DeletionPolicy to Retain. + resetVolumeSnapshotAnnotation(&vs) + + vsMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vs) + if err != nil { + p.log.Errorf("Fail to convert VS %s to unstructured", vs.Namespace+"/"+vs.Name) + return nil, errors.WithStack(err) + } + + p.log.Infof(`Returning from VolumeSnapshotRestoreItemAction with + no additionalItems`) + + return &velero.RestoreItemActionExecuteOutput{ + UpdatedItem: &unstructured.Unstructured{Object: vsMap}, + AdditionalItems: []velero.ResourceIdentifier{}, + }, nil +} + +// generateSha256FromRestoreAndVsUID Use the restore UID and the VS UID to generate the new VSC name. +// By this way, VS and VSC RIA action can get the same VSC name. +func generateSha256FromRestoreAndVsUID(restoreUID string, vsUID string) string { + sha256Bytes := sha256.Sum256([]byte(restoreUID + "/" + vsUID)) + return "vsc-" + hex.EncodeToString(sha256Bytes[:]) +} +``` + +#### Restore the VolumeSnapshot ``` go // Execute restores a VolumeSnapshotContent object without modification // returning the snapshot lister secret, if any, as additional items to restore. @@ -128,9 +210,9 @@ func (p *volumeSnapshotContentRestoreItemAction) Execute( p.log.Info("Starting VolumeSnapshotContentRestoreItemAction") - var snapCont snapshotv1api.VolumeSnapshotContent + var vsc snapshotv1api.VolumeSnapshotContent if err := runtime.DefaultUnstructuredConverter.FromUnstructured( - input.Item.UnstructuredContent(), &snapCont); err != nil { + input.Item.UnstructuredContent(), &vsc); err != nil { return &velero.RestoreItemActionExecuteOutput{}, errors.Wrapf(err, "failed to convert input.Item from unstructured") } @@ -144,39 +226,42 @@ func (p *volumeSnapshotContentRestoreItemAction) Execute( // If cross-namespace restore is configured, change the namespace // for VolumeSnapshot object to be restored - newNamespace, ok := input.Restore.Spec.NamespaceMapping[snapCont.Spec.VolumeSnapshotRef.Namespace] + newNamespace, ok := input.Restore.Spec.NamespaceMapping[vsc.Spec.VolumeSnapshotRef.Namespace] if ok { // Update the referenced VS namespace to the mapping one. - snapCont.Spec.VolumeSnapshotRef.Namespace = newNamespace + vsc.Spec.VolumeSnapshotRef.Namespace = newNamespace } + // Reset VSC name to align with VS. + vsc.Name = generateSha256FromRestoreAndVsUID(string(input.Restore.UID), string(vscFromBackup.Spec.VolumeSnapshotRef.UID)) + // Reset the ResourceVersion and UID of referenced VolumeSnapshot. - snapCont.Spec.VolumeSnapshotRef.ResourceVersion = "" - snapCont.Spec.VolumeSnapshotRef.UID = "" + vsc.Spec.VolumeSnapshotRef.ResourceVersion = "" + vsc.Spec.VolumeSnapshotRef.UID = "" // Set the DeletionPolicy to Retain to avoid VS deletion will not trigger snapshot deletion - snapCont.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentRetain + vsc.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentRetain if vscFromBackup.Status != nil && vscFromBackup.Status.SnapshotHandle != nil { - snapCont.Spec.Source.VolumeHandle = nil - snapCont.Spec.Source.SnapshotHandle = vscFromBackup.Status.SnapshotHandle + vsc.Spec.Source.VolumeHandle = nil + vsc.Spec.Source.SnapshotHandle = vscFromBackup.Status.SnapshotHandle } else { - p.log.Errorf("fail to get snapshot handle from VSC %s status", snapCont.Name) - return nil, errors.Errorf("fail to get snapshot handle from VSC %s status", snapCont.Name) + p.log.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) + return nil, errors.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) } additionalItems := []velero.ResourceIdentifier{} - if csi.IsVolumeSnapshotContentHasDeleteSecret(&snapCont) { + if csi.IsVolumeSnapshotContentHasDeleteSecret(&vsc) { additionalItems = append(additionalItems, velero.ResourceIdentifier{ GroupResource: schema.GroupResource{Group: "", Resource: "secrets"}, - Name: snapCont.Annotations[velerov1api.PrefixedSecretNameAnnotation], - Namespace: snapCont.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], + Name: vsc.Annotations[velerov1api.PrefixedSecretNameAnnotation], + Namespace: vsc.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], }, ) } - vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&snapCont) + vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vsc) if err != nil { return nil, errors.WithStack(err) } @@ -190,61 +275,6 @@ func (p *volumeSnapshotContentRestoreItemAction) Execute( } ``` -Because VSC is not restored dynamically, if we run restores two times for the same backup in the same cluster, the second restore will fail due to the VSC is there in the cluster, and it is already bound to an existing VS. - -To avoid this issue, it's better to delete the restored VS and VSC after restore completes. - -The VolumeSnapshot and VolumeSnapshotContent are delete in the `restore_finalizer_controller`. - -``` go -func (ctx *finalizerContext) deleteVolumeSnapshotAndVolumeSnapshotContent() (errs results.Result) { - for _, operation := range ctx.restoreItemOperationList.items { - if operation.Spec.RestoreItemAction == constant.PluginCsiVolumeSnapshotRestoreRIA && - operation.Status.Phase == itemoperation.OperationPhaseCompleted { - if operation.Spec.OperationID == "" || !strings.Contains(operation.Spec.OperationID, "/") { - ctx.logger.Errorf("invalid OperationID: %s", operation.Spec.OperationID) - errs.Add("", errors.Errorf("invalid OperationID: %s", operation.Spec.OperationID)) - continue - } - - operationIDParts := strings.Split(operation.Spec.OperationID, "/") - - vs := new(snapshotv1api.VolumeSnapshot) - vsc := new(snapshotv1api.VolumeSnapshotContent) - - if err := ctx.crClient.Get( - context.TODO(), - client.ObjectKey{Namespace: operationIDParts[0], Name: operationIDParts[1]}, - vs, - ); err != nil { - ctx.logger.Errorf("Fail to get the VolumeSnapshot %s: %s", operation.Spec.OperationID, err.Error()) - errs.Add(operationIDParts[0], errors.Errorf("Fail to get the VolumeSnapshot %s: %s", operation.Spec.OperationID, err.Error())) - continue - } - - if err := ctx.crClient.Delete(context.TODO(), vs); err != nil { - ctx.logger.Errorf("Fail to delete VolumeSnapshot %s: %s", operation.Spec.OperationID, err.Error()) - errs.Add(vs.Namespace, err) - } - - if vs.Status != nil && vs.Status.BoundVolumeSnapshotContentName != nil { - vsc.Name = *vs.Status.BoundVolumeSnapshotContentName - } else { - ctx.logger.Errorf("VolumeSnapshotContent %s is not ready.", vsc.Name) - errs.Add("", errors.Errorf("VolumeSnapshotContent %s is not ready.", vsc.Name)) - continue - } - - if err := ctx.crClient.Delete(context.TODO(), vsc); err != nil { - ctx.logger.Errorf("Fail to delete VolumeSnapshotContent %s: %s", vsc.Name, err.Error()) - errs.Add("", errors.Errorf("Fail to delete the VolumeSnapshotContent %s", err)) - } - } - } - - return errs -} -``` ### Backup Sync csi-volumesnapshotclasses.json, csi-volumesnapshotcontents.json, and csi-volumesnapshots.json are CSI-related metadata files in the BSL for each backup. @@ -266,8 +296,17 @@ For the VSC DeleteItemAction, we need to generate a VSC. Because we only care ab Create a static VSC, then point it to a pseudo VS, and reference to the snapshot handle should be enough. +To avoid the created VSC conflict with older version Velero B/R generated ones, the VSC name is set to `vsc-uuid`. + The following is an example of the implementation. ``` go + uuid, err := uuid.NewRandom() + if err != nil { + p.log.WithError(err).Errorf("Fail to generate the UUID to create VSC %s", snapCont.Name) + return errors.Wrapf(err, "Fail to generate the UUID to create VSC %s", snapCont.Name) + } + snapCont.Name = "vsc-" + uuid.String() + snapCont.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentDelete snapCont.Spec.Source = snapshotv1api.VolumeSnapshotContentSource{ diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action.go b/internal/delete/actions/csi/volumesnapshotcontent_action.go index 641513eef4..8ac7d4d2ab 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action.go @@ -20,6 +20,7 @@ import ( "context" "time" + "github.com/google/uuid" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -80,6 +81,13 @@ func (p *volumeSnapshotContentDeleteItemAction) Execute( p.log.Infof("Deleting VolumeSnapshotContent %s", snapCont.Name) + uuid, err := uuid.NewRandom() + if err != nil { + p.log.WithError(err).Errorf("Fail to generate the UUID to create VSC %s", snapCont.Name) + return errors.Wrapf(err, "Fail to generate the UUID to create VSC %s", snapCont.Name) + } + snapCont.Name = "vsc-" + uuid.String() + snapCont.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentDelete snapCont.Spec.Source = snapshotv1api.VolumeSnapshotContentSource{ diff --git a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go index 805c1b82e2..223b84a832 100644 --- a/internal/delete/actions/csi/volumesnapshotcontent_action_test.go +++ b/internal/delete/actions/csi/volumesnapshotcontent_action_test.go @@ -25,6 +25,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" crclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -156,3 +157,54 @@ func TestNewVolumeSnapshotContentDeleteItemAction(t *testing.T) { _, err1 := plugin1(logger) require.NoError(t, err1) } + +func TestCheckVSCReadiness(t *testing.T) { + tests := []struct { + name string + vsc *snapshotv1api.VolumeSnapshotContent + createVSC bool + expectErr bool + ready bool + }{ + { + name: "VSC not exist", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vsc-1", + Namespace: "velero", + }, + }, + createVSC: false, + expectErr: true, + ready: false, + }, + { + name: "VSC not ready", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vsc-1", + Namespace: "velero", + }, + }, + createVSC: true, + expectErr: false, + ready: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx := context.TODO() + crClient := velerotest.NewFakeControllerRuntimeClient(t) + if test.createVSC { + require.NoError(t, crClient.Create(ctx, test.vsc)) + } + + ready, err := checkVSCReadiness(ctx, test.vsc, crClient) + require.Equal(t, test.ready, ready) + if test.expectErr { + require.Error(t, err) + } + }) + } +} diff --git a/internal/volume/volumes_information.go b/internal/volume/volumes_information.go index a89806ee01..d0c70a2b8c 100644 --- a/internal/volume/volumes_information.go +++ b/internal/volume/volumes_information.go @@ -838,6 +838,7 @@ func (t *RestoreVolumeInfoTracker) Result() []*RestoreVolumeInfo { if csiSnapshot.Spec.Source.VolumeSnapshotContentName != nil { vscName = *csiSnapshot.Spec.Source.VolumeSnapshotContentName } + volumeInfo := &RestoreVolumeInfo{ PVCNamespace: pvcNS, PVCName: pvcName, diff --git a/pkg/backup/actions/csi/pvc_action_test.go b/pkg/backup/actions/csi/pvc_action_test.go index 59da6145f4..5af6e2f331 100644 --- a/pkg/backup/actions/csi/pvc_action_test.go +++ b/pkg/backup/actions/csi/pvc_action_test.go @@ -124,8 +124,8 @@ func TestExecute(t *testing.T) { operationID: ".", expectedErr: nil, expectedPVC: builder.ForPersistentVolumeClaim("velero", "testPVC"). - ObjectMeta(builder.WithAnnotations(velerov1api.MustIncludeAdditionalItemAnnotation, "true", velerov1api.DataUploadNameAnnotation, "velero/", velerov1api.VolumeSnapshotLabel, ""), - builder.WithLabels(velerov1api.BackupNameLabel, "test", velerov1api.VolumeSnapshotLabel, "")). + ObjectMeta(builder.WithAnnotations(velerov1api.MustIncludeAdditionalItemAnnotation, "true", velerov1api.DataUploadNameAnnotation, "velero/"), + builder.WithLabels(velerov1api.BackupNameLabel, "test")). VolumeName("testPV").StorageClass("testSC").Phase(corev1.ClaimBound).Result(), }, { diff --git a/pkg/backup/actions/csi/volumesnapshot_action.go b/pkg/backup/actions/csi/volumesnapshot_action.go index 244785c0d8..7a3bc79d72 100644 --- a/pkg/backup/actions/csi/volumesnapshot_action.go +++ b/pkg/backup/actions/csi/volumesnapshot_action.go @@ -131,13 +131,13 @@ func (p *volumeSnapshotBackupItemAction) Execute( backup.Status.Phase == velerov1api.BackupPhaseFinalizingPartiallyFailed { p.log. WithField("Backup", fmt.Sprintf("%s/%s", backup.Namespace, backup.Name)). - WithField("BackupPhase", backup.Status.Phase).Debugf("Clean VolumeSnapshots.") + WithField("BackupPhase", backup.Status.Phase).Debugf("Cleaning VolumeSnapshots.") if vsc == nil { vsc = &snapshotv1api.VolumeSnapshotContent{} } - csi.DeleteVolumeSnapshot(*vs, *vsc, p.crClient, p.log) + csi.DeleteReadyVolumeSnapshot(*vs, *vsc, p.crClient, p.log) return item, nil, "", nil, nil } @@ -164,6 +164,7 @@ func (p *volumeSnapshotBackupItemAction) Execute( annotations[velerov1api.VolumeSnapshotHandleAnnotation] = *vsc.Status.SnapshotHandle annotations[velerov1api.DriverNameAnnotation] = vsc.Spec.Driver } + if vsc.Status.RestoreSize != nil { annotations[velerov1api.VolumeSnapshotRestoreSize] = resource.NewQuantity( *vsc.Status.RestoreSize, resource.BinarySI).String() diff --git a/pkg/builder/volume_snapshot_content_builder.go b/pkg/builder/volume_snapshot_content_builder.go index 3f8567e7e6..00c7dbdb8e 100644 --- a/pkg/builder/volume_snapshot_content_builder.go +++ b/pkg/builder/volume_snapshot_content_builder.go @@ -20,6 +20,7 @@ import ( snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ) // VolumeSnapshotContentBuilder builds VolumeSnapshotContent object. @@ -60,12 +61,13 @@ func (v *VolumeSnapshotContentBuilder) DeletionPolicy(policy snapshotv1api.Delet } // VolumeSnapshotRef sets the built VolumeSnapshotContent's spec.VolumeSnapshotRef value. -func (v *VolumeSnapshotContentBuilder) VolumeSnapshotRef(namespace, name string) *VolumeSnapshotContentBuilder { +func (v *VolumeSnapshotContentBuilder) VolumeSnapshotRef(namespace, name, uid string) *VolumeSnapshotContentBuilder { v.object.Spec.VolumeSnapshotRef = v1.ObjectReference{ APIVersion: "snapshot.storage.k8s.io/v1", Kind: "VolumeSnapshot", Namespace: namespace, Name: name, + UID: types.UID(uid), } return v } diff --git a/pkg/cmd/server/plugin/plugin.go b/pkg/cmd/server/plugin/plugin.go index 1c0f0bc7d7..af159c7d41 100644 --- a/pkg/cmd/server/plugin/plugin.go +++ b/pkg/cmd/server/plugin/plugin.go @@ -17,6 +17,8 @@ limitations under the License. package plugin import ( + "time" + "github.com/sirupsen/logrus" "github.com/spf13/cobra" apiextensions "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" @@ -163,12 +165,12 @@ func NewCommand(f client.Factory) *cobra.Command { newPvcRestoreItemAction(f), ). RegisterRestoreItemActionV2( - "velero.io/csi-volumesnapshot-restorer", - newVolumeSnapshotRestoreItemAction(f), + constant.PluginCsiVolumeSnapshotRestoreRIA, + newVolumeSnapshotRestoreItemAction(f, pluginServer.GetConfig().ResourceTimeout), ). RegisterRestoreItemActionV2( "velero.io/csi-volumesnapshotcontent-restorer", - newVolumeSnapshotContentRestoreItemAction, + newVolumeSnapshotContentRestoreItemAction(f), ). RegisterRestoreItemActionV2( "velero.io/csi-volumesnapshotclass-restorer", @@ -438,12 +440,12 @@ func newPvcRestoreItemAction(f client.Factory) plugincommon.HandlerInitializer { return csiria.NewPvcRestoreItemAction(f) } -func newVolumeSnapshotRestoreItemAction(f client.Factory) plugincommon.HandlerInitializer { - return csiria.NewVolumeSnapshotRestoreItemAction(f) +func newVolumeSnapshotRestoreItemAction(f client.Factory, resourceTimeout time.Duration) plugincommon.HandlerInitializer { + return csiria.NewVolumeSnapshotRestoreItemAction(f, resourceTimeout) } -func newVolumeSnapshotContentRestoreItemAction(logger logrus.FieldLogger) (any, error) { - return csiria.NewVolumeSnapshotContentRestoreItemAction(logger) +func newVolumeSnapshotContentRestoreItemAction(f client.Factory) plugincommon.HandlerInitializer { + return csiria.NewVolumeSnapshotContentRestoreItemAction(f) } func newVolumeSnapshotClassRestoreItemAction(logger logrus.FieldLogger) (any, error) { diff --git a/pkg/constant/constant.go b/pkg/constant/constant.go index dc2c0e558d..cfadcc5dc5 100644 --- a/pkg/constant/constant.go +++ b/pkg/constant/constant.go @@ -20,5 +20,6 @@ const ( ControllerServerStatusRequest = "server-status-request" ControllerRestoreFinalizer = "restore-finalizer" - PluginCSIPVCRestoreRIA = "velero.io/csi-pvc-restorer" + PluginCSIPVCRestoreRIA = "velero.io/csi-pvc-restorer" + PluginCsiVolumeSnapshotRestoreRIA = "velero.io/csi-volumesnapshot-restorer" ) diff --git a/pkg/controller/restore_finalizer_controller.go b/pkg/controller/restore_finalizer_controller.go index 6ca80d5756..d06849207e 100644 --- a/pkg/controller/restore_finalizer_controller.go +++ b/pkg/controller/restore_finalizer_controller.go @@ -22,31 +22,27 @@ import ( "sync" "time" - "k8s.io/apimachinery/pkg/runtime/schema" - - "github.com/vmware-tanzu/velero/pkg/constant" - "github.com/vmware-tanzu/velero/pkg/itemoperation" - "github.com/vmware-tanzu/velero/pkg/plugin/velero" - - storagev1api "k8s.io/api/storage/v1" - "github.com/pkg/errors" "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + storagev1api "k8s.io/api/storage/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/clock" - "github.com/vmware-tanzu/velero/internal/hook" "github.com/vmware-tanzu/velero/internal/volume" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/constant" + "github.com/vmware-tanzu/velero/pkg/itemoperation" "github.com/vmware-tanzu/velero/pkg/metrics" "github.com/vmware-tanzu/velero/pkg/persistence" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" + "github.com/vmware-tanzu/velero/pkg/plugin/velero" kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube" "github.com/vmware-tanzu/velero/pkg/util/results" ) diff --git a/pkg/controller/restore_finalizer_controller_test.go b/pkg/controller/restore_finalizer_controller_test.go index 518b824241..23c2b044db 100644 --- a/pkg/controller/restore_finalizer_controller_test.go +++ b/pkg/controller/restore_finalizer_controller_test.go @@ -23,17 +23,13 @@ import ( "testing" "time" - "k8s.io/apimachinery/pkg/runtime/schema" - - "github.com/vmware-tanzu/velero/pkg/itemoperation" - "github.com/vmware-tanzu/velero/pkg/plugin/velero" - "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" corev1api "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" testclocks "k8s.io/utils/clock/testing" ctrl "sigs.k8s.io/controller-runtime" @@ -43,10 +39,12 @@ import ( "github.com/vmware-tanzu/velero/internal/volume" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + "github.com/vmware-tanzu/velero/pkg/itemoperation" "github.com/vmware-tanzu/velero/pkg/metrics" persistencemocks "github.com/vmware-tanzu/velero/pkg/persistence/mocks" "github.com/vmware-tanzu/velero/pkg/plugin/clientmgmt" pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks" + "github.com/vmware-tanzu/velero/pkg/plugin/velero" velerotest "github.com/vmware-tanzu/velero/pkg/test" pkgUtilKubeMocks "github.com/vmware-tanzu/velero/pkg/util/kube/mocks" "github.com/vmware-tanzu/velero/pkg/util/results" diff --git a/pkg/restore/actions/csi/pvc_action.go b/pkg/restore/actions/csi/pvc_action.go index ef0143629c..9efd3649c6 100644 --- a/pkg/restore/actions/csi/pvc_action.go +++ b/pkg/restore/actions/csi/pvc_action.go @@ -220,8 +220,10 @@ func (p *pvcRestoreItemAction) Execute( logger.Infof("DataDownload %s/%s is created successfully.", dataDownload.Namespace, dataDownload.Name) } else { - volumeSnapshotName, ok := pvcFromBackup.Annotations[velerov1api.VolumeSnapshotLabel] - if !ok { + targetVSName := "" + if vsName, nameOK := pvcFromBackup.Annotations[velerov1api.VolumeSnapshotLabel]; nameOK { + targetVSName = util.GenerateSha256FromRestoreUIDAndVsName(string(input.Restore.UID), vsName) + } else { logger.Info("Skipping PVCRestoreItemAction for PVC,", "PVC does not have a CSI VolumeSnapshot.") // Make no change in the input PVC. @@ -229,8 +231,9 @@ func (p *pvcRestoreItemAction) Execute( UpdatedItem: input.Item, }, nil } + if err := restoreFromVolumeSnapshot( - &pvc, newNamespace, p.crClient, volumeSnapshotName, logger, + &pvc, newNamespace, p.crClient, targetVSName, logger, ); err != nil { logger.Errorf("Failed to restore PVC from VolumeSnapshot.") return nil, errors.WithStack(err) @@ -502,19 +505,17 @@ func restoreFromVolumeSnapshot( }, vs, ); err != nil { - return errors.Wrapf(err, - fmt.Sprintf("Failed to get Volumesnapshot %s/%s to restore PVC %s/%s", - newNamespace, volumeSnapshotName, newNamespace, pvc.Name), - ) + return errors.Wrapf(err, "Failed to get Volumesnapshot %s/%s to restore PVC %s/%s", + newNamespace, volumeSnapshotName, newNamespace, pvc.Name) } if _, exists := vs.Annotations[velerov1api.VolumeSnapshotRestoreSize]; exists { restoreSize, err := resource.ParseQuantity( vs.Annotations[velerov1api.VolumeSnapshotRestoreSize]) if err != nil { - return errors.Wrapf(err, fmt.Sprintf( + return errors.Wrapf(err, "Failed to parse %s from annotation on Volumesnapshot %s/%s into restore size", - vs.Annotations[velerov1api.VolumeSnapshotRestoreSize], vs.Namespace, vs.Name)) + vs.Annotations[velerov1api.VolumeSnapshotRestoreSize], vs.Namespace, vs.Name) } // It is possible that the volume provider allocated a larger // capacity volume than what was requested in the backed up PVC. diff --git a/pkg/restore/actions/csi/pvc_action_test.go b/pkg/restore/actions/csi/pvc_action_test.go index 53e1908f2f..36fa212f78 100644 --- a/pkg/restore/actions/csi/pvc_action_test.go +++ b/pkg/restore/actions/csi/pvc_action_test.go @@ -45,6 +45,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/plugin/velero" velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util" "github.com/vmware-tanzu/velero/pkg/util/boolptr" ) @@ -546,6 +547,7 @@ func TestCancel(t *testing.T) { } func TestExecute(t *testing.T) { + vsName := util.GenerateSha256FromRestoreUIDAndVsName("restoreUID", "vsName") tests := []struct { name string backup *velerov1api.Backup @@ -573,20 +575,36 @@ func TestExecute(t *testing.T) { { name: "VolumeSnapshot cannot be found", backup: builder.ForBackup("velero", "testBackup").Result(), - restore: builder.ForRestore("velero", "testRestore").Backup("testBackup").Result(), - pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "testVS")).Result(), - expectedErr: "Failed to get Volumesnapshot velero/testVS to restore PVC velero/testPVC: volumesnapshots.snapshot.storage.k8s.io \"testVS\" not found", + restore: builder.ForRestore("velero", "testRestore").ObjectMeta(builder.WithUID("restoreUID")).Backup("testBackup").Result(), + pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "vsName")).Result(), + expectedErr: fmt.Sprintf("Failed to get Volumesnapshot velero/%s to restore PVC velero/testPVC: volumesnapshots.snapshot.storage.k8s.io \"%s\" not found", vsName, vsName), + }, + { + name: "Restore from VolumeSnapshot from the old backup, which PVC references the VS by name, not UID.", + backup: builder.ForBackup("velero", "testBackup").Result(), + restore: builder.ForRestore("velero", "testRestore").ObjectMeta(builder.WithUID("restoreUID")).Backup("testBackup").Result(), + pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "vsName")). + RequestResource(map[corev1api.ResourceName]resource.Quantity{corev1api.ResourceStorage: resource.MustParse("10Gi")}). + DataSource(&corev1api.TypedLocalObjectReference{APIGroup: &snapshotv1api.SchemeGroupVersion.Group, Kind: "VolumeSnapshot", Name: "testVS"}). + DataSourceRef(&corev1api.TypedObjectReference{APIGroup: &snapshotv1api.SchemeGroupVersion.Group, Kind: "VolumeSnapshot", Name: "testVS"}). + Result(), + vs: builder.ForVolumeSnapshot("velero", "vsName").ObjectMeta( + builder.WithAnnotations(velerov1api.VolumeSnapshotRestoreSize, "10Gi"), + ).Result(), + expectedPVC: builder.ForPersistentVolumeClaim("velero", "testPVC").Result(), }, { name: "Restore from VolumeSnapshot", backup: builder.ForBackup("velero", "testBackup").Result(), - restore: builder.ForRestore("velero", "testRestore").Backup("testBackup").Result(), - pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "testVS")). + restore: builder.ForRestore("velero", "testRestore").ObjectMeta(builder.WithUID("restoreUID")).Backup("testBackup").Result(), + pvc: builder.ForPersistentVolumeClaim("velero", "testPVC").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotLabel, "vsName")). RequestResource(map[corev1api.ResourceName]resource.Quantity{corev1api.ResourceStorage: resource.MustParse("10Gi")}). DataSource(&corev1api.TypedLocalObjectReference{APIGroup: &snapshotv1api.SchemeGroupVersion.Group, Kind: "VolumeSnapshot", Name: "testVS"}). DataSourceRef(&corev1api.TypedObjectReference{APIGroup: &snapshotv1api.SchemeGroupVersion.Group, Kind: "VolumeSnapshot", Name: "testVS"}). Result(), - vs: builder.ForVolumeSnapshot("velero", "testVS").ObjectMeta(builder.WithAnnotations(velerov1api.VolumeSnapshotRestoreSize, "10Gi")).Result(), + vs: builder.ForVolumeSnapshot("velero", vsName).ObjectMeta( + builder.WithAnnotations(velerov1api.VolumeSnapshotRestoreSize, "10Gi"), + ).Result(), expectedPVC: builder.ForPersistentVolumeClaim("velero", "testPVC").Result(), }, { diff --git a/pkg/restore/actions/csi/volumesnapshot_action.go b/pkg/restore/actions/csi/volumesnapshot_action.go index 9bac9a3361..1c1b7d69ee 100644 --- a/pkg/restore/actions/csi/volumesnapshot_action.go +++ b/pkg/restore/actions/csi/volumesnapshot_action.go @@ -17,31 +17,29 @@ limitations under the License. package csi import ( - "context" + "time" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" - core_v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" crclient "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/client" - "github.com/vmware-tanzu/velero/pkg/label" plugincommon "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/plugin/velero" + "github.com/vmware-tanzu/velero/pkg/util" "github.com/vmware-tanzu/velero/pkg/util/boolptr" - "github.com/vmware-tanzu/velero/pkg/util/csi" ) // volumeSnapshotRestoreItemAction is a Velero restore item // action plugin for VolumeSnapshots type volumeSnapshotRestoreItemAction struct { - log logrus.FieldLogger - crClient crclient.Client + log logrus.FieldLogger + crClient crclient.Client + resourceTimeout time.Duration } // AppliesTo returns information indicating that @@ -54,8 +52,7 @@ func (p *volumeSnapshotRestoreItemAction) AppliesTo() ( }, nil } -func resetVolumeSnapshotSpecForRestore( - vs *snapshotv1api.VolumeSnapshot, vscName *string) { +func resetVolumeSnapshotSpecForRestore(vs *snapshotv1api.VolumeSnapshot, vscName *string) { // Spec of the backed-up object used the PVC as the source // of the volumeSnapshot. Restore operation will however, // restore the VolumeSnapshot from the VolumeSnapshotContent @@ -68,10 +65,6 @@ func resetVolumeSnapshotAnnotation(vs *snapshotv1api.VolumeSnapshot) { string(snapshotv1api.VolumeSnapshotContentRetain) } -// Execute uses the data such as CSI driver name, storage -// snapshot handle, snapshot deletion secret (if any) from -// the annotations to recreate a VolumeSnapshotContent object -// and statically bind the VolumeSnapshot object being restored. func (p *volumeSnapshotRestoreItemAction) Execute( input *velero.RestoreItemActionExecuteInput, ) (*velero.RestoreItemActionExecuteOutput, error) { @@ -90,84 +83,29 @@ func (p *volumeSnapshotRestoreItemAction) Execute( errors.Wrapf(err, "failed to convert input.Item from unstructured") } - // If cross-namespace restore is configured, change the namespace - // for VolumeSnapshot object to be restored - newNamespace, ok := input.Restore.Spec.NamespaceMapping[vs.GetNamespace()] - if !ok { - // Use original namespace - newNamespace = vs.Namespace + var vsFromBackup snapshotv1api.VolumeSnapshot + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.ItemFromBackup.UnstructuredContent(), &vsFromBackup); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Wrapf(err, "failed to convert input.Item from unstructured") } - if !csi.IsVolumeSnapshotExists(newNamespace, vs.Name, p.crClient) { - snapHandle, exists := vs.Annotations[velerov1api.VolumeSnapshotHandleAnnotation] - if !exists { - return nil, errors.Errorf( - "VolumeSnapshot %s/%s does not have a %s annotation", - vs.Namespace, - vs.Name, - velerov1api.VolumeSnapshotHandleAnnotation, - ) - } + generatedName := util.GenerateSha256FromRestoreUIDAndVsName(string(input.Restore.UID), vsFromBackup.Name) - csiDriverName, exists := vs.Annotations[velerov1api.DriverNameAnnotation] - if !exists { - return nil, errors.Errorf( - "VolumeSnapshot %s/%s does not have a %s annotation", - vs.Namespace, vs.Name, velerov1api.DriverNameAnnotation) - } + // Reset Spec to convert the VolumeSnapshot from using + // the dynamic VolumeSnapshotContent to the static one. + resetVolumeSnapshotSpecForRestore(&vs, &generatedName) + // Also reset the VS name to avoid potential conflict caused by multiple restores of the same backup. + // Both VS and VSC share the same generated name. + vs.Name = generatedName - p.log.Debugf("Set VolumeSnapshotContent %s/%s DeletionPolicy to Retain to make sure VS deletion in namespace will not delete Snapshot on cloud provider.", - newNamespace, vs.Name) - - vsc := snapshotv1api.VolumeSnapshotContent{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: vs.Name + "-", - Labels: map[string]string{ - velerov1api.RestoreNameLabel: label.GetValidName(input.Restore.Name), - }, - }, - Spec: snapshotv1api.VolumeSnapshotContentSpec{ - DeletionPolicy: snapshotv1api.VolumeSnapshotContentRetain, - Driver: csiDriverName, - VolumeSnapshotRef: core_v1.ObjectReference{ - APIVersion: "snapshot.storage.k8s.io/v1", - Kind: "VolumeSnapshot", - Namespace: newNamespace, - Name: vs.Name, - }, - Source: snapshotv1api.VolumeSnapshotContentSource{ - SnapshotHandle: &snapHandle, - }, - }, - } - - // we create the VolumeSnapshotContent here instead of relying on the - // restore flow because we want to statically bind this VolumeSnapshot - // with a VolumeSnapshotContent that will be used as its source for pre-populating - // the volume that will be created as a result of the restore. To perform - // this static binding, a bi-directional link between the VolumeSnapshotContent - // and VolumeSnapshot objects have to be setup. Further, it is disallowed - // to convert a dynamically created VolumeSnapshotContent for static binding. - // See: https://github.com/kubernetes-csi/external-snapshotter/issues/274 - if err := p.crClient.Create(context.TODO(), &vsc); err != nil { - return nil, errors.Wrapf(err, - "failed to create VolumeSnapshotContents %s", - vsc.GenerateName) - } - p.log.Infof("Created VolumesnapshotContents %s with static binding to volumesnapshot %s/%s", - vsc, newNamespace, vs.Name) - - // Reset Spec to convert the VolumeSnapshot from using - // the dynamic VolumeSnapshotContent to the static one. - resetVolumeSnapshotSpecForRestore(&vs, &vsc.Name) - - // Reset VolumeSnapshot annotation. By now, only change - // DeletionPolicy to Retain. - resetVolumeSnapshotAnnotation(&vs) - } + // Reset VolumeSnapshot annotation. By now, only change + // DeletionPolicy to Retain. + resetVolumeSnapshotAnnotation(&vs) vsMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vs) if err != nil { + p.log.Errorf("Fail to convert VS %s to unstructured", vs.Namespace+"/"+vs.Name) return nil, errors.WithStack(err) } @@ -195,6 +133,7 @@ func (p *volumeSnapshotRestoreItemAction) Cancel( operationID string, restore *velerov1api.Restore, ) error { + // CSI Specification doesn't support canceling a snapshot creation. return nil } @@ -206,13 +145,15 @@ func (p *volumeSnapshotRestoreItemAction) AreAdditionalItemsReady( } func NewVolumeSnapshotRestoreItemAction( - f client.Factory) plugincommon.HandlerInitializer { + f client.Factory, + resourceTimeout time.Duration, +) plugincommon.HandlerInitializer { return func(logger logrus.FieldLogger) (any, error) { crClient, err := f.KubebuilderClient() if err != nil { return nil, err } - return &volumeSnapshotRestoreItemAction{logger, crClient}, nil + return &volumeSnapshotRestoreItemAction{logger, crClient, resourceTimeout}, nil } } diff --git a/pkg/restore/actions/csi/volumesnapshot_action_test.go b/pkg/restore/actions/csi/volumesnapshot_action_test.go index 1cc3f0ba94..de0526056e 100644 --- a/pkg/restore/actions/csi/volumesnapshot_action_test.go +++ b/pkg/restore/actions/csi/volumesnapshot_action_test.go @@ -20,9 +20,8 @@ import ( "context" "fmt" "testing" + "time" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" @@ -36,6 +35,7 @@ import ( factorymocks "github.com/vmware-tanzu/velero/pkg/client/mocks" "github.com/vmware-tanzu/velero/pkg/plugin/velero" velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util" ) var ( @@ -89,7 +89,7 @@ func TestResetVolumeSnapshotSpecForRestore(t *testing.T) { before := tc.vs.DeepCopy() resetVolumeSnapshotSpecForRestore(&tc.vs, &tc.vscName) - assert.Equalf(t, tc.vs.Name, before.Name, "unexpected change to Object.Name, Want: %s; Got %s", tc.name, before.Name, tc.vs.Name) + assert.Equalf(t, tc.vs.Name, before.Name, "unexpected change to Object.Name, Want: %s; Got %s", before.Name, tc.vs.Name) assert.Equal(t, tc.vs.Namespace, before.Namespace, "unexpected change to Object.Namespace, Want: %s; Got %s", tc.name, before.Namespace, tc.vs.Namespace) assert.NotNil(t, tc.vs.Spec.Source) assert.Nil(t, tc.vs.Spec.Source.PersistentVolumeClaimName) @@ -103,15 +103,15 @@ func TestResetVolumeSnapshotSpecForRestore(t *testing.T) { } func TestVSExecute(t *testing.T) { - snapshotHandle := "vsc" + newVscName := util.GenerateSha256FromRestoreUIDAndVsName("restoreUID", "vsName") tests := []struct { - name string - item runtime.Unstructured - vs *snapshotv1api.VolumeSnapshot - restore *velerov1api.Restore - expectErr bool - createVS bool - expectedVSC *snapshotv1api.VolumeSnapshotContent + name string + item runtime.Unstructured + vs *snapshotv1api.VolumeSnapshot + restore *velerov1api.Restore + expectErr bool + createVS bool + expectedVS *snapshotv1api.VolumeSnapshot }{ { name: "Restore's RestorePVs is false", @@ -119,45 +119,24 @@ func TestVSExecute(t *testing.T) { expectErr: false, }, { - name: "Namespace remapping and VS already exists in cluster. Nothing change", - vs: builder.ForVolumeSnapshot("ns", "name").Result(), - restore: builder.ForRestore("velero", "restore").NamespaceMappings("ns", "newNS").Result(), - createVS: true, - expectErr: false, - }, - { - name: "VS doesn't have VolumeSnapshotHandleAnnotation annotation", - vs: builder.ForVolumeSnapshot("ns", "name").Result(), - restore: builder.ForRestore("velero", "restore").NamespaceMappings("ns", "newNS").Result(), - expectErr: true, - }, - { - name: "VS doesn't have DriverNameAnnotation annotation", - vs: builder.ForVolumeSnapshot("ns", "name").ObjectMeta( - builder.WithAnnotations(velerov1api.VolumeSnapshotHandleAnnotation, ""), - ).Result(), + name: "VS doesn't have VSC in status", + vs: builder.ForVolumeSnapshot("ns", "name").ObjectMeta(builder.WithAnnotations("1", "1")).Status().Result(), restore: builder.ForRestore("velero", "restore").NamespaceMappings("ns", "newNS").Result(), expectErr: true, }, { name: "Normal case, VSC should be created", - vs: builder.ForVolumeSnapshot("ns", "test").ObjectMeta(builder.WithAnnotationsMap( - map[string]string{ - velerov1api.VolumeSnapshotHandleAnnotation: "vsc", - velerov1api.DriverNameAnnotation: "pd.csi.storage.gke.io", - }, - )).Result(), - restore: builder.ForRestore("velero", "restore").Result(), - expectErr: false, - expectedVSC: builder.ForVolumeSnapshotContent("vsc").ObjectMeta( - builder.WithLabels(velerov1api.RestoreNameLabel, "restore"), - ).VolumeSnapshotRef("ns", "test"). - DeletionPolicy(snapshotv1api.VolumeSnapshotContentRetain). - Driver("pd.csi.storage.gke.io"). - Source(snapshotv1api.VolumeSnapshotContentSource{ - SnapshotHandle: &snapshotHandle, - }). - Result(), + vs: builder.ForVolumeSnapshot("ns", "vsName").ObjectMeta( + builder.WithAnnotationsMap( + map[string]string{ + velerov1api.VolumeSnapshotHandleAnnotation: "vsc", + velerov1api.DriverNameAnnotation: "pd.csi.storage.gke.io", + }, + ), + ).SourceVolumeSnapshotContentName(newVscName).Status().BoundVolumeSnapshotContentName("vscName").Result(), + restore: builder.ForRestore("velero", "restore").ObjectMeta(builder.WithUID("restoreUID")).Result(), + expectErr: false, + expectedVS: builder.ForVolumeSnapshot("ns", "test").SourceVolumeSnapshotContentName(newVscName).Result(), }, } @@ -181,10 +160,11 @@ func TestVSExecute(t *testing.T) { } } - _, err := p.Execute( + result, err := p.Execute( &velero.RestoreItemActionExecuteInput{ - Item: test.item, - Restore: test.restore, + Item: test.item, + ItemFromBackup: test.item, + Restore: test.restore, }, ) @@ -192,18 +172,11 @@ func TestVSExecute(t *testing.T) { require.NoError(t, err) } - if test.expectedVSC != nil { - vscList := new(snapshotv1api.VolumeSnapshotContentList) - require.NoError(t, p.crClient.List(context.TODO(), vscList)) - require.True(t, cmp.Equal( - *test.expectedVSC, - vscList.Items[0], - cmpopts.IgnoreFields( - snapshotv1api.VolumeSnapshotContent{}, - "Kind", "APIVersion", "GenerateName", "Name", - "ResourceVersion", - ), - )) + if test.expectedVS != nil { + var vs snapshotv1api.VolumeSnapshot + require.NoError(t, runtime.DefaultUnstructuredConverter.FromUnstructured( + result.UpdatedItem.UnstructuredContent(), &vs)) + require.Equal(t, test.expectedVS.Spec, vs.Spec) } }) } @@ -232,13 +205,13 @@ func TestNewVolumeSnapshotRestoreItemAction(t *testing.T) { f := &factorymocks.Factory{} f.On("KubebuilderClient").Return(nil, fmt.Errorf("")) - plugin := NewVolumeSnapshotRestoreItemAction(f) + plugin := NewVolumeSnapshotRestoreItemAction(f, time.Second) _, err := plugin(logger) require.Error(t, err) f1 := &factorymocks.Factory{} f1.On("KubebuilderClient").Return(crClient, nil) - plugin1 := NewVolumeSnapshotRestoreItemAction(f1) + plugin1 := NewVolumeSnapshotRestoreItemAction(f1, time.Second) _, err1 := plugin1(logger) require.NoError(t, err1) } diff --git a/pkg/restore/actions/csi/volumesnapshotcontent_action.go b/pkg/restore/actions/csi/volumesnapshotcontent_action.go index bea63b53b9..f4b42b30ad 100644 --- a/pkg/restore/actions/csi/volumesnapshotcontent_action.go +++ b/pkg/restore/actions/csi/volumesnapshotcontent_action.go @@ -20,11 +20,16 @@ import ( snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + crclient "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "github.com/vmware-tanzu/velero/pkg/client" + plugincommon "github.com/vmware-tanzu/velero/pkg/plugin/framework/common" "github.com/vmware-tanzu/velero/pkg/plugin/velero" + "github.com/vmware-tanzu/velero/pkg/util" "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/csi" ) @@ -32,7 +37,8 @@ import ( // volumeSnapshotContentRestoreItemAction is a restore item action // plugin for Velero type volumeSnapshotContentRestoreItemAction struct { - log logrus.FieldLogger + log logrus.FieldLogger + client crclient.Client } // AppliesTo returns information indicating VolumeSnapshotContentRestoreItemAction @@ -51,34 +57,77 @@ func (p *volumeSnapshotContentRestoreItemAction) AppliesTo() ( func (p *volumeSnapshotContentRestoreItemAction) Execute( input *velero.RestoreItemActionExecuteInput, ) (*velero.RestoreItemActionExecuteOutput, error) { - p.log.Info("Starting VolumeSnapshotContentRestoreItemAction") - var snapCont snapshotv1api.VolumeSnapshotContent if boolptr.IsSetToFalse(input.Restore.Spec.RestorePVs) { p.log.Infof("Restore did not request for PVs to be restored %s/%s", input.Restore.Namespace, input.Restore.Name) return &velero.RestoreItemActionExecuteOutput{SkipRestore: true}, nil } + + p.log.Info("Starting VolumeSnapshotContentRestoreItemAction") + + var vsc snapshotv1api.VolumeSnapshotContent if err := runtime.DefaultUnstructuredConverter.FromUnstructured( - input.Item.UnstructuredContent(), &snapCont); err != nil { + input.Item.UnstructuredContent(), &vsc); err != nil { return &velero.RestoreItemActionExecuteOutput{}, errors.Wrapf(err, "failed to convert input.Item from unstructured") } + var vscFromBackup snapshotv1api.VolumeSnapshotContent + if err := runtime.DefaultUnstructuredConverter.FromUnstructured( + input.ItemFromBackup.UnstructuredContent(), &vscFromBackup); err != nil { + return &velero.RestoreItemActionExecuteOutput{}, + errors.Errorf(err.Error(), "failed to convert input.ItemFromBackup from unstructured") + } + + // If cross-namespace restore is configured, change the namespace + // for VolumeSnapshot object to be restored + newNamespace, ok := input.Restore.Spec.NamespaceMapping[vsc.Spec.VolumeSnapshotRef.Namespace] + if ok { + // Update the referenced VS namespace to the mapping one. + vsc.Spec.VolumeSnapshotRef.Namespace = newNamespace + } + + // Reset VSC name to align with VS. + vsc.Name = util.GenerateSha256FromRestoreUIDAndVsName( + string(input.Restore.UID), vscFromBackup.Spec.VolumeSnapshotRef.Name) + // Also reset the referenced VS name. + vsc.Spec.VolumeSnapshotRef.Name = vsc.Name + + // Reset the ResourceVersion and UID of referenced VolumeSnapshot. + vsc.Spec.VolumeSnapshotRef.ResourceVersion = "" + vsc.Spec.VolumeSnapshotRef.UID = "" + + // Set the DeletionPolicy to Retain to avoid VS deletion will not trigger snapshot deletion + vsc.Spec.DeletionPolicy = snapshotv1api.VolumeSnapshotContentRetain + + if vscFromBackup.Status != nil && vscFromBackup.Status.SnapshotHandle != nil { + vsc.Spec.Source.VolumeHandle = nil + vsc.Spec.Source.SnapshotHandle = vscFromBackup.Status.SnapshotHandle + } else { + p.log.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) + return nil, errors.Errorf("fail to get snapshot handle from VSC %s status", vsc.Name) + } + additionalItems := []velero.ResourceIdentifier{} - if csi.IsVolumeSnapshotContentHasDeleteSecret(&snapCont) { + if csi.IsVolumeSnapshotContentHasDeleteSecret(&vsc) { additionalItems = append(additionalItems, velero.ResourceIdentifier{ GroupResource: schema.GroupResource{Group: "", Resource: "secrets"}, - Name: snapCont.Annotations[velerov1api.PrefixedSecretNameAnnotation], - Namespace: snapCont.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], + Name: vsc.Annotations[velerov1api.PrefixedSecretNameAnnotation], + Namespace: vsc.Annotations[velerov1api.PrefixedSecretNamespaceAnnotation], }, ) } + vscMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&vsc) + if err != nil { + return nil, errors.WithStack(err) + } + p.log.Infof("Returning from VolumeSnapshotContentRestoreItemAction with %d additionalItems", len(additionalItems)) return &velero.RestoreItemActionExecuteOutput{ - UpdatedItem: input.Item, + UpdatedItem: &unstructured.Unstructured{Object: vscMap}, AdditionalItems: additionalItems, }, nil } @@ -108,6 +157,15 @@ func (p *volumeSnapshotContentRestoreItemAction) AreAdditionalItemsReady( return true, nil } -func NewVolumeSnapshotContentRestoreItemAction(logger logrus.FieldLogger) (any, error) { - return &volumeSnapshotContentRestoreItemAction{logger}, nil +func NewVolumeSnapshotContentRestoreItemAction( + f client.Factory, +) plugincommon.HandlerInitializer { + return func(logger logrus.FieldLogger) (any, error) { + crClient, err := f.KubebuilderClient() + if err != nil { + return nil, err + } + + return &volumeSnapshotContentRestoreItemAction{logger, crClient}, nil + } } diff --git a/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go b/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go index 71ff673545..579096a59c 100644 --- a/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go +++ b/pkg/restore/actions/csi/volumesnapshotcontent_action_test.go @@ -17,6 +17,8 @@ limitations under the License. package csi import ( + "context" + "fmt" "testing" snapshotv1api "github.com/kubernetes-csi/external-snapshotter/client/v7/apis/volumesnapshot/v1" @@ -27,18 +29,25 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + factorymocks "github.com/vmware-tanzu/velero/pkg/client/mocks" "github.com/vmware-tanzu/velero/pkg/kuberesource" "github.com/vmware-tanzu/velero/pkg/plugin/velero" + velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util" ) func TestVSCExecute(t *testing.T) { + snapshotHandleName := "testHandle" + newVscName := util.GenerateSha256FromRestoreUIDAndVsName("restoreUID", "vsName") tests := []struct { name string item runtime.Unstructured vsc *snapshotv1api.VolumeSnapshotContent restore *velerov1api.Restore expectErr bool + createVSC bool expectedItems []velero.ResourceIdentifier + expectedVSC *snapshotv1api.VolumeSnapshotContent }{ { name: "Restore's RestorePVs is false", @@ -46,14 +55,16 @@ func TestVSCExecute(t *testing.T) { expectErr: false, }, { - name: "Normal case, additional items should return", + name: "Normal case, additional items should return ", vsc: builder.ForVolumeSnapshotContent("test").ObjectMeta(builder.WithAnnotationsMap( map[string]string{ velerov1api.PrefixedSecretNameAnnotation: "name", velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", }, - )).Result(), - restore: builder.ForRestore("velero", "restore").Result(), + )).VolumeSnapshotRef("velero", "vsName", "vsUID"). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), + restore: builder.ForRestore("velero", "restore").ObjectMeta(builder.WithUID("restoreUID")). + NamespaceMappings("velero", "restore").Result(), expectErr: false, expectedItems: []velero.ResourceIdentifier{ { @@ -62,26 +73,63 @@ func TestVSCExecute(t *testing.T) { Name: "name", }, }, + expectedVSC: builder.ForVolumeSnapshotContent(newVscName).ObjectMeta(builder.WithAnnotationsMap( + map[string]string{ + velerov1api.PrefixedSecretNameAnnotation: "name", + velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", + }, + )).VolumeSnapshotRef("restore", newVscName, ""). + Source(snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: &snapshotHandleName}). + DeletionPolicy(snapshotv1api.VolumeSnapshotContentRetain). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), + }, + { + name: "VSC exists in cluster, same as the normal case", + vsc: builder.ForVolumeSnapshotContent("test").ObjectMeta(builder.WithAnnotationsMap( + map[string]string{ + velerov1api.PrefixedSecretNameAnnotation: "name", + velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", + }, + )).VolumeSnapshotRef("velero", "vsName", "vsUID"). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), + restore: builder.ForRestore("velero", "restore").ObjectMeta(builder.WithUID("restoreUID")). + NamespaceMappings("velero", "restore").Result(), + createVSC: true, + expectErr: false, + expectedVSC: builder.ForVolumeSnapshotContent(newVscName).ObjectMeta(builder.WithAnnotationsMap( + map[string]string{ + velerov1api.PrefixedSecretNameAnnotation: "name", + velerov1api.PrefixedSecretNamespaceAnnotation: "namespace", + }, + )).VolumeSnapshotRef("restore", newVscName, ""). + Source(snapshotv1api.VolumeSnapshotContentSource{SnapshotHandle: &snapshotHandleName}). + DeletionPolicy(snapshotv1api.VolumeSnapshotContentRetain). + Status(&snapshotv1api.VolumeSnapshotContentStatus{SnapshotHandle: &snapshotHandleName}).Result(), }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - p, err := NewVolumeSnapshotContentRestoreItemAction(logrus.StandardLogger()) - require.NoError(t, err) - - action := p.(*volumeSnapshotContentRestoreItemAction) + action := volumeSnapshotContentRestoreItemAction{ + log: logrus.StandardLogger(), + client: velerotest.NewFakeControllerRuntimeClient(t), + } if test.vsc != nil { vsMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(test.vsc) require.NoError(t, err) test.item = &unstructured.Unstructured{Object: vsMap} + + if test.createVSC { + require.NoError(t, action.client.Create(context.TODO(), test.vsc)) + } } output, err := action.Execute( &velero.RestoreItemActionExecuteInput{ - Item: test.item, - Restore: test.restore, + Item: test.item, + ItemFromBackup: test.item, + Restore: test.restore, }, ) @@ -89,6 +137,18 @@ func TestVSCExecute(t *testing.T) { require.NoError(t, err) } + if test.expectedVSC != nil { + vsc := new(snapshotv1api.VolumeSnapshotContent) + require.NoError(t, + runtime.DefaultUnstructuredConverter.FromUnstructured( + output.UpdatedItem.UnstructuredContent(), + vsc, + ), + ) + + require.Equal(t, test.expectedVSC, vsc) + } + if len(test.expectedItems) > 0 { require.Equal(t, test.expectedItems, output.AdditionalItems) } @@ -112,3 +172,20 @@ func TestVSCAppliesTo(t *testing.T) { selector, ) } + +func TestNewVolumeSnapshotContentRestoreItemAction(t *testing.T) { + logger := logrus.StandardLogger() + crClient := velerotest.NewFakeControllerRuntimeClient(t) + + f := &factorymocks.Factory{} + f.On("KubebuilderClient").Return(nil, fmt.Errorf("")) + plugin := NewVolumeSnapshotContentRestoreItemAction(f) + _, err := plugin(logger) + require.Error(t, err) + + f1 := &factorymocks.Factory{} + f1.On("KubebuilderClient").Return(crClient, nil) + plugin1 := NewVolumeSnapshotContentRestoreItemAction(f1) + _, err1 := plugin1(logger) + require.NoError(t, err1) +} diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index 649b877fcd..186683c63f 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -475,6 +475,17 @@ func IsVolumeSnapshotExists( return err == nil } +func IsVolumeSnapshotContentExists(name string, client crclient.Client) bool { + vsc := new(snapshotv1api.VolumeSnapshotContent) + err := client.Get( + context.TODO(), + crclient.ObjectKey{Name: name}, + vsc, + ) + + return err == nil +} + func SetVolumeSnapshotContentDeletionPolicy( vscName string, crClient crclient.Client, @@ -532,8 +543,7 @@ func CleanupVolumeSnapshot( } } -// DeleteVolumeSnapshot handles the VolumeSnapshot and VolumeSnapshotContent instance deletion. -func DeleteVolumeSnapshot( +func DeleteReadyVolumeSnapshot( vs snapshotv1api.VolumeSnapshot, vsc snapshotv1api.VolumeSnapshotContent, client crclient.Client, diff --git a/pkg/util/csi/volume_snapshot_test.go b/pkg/util/csi/volume_snapshot_test.go index c39a4f7fce..014cadaba5 100644 --- a/pkg/util/csi/volume_snapshot_test.go +++ b/pkg/util/csi/volume_snapshot_test.go @@ -1497,7 +1497,7 @@ func TestDeleteVolumeSnapshots(t *testing.T) { ) logger := logging.DefaultLogger(logrus.DebugLevel, logging.FormatText) - DeleteVolumeSnapshot(tc.vs, tc.vsc, client, logger) + DeleteReadyVolumeSnapshot(tc.vs, tc.vsc, client, logger) vsList := new(snapshotv1api.VolumeSnapshotList) err := client.List( diff --git a/pkg/util/util.go b/pkg/util/util.go index 2aad8ccb1c..6c5fdbb2ad 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -16,6 +16,11 @@ limitations under the License. package util +import ( + "crypto/sha256" + "encoding/hex" +) + func Contains(slice []string, key string) bool { for _, i := range slice { if i == key { @@ -24,3 +29,10 @@ func Contains(slice []string, key string) bool { } return false } + +// GenerateSha256FromRestoreUIDAndVsName Use the restore UID and the VS Name to generate +// the new VSC and VS name. By this way, VS and VSC RIA action can get the same VSC name. +func GenerateSha256FromRestoreUIDAndVsName(restoreUID string, vsName string) string { + sha256Bytes := sha256.Sum256([]byte(restoreUID + "/" + vsName)) + return hex.EncodeToString(sha256Bytes[:]) +} diff --git a/site/content/docs/main/csi.md b/site/content/docs/main/csi.md index 42d0b653cd..89f697f2db 100644 --- a/site/content/docs/main/csi.md +++ b/site/content/docs/main/csi.md @@ -109,8 +109,8 @@ Velero will include the generated VolumeSnapshot and VolumeSnapshotContent objec upload all VolumeSnapshots and VolumeSnapshotContents objects in a JSON file to the object storage system. **Note that only Kubernetes objects are uploaded to the object storage, not the data in snapshots.** -When Velero synchronizes backups into a new cluster, VolumeSnapshotContent objects and the VolumeSnapshotClass that is chosen to take -snapshot will be synced into the cluster as well, so that Velero can manage backup expiration appropriately. +From v1.16, when Velero synchronizes backups into a new cluster, the VolumeSnapshotClass that is chosen to take +snapshot will be synced into the cluster, so that Velero can manage backup expiration appropriately. The `DeletionPolicy` on the VolumeSnapshotContent will be the same as the `DeletionPolicy` on the VolumeSnapshotClass that was used to create the VolumeSnapshot. Setting a `DeletionPolicy` of `Retain` on the VolumeSnapshotClass will preserve the volume snapshot in the storage system for the lifetime of the Velero backup and will prevent the deletion of the volume snapshot, in the storage system, in the event of a disaster where the namespace with the VolumeSnapshot object may be lost.