Skip to content

Commit 603d321

Browse files
authored
Merge pull request #848 from fluxcd/remediation-stall
Early stall condition detection after remediation
2 parents abcdfef + fa5e284 commit 603d321

File tree

2 files changed

+63
-1
lines changed

2 files changed

+63
-1
lines changed

internal/reconcile/atomic_release.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,18 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
262262
"instructed to stop after running %s action reconciler %s", next.Type(), next.Name()),
263263
)
264264

265-
if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) {
265+
remediation := req.Object.GetActiveRemediation()
266+
if remediation == nil || !remediation.RetriesExhausted(req.Object) {
266267
conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, conditions.GetMessage(req.Object, meta.ReadyCondition))
267268
return ErrMustRequeue
268269
}
270+
// Check if retries have exhausted after remediation for early
271+
// stall condition detection.
272+
if remediation != nil && remediation.RetriesExhausted(req.Object) {
273+
conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)",
274+
req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object))
275+
return ErrExceededMaxRetries
276+
}
269277

270278
conditions.Delete(req.Object, meta.ReconcilingCondition)
271279
return nil

internal/reconcile/atomic_release_test.go

+54
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
629629
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
630630
}
631631
},
632+
wantErr: ErrExceededMaxRetries,
632633
},
633634
{
634635
name: "install test failure with remediation",
@@ -654,6 +655,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
654655
snap,
655656
}
656657
},
658+
wantErr: ErrExceededMaxRetries,
657659
},
658660
{
659661
name: "install test failure with test ignore",
@@ -759,6 +761,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
759761
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
760762
}
761763
},
764+
wantErr: ErrExceededMaxRetries,
762765
},
763766
{
764767
name: "upgrade failure with uninstall remediation",
@@ -799,6 +802,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
799802
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
800803
}
801804
},
805+
wantErr: ErrExceededMaxRetries,
802806
},
803807
{
804808
name: "upgrade test failure with remediation",
@@ -841,6 +845,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
841845
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
842846
}
843847
},
848+
wantErr: ErrExceededMaxRetries,
844849
},
845850
{
846851
name: "upgrade test failure with test ignore",
@@ -928,6 +933,55 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
928933
},
929934
wantErr: ErrExceededMaxRetries,
930935
},
936+
{
937+
name: "upgrade remediation results in exhausted retries",
938+
releases: func(namespace string) []*helmrelease.Release {
939+
return []*helmrelease.Release{
940+
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
941+
Name: mockReleaseName,
942+
Namespace: namespace,
943+
Version: 1,
944+
Chart: testutil.BuildChart(),
945+
Status: helmrelease.StatusSuperseded,
946+
}),
947+
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
948+
Name: mockReleaseName,
949+
Namespace: namespace,
950+
Version: 2,
951+
Chart: testutil.BuildChart(),
952+
Status: helmrelease.StatusSuperseded,
953+
}),
954+
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
955+
Name: mockReleaseName,
956+
Namespace: namespace,
957+
Version: 3,
958+
Chart: testutil.BuildChart(),
959+
Status: helmrelease.StatusFailed,
960+
}),
961+
}
962+
},
963+
spec: func(spec *v2.HelmReleaseSpec) {
964+
spec.Upgrade = &v2.Upgrade{
965+
Remediation: &v2.UpgradeRemediation{
966+
Retries: 1,
967+
},
968+
}
969+
},
970+
status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
971+
return v2.HelmReleaseStatus{
972+
History: v2.Snapshots{
973+
release.ObservedToSnapshot(release.ObserveRelease(releases[2])),
974+
release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
975+
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
976+
},
977+
LastAttemptedReleaseAction: v2.ReleaseActionUpgrade,
978+
Failures: 2,
979+
UpgradeFailures: 2,
980+
}
981+
},
982+
chart: testutil.BuildChart(),
983+
wantErr: ErrExceededMaxRetries,
984+
},
931985
}
932986
for _, tt := range tests {
933987
t.Run(tt.name, func(t *testing.T) {

0 commit comments

Comments
 (0)