Skip to content

Commit

Permalink
Fix tekton_pipelines_controller_taskrun_count recount bug
Browse files Browse the repository at this point in the history
Added before and after condition check to avoid taskrun metrics recount bug.
  • Loading branch information
khrm committed Jan 17, 2022
1 parent 097eeb4 commit 123f2ce
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg
}

go func(metrics *taskrunmetrics.Recorder) {
if err := metrics.DurationAndCount(tr); err != nil {
if err := metrics.DurationAndCount(tr, before); err != nil {
logger.Warnf("Failed to log the metrics : %v", err)
}
if err := metrics.CloudEvents(tr); err != nil {
Expand Down
13 changes: 10 additions & 3 deletions pkg/taskrunmetrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"go.opencensus.io/tag"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"knative.dev/pkg/apis"
Expand Down Expand Up @@ -278,14 +279,20 @@ func nilInsertTag(task, taskrun string) []tag.Mutator {
// DurationAndCount logs the duration of TaskRun execution and
// count for number of TaskRuns succeed or failed
// returns an error if its failed to log the metrics
func (r *Recorder) DurationAndCount(tr *v1beta1.TaskRun) error {
r.mutex.Lock()
defer r.mutex.Unlock()
func (r *Recorder) DurationAndCount(tr *v1beta1.TaskRun, beforeCondition *apis.Condition) error {

if !r.initialized {
return fmt.Errorf("ignoring the metrics recording for %s , failed to initialize the metrics recorder", tr.Name)
}

afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded)
if equality.Semantic.DeepEqual(beforeCondition, afterCondition) {
return nil
}

r.mutex.Lock()
defer r.mutex.Unlock()

duration := time.Since(tr.Status.StartTime.Time)
if tr.Status.CompletionTime != nil {
duration = tr.Status.CompletionTime.Sub(tr.Status.StartTime.Time)
Expand Down
81 changes: 79 additions & 2 deletions pkg/taskrunmetrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func getConfigContext() context.Context {
func TestUninitializedMetrics(t *testing.T) {
metrics := Recorder{}

if err := metrics.DurationAndCount(&v1beta1.TaskRun{}); err == nil {
if err := metrics.DurationAndCount(&v1beta1.TaskRun{}, nil); err == nil {
t.Error("DurationCount recording expected to return error but got nil")
}
if err := metrics.RunningTaskRuns(nil); err == nil {
Expand Down Expand Up @@ -132,6 +132,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
expectedCountTags map[string]string
expectedDuration float64
expectedCount int64
beforeCondition *apis.Condition
}{{
name: "for succeeded task",
taskRun: &v1beta1.TaskRun{
Expand Down Expand Up @@ -164,6 +165,79 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}, {
name: "for succeeded task before condition",
taskRun: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Name: "taskrun-1", Namespace: "ns"},
Spec: v1beta1.TaskRunSpec{
TaskRef: &v1beta1.TaskRef{Name: "task-1"},
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
}},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
StartTime: &startTime,
CompletionTime: &completionTime,
},
},
},
metricName: "taskrun_duration_seconds",
expectedTags: map[string]string{
"task": "task-1",
"taskrun": "taskrun-1",
"namespace": "ns",
"status": "success",
},
expectedCountTags: map[string]string{
"status": "success",
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: &apis.Condition{
Type: apis.ConditionReady,
Status: corev1.ConditionUnknown,
},
}, {
name: "for succeeded task recount",
taskRun: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Name: "taskrun-1", Namespace: "ns"},
Spec: v1beta1.TaskRunSpec{
TaskRef: &v1beta1.TaskRef{Name: "task-1"},
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
}},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
StartTime: &startTime,
CompletionTime: &completionTime,
},
},
},
metricName: "taskrun_duration_seconds",
expectedTags: map[string]string{
"task": "task-1",
"taskrun": "taskrun-1",
"namespace": "ns",
"status": "success",
},
expectedCountTags: map[string]string{
"status": "success",
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: &apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
},
}, {
name: "for failed task",
taskRun: &v1beta1.TaskRun{
Expand Down Expand Up @@ -196,6 +270,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}, {
name: "for succeeded taskrun in pipelinerun",
taskRun: &v1beta1.TaskRun{
Expand Down Expand Up @@ -236,6 +311,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}, {
name: "for failed taskrun in pipelinerun",
taskRun: &v1beta1.TaskRun{
Expand Down Expand Up @@ -276,6 +352,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}} {
t.Run(c.name, func(t *testing.T) {
unregisterMetrics()
Expand All @@ -286,7 +363,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
t.Fatalf("NewRecorder: %v", err)
}

if err := metrics.DurationAndCount(c.taskRun); err != nil {
if err := metrics.DurationAndCount(c.taskRun, nil); err != nil {
t.Errorf("DurationAndCount: %v", err)
}
metricstest.CheckLastValueData(t, c.metricName, c.expectedTags, c.expectedDuration)
Expand Down

0 comments on commit 123f2ce

Please sign in to comment.