Skip to content

Commit

Permalink
Fix tekton_pipelines_controller_taskrun_count recount bug
Browse files Browse the repository at this point in the history
Added before and after condition check to avoid taskrun metrics recount bug.
  • Loading branch information
khrm committed Jan 28, 2022
1 parent 097eeb4 commit 9a3aaee
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg
}

go func(metrics *taskrunmetrics.Recorder) {
if err := metrics.DurationAndCount(tr); err != nil {
if err := metrics.DurationAndCount(tr, before); err != nil {
logger.Warnf("Failed to log the metrics : %v", err)
}
if err := metrics.CloudEvents(tr); err != nil {
Expand Down
13 changes: 10 additions & 3 deletions pkg/taskrunmetrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"go.opencensus.io/tag"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"knative.dev/pkg/apis"
Expand Down Expand Up @@ -278,14 +279,20 @@ func nilInsertTag(task, taskrun string) []tag.Mutator {
// DurationAndCount logs the duration of TaskRun execution and
// count for number of TaskRuns succeed or failed
// returns an error if its failed to log the metrics
func (r *Recorder) DurationAndCount(tr *v1beta1.TaskRun) error {
r.mutex.Lock()
defer r.mutex.Unlock()
func (r *Recorder) DurationAndCount(tr *v1beta1.TaskRun, beforeCondition *apis.Condition) error {

if !r.initialized {
return fmt.Errorf("ignoring the metrics recording for %s , failed to initialize the metrics recorder", tr.Name)
}

afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded)
if equality.Semantic.DeepEqual(beforeCondition, afterCondition) {
return nil
}

r.mutex.Lock()
defer r.mutex.Unlock()

duration := time.Since(tr.Status.StartTime.Time)
if tr.Status.CompletionTime != nil {
duration = tr.Status.CompletionTime.Sub(tr.Status.StartTime.Time)
Expand Down
118 changes: 101 additions & 17 deletions pkg/taskrunmetrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ func getConfigContext() context.Context {
func TestUninitializedMetrics(t *testing.T) {
metrics := Recorder{}

if err := metrics.DurationAndCount(&v1beta1.TaskRun{}); err == nil {
beforeCondition := &apis.Condition{
Type: apis.ConditionReady,
Status: corev1.ConditionUnknown,
}

if err := metrics.DurationAndCount(&v1beta1.TaskRun{}, beforeCondition); err == nil {
t.Error("DurationCount recording expected to return error but got nil")
}
if err := metrics.RunningTaskRuns(nil); err == nil {
Expand Down Expand Up @@ -125,15 +130,16 @@ func TestMetricsOnStore(t *testing.T) {

func TestRecordTaskRunDurationCount(t *testing.T) {
for _, c := range []struct {
name string
taskRun *v1beta1.TaskRun
metricName string // "taskrun_duration_seconds" or "pipelinerun_taskrun_duration_seconds"
expectedTags map[string]string
expectedCountTags map[string]string
expectedDuration float64
expectedCount int64
name string
taskRun *v1beta1.TaskRun
metricName string // "taskrun_duration_seconds" or "pipelinerun_taskrun_duration_seconds"
expectedDurationTags map[string]string
expectedCountTags map[string]string
expectedDuration float64
expectedCount int64
beforeCondition *apis.Condition
}{{
name: "for succeeded task",
name: "for succeeded taskrun",
taskRun: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Name: "taskrun-1", Namespace: "ns"},
Spec: v1beta1.TaskRunSpec{
Expand All @@ -153,7 +159,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
},
metricName: "taskrun_duration_seconds",
expectedTags: map[string]string{
expectedDurationTags: map[string]string{
"task": "task-1",
"taskrun": "taskrun-1",
"namespace": "ns",
Expand All @@ -164,8 +170,74 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}, {
name: "for failed task",
name: "for succeeded taskrun with before condition",
taskRun: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Name: "taskrun-1", Namespace: "ns"},
Spec: v1beta1.TaskRunSpec{
TaskRef: &v1beta1.TaskRef{Name: "task-1"},
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
}},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
StartTime: &startTime,
CompletionTime: &completionTime,
},
},
},
metricName: "taskrun_duration_seconds",
expectedDurationTags: map[string]string{
"task": "task-1",
"taskrun": "taskrun-1",
"namespace": "ns",
"status": "success",
},
expectedCountTags: map[string]string{
"status": "success",
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: &apis.Condition{
Type: apis.ConditionReady,
Status: corev1.ConditionUnknown,
},
}, {
name: "for succeeded taskrun recount",
taskRun: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Name: "taskrun-1", Namespace: "ns"},
Spec: v1beta1.TaskRunSpec{
TaskRef: &v1beta1.TaskRef{Name: "task-1"},
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
}},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
StartTime: &startTime,
CompletionTime: &completionTime,
},
},
},
metricName: "taskrun_duration_seconds",
expectedDurationTags: nil,
expectedCountTags: nil,
expectedDuration: 0,
expectedCount: 0,
beforeCondition: &apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
},
}, {
name: "for failed taskrun",
taskRun: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{Name: "taskrun-1", Namespace: "ns"},
Spec: v1beta1.TaskRunSpec{
Expand All @@ -185,7 +257,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
},
metricName: "taskrun_duration_seconds",
expectedTags: map[string]string{
expectedDurationTags: map[string]string{
"task": "task-1",
"taskrun": "taskrun-1",
"namespace": "ns",
Expand All @@ -196,6 +268,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}, {
name: "for succeeded taskrun in pipelinerun",
taskRun: &v1beta1.TaskRun{
Expand Down Expand Up @@ -223,7 +296,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
},
metricName: "pipelinerun_taskrun_duration_seconds",
expectedTags: map[string]string{
expectedDurationTags: map[string]string{
"pipeline": "pipeline-1",
"pipelinerun": "pipelinerun-1",
"task": "task-1",
Expand All @@ -236,6 +309,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}, {
name: "for failed taskrun in pipelinerun",
taskRun: &v1beta1.TaskRun{
Expand Down Expand Up @@ -263,7 +337,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
},
metricName: "pipelinerun_taskrun_duration_seconds",
expectedTags: map[string]string{
expectedDurationTags: map[string]string{
"pipeline": "pipeline-1",
"pipelinerun": "pipelinerun-1",
"task": "task-1",
Expand All @@ -276,6 +350,7 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
},
expectedDuration: 60,
expectedCount: 1,
beforeCondition: nil,
}} {
t.Run(c.name, func(t *testing.T) {
unregisterMetrics()
Expand All @@ -286,11 +361,20 @@ func TestRecordTaskRunDurationCount(t *testing.T) {
t.Fatalf("NewRecorder: %v", err)
}

if err := metrics.DurationAndCount(c.taskRun); err != nil {
if err := metrics.DurationAndCount(c.taskRun, c.beforeCondition); err != nil {
t.Errorf("DurationAndCount: %v", err)
}
metricstest.CheckLastValueData(t, c.metricName, c.expectedTags, c.expectedDuration)
metricstest.CheckCountData(t, "taskrun_count", c.expectedCountTags, c.expectedCount)
if c.expectedCountTags != nil {
metricstest.CheckCountData(t, "taskrun_count", c.expectedCountTags, c.expectedCount)
} else {
metricstest.CheckStatsNotReported(t, "taskrun_count")
}
if c.expectedDurationTags != nil {
metricstest.CheckLastValueData(t, c.metricName, c.expectedDurationTags, c.expectedDuration)
} else {
metricstest.CheckStatsNotReported(t, c.metricName)

}
})
}
}
Expand Down

0 comments on commit 9a3aaee

Please sign in to comment.