entrypoint: in case of step command failure, write postfile

The entrypoint package wraps the step commands and execute them. This allows use to use pods containers with some order. In a step, the entrypoint binary will wait for the file of the previous step to be present to execute the actual command. Before this change, if a command failed (`exit 1` or something), entrypoint would not write a file, and thus the whole pod would be stuck running (all the next step would wait forever). This fixes that by always writing the post-file — and making the *waiter* a bit smarter : - it will now look for a `{postfile}.err` to detect if the previous step failed or not. - if the previous steps failed, it will fail too without executing the step commands. Signed-off-by: Vincent Demeester <vdemeest@redhat.com>
tektoncd · Mar 27, 2019 · d86f71b · d86f71b
1 parent 2f7dbd9
commit d86f71b
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 31 deletions.
diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go
@@ -18,10 +18,10 @@ package main
 
 import (
 	"flag"
+	"fmt"
 	"log"
 	"os"
 	"os/exec"
-	"syscall"
 	"time"
 
 	"github.com/tektoncd/pipeline/pkg/entrypoint"
@@ -55,15 +55,20 @@ type RealWaiter struct{ waitFile string }
 
 var _ entrypoint.Waiter = (*RealWaiter)(nil)
 
-func (*RealWaiter) Wait(file string) {
+func (*RealWaiter) Wait(file string) error {
 	if file == "" {
-		return
+		return nil
 	}
 	for ; ; time.Sleep(time.Second) {
+		// Watch for the post file
 		if _, err := os.Stat(file); err == nil {
-			return
+			return nil
 		} else if !os.IsNotExist(err) {
-			log.Fatalf("Waiting for %q: %v", file, err)
+			return fmt.Errorf("Waiting for %q: %v", file, err)
+		}
+		// Watch for the post error file
+		if _, err := os.Stat(file + ".err"); err == nil {
+			return fmt.Errorf("error file present, bail")
 		}
 	}
 }
@@ -73,9 +78,9 @@ type RealRunner struct{}
 
 var _ entrypoint.Runner = (*RealRunner)(nil)
 
-func (*RealRunner) Run(args ...string) {
+func (*RealRunner) Run(args ...string) error {
 	if len(args) == 0 {
-		return
+		return nil
 	}
 	name, args := args[0], args[1:]
 
@@ -84,20 +89,9 @@ func (*RealRunner) Run(args ...string) {
 	cmd.Stderr = os.Stderr
 
 	if err := cmd.Run(); err != nil {
-		if exiterr, ok := err.(*exec.ExitError); ok {
-			// Copied from https://stackoverflow.com/questions/10385551/get-exit-code-go
-			// This works on both Unix and Windows. Although
-			// package syscall is generally platform dependent,
-			// WaitStatus is defined for both Unix and Windows and
-			// in both cases has an ExitStatus() method with the
-			// same signature.
-			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
-				os.Exit(status.ExitStatus())
-			}
-			log.Fatalf("Error executing command (ExitError): %v", err)
-		}
-		log.Fatalf("Error executing command: %v", err)
+		return err
 	}
+	return nil
 }
 
 // RealPostWriter actually writes files.
@@ -106,9 +100,6 @@ type RealPostWriter struct{}
 var _ entrypoint.PostWriter = (*RealPostWriter)(nil)
 
 func (*RealPostWriter) Write(file string) {
-	if file == "" {
-		return
-	}
 	if _, err := os.Create(file); err != nil {
 		log.Fatalf("Creating %q: %v", file, err)
 	}

diff --git a/pkg/entrypoint/entrypoint_test.go b/pkg/entrypoint/entrypoint_test.go
@@ -102,11 +102,17 @@ func TestEntrypointer(t *testing.T) {
 
 type fakeWaiter struct{ waited *string }
 
-func (f *fakeWaiter) Wait(file string) { f.waited = &file }
+func (f *fakeWaiter) Wait(file string) error {
+	f.waited = &file
+	return nil
+}
 
 type fakeRunner struct{ args *[]string }
 
-func (f *fakeRunner) Run(args ...string) { f.args = &args }
+func (f *fakeRunner) Run(args ...string) error {
+	f.args = &args
+	return nil
+}
 
 type fakePostWriter struct{ wrote *string }
 

diff --git a/pkg/entrypoint/entrypointer.go b/pkg/entrypoint/entrypointer.go
@@ -16,6 +16,14 @@ limitations under the License.
 
 package entrypoint
 
+import (
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+	"syscall"
+)
+
 // Entrypointer holds fields for running commands with redirected
 // entrypoints.
 type Entrypointer struct {
@@ -41,12 +49,12 @@ type Entrypointer struct {
 // Waiter encapsulates waiting for files to exist.
 type Waiter interface {
 	// Wait blocks until the specified file exists.
-	Wait(file string)
+	Wait(file string) error
 }
 
 // Runner encapsulates running commands.
 type Runner interface {
-	Run(args ...string)
+	Run(args ...string) error
 }
 
 // PostWriter encapsulates writing a file when complete.
@@ -59,15 +67,45 @@ type PostWriter interface {
 // post file.
 func (e Entrypointer) Go() {
 	if e.WaitFile != "" {
-		e.Waiter.Wait(e.WaitFile)
+		if err := e.Waiter.Wait(e.WaitFile); err != nil {
+			// An error happened while waiting, so we bail
+			// *but* we write postfile to make next steps bail too
+			e.WritePostFile(e.PostFile, err)
+			return
+		}
 	}
 
 	if e.Entrypoint != "" {
 		e.Args = append([]string{e.Entrypoint}, e.Args...)
 	}
-	e.Runner.Run(e.Args...)
 
-	if e.PostFile != "" {
-		e.PostWriter.Write(e.PostFile)
+	err := e.Runner.Run(e.Args...)
+
+	// Write the post file *no matter what*
+	e.WritePostFile(e.PostFile, err)
+
+	if err != nil {
+		if exiterr, ok := err.(*exec.ExitError); ok {
+			// Copied from https://stackoverflow.com/questions/10385551/get-exit-code-go
+			// This works on both Unix and Windows. Although
+			// package syscall is generally platform dependent,
+			// WaitStatus is defined for both Unix and Windows and
+			// in both cases has an ExitStatus() method with the
+			// same signature.
+			if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
+				os.Exit(status.ExitStatus())
+			}
+			log.Fatalf("Error executing command (ExitError): %v", err)
+		}
+		log.Fatalf("Error executing command: %v", err)
+	}
+}
+
+func (e Entrypointer) WritePostFile(postFile string, err error) {
+	if err != nil && postFile != "" {
+		postFile = fmt.Sprintf("%s.err", postFile)
+	}
+	if postFile != "" {
+		e.PostWriter.Write(postFile)
 	}
 }
diff --git a/test/taskrun_test.go b/test/taskrun_test.go
@@ -0,0 +1,61 @@
+// +build e2e
+
+/*
+Copyright 2018 The Knative Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package test
+
+import (
+	"testing"
+
+	knativetest "github.com/knative/pkg/test"
+	tb "github.com/tektoncd/pipeline/test/builder"
+)
+
+func TestTaskRunFailure(t *testing.T) {
+	c, namespace := setup(t)
+	t.Parallel()
+
+	knativetest.CleanupOnInterrupt(func() { tearDown(t, c, namespace) }, t.Logf)
+	defer tearDown(t, c, namespace)
+
+	t.Logf("Creating Task and TaskRun in namespace %s", namespace)
+	task := tb.Task("failing-task", namespace, tb.TaskSpec(
+		tb.Step("hello", "busybox",
+			tb.Command("/bin/sh"), tb.Args("-c", "echo hello"),
+		),
+		tb.Step("exit", "busybox",
+			tb.Command("/bin/sh"), tb.Args("-c", "exit 1"),
+		),
+		tb.Step("world", "busybox",
+			tb.Command("/bin/sh"), tb.Args("-c", "sleep 20s"),
+		),
+	))
+	if _, err := c.TaskClient.Create(task); err != nil {
+		t.Fatalf("Failed to create Task: %s", err)
+	}
+	taskRun := tb.TaskRun("failing-taskrun", namespace, tb.TaskRunSpec(
+		tb.TaskRunTaskRef("failing-task"),
+	))
+	if _, err := c.TaskRunClient.Create(taskRun); err != nil {
+		t.Fatalf("Failed to create TaskRun: %s", err)
+	}
+
+	t.Logf("Waiting for TaskRun in namespace %s to fail", namespace)
+	if err := WaitForTaskRunState(c, "failing-taskrun", TaskRunFailed("failing-taskrun"), "TaskRunFailed"); err != nil {
+		t.Errorf("Error waiting for TaskRun to finish: %s", err)
+	}
+}