Skip to content

Commit

Permalink
entrypoint: in case of step command failure, write postfile
Browse files Browse the repository at this point in the history
The entrypoint package wraps the step commands and execute them. This
allows use to use pods containers with some order. In a step, the
entrypoint binary will wait for the file of the previous step to be
present to execute the actual command.

Before this change, if a command failed (`exit 1` or something),
entrypoint would not write a file, and thus the whole pod would be
stuck running (all the next step would wait forever).

This fixes that by always writing the post-file — and making
the *waiter* a bit smarter :

- it will now look for a `{postfile}.err` to detect if the previous
  step failed or not.
- if the previous steps failed, it will fail too without executing the
  step commands.

Signed-off-by: Vincent Demeester <vdemeest@redhat.com>
  • Loading branch information
vdemeester committed Mar 27, 2019
1 parent 2f7dbd9 commit d86f71b
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 31 deletions.
37 changes: 14 additions & 23 deletions cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ package main

import (
"flag"
"fmt"
"log"
"os"
"os/exec"
"syscall"
"time"

"github.com/tektoncd/pipeline/pkg/entrypoint"
Expand Down Expand Up @@ -55,15 +55,20 @@ type RealWaiter struct{ waitFile string }

var _ entrypoint.Waiter = (*RealWaiter)(nil)

func (*RealWaiter) Wait(file string) {
func (*RealWaiter) Wait(file string) error {
if file == "" {
return
return nil
}
for ; ; time.Sleep(time.Second) {
// Watch for the post file
if _, err := os.Stat(file); err == nil {
return
return nil
} else if !os.IsNotExist(err) {
log.Fatalf("Waiting for %q: %v", file, err)
return fmt.Errorf("Waiting for %q: %v", file, err)
}
// Watch for the post error file
if _, err := os.Stat(file + ".err"); err == nil {
return fmt.Errorf("error file present, bail")
}
}
}
Expand All @@ -73,9 +78,9 @@ type RealRunner struct{}

var _ entrypoint.Runner = (*RealRunner)(nil)

func (*RealRunner) Run(args ...string) {
func (*RealRunner) Run(args ...string) error {
if len(args) == 0 {
return
return nil
}
name, args := args[0], args[1:]

Expand All @@ -84,20 +89,9 @@ func (*RealRunner) Run(args ...string) {
cmd.Stderr = os.Stderr

if err := cmd.Run(); err != nil {
if exiterr, ok := err.(*exec.ExitError); ok {
// Copied from https://stackoverflow.com/questions/10385551/get-exit-code-go
// This works on both Unix and Windows. Although
// package syscall is generally platform dependent,
// WaitStatus is defined for both Unix and Windows and
// in both cases has an ExitStatus() method with the
// same signature.
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
os.Exit(status.ExitStatus())
}
log.Fatalf("Error executing command (ExitError): %v", err)
}
log.Fatalf("Error executing command: %v", err)
return err
}
return nil
}

// RealPostWriter actually writes files.
Expand All @@ -106,9 +100,6 @@ type RealPostWriter struct{}
var _ entrypoint.PostWriter = (*RealPostWriter)(nil)

func (*RealPostWriter) Write(file string) {
if file == "" {
return
}
if _, err := os.Create(file); err != nil {
log.Fatalf("Creating %q: %v", file, err)
}
Expand Down
10 changes: 8 additions & 2 deletions pkg/entrypoint/entrypoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,17 @@ func TestEntrypointer(t *testing.T) {

type fakeWaiter struct{ waited *string }

func (f *fakeWaiter) Wait(file string) { f.waited = &file }
func (f *fakeWaiter) Wait(file string) error {
f.waited = &file
return nil
}

type fakeRunner struct{ args *[]string }

func (f *fakeRunner) Run(args ...string) { f.args = &args }
func (f *fakeRunner) Run(args ...string) error {
f.args = &args
return nil
}

type fakePostWriter struct{ wrote *string }

Expand Down
50 changes: 44 additions & 6 deletions pkg/entrypoint/entrypointer.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ limitations under the License.

package entrypoint

import (
"fmt"
"log"
"os"
"os/exec"
"syscall"
)

// Entrypointer holds fields for running commands with redirected
// entrypoints.
type Entrypointer struct {
Expand All @@ -41,12 +49,12 @@ type Entrypointer struct {
// Waiter encapsulates waiting for files to exist.
type Waiter interface {
// Wait blocks until the specified file exists.
Wait(file string)
Wait(file string) error
}

// Runner encapsulates running commands.
type Runner interface {
Run(args ...string)
Run(args ...string) error
}

// PostWriter encapsulates writing a file when complete.
Expand All @@ -59,15 +67,45 @@ type PostWriter interface {
// post file.
func (e Entrypointer) Go() {
if e.WaitFile != "" {
e.Waiter.Wait(e.WaitFile)
if err := e.Waiter.Wait(e.WaitFile); err != nil {
// An error happened while waiting, so we bail
// *but* we write postfile to make next steps bail too
e.WritePostFile(e.PostFile, err)
return
}
}

if e.Entrypoint != "" {
e.Args = append([]string{e.Entrypoint}, e.Args...)
}
e.Runner.Run(e.Args...)

if e.PostFile != "" {
e.PostWriter.Write(e.PostFile)
err := e.Runner.Run(e.Args...)

// Write the post file *no matter what*
e.WritePostFile(e.PostFile, err)

if err != nil {
if exiterr, ok := err.(*exec.ExitError); ok {
// Copied from https://stackoverflow.com/questions/10385551/get-exit-code-go
// This works on both Unix and Windows. Although
// package syscall is generally platform dependent,
// WaitStatus is defined for both Unix and Windows and
// in both cases has an ExitStatus() method with the
// same signature.
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
os.Exit(status.ExitStatus())
}
log.Fatalf("Error executing command (ExitError): %v", err)
}
log.Fatalf("Error executing command: %v", err)
}
}

func (e Entrypointer) WritePostFile(postFile string, err error) {
if err != nil && postFile != "" {
postFile = fmt.Sprintf("%s.err", postFile)
}
if postFile != "" {
e.PostWriter.Write(postFile)
}
}
61 changes: 61 additions & 0 deletions test/taskrun_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// +build e2e

/*
Copyright 2018 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package test

import (
"testing"

knativetest "github.com/knative/pkg/test"
tb "github.com/tektoncd/pipeline/test/builder"
)

func TestTaskRunFailure(t *testing.T) {
c, namespace := setup(t)
t.Parallel()

knativetest.CleanupOnInterrupt(func() { tearDown(t, c, namespace) }, t.Logf)
defer tearDown(t, c, namespace)

t.Logf("Creating Task and TaskRun in namespace %s", namespace)
task := tb.Task("failing-task", namespace, tb.TaskSpec(
tb.Step("hello", "busybox",
tb.Command("/bin/sh"), tb.Args("-c", "echo hello"),
),
tb.Step("exit", "busybox",
tb.Command("/bin/sh"), tb.Args("-c", "exit 1"),
),
tb.Step("world", "busybox",
tb.Command("/bin/sh"), tb.Args("-c", "sleep 20s"),
),
))
if _, err := c.TaskClient.Create(task); err != nil {
t.Fatalf("Failed to create Task: %s", err)
}
taskRun := tb.TaskRun("failing-taskrun", namespace, tb.TaskRunSpec(
tb.TaskRunTaskRef("failing-task"),
))
if _, err := c.TaskRunClient.Create(taskRun); err != nil {
t.Fatalf("Failed to create TaskRun: %s", err)
}

t.Logf("Waiting for TaskRun in namespace %s to fail", namespace)
if err := WaitForTaskRunState(c, "failing-taskrun", TaskRunFailed("failing-taskrun"), "TaskRunFailed"); err != nil {
t.Errorf("Error waiting for TaskRun to finish: %s", err)
}
}

0 comments on commit d86f71b

Please sign in to comment.