Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement timeouts in checks and check hooks #893

Merged
merged 28 commits into from
Jan 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
cea2221
Add timeout to execution, check config, cli
Jan 9, 2018
fb03580
Bump timeout
Jan 9, 2018
24816c9
Merge branch 'feature/check-timeout' of https://github.com/sensu/sens…
Jan 9, 2018
52e778a
Regenerate protobuf with timeout at 19
Jan 9, 2018
8ee319e
Regenerate protobuf with timeout at 19
Jan 9, 2018
ebcf6f5
Merge branch 'feature/check-timeout' of https://github.com/sensu/sens…
Jan 11, 2018
ab923a5
Kill process and all of its children at timeout, remove default timeout
Jan 11, 2018
a382d58
Only start time func if timeout is set
Jan 11, 2018
0768315
Add a test case for multiple spawned processes
Jan 12, 2018
41be152
Import utility_os for process killing on different OS's
Jan 16, 2018
fa0d529
Add Timeout support to Changelog
Jan 16, 2018
16dbd0e
Regenerate protobuf with timeout at 19
Jan 9, 2018
d94ad7d
Kill process and all of its children at timeout, remove default timeout
Jan 11, 2018
98233f2
Only start time func if timeout is set
Jan 11, 2018
418fa8b
Add a test case for multiple spawned processes
Jan 12, 2018
6d1fdac
Import utility_os for process killing on different OS's
Jan 16, 2018
444b3fc
Add Timeout support to Changelog
Jan 16, 2018
2e8d993
Merge branch 'feature/check-timeout' of https://github.com/sensu/sens…
Jan 16, 2018
bdd7940
Remove dep, add proc functions build build tags
Jan 16, 2018
439a2d1
Re-run dep ensure
Jan 16, 2018
80897f9
Catch error from KillProcess
Jan 16, 2018
c63b0a2
Return immediately if we fail to kill the process, update comment
Jan 16, 2018
6c9b672
Resolve data race
Jan 17, 2018
742831b
Removing an accidental and irrelevant test!
Jan 17, 2018
74cc9ef
Create command function to set SysProcAttr in windows env
Jan 17, 2018
2d98946
Log error, set SysProcAttr for windows env
Jan 17, 2018
03324c5
Create 2 fake commands
Jan 17, 2018
4c43f49
Use && instead of ;
Jan 17, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ and this project adheres to [Semantic
Versioning](http://semver.org/spec/v2.0.0.html).

## Unreleased
### Added
- Add `Timeout` field to CheckConfig.
- CLI functionality for check `Timeout` field.
- Add timeout support for check execution.
- Add timeout support for check hook execution.

## [2.0.0-alpha.13] - 2018-01-16
### Added
Expand Down
4 changes: 2 additions & 2 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions agent/check_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func (a *Agent) executeCheck(request *types.CheckRequest) {
ex := &command.Execution{
Env: assets.Env(),
Command: checkConfig.Command,
Timeout: int(checkConfig.Timeout),
}

// If stdin is true, add JSON event data to command execution.
Expand Down
1 change: 1 addition & 0 deletions agent/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func (a *Agent) executeHook(hookConfig *types.HookConfig) *types.Hook {
// Instantiate the execution command
ex := &command.Execution{
Command: hookConfig.Command,
Timeout: int(hookConfig.Timeout),
}

// If stdin is true, add JSON event data to command execution.
Expand Down
1 change: 1 addition & 0 deletions backend/apid/actions/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var checkConfigUpdateFields = []string{
"CheckHooks",
"Subdue",
"Cron",
"Timeout",
}

// CheckController exposes actions in which a viewer can perform.
Expand Down
1 change: 1 addition & 0 deletions cli/commands/check/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func CreateCommand(cli *cli.SensuCli) *cobra.Command {
cmd.Flags().String("proxy-entity-id", "", "the check proxy entity, used to create a proxy entity for an external resource")
cmd.Flags().BoolP("publish", "p", true, "publish check requests")
cmd.Flags().BoolP("stdin", "", false, "accept event data via STDIN")
cmd.Flags().StringP("timeout", "t", "", "timeout, in seconds, at which the check has to run")

// Mark flags are required for bash-completions
_ = cmd.MarkFlagRequired("command")
Expand Down
12 changes: 12 additions & 0 deletions cli/commands/check/interactive.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type checkOpts struct {
Publish string `survey:"publish"`
ProxyEntityID string `survey:"proxy-entity-id"`
Stdin string `survey:"stdin"`
Timeout string `survey:"timeout"`
}

func newCheckOpts() *checkOpts {
Expand All @@ -50,6 +51,7 @@ func (opts *checkOpts) withCheck(check *types.CheckConfig) {
opts.RuntimeAssets = strings.Join(check.RuntimeAssets, ",")
opts.ProxyEntityID = check.ProxyEntityID
opts.Stdin = stdinDefault
opts.Timeout = strconv.Itoa(int(check.Timeout))
}

func (opts *checkOpts) withFlags(flags *pflag.FlagSet) {
Expand All @@ -63,6 +65,7 @@ func (opts *checkOpts) withFlags(flags *pflag.FlagSet) {
opts.Publish = strconv.FormatBool(publishBool)
opts.ProxyEntityID, _ = flags.GetString("proxy-entity-id")
opts.Stdin, _ = flags.GetString("stdin")
opts.Timeout, _ = flags.GetString("timeout")

if org, _ := flags.GetString("organization"); org != "" {
opts.Org = org
Expand Down Expand Up @@ -136,6 +139,13 @@ func (opts *checkOpts) administerQuestionnaire(editing bool) error {
return nil
},
},
{
Name: "timeout",
Prompt: &survey.Input{
Message: "Timeout:",
Default: opts.Timeout,
},
},
{
Name: "subscriptions",
Prompt: &survey.Input{
Expand Down Expand Up @@ -196,6 +206,7 @@ func (opts *checkOpts) administerQuestionnaire(editing bool) error {
func (opts *checkOpts) Copy(check *types.CheckConfig) {
interval, _ := strconv.ParseUint(opts.Interval, 10, 32)
stdin, _ := strconv.ParseBool(opts.Stdin)
timeout, _ := strconv.ParseUint(opts.Timeout, 10, 32)

check.Name = opts.Name
check.Environment = opts.Env
Expand All @@ -209,4 +220,5 @@ func (opts *checkOpts) Copy(check *types.CheckConfig) {
check.Publish = opts.Publish == "true"
check.ProxyEntityID = opts.ProxyEntityID
check.Stdin = stdin
check.Timeout = uint32(timeout)
}
8 changes: 8 additions & 0 deletions cli/commands/check/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ func printToTable(results interface{}, writer io.Writer) {
return check.Cron
},
},
{
Title: "Timeout",
CellTransformer: func(data interface{}) string {
check, _ := data.(types.CheckConfig)
timeout := strconv.FormatUint(uint64(check.Timeout), 10)
return timeout
},
},
{
Title: "Subscriptions",
CellTransformer: func(data interface{}) string {
Expand Down
4 changes: 4 additions & 0 deletions cli/commands/check/show.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ func printCheckToList(r *types.CheckConfig, writer io.Writer) {
Label: "Cron",
Value: r.Cron,
},
{
Label: "Timeout",
Value: strconv.FormatInt(int64(r.Timeout), 10),
},
{
Label: "Subscriptions",
Value: strings.Join(r.Subscriptions, ", "),
Expand Down
45 changes: 23 additions & 22 deletions command/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,14 @@ import (
"bytes"
"context"
"os/exec"
"runtime"
"strings"
"syscall"
"time"

"github.com/Sirupsen/logrus"
)

const (
// DefaultTimeout specifies the default command execution
// timeout in seconds.
DefaultTimeout int = 60

// TimeoutOutput specifies the command execution output in the
// event of an execution timeout.
TimeoutOutput string = "Execution timed out\n"
Expand Down Expand Up @@ -65,30 +62,24 @@ type Execution struct {
// timeout, optionally writing to STDIN, capturing its combined output
// (STDOUT/ERR) and exit status.
func ExecuteCommand(ctx context.Context, execution *Execution) (*Execution, error) {
// If Timeout is not specified, use the default.
if execution.Timeout == 0 {
execution.Timeout = DefaultTimeout
}

logger := logrus.WithFields(logrus.Fields{"component": "command"})
// Using a platform specific shell to "cheat", as the shell
// will handle certain failures for us, where golang exec is
// known to have troubles, e.g. command not found. We still
// use a fallback exit status in the unlikely event that the
// exit status cannot be determined.
var cmd *exec.Cmd

// Use the context deadline for command execution timeout.
// This will be effectively ignored if the context already has
// an earlier deadline, which is super rad.
ctx, timeout := context.WithTimeout(ctx, time.Duration(execution.Timeout)*time.Second)
// Use context.WithCancel for command execution timeout.
// context.WithTimeout will not kill child/grandchild processes
// (see issues tagged in https://github.com/sensu/sensu-go/issues/781).
// Rather, we will use a timer, CancelFunc and proc functions
// to perform full cleanup.
ctx, timeout := context.WithCancel(ctx)
defer timeout()

// Taken from Sensu-Spawn (Sensu 1.x.x).
if runtime.GOOS == "windows" {
cmd = exec.CommandContext(ctx, "cmd", "/c", execution.Command)
} else {
cmd = exec.CommandContext(ctx, "sh", "-c", execution.Command)
}
cmd = Command(ctx, execution.Command)

// Set the ENV for the command if it is set
if len(execution.Env) > 0 {
Expand All @@ -112,6 +103,17 @@ func ExecuteCommand(ctx context.Context, execution *Execution) (*Execution, erro
execution.Duration = time.Since(started).Seconds()
}()

// Kill process and all of its children when the timeout has expired.
if execution.Timeout != 0 {
SetProcessGroup(cmd)
time.AfterFunc(time.Duration(execution.Timeout)*time.Second, func() {
timeout()
if err := KillProcess(cmd); err != nil {
logger.WithError(err).Error("error when attempting to kill process")
}
})
}

if err := cmd.Start(); err != nil {
// Something unexpected happended when attepting to
// fork/exec, return immediately.
Expand All @@ -122,9 +124,8 @@ func ExecuteCommand(ctx context.Context, execution *Execution) (*Execution, erro

execution.Output = output.String()

// The command execution timed out if the context deadline was
// exceeded.
if ctx.Err() == context.DeadlineExceeded {
// The command execution timed out if the context was cancelled prematurely
if ctx.Err() == context.Canceled {
execution.Output = TimeoutOutput
execution.Status = TimeoutExitStatus
} else if err != nil {
Expand Down
10 changes: 10 additions & 0 deletions command/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,14 @@ func TestExecuteCommand(t *testing.T) {
assert.Equal(t, "Execution timed out\n", testutil.CleanOutput(sleepExec.Output))
assert.Equal(t, 2, sleepExec.Status)
assert.NotEqual(t, 0, sleepExec.Duration)

// test that multiple commands can time out
sleepMultiple := FakeCommand("sleep 10 && echo foo")
sleepMultiple.Timeout = 1

sleepMultipleExec, sleepMultipleErr := ExecuteCommand(context.Background(), sleepMultiple)
assert.Equal(t, nil, sleepMultipleErr)
assert.Equal(t, "Execution timed out\n", testutil.CleanOutput(sleepMultipleExec.Output))
assert.Equal(t, 2, sleepMultipleExec.Status)
assert.NotEqual(t, 0, sleepMultipleExec.Duration)
}
24 changes: 24 additions & 0 deletions command/proc_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// +build !windows

package command

import (
"context"
"os/exec"
"syscall"
)

// Command returns a command to execute a script through a shell.
func Command(ctx context.Context, command string) *exec.Cmd {
return exec.CommandContext(ctx, "sh", "-c", command)
}

// SetProcessGroup sets the process group of the command process
func SetProcessGroup(cmd *exec.Cmd) {
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
}

// KillProcess kills the command process and any child processes
func KillProcess(cmd *exec.Cmd) error {
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
24 changes: 24 additions & 0 deletions command/proc_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// +build windows

package command

import (
"context"
"os/exec"
"syscall"
)

// Command returns a command to execute a script through a shell.
func Command(ctx context.Context, command string) *exec.Cmd {
return exec.CommandContext(ctx, "cmd", "/c", command)
}

// SetProcessGroup sets the process group of the command process
func SetProcessGroup(cmd *exec.Cmd) {
cmd.SysProcAttr = &syscall.SysProcAttr{CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP}
}

// KillProcess kills the command process and any child processes
func KillProcess(cmd *exec.Cmd) error {
return cmd.Process.Kill()
}
2 changes: 2 additions & 0 deletions types/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ func FixtureCheckRequest(id string) *CheckRequest {
// FixtureCheckConfig returns a fixture for a CheckConfig object.
func FixtureCheckConfig(id string) *CheckConfig {
interval := uint32(60)
timeout := uint32(0)

return &CheckConfig{
Name: id,
Expand All @@ -155,6 +156,7 @@ func FixtureCheckConfig(id string) *CheckConfig {
Publish: true,
Cron: "",
Ttl: 0,
Timeout: timeout,
}
}

Expand Down
Loading