Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamically configurable standby instance #824

Merged
merged 9 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions api/cirrus_ci_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ message PollRequest {
repeated int64 old_running_tasks = 2 [deprecated = true];
map<string, double> resources_in_use = 3;
repeated string running_tasks = 4;
repeated StandbyInstanceInformation available_standby_instances_information = 5;
}

message QueryRunningTasksRequest {
Expand Down Expand Up @@ -196,6 +197,24 @@ message PollResponse {
bool shutdown = 4;

repeated string tasks_to_stop = 5;

repeated StandbyInstanceParameters updated_standby_instances = 6;
}

message StandbyInstanceInformation {
StandbyInstanceParameters parameters = 1;
uint64 age_seconds = 2; // since warming up
}

message StandbyInstanceParameters {
message Warmup {
string script = 1;
uint64 timeout_seconds = 2;
}

Isolation isolation = 1;
map<string, double> resources = 2;
Warmup warmup = 3;
}

message WorkerInfo {
Expand Down
2 changes: 1 addition & 1 deletion internal/commands/worker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func buildWorker(output io.Writer) (*worker.Worker, error) {

// Configure standby
if standby := config.Standby; standby != nil {
opts = append(opts, worker.WithStandby(standby))
opts = append(opts, worker.WithStandby(standby.StandbyInstanceParameters))
}

// Configure resource modifiers
Expand Down
41 changes: 41 additions & 0 deletions internal/commands/worker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/stretchr/testify/require"
"path/filepath"
"testing"
"time"
)

// TestUnknownFields ensures that we will error out on configuration files
Expand Down Expand Up @@ -71,3 +72,43 @@ func TestRestrictForceSoftnet(t *testing.T) {

require.True(t, config.Security.AllowedIsolations.Tart.ForceSoftnet)
}

func TestStandby(t *testing.T) {
config, err := parseConfig(filepath.Join("testdata", "standby.yml"))
require.NoError(t, err)

// Verify pre-pull configuration exists
require.NotNil(t, config.TartPrePull)
require.Equal(t, 3*time.Hour, config.TartPrePull.CheckInterval)

// Verify pre-pull images
expectedImages := []string{
"ghcr.io/cirruslabs/macos-runner:sonoma",
"ghcr.io/cirruslabs/macos-runner:sequoia",
}
require.Equal(t, expectedImages, config.TartPrePull.Images)

// Verify standby configuration exists
require.NotNil(t, config.Standby)

// Verify resources
require.Equal(t, float64(1), config.Standby.Resources["tart-vms"])

// Verify isolation configuration
require.NotNil(t, config.Standby.Isolation)
require.NotNil(t, config.Standby.Isolation.GetTart())

tart := config.Standby.Isolation.GetTart()
require.Equal(t, "ghcr.io/cirruslabs/macos-runner:sonoma", tart.Image)
require.Equal(t, "admin", tart.User)
require.Equal(t, "admin", tart.Password)
require.Equal(t, "1920x1080", tart.Display)
require.True(t, tart.Softnet)
require.Equal(t, uint32(4), tart.Cpu)
require.Equal(t, uint32(16384), tart.Memory)

// Verify warmup configuration
require.NotNil(t, config.Standby.Warmup)
require.Equal(t, "xcrun simctl list || true", config.Standby.Warmup.Script)
require.Equal(t, uint64(600), config.Standby.Warmup.TimeoutSeconds)
}
25 changes: 25 additions & 0 deletions internal/commands/worker/testdata/standby.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
token: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855

name: "MacMini-Rack-1-Slot-2"

tart-pre-pull:
check-interval: 3h
images:
- ghcr.io/cirruslabs/macos-runner:sonoma
- ghcr.io/cirruslabs/macos-runner:sequoia

standby:
resources:
tart-vms: 1
isolation:
tart:
image: ghcr.io/cirruslabs/macos-runner:sonoma
user: admin
password: admin
display: 1920x1080
softnet: true
cpu: 4
memory: 16384
warmup:
script: xcrun simctl list || true
timeout: 10m
5 changes: 2 additions & 3 deletions internal/executor/instance/abstract/abstract.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import (
"context"
"errors"
"github.com/cirruslabs/cirrus-cli/internal/executor/instance/runconfig"
"github.com/cirruslabs/cirrus-cli/pkg/api"
"github.com/cirruslabs/echelon"
"go.opentelemetry.io/otel/attribute"
"time"
)

type Instance interface {
Expand All @@ -28,8 +28,7 @@ type WarmableInstance interface {
ident string,
env map[string]string,
lazyPull bool,
warmupScript string,
warmupTimeout time.Duration,
warmup *api.StandbyInstanceParameters_Warmup,
logger *echelon.Logger,
) error
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,7 @@ func (tart *Tart) Warmup(
ident string,
additionalEnvironment map[string]string,
lazyPull bool,
warmupScript string,
warmupTimeout time.Duration,
warmup *api.StandbyInstanceParameters_Warmup,
logger *echelon.Logger,
) error {
err := tart.bootVM(ctx, ident, additionalEnvironment, "", lazyPull, logger)
Expand All @@ -125,7 +124,7 @@ func (tart *Tart) Warmup(
}
defer func() { _ = sshClient.Close() }()

if warmupScript == "" {
if warmup == nil {
return nil
}

Expand All @@ -137,8 +136,9 @@ func (tart *Tart) Warmup(
// Work around x/crypto/ssh not being context.Context-friendly (e.g. https://github.com/golang/go/issues/20288)
var monitorCtx context.Context
var monitorCancel context.CancelFunc
if warmupTimeout != 0 {
monitorCtx, monitorCancel = context.WithTimeoutCause(ctx, warmupTimeout, abstract.ErrWarmupTimeout)
if warmup.TimeoutSeconds != 0 {
duration := time.Duration(warmup.TimeoutSeconds) * time.Second
monitorCtx, monitorCancel = context.WithTimeoutCause(ctx, duration, abstract.ErrWarmupTimeout)
} else {
monitorCtx, monitorCancel = context.WithCancel(ctx)
}
Expand Down Expand Up @@ -181,7 +181,7 @@ func (tart *Tart) Warmup(
return fmt.Errorf("%w: failed to invoke SSH shell on the VM: %v", abstract.ErrWarmupScriptFailed, err)
}

_, err = stdinBuf.Write([]byte(warmupScript + "\nexit\n"))
_, err = stdinBuf.Write([]byte(warmup.Script + "\nexit\n"))
if err != nil {
return fmt.Errorf("%w: failed to write the warm-up script to the shell: %v",
abstract.ErrWarmupScriptFailed, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/cirruslabs/cirrus-cli/internal/executor/platform"
"github.com/cirruslabs/cirrus-cli/internal/logger"
"github.com/cirruslabs/cirrus-cli/internal/worker/resourcemodifier"
"github.com/cirruslabs/cirrus-cli/pkg/api"
"github.com/cirruslabs/echelon"
"github.com/getsentry/sentry-go"
"github.com/google/uuid"
Expand Down Expand Up @@ -96,8 +97,7 @@ func (vetu *Vetu) Warmup(
ident string,
env map[string]string,
lazyPull bool,
_ string,
_ time.Duration,
_ *api.StandbyInstanceParameters_Warmup,
logger *echelon.Logger,
) error {
return vetu.bootVM(ctx, ident, env, lazyPull, logger)
Expand Down
5 changes: 3 additions & 2 deletions internal/worker/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/cirruslabs/cirrus-cli/internal/worker/resourcemodifier"
"github.com/cirruslabs/cirrus-cli/internal/worker/security"
"github.com/cirruslabs/cirrus-cli/internal/worker/upstream"
"github.com/cirruslabs/cirrus-cli/pkg/api"
"github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -39,9 +40,9 @@ func WithSecurity(security *security.Security) Option {
}
}

func WithStandby(standby *StandbyConfig) Option {
func WithStandby(standby *api.StandbyInstanceParameters) Option {
return func(e *Worker) {
e.standbyConfig = standby
e.standbyParameters = standby
}
}

Expand Down
46 changes: 7 additions & 39 deletions internal/worker/standby.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,15 @@ import (
"fmt"
"github.com/cirruslabs/cirrus-cli/pkg/api"
"github.com/cirruslabs/cirrus-cli/pkg/parser/boolevator"
"github.com/cirruslabs/cirrus-cli/pkg/parser/instance/isolation"
"github.com/cirruslabs/cirrus-cli/pkg/parser/instance"
"github.com/cirruslabs/cirrus-cli/pkg/parser/issue"
"github.com/cirruslabs/cirrus-cli/pkg/parser/node"
"github.com/cirruslabs/cirrus-cli/pkg/parser/parserkit"
"gopkg.in/yaml.v3"
"strconv"
"time"
)

type StandbyConfig struct {
Isolation *api.Isolation `yaml:"isolation"`
Resources map[string]float64 `yaml:"resources"`
Warmup Warmup `yaml:"warmup"`
}

type Warmup struct {
Script string `yaml:"script"`
Timeout time.Duration `yaml:"timeout"`
*api.StandbyInstanceParameters
}

var ErrIsolationMissing = errors.New("isolation configuration is required for standby")
Expand All @@ -39,22 +30,21 @@ func (standby *StandbyConfig) UnmarshalYAML(value *yaml.Node) error {
return err
}

isolationNode := documentNode.FindChild("isolation")
if isolationNode == nil {
if isolationNode := documentNode.FindChild("isolation"); isolationNode == nil {
return ErrIsolationMissing
}
// Parse isolation
parserKit := &parserkit.ParserKit{
Boolevator: boolevator.New(),
IssueRegistry: issue.NewRegistry(),
}
isolationParser := isolation.NewIsolation(nil, parserKit)
if err := isolationParser.Parse(isolationNode, parserKit); err != nil {
parametersParser := instance.NewStandbyParameters(nil, parserKit)
if err := parametersParser.Parse(documentNode, parserKit); err != nil {
return err
}

// Only allow Tart and Vetu to be configured as standby
switch isolationType := isolationParser.Proto().Type.(type) {
switch isolationType := parametersParser.Proto().Isolation.Type.(type) {
case *api.Isolation_Tart_:
// OK
case *api.Isolation_Vetu_:
Expand All @@ -63,29 +53,7 @@ func (standby *StandbyConfig) UnmarshalYAML(value *yaml.Node) error {
return fmt.Errorf("%w, got %T", ErrUnsupportedIsolation, isolationType)
}

standby.Isolation = isolationParser.Proto()

// Parse resources
standby.Resources = make(map[string]float64)
if resourcesNode := documentNode.FindChild("resources"); resourcesNode != nil {
for _, resourceNode := range resourcesNode.Children {
resourceValueRaw, err := resourceNode.FlattenedValue()
if err != nil {
return err
}
resourceValue, err := strconv.ParseFloat(resourceValueRaw, 64)
if err != nil {
return err
}
standby.Resources[resourceNode.Name] = resourceValue
}
}

if warmupNode := documentNode.FindChild("warmup"); warmupNode != nil {
if err := warmupNode.YAMLNode.Decode(&standby.Warmup); err != nil {
return err
}
}
standby.StandbyInstanceParameters = parametersParser.Proto()

return nil
}
2 changes: 1 addition & 1 deletion internal/worker/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (worker *Worker) getInstance(
worker.standbyInstanceStartedAt = time.Time{}

// Return the standby instance if matches the isolation required by the task
if proto.Equal(worker.standbyConfig.Isolation, isolation) {
if proto.Equal(worker.standbyParameters.Isolation, isolation) {
worker.logger.Debugf("standby instance matches the task's isolation configuration, " +
"yielding it to the task")
worker.standbyHitCounter.Add(ctx, 1, metric.WithAttributes(standbyInstance.Attributes()...))
Expand Down
Loading