diff --git a/bin/helper/helper.go b/bin/helper/helper.go index 667c79293..c2774a131 100644 --- a/bin/helper/helper.go +++ b/bin/helper/helper.go @@ -54,7 +54,7 @@ func main() { clients := cli.ClientSets{} - _, span := otel.Tracer(telemetry.TracerName).Start(ctx, "ExecuteExperimentHelper") + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "ExecuteExperimentHelper") defer span.End() // parse the helper name @@ -71,17 +71,17 @@ func main() { // invoke the corresponding helper based on the the (-name) flag switch *helperName { case "container-kill": - containerKill.Helper(clients) + containerKill.Helper(ctx, clients) case "disk-fill": - diskFill.Helper(clients) + diskFill.Helper(ctx, clients) case "dns-chaos": - dnsChaos.Helper(clients) + dnsChaos.Helper(ctx, clients) case "stress-chaos": - stressChaos.Helper(clients) + stressChaos.Helper(ctx, clients) case "network-chaos": - networkChaos.Helper(clients) + networkChaos.Helper(ctx, clients) case "http-chaos": - httpChaos.Helper(clients) + httpChaos.Helper(ctx, clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *helperName) diff --git a/chaoslib/litmus/container-kill/helper/container-kill.go b/chaoslib/litmus/container-kill/helper/container-kill.go index 6ea335467..81e6b1a67 100644 --- a/chaoslib/litmus/container-kill/helper/container-kill.go +++ b/chaoslib/litmus/container-kill/helper/container-kill.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "fmt" + "github.com/litmuschaos/litmus-go/pkg/telemetry" + "go.opentelemetry.io/otel" "os/exec" "strconv" "time" @@ -27,7 +29,9 @@ import ( var err error // Helper injects the container-kill chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulateContainerKillFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/disk-fill/helper/disk-fill.go b/chaoslib/litmus/disk-fill/helper/disk-fill.go index 0ebaf03be..c851ba26f 100644 --- a/chaoslib/litmus/disk-fill/helper/disk-fill.go +++ b/chaoslib/litmus/disk-fill/helper/disk-fill.go @@ -4,7 +4,9 @@ import ( "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -29,7 +31,9 @@ import ( var inject, abort chan os.Signal // Helper injects the disk-fill chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulateDiskFillFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/http-chaos/helper/http-helper.go b/chaoslib/litmus/http-chaos/helper/http-helper.go index e5a0bd29c..b544df448 100644 --- a/chaoslib/litmus/http-chaos/helper/http-helper.go +++ b/chaoslib/litmus/http-chaos/helper/http-helper.go @@ -1,9 +1,12 @@ package helper import ( + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/signal" "strconv" @@ -27,7 +30,9 @@ var ( ) // Helper injects the http chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodHTTPFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go index e2453292b..d32c2900e 100644 --- a/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go +++ b/chaoslib/litmus/k6-loadgen/lib/k6-loadgen.go @@ -3,6 +3,7 @@ package lib import ( "context" "fmt" + "os" "github.com/litmuschaos/litmus-go/pkg/cerrors" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -102,6 +103,33 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex const volumeName = "script-volume" const mountPath = "/mnt" + + var envs []corev1.EnvVar + args := []string{ + mountPath + "/" + experimentsDetails.ScriptSecretKey, + "-q", + "--tag", + "trace_id=" + span.SpanContext().TraceID().String(), + } + + if otelExporterEndpoint := os.Getenv(telemetry.OTELExporterOTLPEndpoint); otelExporterEndpoint != "" { + envs = []corev1.EnvVar{ + { + Name: "K6_OTEL_METRIC_PREFIX", + Value: experimentsDetails.OTELMetricPrefix, + }, + { + Name: "K6_OTEL_GRPC_EXPORTER_INSECURE", + Value: "true", + }, + { + Name: "K6_OTEL_GRPC_EXPORTER_ENDPOINT", + Value: otelExporterEndpoint, + }, + } + args = append(args, "--out", "experimental-opentelemetry") + } + helperPod := &corev1.Pod{ ObjectMeta: v1.ObjectMeta{ GenerateName: experimentsDetails.ExperimentName + "-helper-", @@ -121,10 +149,8 @@ func createHelperPod(ctx context.Context, experimentsDetails *experimentTypes.Ex "k6", "run", }, - Args: []string{ - mountPath + "/" + experimentsDetails.ScriptSecretKey, - "-q", - }, + Args: args, + Env: envs, Resources: chaosDetails.Resources, VolumeMounts: []corev1.VolumeMount{ { diff --git a/chaoslib/litmus/network-chaos/helper/netem.go b/chaoslib/litmus/network-chaos/helper/netem.go index 415039c44..b5d200c24 100644 --- a/chaoslib/litmus/network-chaos/helper/netem.go +++ b/chaoslib/litmus/network-chaos/helper/netem.go @@ -1,10 +1,13 @@ package helper import ( + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -34,7 +37,9 @@ var ( ) // Helper injects the network chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodNetworkFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go index 794da032e..04b895f9b 100644 --- a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go +++ b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go @@ -2,9 +2,12 @@ package helper import ( "bytes" + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "os" "os/exec" "os/signal" @@ -34,7 +37,9 @@ const ( ) // Helper injects the dns chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodDNSFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index e879b2d02..dd310f07a 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -3,9 +3,12 @@ package helper import ( "bufio" "bytes" + "context" "fmt" "github.com/litmuschaos/litmus-go/pkg/cerrors" + "github.com/litmuschaos/litmus-go/pkg/telemetry" "github.com/palantir/stacktrace" + "go.opentelemetry.io/otel" "io" "os" "os/exec" @@ -51,7 +54,9 @@ const ( ) // Helper injects the stress chaos -func Helper(clients clients.ClientSets) { +func Helper(ctx context.Context, clients clients.ClientSets) { + ctx, span := otel.Tracer(telemetry.TracerName).Start(ctx, "SimulatePodStressFault") + defer span.End() experimentsDetails := experimentTypes.ExperimentDetails{} eventsDetails := types.EventDetails{} @@ -105,7 +110,9 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c return stacktrace.Propagate(err, "could not parse targets") } - var targets []targetDetails + var ( + targets []targetDetails + ) for _, t := range targetList.Target { td := targetDetails{ @@ -126,7 +133,7 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c return stacktrace.Propagate(err, "could not get container pid") } - td.CGroupManager, err = getCGroupManager(td) + td.CGroupManager, err, td.GroupPath = getCGroupManager(td) if err != nil { return stacktrace.Propagate(err, "could not get cgroup manager") } @@ -494,38 +501,55 @@ func abortWatcher(targets []targetDetails, resultName, chaosNS string) { } // getCGroupManager will return the cgroup for the given pid of the process -func getCGroupManager(t targetDetails) (interface{}, error) { +func getCGroupManager(t targetDetails) (interface{}, error, string) { if cgroups.Mode() == cgroups.Unified { - groupPath, err := cgroupsv2.PidGroupPath(t.Pid) + groupPath := "" + output, err := exec.Command("bash", "-c", fmt.Sprintf("nsenter -t 1 -C -m -- cat /proc/%v/cgroup", t.Pid)).CombinedOutput() if err != nil { - return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to get pid group path: %s", err.Error())} + return nil, errors.Errorf("Error in getting groupPath,%s", string(output)), "" + } + parts := strings.SplitN(string(output), ":", 3) + if len(parts) < 3 { + return "", fmt.Errorf("invalid cgroup entry: %s", string(output)), "" + } + if parts[0] == "0" && parts[1] == "" { + groupPath = parts[2] } + log.Infof("group path: %s", groupPath) + cgroup2, err := cgroupsv2.LoadManager("/sys/fs/cgroup", groupPath) if err != nil { - return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to load the cgroup: %s", err.Error())} + return nil, errors.Errorf("Error loading cgroup v2 manager, %v", err), "" } - return cgroup2, nil + return cgroup2, nil, groupPath } path := pidPath(t) cgroup, err := findValidCgroup(path, t) if err != nil { - return nil, stacktrace.Propagate(err, "could not find valid cgroup") + return nil, stacktrace.Propagate(err, "could not find valid cgroup"), "" } cgroup1, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(cgroup)) if err != nil { - return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to load the cgroup: %s", err.Error())} + return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeHelper, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to load the cgroup: %s", err.Error())}, "" } - return cgroup1, nil + return cgroup1, nil, "" } // addProcessToCgroup will add the process to cgroup // By default it will add to v1 cgroup -func addProcessToCgroup(pid int, control interface{}) error { +func addProcessToCgroup(pid int, control interface{}, groupPath string) error { if cgroups.Mode() == cgroups.Unified { - var cgroup1 = control.(*cgroupsv2.Manager) - return cgroup1.AddProc(uint64(pid)) + args := []string{"-t", "1", "-C", "--", "sudo", "sh", "-c", fmt.Sprintf("echo %d >> /sys/fs/cgroup%s/cgroup.procs", pid, strings.ReplaceAll(groupPath, "\n", ""))} + output, err := exec.Command("nsenter", args...).CombinedOutput() + if err != nil { + return cerrors.Error{ + ErrorCode: cerrors.ErrorTypeChaosInject, + Reason: fmt.Sprintf("failed to add process to cgroup %s: %v", string(output), err), + } + } + return nil } var cgroup1 = control.(cgroups.Cgroup) return cgroup1.Add(cgroups.Process{Pid: pid}) @@ -538,6 +562,7 @@ func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, erro if stressType == "pod-io-stress" { stressCommand = fmt.Sprintf("pause nsutil -t %v -p -m -- %v", strconv.Itoa(t.Pid), stressors) } + log.Infof("[Info]: starting process: %v", stressCommand) // launch the stress-ng process on the target container in paused mode @@ -551,7 +576,7 @@ func injectChaos(t targetDetails, stressors, stressType string) (*exec.Cmd, erro } // add the stress process to the cgroup of target container - if err = addProcessToCgroup(cmd.Process.Pid, t.CGroupManager); err != nil { + if err = addProcessToCgroup(cmd.Process.Pid, t.CGroupManager, t.GroupPath); err != nil { if killErr := cmd.Process.Kill(); killErr != nil { return nil, cerrors.Error{ErrorCode: cerrors.ErrorTypeChaosInject, Source: t.Source, Target: fmt.Sprintf("{podName: %s, namespace: %s, container: %s}", t.Name, t.Namespace, t.TargetContainer), Reason: fmt.Sprintf("fail to add the stress process to cgroup %s and kill stress process: %s", err.Error(), killErr.Error())} } @@ -579,4 +604,5 @@ type targetDetails struct { CGroupManager interface{} Cmd *exec.Cmd Source string + GroupPath string } diff --git a/experiments/load/k6-loadgen/experiment/k6-loadgen.go b/experiments/load/k6-loadgen/experiment/k6-loadgen.go index 462521f1f..6e1af353f 100644 --- a/experiments/load/k6-loadgen/experiment/k6-loadgen.go +++ b/experiments/load/k6-loadgen/experiment/k6-loadgen.go @@ -13,7 +13,6 @@ import ( "github.com/litmuschaos/litmus-go/pkg/log" "github.com/litmuschaos/litmus-go/pkg/probe" "github.com/litmuschaos/litmus-go/pkg/result" - "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/sirupsen/logrus" @@ -69,18 +68,6 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result go common.AbortWatcher(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) - //PRE-CHAOS APPLICATION STATUS CHECK - if chaosDetails.DefaultHealthCheck { - log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") - if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { - log.Errorf("Application status check failed, err: %v", err) - types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") - result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) - return - } - } - if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test msg := "AUT: Running" @@ -112,18 +99,6 @@ func Experiment(ctx context.Context, clients clients.ClientSets) { resultDetails.Verdict = v1alpha1.ResultVerdictPassed chaosDetails.Phase = types.PostChaosPhase - //POST-CHAOS APPLICATION STATUS CHECK - if chaosDetails.DefaultHealthCheck { - log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") - if err := status.AUTStatusCheck(clients, &chaosDetails); err != nil { - log.Errorf("Application status check failed, err: %v", err) - types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) - events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") - result.RecordAfterFailure(&chaosDetails, &resultDetails, err, clients, &eventsDetails) - return - } - } - if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test msg := "AUT: Running" diff --git a/pkg/load/k6-loadgen/environment/environment.go b/pkg/load/k6-loadgen/environment/environment.go index 92a4a0caa..fb1a6ead6 100644 --- a/pkg/load/k6-loadgen/environment/environment.go +++ b/pkg/load/k6-loadgen/environment/environment.go @@ -20,5 +20,5 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.LIBImage = types.Getenv("LIB_IMAGE", "ghcr.io/grafana/k6-operator:latest-runner") experimentDetails.ScriptSecretName = types.Getenv("SCRIPT_SECRET_NAME", "k6-script") experimentDetails.ScriptSecretKey = types.Getenv("SCRIPT_SECRET_KEY", "script.js") - + experimentDetails.OTELMetricPrefix = types.Getenv("OTEL_METRIC_PREFIX", "k6_") } diff --git a/pkg/load/k6-loadgen/types/types.go b/pkg/load/k6-loadgen/types/types.go index 86597ca6a..b2a3b929b 100644 --- a/pkg/load/k6-loadgen/types/types.go +++ b/pkg/load/k6-loadgen/types/types.go @@ -13,4 +13,5 @@ type ExperimentDetails struct { LIBImage string ScriptSecretName string ScriptSecretKey string + OTELMetricPrefix string }