From cd2a594817660af0496c661edf2a3a875f2fbe82 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Tue, 18 Feb 2025 22:02:14 -0500 Subject: [PATCH 1/8] drop extra prometheus metadata metrics for container insights --- .../pipeline/containerinsights/translator.go | 6 ++++- .../processor/filterprocessor/translator.go | 22 ++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/translator/translate/otel/pipeline/containerinsights/translator.go b/translator/translate/otel/pipeline/containerinsights/translator.go index 5304416081..6778d61f51 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator.go +++ b/translator/translate/otel/pipeline/containerinsights/translator.go @@ -6,6 +6,7 @@ package containerinsights import ( "fmt" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/filterprocessor" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/pipeline" @@ -58,7 +59,10 @@ func (t *translator) Translate(conf *confmap.Conf) (*common.ComponentTranslators } // create processor map with default batch processor based on pipeline name - processors := common.NewTranslatorMap(batchprocessor.NewTranslatorWithNameAndSection(t.pipelineName, common.LogsKey)) + processors := common.NewTranslatorMap( + batchprocessor.NewTranslatorWithNameAndSection(t.pipelineName, common.LogsKey), + filterprocessor.NewTranslator(common.WithName(t.pipelineName)), + ) // create exporter map with default emf exporter based on pipeline name exporters := common.NewTranslatorMap(awsemf.NewTranslatorWithName(t.pipelineName)) // create extensions map based on pipeline name diff --git a/translator/translate/otel/processor/filterprocessor/translator.go b/translator/translate/otel/processor/filterprocessor/translator.go index a656277a66..b2cd3c59f2 100644 --- a/translator/translate/otel/processor/filterprocessor/translator.go +++ b/translator/translate/otel/processor/filterprocessor/translator.go @@ -52,7 +52,7 @@ func (t *translator) ID() component.ID { // Translate creates a processor config based on the fields in the // Metrics section of the JSON config. func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { - if conf == nil || (!conf.IsSet(common.JmxConfigKey) && t.Name() != common.PipelineNameContainerInsightsJmx) { + if conf == nil || (t.Name() != common.PipelineNameContainerInsights && !conf.IsSet(common.JmxConfigKey) && t.Name() != common.PipelineNameContainerInsightsJmx) { return nil, &common.MissingKeyError{ID: t.ID(), JsonKey: common.JmxConfigKey} } @@ -60,6 +60,26 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { if t.Name() == common.PipelineNameContainerInsightsJmx { return common.GetYamlFileToYamlConfig(cfg, containerInsightsJmxConfig) } + if t.Name() == common.PipelineNameContainerInsights { + c := confmap.NewFromStringMap(map[string]interface{}{ + "metrics": map[string]any{ + "exclude": map[string]any{ + "match_type": "strict", + "metric_names": []string{ + "up", + "scrape_duration_seconds", + "scrape_samples_scraped", + "scrape_series_added", + "scrape_samples_post_metric_relabeling", + }, + }, + }, + }) + if err := c.Unmarshal(&cfg); err != nil { + return nil, fmt.Errorf("unable to unmarshal filter processor (%s): %w", t.ID(), err) + } + return cfg, nil + } jmxMap := common.GetIndexedMap(conf, common.JmxConfigKey, t.Index()) From 21789fd8d4d1881c1ae16b580b4635efce682027 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 11:34:21 -0500 Subject: [PATCH 2/8] add a simple test --- .../otel/processor/filterprocessor/translator.go | 5 +++-- .../processor/filterprocessor/translator_test.go | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/translator/translate/otel/processor/filterprocessor/translator.go b/translator/translate/otel/processor/filterprocessor/translator.go index b2cd3c59f2..4d63b8ad4a 100644 --- a/translator/translate/otel/processor/filterprocessor/translator.go +++ b/translator/translate/otel/processor/filterprocessor/translator.go @@ -52,7 +52,8 @@ func (t *translator) ID() component.ID { // Translate creates a processor config based on the fields in the // Metrics section of the JSON config. func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { - if conf == nil || (t.Name() != common.PipelineNameContainerInsights && !conf.IsSet(common.JmxConfigKey) && t.Name() != common.PipelineNameContainerInsightsJmx) { + // also checking for container insights pipeline to add default filtering for prometheus metadata + if conf == nil || (t.Name() != common.PipelineNameContainerInsights && t.Name() != common.PipelineNameContainerInsightsJmx && !conf.IsSet(common.JmxConfigKey)) { return nil, &common.MissingKeyError{ID: t.ID(), JsonKey: common.JmxConfigKey} } @@ -64,7 +65,7 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { c := confmap.NewFromStringMap(map[string]interface{}{ "metrics": map[string]any{ "exclude": map[string]any{ - "match_type": "strict", + "match_type": matchTypeStrict, "metric_names": []string{ "up", "scrape_duration_seconds", diff --git a/translator/translate/otel/processor/filterprocessor/translator_test.go b/translator/translate/otel/processor/filterprocessor/translator_test.go index d87fff55ff..bf558c18a9 100644 --- a/translator/translate/otel/processor/filterprocessor/translator_test.go +++ b/translator/translate/otel/processor/filterprocessor/translator_test.go @@ -144,3 +144,17 @@ func TestContainerInsightsJmx(t *testing.T) { assert.True(t, ok) assert.Equal(t, len(expectedCfg.Metrics.Include.MetricNames), len(actualCfg.Metrics.Include.MetricNames)) } + +func TestContainerInsights(t *testing.T) { + transl := NewTranslator(common.WithName(common.PipelineNameContainerInsights)).(*translator) + expectedCfg := transl.factory.CreateDefaultConfig().(*filterprocessor.Config) + c := testutil.GetConf(t, "filter_containerinsights_config.yaml") + require.NoError(t, c.Unmarshal(&expectedCfg)) + + conf := confmap.NewFromStringMap(testutil.GetJson(t, filepath.Join("testdata", "config.json"))) + translatedCfg, err := transl.Translate(conf) + assert.NoError(t, err) + actualCfg, ok := translatedCfg.(*filterprocessor.Config) + assert.True(t, ok) + assert.Equal(t, len(expectedCfg.Metrics.Exclude.MetricNames), len(actualCfg.Metrics.Exclude.MetricNames)) +} From aef00bc49ac2c47d59df0257902b4797c6134ebb Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 11:38:30 -0500 Subject: [PATCH 3/8] update comment --- .../translate/otel/pipeline/containerinsights/translator.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/translator/translate/otel/pipeline/containerinsights/translator.go b/translator/translate/otel/pipeline/containerinsights/translator.go index 6778d61f51..b19d3210b6 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator.go +++ b/translator/translate/otel/pipeline/containerinsights/translator.go @@ -58,7 +58,9 @@ func (t *translator) Translate(conf *confmap.Conf) (*common.ComponentTranslators return nil, &common.MissingKeyError{ID: t.ID(), JsonKey: fmt.Sprint(ecsKey, " or ", eksKey)} } - // create processor map with default batch processor based on pipeline name + // create processor map with + // - default batch processor + // - filter processor to drop prometheus metadata processors := common.NewTranslatorMap( batchprocessor.NewTranslatorWithNameAndSection(t.pipelineName, common.LogsKey), filterprocessor.NewTranslator(common.WithName(t.pipelineName)), From addbd5d8e8394b194c362ba5679554ac92256f65 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 11:43:15 -0500 Subject: [PATCH 4/8] lint --- .../translate/otel/pipeline/containerinsights/translator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/translator/translate/otel/pipeline/containerinsights/translator.go b/translator/translate/otel/pipeline/containerinsights/translator.go index b19d3210b6..b2bba0a5d0 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator.go +++ b/translator/translate/otel/pipeline/containerinsights/translator.go @@ -6,7 +6,6 @@ package containerinsights import ( "fmt" - "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/filterprocessor" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/pipeline" @@ -15,6 +14,7 @@ import ( "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/exporter/awsemf" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/extension/agenthealth" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/batchprocessor" + "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/filterprocessor" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/gpu" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/kueue" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/processor/metricstransformprocessor" From b36b5e544a96cce52b3a10165f7e7dc4134ed67b Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 11:45:52 -0500 Subject: [PATCH 5/8] test config --- .../filterprocessor/filter_containerinsights_config.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 translator/translate/otel/processor/filterprocessor/filter_containerinsights_config.yaml diff --git a/translator/translate/otel/processor/filterprocessor/filter_containerinsights_config.yaml b/translator/translate/otel/processor/filterprocessor/filter_containerinsights_config.yaml new file mode 100644 index 0000000000..167528866c --- /dev/null +++ b/translator/translate/otel/processor/filterprocessor/filter_containerinsights_config.yaml @@ -0,0 +1,9 @@ +metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling \ No newline at end of file From d3e953ab2a3dd2f5dfb06c1e2da34ba505dc1de2 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 11:59:27 -0500 Subject: [PATCH 6/8] use config yaml --- .../processor/filterprocessor/translator.go | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/translator/translate/otel/processor/filterprocessor/translator.go b/translator/translate/otel/processor/filterprocessor/translator.go index 4d63b8ad4a..8506e5d702 100644 --- a/translator/translate/otel/processor/filterprocessor/translator.go +++ b/translator/translate/otel/processor/filterprocessor/translator.go @@ -23,6 +23,9 @@ const ( //go:embed filter_jmx_config.yaml var containerInsightsJmxConfig string +//go:embed filter_containerinsights_config.yaml +var containerInsightsConfig string + type translator struct { common.NameProvider common.IndexProvider @@ -62,24 +65,7 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { return common.GetYamlFileToYamlConfig(cfg, containerInsightsJmxConfig) } if t.Name() == common.PipelineNameContainerInsights { - c := confmap.NewFromStringMap(map[string]interface{}{ - "metrics": map[string]any{ - "exclude": map[string]any{ - "match_type": matchTypeStrict, - "metric_names": []string{ - "up", - "scrape_duration_seconds", - "scrape_samples_scraped", - "scrape_series_added", - "scrape_samples_post_metric_relabeling", - }, - }, - }, - }) - if err := c.Unmarshal(&cfg); err != nil { - return nil, fmt.Errorf("unable to unmarshal filter processor (%s): %w", t.ID(), err) - } - return cfg, nil + return common.GetYamlFileToYamlConfig(cfg, containerInsightsConfig) } jmxMap := common.GetIndexedMap(conf, common.JmxConfigKey, t.Index()) From 561cb1afa7625bfff494403d0d70cd66ab14968f Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 13:57:06 -0500 Subject: [PATCH 7/8] use config yaml --- .../sampleConfig/advanced_config_darwin.yaml | 6 +- .../sampleConfig/advanced_config_linux.yaml | 6 +- .../sampleConfig/advanced_config_windows.yaml | 2 +- .../appsignals_and_eks_config.yaml | 15 ++ .../appsignals_and_k8s_config.yaml | 15 ++ .../appsignals_fallback_and_eks_config.yaml | 15 ++ .../appsignals_over_fallback_config.yaml | 15 ++ .../base_container_insights_config.yaml | 15 ++ .../sampleConfig/basic_config_linux.yaml | 2 +- .../sampleConfig/basic_config_windows.yaml | 2 +- .../sampleConfig/complete_darwin_config.yaml | 14 +- .../sampleConfig/complete_linux_config.yaml | 10 +- .../sampleConfig/container_insights_jmx.yaml | 15 ++ .../sampleConfig/delta_net_config_linux.yaml | 2 +- .../sampleConfig/drop_origin_linux.yaml | 4 +- .../emf_and_kubernetes_config.yaml | 15 ++ .../emf_and_kubernetes_with_gpu_config.yaml | 237 ++++++++++-------- .../emf_and_kubernetes_with_kueue_config.yaml | 25 ++ .../sampleConfig/invalid_input_linux.yaml | 2 +- .../sampleConfig/jmx_config_linux.yaml | 6 +- .../kubernetes_on_prem_config.yaml | 15 ++ .../kueue_container_insights_config.yaml | 25 ++ .../sampleConfig/log_ecs_metric_only.yaml | 15 ++ .../logs_and_kubernetes_config.yaml | 15 ++ .../sampleConfig/otlp_metrics_eks_config.yaml | 2 +- .../sampleConfig/standard_config_linux.yaml | 4 +- ...ndard_config_linux_with_common_config.yaml | 4 +- .../sampleConfig/standard_config_windows.yaml | 2 +- ...ard_config_windows_with_common_config.yaml | 2 +- translator/translate/otel/common/common.go | 1 + .../otel/exporter/awsemf/translator.go | 6 +- .../pipeline/containerinsights/translator.go | 6 +- .../containerinsights/translator_test.go | 7 +- .../pipeline/containerinsights/translators.go | 2 +- .../processor/filterprocessor/translator.go | 2 +- 35 files changed, 372 insertions(+), 159 deletions(-) diff --git a/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml b/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml index f5d9a29ac8..973c2e3aac 100644 --- a/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml +++ b/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml @@ -46,9 +46,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - ImageId - InstanceId - InstanceType - - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s @@ -90,11 +90,11 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_disk - - telegraf_mem - telegraf_netstat - telegraf_swap - telegraf_cpu + - telegraf_disk + - telegraf_mem metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml b/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml index 6be870f0e4..37fd13c868 100644 --- a/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml @@ -98,13 +98,13 @@ service: - ec2tagger - awsentity/resource receivers: + - telegraf_cpu + - telegraf_disk + - telegraf_mem - telegraf_netstat - telegraf_swap - telegraf_ethtool - telegraf_nvidia_smi - - telegraf_cpu - - telegraf_disk - - telegraf_mem metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml b/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml index d1fab88e83..004a6240f6 100644 --- a/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml +++ b/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml @@ -36,9 +36,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - ImageId - InstanceId - InstanceType - - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.yaml b/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.yaml index 89343a37f8..fe7efba5c2 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.yaml +++ b/translator/tocwconfig/sampleConfig/appsignals_and_eks_config.yaml @@ -332,6 +332,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/application_signals: transforms: - action: update @@ -1553,6 +1567,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver traces/application_signals: diff --git a/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.yaml b/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.yaml index a59e397e5e..28105f35ae 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.yaml +++ b/translator/tocwconfig/sampleConfig/appsignals_and_k8s_config.yaml @@ -333,6 +333,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/application_signals: transforms: - action: update @@ -1534,6 +1548,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver traces/application_signals: diff --git a/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.yaml b/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.yaml index 89343a37f8..fe7efba5c2 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.yaml +++ b/translator/tocwconfig/sampleConfig/appsignals_fallback_and_eks_config.yaml @@ -332,6 +332,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/application_signals: transforms: - action: update @@ -1553,6 +1567,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver traces/application_signals: diff --git a/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.yaml b/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.yaml index 89343a37f8..fe7efba5c2 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.yaml +++ b/translator/tocwconfig/sampleConfig/appsignals_over_fallback_config.yaml @@ -332,6 +332,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/application_signals: transforms: - action: update @@ -1553,6 +1567,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver traces/application_signals: diff --git a/translator/tocwconfig/sampleConfig/base_container_insights_config.yaml b/translator/tocwconfig/sampleConfig/base_container_insights_config.yaml index d368a351e0..9c8e12b228 100644 --- a/translator/tocwconfig/sampleConfig/base_container_insights_config.yaml +++ b/translator/tocwconfig/sampleConfig/base_container_insights_config.yaml @@ -169,6 +169,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} receivers: awscontainerinsightreceiver: accelerated_compute_metrics: false @@ -244,6 +258,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver telemetry: diff --git a/translator/tocwconfig/sampleConfig/basic_config_linux.yaml b/translator/tocwconfig/sampleConfig/basic_config_linux.yaml index 79c2cbb4c9..34097d8b2f 100644 --- a/translator/tocwconfig/sampleConfig/basic_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/basic_config_linux.yaml @@ -64,8 +64,8 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_disk - telegraf_mem + - telegraf_disk telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/basic_config_windows.yaml b/translator/tocwconfig/sampleConfig/basic_config_windows.yaml index b8b7564990..becf25fd31 100644 --- a/translator/tocwconfig/sampleConfig/basic_config_windows.yaml +++ b/translator/tocwconfig/sampleConfig/basic_config_windows.yaml @@ -36,9 +36,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - ImageId - InstanceId - InstanceType + - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml b/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml index 20946361b3..900becad73 100644 --- a/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml +++ b/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml @@ -134,9 +134,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - ImageId - InstanceId - InstanceType - - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s @@ -289,23 +289,23 @@ service: - transform - awsentity/resource receivers: - - telegraf_disk - - telegraf_swap - telegraf_mem - - telegraf_cpu - - telegraf_processes - telegraf_netstat - telegraf_procstat/1917393364 + - telegraf_swap + - telegraf_disk + - telegraf_cpu + - telegraf_processes metrics/hostCustomMetrics: exporters: - awscloudwatch processors: - - awsentity/service/telegraf - ec2tagger - transform + - awsentity/service/telegraf receivers: - - telegraf_socket_listener - telegraf_statsd + - telegraf_socket_listener metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/complete_linux_config.yaml b/translator/tocwconfig/sampleConfig/complete_linux_config.yaml index 73af034d13..30f59e002f 100644 --- a/translator/tocwconfig/sampleConfig/complete_linux_config.yaml +++ b/translator/tocwconfig/sampleConfig/complete_linux_config.yaml @@ -212,9 +212,9 @@ processors: metric_statements: - context: metric statements: - - set(name, "kafka.fetch-rate") where name == "kafka.consumer.fetch-rate" - set(unit, "unit") where name == "jvm.memory.heap.used" - set(name, "JVM_MEM_HEAP_USED") where name == "jvm.memory.heap.used" + - set(name, "kafka.fetch-rate") where name == "kafka.consumer.fetch-rate" trace_statements: [] transform/jmx/1: error_mode: propagate @@ -396,13 +396,13 @@ service: - transform - awsentity/resource receivers: + - telegraf_netstat - telegraf_cpu - - telegraf_disk - telegraf_swap - - telegraf_netstat - - telegraf_processes - telegraf_procstat/1917393364 - telegraf_mem + - telegraf_processes + - telegraf_disk metrics/hostCustomMetrics/cloudwatch: exporters: - awscloudwatch @@ -411,8 +411,8 @@ service: - transform - awsentity/service/telegraf receivers: - - telegraf_socket_listener - telegraf_statsd + - telegraf_socket_listener metrics/hostDeltaMetrics/cloudwatch: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/container_insights_jmx.yaml b/translator/tocwconfig/sampleConfig/container_insights_jmx.yaml index 39de3d6c0d..4d90f27f01 100644 --- a/translator/tocwconfig/sampleConfig/container_insights_jmx.yaml +++ b/translator/tocwconfig/sampleConfig/container_insights_jmx.yaml @@ -205,6 +205,20 @@ processors: match_type: "" initial_value: 2 max_staleness: 0s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} filter/containerinsightsjmx: error_mode: propagate logs: {} @@ -537,6 +551,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver metrics/containerinsightsjmx: diff --git a/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml b/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml index a67cd3c009..9d6a783b30 100644 --- a/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml @@ -43,9 +43,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - ImageId - InstanceId - InstanceType + - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml b/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml index 7035225c15..d9fdee561e 100644 --- a/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml +++ b/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml @@ -41,9 +41,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - ImageId - InstanceId - InstanceType + - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s @@ -85,9 +85,9 @@ service: - transform - awsentity/resource receivers: - - telegraf_nvidia_smi - telegraf_cpu - telegraf_disk + - telegraf_nvidia_smi telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml index ea1d4d3d80..9735dd2efd 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_config.yaml @@ -421,6 +421,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/containerinsights: transforms: - action: insert @@ -508,6 +522,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights - metricstransform/containerinsights receivers: - awscontainerinsightreceiver diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml index d5b6ed4aa8..bac9c069b5 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml @@ -679,6 +679,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} gpuattributes/containerinsights: {} metricstransform/containerinsights: transforms: @@ -692,9 +706,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_GPU_UTIL + include: DCGM_FI_DEV_GPU_TEMP match_type: "" - new_name: container_gpu_utilization + new_name: container_gpu_temperature operations: - action: add_label aggregation_type: "" @@ -706,9 +720,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_GPU_UTIL + include: DCGM_FI_DEV_GPU_TEMP match_type: "" - new_name: pod_gpu_utilization + new_name: pod_gpu_temperature operations: - action: add_label aggregation_type: "" @@ -720,9 +734,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_GPU_UTIL + include: DCGM_FI_DEV_GPU_TEMP match_type: "" - new_name: node_gpu_utilization + new_name: node_gpu_temperature operations: - action: add_label aggregation_type: "" @@ -734,9 +748,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_USED_PERCENT + include: DCGM_FI_DEV_POWER_USAGE match_type: "" - new_name: container_gpu_memory_utilization + new_name: container_gpu_power_draw operations: - action: add_label aggregation_type: "" @@ -745,19 +759,26 @@ processors: label_value: "" new_label: Type new_value: ContainerGPU - - action: experimental_scale_value + submatch_case: "" + - action: insert + aggregation_type: "" + include: DCGM_FI_DEV_POWER_USAGE + match_type: "" + new_name: pod_gpu_power_draw + operations: + - action: add_label aggregation_type: "" - experimental_scale: 100 + experimental_scale: 0 label: "" label_value: "" - new_label: "" - new_value: "" + new_label: Type + new_value: PodGPU submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_USED_PERCENT + include: DCGM_FI_DEV_POWER_USAGE match_type: "" - new_name: pod_gpu_memory_utilization + new_name: node_gpu_power_draw operations: - action: add_label aggregation_type: "" @@ -765,20 +786,27 @@ processors: label: "" label_value: "" new_label: Type - new_value: PodGPU - - action: experimental_scale_value + new_value: NodeGPU + submatch_case: "" + - action: insert + aggregation_type: "" + include: DCGM_FI_DEV_GPU_UTIL + match_type: "" + new_name: container_gpu_utilization + operations: + - action: add_label aggregation_type: "" - experimental_scale: 100 + experimental_scale: 0 label: "" label_value: "" - new_label: "" - new_value: "" + new_label: Type + new_value: ContainerGPU submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_USED_PERCENT + include: DCGM_FI_DEV_GPU_UTIL match_type: "" - new_name: node_gpu_memory_utilization + new_name: pod_gpu_utilization operations: - action: add_label aggregation_type: "" @@ -786,20 +814,27 @@ processors: label: "" label_value: "" new_label: Type - new_value: NodeGPU - - action: experimental_scale_value + new_value: PodGPU + submatch_case: "" + - action: insert + aggregation_type: "" + include: DCGM_FI_DEV_GPU_UTIL + match_type: "" + new_name: node_gpu_utilization + operations: + - action: add_label aggregation_type: "" - experimental_scale: 100 + experimental_scale: 0 label: "" label_value: "" - new_label: "" - new_value: "" + new_label: Type + new_value: NodeGPU submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_USED + include: DCGM_FI_DEV_FB_USED_PERCENT match_type: "" - new_name: container_gpu_memory_used + new_name: container_gpu_memory_utilization operations: - action: add_label aggregation_type: "" @@ -810,7 +845,7 @@ processors: new_value: ContainerGPU - action: experimental_scale_value aggregation_type: "" - experimental_scale: 1.048576e+06 + experimental_scale: 100 label: "" label_value: "" new_label: "" @@ -818,9 +853,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_USED + include: DCGM_FI_DEV_FB_USED_PERCENT match_type: "" - new_name: pod_gpu_memory_used + new_name: pod_gpu_memory_utilization operations: - action: add_label aggregation_type: "" @@ -831,7 +866,7 @@ processors: new_value: PodGPU - action: experimental_scale_value aggregation_type: "" - experimental_scale: 1.048576e+06 + experimental_scale: 100 label: "" label_value: "" new_label: "" @@ -839,9 +874,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_USED + include: DCGM_FI_DEV_FB_USED_PERCENT match_type: "" - new_name: node_gpu_memory_used + new_name: node_gpu_memory_utilization operations: - action: add_label aggregation_type: "" @@ -852,7 +887,7 @@ processors: new_value: NodeGPU - action: experimental_scale_value aggregation_type: "" - experimental_scale: 1.048576e+06 + experimental_scale: 100 label: "" label_value: "" new_label: "" @@ -860,9 +895,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_TOTAL + include: DCGM_FI_DEV_FB_USED match_type: "" - new_name: container_gpu_memory_total + new_name: container_gpu_memory_used operations: - action: add_label aggregation_type: "" @@ -881,9 +916,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_TOTAL + include: DCGM_FI_DEV_FB_USED match_type: "" - new_name: pod_gpu_memory_total + new_name: pod_gpu_memory_used operations: - action: add_label aggregation_type: "" @@ -902,9 +937,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_FB_TOTAL + include: DCGM_FI_DEV_FB_USED match_type: "" - new_name: node_gpu_memory_total + new_name: node_gpu_memory_used operations: - action: add_label aggregation_type: "" @@ -923,9 +958,9 @@ processors: submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_GPU_TEMP + include: DCGM_FI_DEV_FB_TOTAL match_type: "" - new_name: container_gpu_temperature + new_name: container_gpu_memory_total operations: - action: add_label aggregation_type: "" @@ -934,12 +969,19 @@ processors: label_value: "" new_label: Type new_value: ContainerGPU + - action: experimental_scale_value + aggregation_type: "" + experimental_scale: 1.048576e+06 + label: "" + label_value: "" + new_label: "" + new_value: "" submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_GPU_TEMP + include: DCGM_FI_DEV_FB_TOTAL match_type: "" - new_name: pod_gpu_temperature + new_name: pod_gpu_memory_total operations: - action: add_label aggregation_type: "" @@ -948,26 +990,19 @@ processors: label_value: "" new_label: Type new_value: PodGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_GPU_TEMP - match_type: "" - new_name: node_gpu_temperature - operations: - - action: add_label + - action: experimental_scale_value aggregation_type: "" - experimental_scale: 0 + experimental_scale: 1.048576e+06 label: "" label_value: "" - new_label: Type - new_value: NodeGPU + new_label: "" + new_value: "" submatch_case: "" - action: insert aggregation_type: "" - include: DCGM_FI_DEV_POWER_USAGE + include: DCGM_FI_DEV_FB_TOTAL match_type: "" - new_name: container_gpu_power_draw + new_name: node_gpu_memory_total operations: - action: add_label aggregation_type: "" @@ -975,41 +1010,34 @@ processors: label: "" label_value: "" new_label: Type - new_value: ContainerGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_POWER_USAGE - match_type: "" - new_name: pod_gpu_power_draw - operations: - - action: add_label + new_value: NodeGPU + - action: experimental_scale_value aggregation_type: "" - experimental_scale: 0 + experimental_scale: 1.048576e+06 label: "" label_value: "" - new_label: Type - new_value: PodGPU + new_label: "" + new_value: "" submatch_case: "" - - action: insert + - action: update aggregation_type: "" - include: DCGM_FI_DEV_POWER_USAGE + include: neuroncore_utilization_ratio match_type: "" - new_name: node_gpu_power_draw + new_name: neuroncore_utilization operations: - - action: add_label + - action: experimental_scale_value aggregation_type: "" - experimental_scale: 0 + experimental_scale: 100 label: "" label_value: "" - new_label: Type - new_value: NodeGPU + new_label: "" + new_value: "" submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_constants + include: instance_info match_type: "" - new_name: neuroncore_memory_usage_constants + new_name: instance_info operations: [] submatch_case: "" - action: update @@ -1021,58 +1049,44 @@ processors: submatch_case: "" - action: update aggregation_type: "" - include: hardware_ecc_events_total + include: execution_errors_total match_type: "" - new_name: neurondevice_hw_ecc_events + new_name: neuron_execution_errors operations: [] submatch_case: "" - action: update aggregation_type: "" - include: execution_latency_seconds + include: neuron_runtime_memory_used_bytes match_type: "" - new_name: neuron_execution_latency + new_name: neurondevice_runtime_memory_used_bytes operations: [] submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_model_shared_scratchpad + include: neuroncore_memory_usage_constants match_type: "" - new_name: neuroncore_memory_usage_model_shared_scratchpad + new_name: neuroncore_memory_usage_constants operations: [] submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_runtime_memory + include: neuroncore_memory_usage_model_code match_type: "" - new_name: neuroncore_memory_usage_runtime_memory + new_name: neuroncore_memory_usage_model_code operations: [] submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_tensors + include: neuroncore_memory_usage_model_shared_scratchpad match_type: "" - new_name: neuroncore_memory_usage_tensors + new_name: neuroncore_memory_usage_model_shared_scratchpad operations: [] submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_utilization_ratio - match_type: "" - new_name: neuroncore_utilization - operations: - - action: experimental_scale_value - aggregation_type: "" - experimental_scale: 100 - label: "" - label_value: "" - new_label: "" - new_value: "" - submatch_case: "" - - action: update - aggregation_type: "" - include: execution_errors_total + include: hardware_ecc_events_total match_type: "" - new_name: neuron_execution_errors + new_name: neurondevice_hw_ecc_events operations: [] submatch_case: "" - action: update @@ -1084,23 +1098,23 @@ processors: submatch_case: "" - action: update aggregation_type: "" - include: neuron_runtime_memory_used_bytes + include: neuroncore_memory_usage_runtime_memory match_type: "" - new_name: neurondevice_runtime_memory_used_bytes + new_name: neuroncore_memory_usage_runtime_memory operations: [] submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_model_code + include: neuroncore_memory_usage_tensors match_type: "" - new_name: neuroncore_memory_usage_model_code + new_name: neuroncore_memory_usage_tensors operations: [] submatch_case: "" - action: update aggregation_type: "" - include: instance_info + include: execution_latency_seconds match_type: "" - new_name: instance_info + new_name: neuron_execution_latency operations: [] submatch_case: "" receivers: @@ -1180,6 +1194,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights - metricstransform/containerinsights - gpuattributes/containerinsights receivers: diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml index e68d2a04a8..ce09e76af1 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml @@ -506,6 +506,29 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} + filter/kueueContainerInsights: + error_mode: propagate + logs: {} + metrics: + include: + match_type: strict + metric_names: [] + spans: {} + traces: {} kueueattributes/kueueContainerInsights: {} metricstransform/containerinsights: transforms: @@ -597,6 +620,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights - metricstransform/containerinsights receivers: - awscontainerinsightreceiver @@ -605,6 +629,7 @@ service: - awsemf/kueueContainerInsights processors: - batch/kueueContainerInsights + - filter/kueueContainerInsights - kueueattributes/kueueContainerInsights receivers: - awscontainerinsightskueuereceiver diff --git a/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml b/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml index 1816ef2182..34097d8b2f 100644 --- a/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml +++ b/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml @@ -36,9 +36,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - ImageId - InstanceId - InstanceType - - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml b/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml index aaf88abff9..604117d5da 100644 --- a/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml @@ -153,9 +153,9 @@ processors: metric_statements: - context: metric statements: - - set(name, "kafka.fetch-rate") where name == "kafka.consumer.fetch-rate" - set(unit, "unit") where name == "jvm.memory.heap.used" - set(name, "JVM_MEM_HEAP_USED") where name == "jvm.memory.heap.used" + - set(name, "kafka.fetch-rate") where name == "kafka.consumer.fetch-rate" trace_statements: [] receivers: jmx: @@ -188,8 +188,8 @@ service: - transform - batch/host/amp receivers: - - telegraf_disk - telegraf_cpu + - telegraf_disk metrics/host/cloudwatch: exporters: - awscloudwatch @@ -197,8 +197,8 @@ service: - transform - awsentity/resource receivers: - - telegraf_disk - telegraf_cpu + - telegraf_disk metrics/jmx/amp: exporters: - prometheusremotewrite/amp diff --git a/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.yaml b/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.yaml index 3f30d378fb..d7ca0d4ab7 100644 --- a/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.yaml +++ b/translator/tocwconfig/sampleConfig/kubernetes_on_prem_config.yaml @@ -383,6 +383,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/containerinsights: transforms: - action: insert @@ -436,6 +450,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights - metricstransform/containerinsights receivers: - awscontainerinsightreceiver diff --git a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml index 92f3426d8f..f93b2a5f2a 100644 --- a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml +++ b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml @@ -252,6 +252,29 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} + filter/kueueContainerInsights: + error_mode: propagate + logs: {} + metrics: + include: + match_type: strict + metric_names: [] + spans: {} + traces: {} kueueattributes/kueueContainerInsights: {} receivers: awscontainerinsightreceiver: @@ -331,6 +354,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver metrics/kueueContainerInsights: @@ -338,6 +362,7 @@ service: - awsemf/kueueContainerInsights processors: - batch/kueueContainerInsights + - filter/kueueContainerInsights - kueueattributes/kueueContainerInsights receivers: - awscontainerinsightskueuereceiver diff --git a/translator/tocwconfig/sampleConfig/log_ecs_metric_only.yaml b/translator/tocwconfig/sampleConfig/log_ecs_metric_only.yaml index a35321d7ce..2e03863be9 100644 --- a/translator/tocwconfig/sampleConfig/log_ecs_metric_only.yaml +++ b/translator/tocwconfig/sampleConfig/log_ecs_metric_only.yaml @@ -115,6 +115,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} receivers: awscontainerinsightreceiver: accelerated_compute_metrics: true @@ -189,6 +203,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights receivers: - awscontainerinsightreceiver telemetry: diff --git a/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.yaml b/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.yaml index 93f8462222..23c0654c26 100644 --- a/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.yaml +++ b/translator/tocwconfig/sampleConfig/logs_and_kubernetes_config.yaml @@ -415,6 +415,20 @@ processors: send_batch_max_size: 0 send_batch_size: 8192 timeout: 5s + filter/containerinsights: + error_mode: propagate + logs: {} + metrics: + exclude: + match_type: strict + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling + spans: {} + traces: {} metricstransform/containerinsights: transforms: - action: insert @@ -500,6 +514,7 @@ service: - awsemf/containerinsights processors: - batch/containerinsights + - filter/containerinsights - metricstransform/containerinsights receivers: - awscontainerinsightreceiver diff --git a/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml b/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml index ce10d3d620..c6aca99c59 100644 --- a/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml +++ b/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml @@ -49,9 +49,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - InstanceType - ImageId - InstanceId - - InstanceType imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/standard_config_linux.yaml b/translator/tocwconfig/sampleConfig/standard_config_linux.yaml index 4f5cdd26b5..c60c336889 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_linux.yaml @@ -46,9 +46,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - InstanceType - ImageId - InstanceId - - InstanceType middleware: agenthealth/statuscode refresh_interval_seconds: 0s receivers: @@ -85,10 +85,10 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_swap - telegraf_cpu - telegraf_disk - telegraf_mem + - telegraf_swap metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/standard_config_linux_with_common_config.yaml b/translator/tocwconfig/sampleConfig/standard_config_linux_with_common_config.yaml index 570f767c78..b0d4f0bbad 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_linux_with_common_config.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_linux_with_common_config.yaml @@ -92,10 +92,10 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_cpu - - telegraf_disk - telegraf_mem - telegraf_swap + - telegraf_cpu + - telegraf_disk metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/standard_config_windows.yaml b/translator/tocwconfig/sampleConfig/standard_config_windows.yaml index 1d5b436001..cbccc9051d 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_windows.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_windows.yaml @@ -80,11 +80,11 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_win_perf_counters/3446270237 - telegraf_win_perf_counters/3762679655 - telegraf_win_perf_counters/4283769065 - telegraf_win_perf_counters/1492679118 - telegraf_win_perf_counters/3610923661 + - telegraf_win_perf_counters/3446270237 telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml b/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml index 0a1f83e489..680a915eb4 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml @@ -87,11 +87,11 @@ service: - ec2tagger - awsentity/resource receivers: + - telegraf_win_perf_counters/3762679655 - telegraf_win_perf_counters/4283769065 - telegraf_win_perf_counters/1492679118 - telegraf_win_perf_counters/3610923661 - telegraf_win_perf_counters/3446270237 - - telegraf_win_perf_counters/3762679655 telemetry: logs: development: false diff --git a/translator/translate/otel/common/common.go b/translator/translate/otel/common/common.go index bd41b6df1b..dae0813473 100644 --- a/translator/translate/otel/common/common.go +++ b/translator/translate/otel/common/common.go @@ -116,6 +116,7 @@ const ( PipelineNameContainerInsightsJmx = "containerinsightsjmx" PipelineNameEmfLogs = "emf_logs" PipelineNamePrometheus = "prometheus" + PipelineNameKueue = "kueueContainerInsights" AppSignals = "application_signals" AppSignalsFallback = "app_signals" AppSignalsRules = "rules" diff --git a/translator/translate/otel/exporter/awsemf/translator.go b/translator/translate/otel/exporter/awsemf/translator.go index ee599f9cef..76089b928c 100644 --- a/translator/translate/otel/exporter/awsemf/translator.go +++ b/translator/translate/otel/exporter/awsemf/translator.go @@ -24,10 +24,6 @@ import ( "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/receiver/awscontainerinsight" ) -const ( - kueuePipelineName = "kueueContainerInsights" -) - //go:embed awsemf_default_generic.yaml var defaultGenericConfig string @@ -173,7 +169,7 @@ func isKubernetes(conf *confmap.Conf) bool { // `kueue_container_insights` is a child of `kubernetes` in config spec. func isKubernetesKueue(conf *confmap.Conf, pipelineName string) bool { - return isKubernetes(conf) && pipelineName == kueuePipelineName && common.GetOrDefaultBool(conf, kubernetesKueueBasePathKey, false) + return isKubernetes(conf) && pipelineName == common.PipelineNameKueue && common.GetOrDefaultBool(conf, kubernetesKueueBasePathKey, false) } func isPrometheus(conf *confmap.Conf) bool { diff --git a/translator/translate/otel/pipeline/containerinsights/translator.go b/translator/translate/otel/pipeline/containerinsights/translator.go index b2bba0a5d0..5e37431ff1 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator.go +++ b/translator/translate/otel/pipeline/containerinsights/translator.go @@ -23,8 +23,7 @@ import ( ) const ( - ciPipelineName = common.PipelineNameContainerInsights - kueuePipelineName = "kueueContainerInsights" + ciPipelineName = common.PipelineNameContainerInsights ) var ( @@ -89,10 +88,11 @@ func (t *translator) Translate(conf *confmap.Conf) (*common.ComponentTranslators processors.Set(gpu.NewTranslatorWithName(t.pipelineName)) } } - case kueuePipelineName: + case common.PipelineNameKueue: // add prometheus receiver for kueue receivers = common.NewTranslatorMap((awscontainerinsightskueue.NewTranslator())) processors.Set(kueue.NewTranslatorWithName(t.pipelineName)) + default: return nil, fmt.Errorf("unknown container insights pipeline name: %s", t.pipelineName) } diff --git a/translator/translate/otel/pipeline/containerinsights/translator_test.go b/translator/translate/otel/pipeline/containerinsights/translator_test.go index 6eca5a51c7..07ea56df97 100644 --- a/translator/translate/otel/pipeline/containerinsights/translator_test.go +++ b/translator/translate/otel/pipeline/containerinsights/translator_test.go @@ -46,7 +46,7 @@ func TestTranslator(t *testing.T) { want: &want{ pipelineType: "metrics/containerinsights", receivers: []string{"awscontainerinsightreceiver"}, - processors: []string{"batch/containerinsights"}, + processors: []string{"batch/containerinsights", "filter/containerinsights"}, exporters: []string{"awsemf/containerinsights"}, extensions: []string{"agenthealth/logs", "agenthealth/statuscode"}, }, @@ -62,7 +62,7 @@ func TestTranslator(t *testing.T) { want: &want{ pipelineType: "metrics/containerinsights", receivers: []string{"awscontainerinsightreceiver"}, - processors: []string{"batch/containerinsights"}, + processors: []string{"batch/containerinsights", "filter/containerinsights"}, exporters: []string{"awsemf/containerinsights"}, extensions: []string{"agenthealth/logs", "agenthealth/statuscode"}, }, @@ -94,7 +94,7 @@ func TestKueueTranslator(t *testing.T) { exporters []string extensions []string } - cit := NewTranslatorWithName(kueuePipelineName) + cit := NewTranslatorWithName(common.PipelineNameKueue) require.EqualValues(t, "metrics/kueueContainerInsights", cit.ID().String()) testCases := map[string]struct { input map[string]interface{} @@ -117,6 +117,7 @@ func TestKueueTranslator(t *testing.T) { receivers: []string{"awscontainerinsightskueuereceiver"}, processors: []string{ "batch/kueueContainerInsights", + "filter/kueueContainerInsights", "kueueattributes/kueueContainerInsights", }, exporters: []string{"awsemf/kueueContainerInsights"}, diff --git a/translator/translate/otel/pipeline/containerinsights/translators.go b/translator/translate/otel/pipeline/containerinsights/translators.go index 579f1b3e85..e01b449dfd 100644 --- a/translator/translate/otel/pipeline/containerinsights/translators.go +++ b/translator/translate/otel/pipeline/containerinsights/translators.go @@ -23,7 +23,7 @@ func NewTranslators(conf *confmap.Conf) pipelinetranslator.TranslatorMap { // create kueue container insights translator KueueContainerInsightsEnabled := common.KueueContainerInsightsEnabled(conf) if KueueContainerInsightsEnabled { - kueueTranslator := NewTranslatorWithName(kueuePipelineName) + kueueTranslator := NewTranslatorWithName(common.PipelineNameKueue) translators.Set(kueueTranslator) } // return the translator map diff --git a/translator/translate/otel/processor/filterprocessor/translator.go b/translator/translate/otel/processor/filterprocessor/translator.go index 8506e5d702..8c2ac6110c 100644 --- a/translator/translate/otel/processor/filterprocessor/translator.go +++ b/translator/translate/otel/processor/filterprocessor/translator.go @@ -56,7 +56,7 @@ func (t *translator) ID() component.ID { // Metrics section of the JSON config. func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { // also checking for container insights pipeline to add default filtering for prometheus metadata - if conf == nil || (t.Name() != common.PipelineNameContainerInsights && t.Name() != common.PipelineNameContainerInsightsJmx && !conf.IsSet(common.JmxConfigKey)) { + if conf == nil || (t.Name() != common.PipelineNameContainerInsights && t.Name() != common.PipelineNameKueue && t.Name() != common.PipelineNameContainerInsightsJmx && !conf.IsSet(common.JmxConfigKey)) { return nil, &common.MissingKeyError{ID: t.ID(), JsonKey: common.JmxConfigKey} } From bb289e2ba703df85ae8cf3e46de63b0fc6cd494d Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim Date: Wed, 19 Feb 2025 14:06:20 -0500 Subject: [PATCH 8/8] tests --- .../sampleConfig/advanced_config_darwin.yaml | 4 +- .../sampleConfig/advanced_config_linux.yaml | 8 +- .../sampleConfig/advanced_config_windows.yaml | 6 +- .../sampleConfig/amp_config_linux.yaml | 2 +- .../sampleConfig/basic_config_windows.yaml | 2 +- .../sampleConfig/compass_linux_config.yaml | 2 +- .../sampleConfig/complete_darwin_config.yaml | 8 +- .../sampleConfig/complete_linux_config.yaml | 8 +- .../sampleConfig/delta_net_config_linux.yaml | 2 +- .../sampleConfig/drop_origin_linux.yaml | 4 +- .../emf_and_kubernetes_with_gpu_config.yaml | 214 +++++++++--------- .../emf_and_kubernetes_with_kueue_config.yaml | 9 +- .../sampleConfig/invalid_input_linux.yaml | 2 +- .../sampleConfig/jmx_config_linux.yaml | 4 +- .../kueue_container_insights_config.yaml | 9 +- .../sampleConfig/otlp_metrics_eks_config.yaml | 2 +- .../sampleConfig/standard_config_linux.yaml | 4 +- .../sampleConfig/standard_config_windows.yaml | 6 +- ...ard_config_windows_with_common_config.yaml | 4 +- .../processor/filterprocessor/translator.go | 2 +- 20 files changed, 156 insertions(+), 146 deletions(-) diff --git a/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml b/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml index 973c2e3aac..477458e8a8 100644 --- a/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml +++ b/translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml @@ -90,11 +90,11 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_netstat - - telegraf_swap - telegraf_cpu - telegraf_disk - telegraf_mem + - telegraf_netstat + - telegraf_swap metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml b/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml index 37fd13c868..c79fab2ac1 100644 --- a/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/advanced_config_linux.yaml @@ -46,9 +46,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - InstanceType - ImageId - InstanceId - - InstanceType imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s @@ -98,13 +98,13 @@ service: - ec2tagger - awsentity/resource receivers: + - telegraf_swap + - telegraf_ethtool + - telegraf_nvidia_smi - telegraf_cpu - telegraf_disk - telegraf_mem - telegraf_netstat - - telegraf_swap - - telegraf_ethtool - - telegraf_nvidia_smi metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml b/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml index 004a6240f6..1e4ad8c8ab 100644 --- a/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml +++ b/translator/tocwconfig/sampleConfig/advanced_config_windows.yaml @@ -91,13 +91,13 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_win_perf_counters/2073218482 - - telegraf_win_perf_counters/2039663244 - - telegraf_win_perf_counters/4283769065 - telegraf_win_perf_counters/1492679118 - telegraf_win_perf_counters/3610923661 - telegraf_win_perf_counters/3446270237 - telegraf_win_perf_counters/3762679655 + - telegraf_win_perf_counters/2073218482 + - telegraf_win_perf_counters/2039663244 + - telegraf_win_perf_counters/4283769065 telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/amp_config_linux.yaml b/translator/tocwconfig/sampleConfig/amp_config_linux.yaml index 25e19ee70f..ce8e5fde4a 100644 --- a/translator/tocwconfig/sampleConfig/amp_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/amp_config_linux.yaml @@ -103,9 +103,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - ImageId - InstanceId - InstanceType + - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/basic_config_windows.yaml b/translator/tocwconfig/sampleConfig/basic_config_windows.yaml index becf25fd31..b8b7564990 100644 --- a/translator/tocwconfig/sampleConfig/basic_config_windows.yaml +++ b/translator/tocwconfig/sampleConfig/basic_config_windows.yaml @@ -36,9 +36,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - ImageId - InstanceId - InstanceType - - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/compass_linux_config.yaml b/translator/tocwconfig/sampleConfig/compass_linux_config.yaml index cce9012908..d766fbb16b 100644 --- a/translator/tocwconfig/sampleConfig/compass_linux_config.yaml +++ b/translator/tocwconfig/sampleConfig/compass_linux_config.yaml @@ -75,8 +75,8 @@ service: - ec2tagger - awsentity/service/telegraf receivers: - - telegraf_statsd - telegraf_socket_listener + - telegraf_statsd telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml b/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml index 900becad73..505d6bb301 100644 --- a/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml +++ b/translator/tocwconfig/sampleConfig/complete_darwin_config.yaml @@ -289,13 +289,13 @@ service: - transform - awsentity/resource receivers: - - telegraf_mem - - telegraf_netstat - - telegraf_procstat/1917393364 - telegraf_swap + - telegraf_netstat - telegraf_disk - - telegraf_cpu - telegraf_processes + - telegraf_mem + - telegraf_procstat/1917393364 + - telegraf_cpu metrics/hostCustomMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/complete_linux_config.yaml b/translator/tocwconfig/sampleConfig/complete_linux_config.yaml index 30f59e002f..d5282d96f6 100644 --- a/translator/tocwconfig/sampleConfig/complete_linux_config.yaml +++ b/translator/tocwconfig/sampleConfig/complete_linux_config.yaml @@ -396,13 +396,13 @@ service: - transform - awsentity/resource receivers: - - telegraf_netstat + - telegraf_processes - telegraf_cpu + - telegraf_netstat + - telegraf_disk + - telegraf_mem - telegraf_swap - telegraf_procstat/1917393364 - - telegraf_mem - - telegraf_processes - - telegraf_disk metrics/hostCustomMetrics/cloudwatch: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml b/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml index 9d6a783b30..a67cd3c009 100644 --- a/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/delta_net_config_linux.yaml @@ -43,9 +43,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: + - ImageId - InstanceId - InstanceType - - ImageId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml b/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml index d9fdee561e..ab322e5b5b 100644 --- a/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml +++ b/translator/tocwconfig/sampleConfig/drop_origin_linux.yaml @@ -41,9 +41,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - InstanceId - InstanceType - ImageId + - InstanceId imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s @@ -85,9 +85,9 @@ service: - transform - awsentity/resource receivers: + - telegraf_nvidia_smi - telegraf_cpu - telegraf_disk - - telegraf_nvidia_smi telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml index bac9c069b5..ac6b3da95a 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_gpu_config.yaml @@ -704,90 +704,6 @@ processors: match_type: regexp new_name: apiserver_request_total_5xx submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_GPU_TEMP - match_type: "" - new_name: container_gpu_temperature - operations: - - action: add_label - aggregation_type: "" - experimental_scale: 0 - label: "" - label_value: "" - new_label: Type - new_value: ContainerGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_GPU_TEMP - match_type: "" - new_name: pod_gpu_temperature - operations: - - action: add_label - aggregation_type: "" - experimental_scale: 0 - label: "" - label_value: "" - new_label: Type - new_value: PodGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_GPU_TEMP - match_type: "" - new_name: node_gpu_temperature - operations: - - action: add_label - aggregation_type: "" - experimental_scale: 0 - label: "" - label_value: "" - new_label: Type - new_value: NodeGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_POWER_USAGE - match_type: "" - new_name: container_gpu_power_draw - operations: - - action: add_label - aggregation_type: "" - experimental_scale: 0 - label: "" - label_value: "" - new_label: Type - new_value: ContainerGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_POWER_USAGE - match_type: "" - new_name: pod_gpu_power_draw - operations: - - action: add_label - aggregation_type: "" - experimental_scale: 0 - label: "" - label_value: "" - new_label: Type - new_value: PodGPU - submatch_case: "" - - action: insert - aggregation_type: "" - include: DCGM_FI_DEV_POWER_USAGE - match_type: "" - new_name: node_gpu_power_draw - operations: - - action: add_label - aggregation_type: "" - experimental_scale: 0 - label: "" - label_value: "" - new_label: Type - new_value: NodeGPU - submatch_case: "" - action: insert aggregation_type: "" include: DCGM_FI_DEV_GPU_UTIL @@ -1019,33 +935,89 @@ processors: new_label: "" new_value: "" submatch_case: "" - - action: update + - action: insert aggregation_type: "" - include: neuroncore_utilization_ratio + include: DCGM_FI_DEV_GPU_TEMP match_type: "" - new_name: neuroncore_utilization + new_name: container_gpu_temperature operations: - - action: experimental_scale_value + - action: add_label aggregation_type: "" - experimental_scale: 100 + experimental_scale: 0 label: "" label_value: "" - new_label: "" - new_value: "" + new_label: Type + new_value: ContainerGPU submatch_case: "" - - action: update + - action: insert aggregation_type: "" - include: instance_info + include: DCGM_FI_DEV_GPU_TEMP match_type: "" - new_name: instance_info - operations: [] + new_name: pod_gpu_temperature + operations: + - action: add_label + aggregation_type: "" + experimental_scale: 0 + label: "" + label_value: "" + new_label: Type + new_value: PodGPU submatch_case: "" - - action: update + - action: insert aggregation_type: "" - include: neuron_hardware + include: DCGM_FI_DEV_GPU_TEMP match_type: "" - new_name: neuron_hardware - operations: [] + new_name: node_gpu_temperature + operations: + - action: add_label + aggregation_type: "" + experimental_scale: 0 + label: "" + label_value: "" + new_label: Type + new_value: NodeGPU + submatch_case: "" + - action: insert + aggregation_type: "" + include: DCGM_FI_DEV_POWER_USAGE + match_type: "" + new_name: container_gpu_power_draw + operations: + - action: add_label + aggregation_type: "" + experimental_scale: 0 + label: "" + label_value: "" + new_label: Type + new_value: ContainerGPU + submatch_case: "" + - action: insert + aggregation_type: "" + include: DCGM_FI_DEV_POWER_USAGE + match_type: "" + new_name: pod_gpu_power_draw + operations: + - action: add_label + aggregation_type: "" + experimental_scale: 0 + label: "" + label_value: "" + new_label: Type + new_value: PodGPU + submatch_case: "" + - action: insert + aggregation_type: "" + include: DCGM_FI_DEV_POWER_USAGE + match_type: "" + new_name: node_gpu_power_draw + operations: + - action: add_label + aggregation_type: "" + experimental_scale: 0 + label: "" + label_value: "" + new_label: Type + new_value: NodeGPU submatch_case: "" - action: update aggregation_type: "" @@ -1056,9 +1028,9 @@ processors: submatch_case: "" - action: update aggregation_type: "" - include: neuron_runtime_memory_used_bytes + include: execution_status_total match_type: "" - new_name: neurondevice_runtime_memory_used_bytes + new_name: neuron_execution_status operations: [] submatch_case: "" - action: update @@ -1077,9 +1049,23 @@ processors: submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_model_shared_scratchpad + include: neuroncore_memory_usage_runtime_memory match_type: "" - new_name: neuroncore_memory_usage_model_shared_scratchpad + new_name: neuroncore_memory_usage_runtime_memory + operations: [] + submatch_case: "" + - action: update + aggregation_type: "" + include: instance_info + match_type: "" + new_name: instance_info + operations: [] + submatch_case: "" + - action: update + aggregation_type: "" + include: neuron_hardware + match_type: "" + new_name: neuron_hardware operations: [] submatch_case: "" - action: update @@ -1091,16 +1077,16 @@ processors: submatch_case: "" - action: update aggregation_type: "" - include: execution_status_total + include: neuron_runtime_memory_used_bytes match_type: "" - new_name: neuron_execution_status + new_name: neurondevice_runtime_memory_used_bytes operations: [] submatch_case: "" - action: update aggregation_type: "" - include: neuroncore_memory_usage_runtime_memory + include: neuroncore_memory_usage_model_shared_scratchpad match_type: "" - new_name: neuroncore_memory_usage_runtime_memory + new_name: neuroncore_memory_usage_model_shared_scratchpad operations: [] submatch_case: "" - action: update @@ -1110,6 +1096,20 @@ processors: new_name: neuroncore_memory_usage_tensors operations: [] submatch_case: "" + - action: update + aggregation_type: "" + include: neuroncore_utilization_ratio + match_type: "" + new_name: neuroncore_utilization + operations: + - action: experimental_scale_value + aggregation_type: "" + experimental_scale: 100 + label: "" + label_value: "" + new_label: "" + new_value: "" + submatch_case: "" - action: update aggregation_type: "" include: execution_latency_seconds diff --git a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml index ce09e76af1..19dfb38a63 100644 --- a/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml +++ b/translator/tocwconfig/sampleConfig/emf_and_kubernetes_with_kueue_config.yaml @@ -524,9 +524,14 @@ processors: error_mode: propagate logs: {} metrics: - include: + exclude: match_type: strict - metric_names: [] + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling spans: {} traces: {} kueueattributes/kueueContainerInsights: {} diff --git a/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml b/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml index 34097d8b2f..79c2cbb4c9 100644 --- a/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml +++ b/translator/tocwconfig/sampleConfig/invalid_input_linux.yaml @@ -64,8 +64,8 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_mem - telegraf_disk + - telegraf_mem telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml b/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml index 604117d5da..d9cfe0785e 100644 --- a/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/jmx_config_linux.yaml @@ -140,11 +140,11 @@ processors: metric_statements: - context: metric statements: - - set(unit, "unit") where name == "disk_free" - - set(name, "DISK_FREE") where name == "disk_free" - set(unit, "unit") where name == "cpu_usage_idle" - set(name, "CPU_USAGE_IDLE") where name == "cpu_usage_idle" - set(unit, "unit") where name == "cpu_usage_nice" + - set(unit, "unit") where name == "disk_free" + - set(name, "DISK_FREE") where name == "disk_free" trace_statements: [] transform/jmx: error_mode: propagate diff --git a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml index f93b2a5f2a..38144eea48 100644 --- a/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml +++ b/translator/tocwconfig/sampleConfig/kueue_container_insights_config.yaml @@ -270,9 +270,14 @@ processors: error_mode: propagate logs: {} metrics: - include: + exclude: match_type: strict - metric_names: [] + metric_names: + - up + - scrape_duration_seconds + - scrape_samples_scraped + - scrape_series_added + - scrape_samples_post_metric_relabeling spans: {} traces: {} kueueattributes/kueueContainerInsights: {} diff --git a/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml b/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml index c6aca99c59..ce10d3d620 100644 --- a/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml +++ b/translator/tocwconfig/sampleConfig/otlp_metrics_eks_config.yaml @@ -49,9 +49,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - InstanceType - ImageId - InstanceId + - InstanceType imds_retries: 1 middleware: agenthealth/statuscode refresh_interval_seconds: 0s diff --git a/translator/tocwconfig/sampleConfig/standard_config_linux.yaml b/translator/tocwconfig/sampleConfig/standard_config_linux.yaml index c60c336889..4f5cdd26b5 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_linux.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_linux.yaml @@ -46,9 +46,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - InstanceType - ImageId - InstanceId + - InstanceType middleware: agenthealth/statuscode refresh_interval_seconds: 0s receivers: @@ -85,10 +85,10 @@ service: - ec2tagger - awsentity/resource receivers: + - telegraf_swap - telegraf_cpu - telegraf_disk - telegraf_mem - - telegraf_swap metrics/hostDeltaMetrics: exporters: - awscloudwatch diff --git a/translator/tocwconfig/sampleConfig/standard_config_windows.yaml b/translator/tocwconfig/sampleConfig/standard_config_windows.yaml index cbccc9051d..5895060fb9 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_windows.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_windows.yaml @@ -36,9 +36,9 @@ processors: ec2_instance_tag_keys: - AutoScalingGroupName ec2_metadata_tags: - - InstanceType - ImageId - InstanceId + - InstanceType middleware: agenthealth/statuscode refresh_interval_seconds: 0s receivers: @@ -80,11 +80,11 @@ service: - ec2tagger - awsentity/resource receivers: - - telegraf_win_perf_counters/3762679655 - - telegraf_win_perf_counters/4283769065 - telegraf_win_perf_counters/1492679118 - telegraf_win_perf_counters/3610923661 - telegraf_win_perf_counters/3446270237 + - telegraf_win_perf_counters/3762679655 + - telegraf_win_perf_counters/4283769065 telemetry: logs: development: false diff --git a/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml b/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml index 680a915eb4..4c3b86fd73 100644 --- a/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml +++ b/translator/tocwconfig/sampleConfig/standard_config_windows_with_common_config.yaml @@ -87,11 +87,11 @@ service: - ec2tagger - awsentity/resource receivers: + - telegraf_win_perf_counters/3610923661 + - telegraf_win_perf_counters/3446270237 - telegraf_win_perf_counters/3762679655 - telegraf_win_perf_counters/4283769065 - telegraf_win_perf_counters/1492679118 - - telegraf_win_perf_counters/3610923661 - - telegraf_win_perf_counters/3446270237 telemetry: logs: development: false diff --git a/translator/translate/otel/processor/filterprocessor/translator.go b/translator/translate/otel/processor/filterprocessor/translator.go index 8c2ac6110c..1cdc80987f 100644 --- a/translator/translate/otel/processor/filterprocessor/translator.go +++ b/translator/translate/otel/processor/filterprocessor/translator.go @@ -64,7 +64,7 @@ func (t *translator) Translate(conf *confmap.Conf) (component.Config, error) { if t.Name() == common.PipelineNameContainerInsightsJmx { return common.GetYamlFileToYamlConfig(cfg, containerInsightsJmxConfig) } - if t.Name() == common.PipelineNameContainerInsights { + if t.Name() == common.PipelineNameContainerInsights || t.Name() == common.PipelineNameKueue { return common.GetYamlFileToYamlConfig(cfg, containerInsightsConfig) }