Skip to content

Commit

Permalink
Resource Monitoring metrics on Windows - remove multiplication by 100 (
Browse files Browse the repository at this point in the history
…#5473)

* Remove multiplication by 100 to put utilization in range [0,1] instead of [0,100]
---------
Co-authored-by: Evgenii Fedorov <evgenii.fedorov@microsoft.com>
  • Loading branch information
evgenyfedorov2 authored Jan 31, 2025
1 parent 2534f08 commit cc2317e
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Diagnostics.CodeAnalysis;
using Microsoft.Shared.DiagnosticIds;

namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring;

Expand All @@ -18,4 +20,17 @@ public partial class ResourceMonitoringOptions
#pragma warning disable CA2227 // Collection properties should be read only
public ISet<string> SourceIpAddresses { get; set; } = new HashSet<string>();
#pragma warning restore CA2227 // Collection properties should be read only

/// <summary>
/// Gets or sets a value indicating whether CPU and Memory utilization metric values should be in range <c>[0, 1]</c> instead of <c>[0, 100]</c>.
/// </summary>
/// <value>
/// The default value is <see langword="false"/>.
/// </value>
/// <remarks>
/// Use this property if you prefer to have the metric values in range <c>[0, 1]</c> instead of <c>[0, 100]</c>.
/// In the long term, the default value of this property will be changed to <see langword="true"/>.
/// </remarks>
[Experimental(diagnosticId: DiagnosticIds.Experiments.ResourceMonitoring, UrlFormat = DiagnosticIds.UrlFormat)]
public bool UseZeroToOneRangeForMetrics { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Windows;

internal sealed class WindowsContainerSnapshotProvider : ISnapshotProvider
{
private const double One = 1.0d;
private const double Hundred = 100.0d;

private readonly Lazy<MEMORYSTATUSEX> _memoryStatus;
Expand All @@ -32,6 +33,7 @@ internal sealed class WindowsContainerSnapshotProvider : ISnapshotProvider
private readonly double _cpuLimit;
private readonly TimeSpan _cpuRefreshInterval;
private readonly TimeSpan _memoryRefreshInterval;
private readonly double _metricValueMultiplier;

private long _oldCpuUsageTicks;
private long _oldCpuTimeTicks;
Expand Down Expand Up @@ -72,6 +74,8 @@ internal WindowsContainerSnapshotProvider(
_logger = logger ?? NullLogger<WindowsContainerSnapshotProvider>.Instance;
Log.RunningInsideJobObject(_logger);

_metricValueMultiplier = options.UseZeroToOneRangeForMetrics ? One : Hundred;

_memoryStatus = new Lazy<MEMORYSTATUSEX>(
memoryInfo.GetMemoryStatus,
LazyThreadSafetyMode.ExecutionAndPublication);
Expand Down Expand Up @@ -195,7 +199,8 @@ private double MemoryPercentage(Func<ulong> getMemoryUsage)
{
if (now >= _refreshAfterMemory)
{
_memoryPercentage = Math.Min(Hundred, memoryUsage / _memoryLimit * Hundred); // Don't change calculation order, otherwise we loose some precision
// Don't change calculation order, otherwise we loose some precision:
_memoryPercentage = Math.Min(_metricValueMultiplier, memoryUsage / _memoryLimit * _metricValueMultiplier);
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
}

Expand Down Expand Up @@ -229,7 +234,8 @@ private double CpuPercentage()
var timeTickDelta = (now.Ticks - _oldCpuTimeTicks) * _cpuLimit;
if (usageTickDelta > 0 && timeTickDelta > 0)
{
_cpuPercentage = Math.Min(Hundred, usageTickDelta / timeTickDelta * Hundred); // Don't change calculation order, otherwise precision is lost.
// Don't change calculation order, otherwise precision is lost:
_cpuPercentage = Math.Min(_metricValueMultiplier, usageTickDelta / timeTickDelta * _metricValueMultiplier);

Log.CpuContainerUsageData(
_logger, basicAccountingInfo.TotalKernelTime, basicAccountingInfo.TotalUserTime, _oldCpuUsageTicks, timeTickDelta, _cpuLimit, _cpuPercentage);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Windows;

internal sealed class WindowsSnapshotProvider : ISnapshotProvider
{
private const double One = 1.0d;
private const double Hundred = 100.0d;

public SystemResources Resources { get; }
Expand All @@ -28,6 +29,7 @@ internal sealed class WindowsSnapshotProvider : ISnapshotProvider
private readonly double _totalMemory;
private readonly TimeSpan _cpuRefreshInterval;
private readonly TimeSpan _memoryRefreshInterval;
private readonly double _metricValueMultiplier;

private long _oldCpuUsageTicks;
private long _oldCpuTimeTicks;
Expand Down Expand Up @@ -56,6 +58,8 @@ internal WindowsSnapshotProvider(

Log.RunningOutsideJobObject(_logger);

_metricValueMultiplier = options.UseZeroToOneRangeForMetrics ? One : Hundred;

_cpuUnits = getCpuUnitsFunc();
var totalMemory = getTotalMemoryInBytesFunc();

Expand Down Expand Up @@ -135,7 +139,8 @@ private double MemoryPercentage()
{
if (now >= _refreshAfterMemory)
{
_memoryPercentage = Math.Min(Hundred, currentMemoryUsage / _totalMemory * Hundred); // Don't change calculation order, otherwise we loose some precision
// Don't change calculation order, otherwise we loose some precision:
_memoryPercentage = Math.Min(_metricValueMultiplier, currentMemoryUsage / _totalMemory * _metricValueMultiplier);
_refreshAfterMemory = now.Add(_memoryRefreshInterval);
}

Expand Down Expand Up @@ -167,7 +172,8 @@ private double CpuPercentage()
var timeTickDelta = (now.Ticks - _oldCpuTimeTicks) * _cpuUnits;
if (usageTickDelta > 0 && timeTickDelta > 0)
{
_cpuPercentage = Math.Min(Hundred, usageTickDelta / (double)timeTickDelta * Hundred); // Don't change calculation order, otherwise we loose some precision
// Don't change calculation order, otherwise we loose some precision:
_cpuPercentage = Math.Min(_metricValueMultiplier, usageTickDelta / (double)timeTickDelta * _metricValueMultiplier);

Log.CpuUsageData(_logger, currentCpuTicks, _oldCpuUsageTicks, timeTickDelta, _cpuUnits, _cpuPercentage);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,11 @@ public void GetSnapshot_With_JobMemoryLimit_Set_To_Zero_ProducesCorrectSnapshot(
}

[Theory]
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization)]
[InlineData(ResourceUtilizationInstruments.ContainerCpuLimitUtilization)]
public void SnapshotProvider_EmitsCpuMetrics(string instrumentName)
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization, true)]
[InlineData(ResourceUtilizationInstruments.ProcessCpuUtilization, false)]
[InlineData(ResourceUtilizationInstruments.ContainerCpuLimitUtilization, true)]
[InlineData(ResourceUtilizationInstruments.ContainerCpuLimitUtilization, false)]
public void SnapshotProvider_EmitsCpuMetrics(string instrumentName, bool useZeroToOneRange)
{
// Simulating 10% CPU usage (2 CPUs, 2000 ticks initially, 4000 ticks after 1 ms):
JOBOBJECT_BASIC_ACCOUNTING_INFORMATION updatedAccountingInfo = default;
Expand All @@ -216,8 +218,12 @@ public void SnapshotProvider_EmitsCpuMetrics(string instrumentName)
.Returns(meter);
using var metricCollector = new MetricCollector<double>(meter, instrumentName, fakeClock);

var options = new ResourceMonitoringOptions { CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };

var options = new ResourceMonitoringOptions
{
CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2),
UseZeroToOneRangeForMetrics = useZeroToOneRange
};
var multiplier = useZeroToOneRange ? 1 : 100;
var snapshotProvider = new WindowsContainerSnapshotProvider(
_memoryInfoMock.Object,
_systemInfoMock.Object,
Expand All @@ -237,27 +243,29 @@ public void SnapshotProvider_EmitsCpuMetrics(string instrumentName)
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();

Assert.Equal(10, metricCollector.LastMeasurement.Value); // Consumed 10% of the CPU.
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Consumed 10% of the CPU.

// Step #2 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();

// CPU usage should be the same as before, as we didn't recalculate it:
Assert.Equal(10, metricCollector.LastMeasurement.Value); // Still consuming 10% as gauge wasn't updated.
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Still consuming 10% as gauge wasn't updated.

// Step #3 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();

// CPU usage should be the same as before, as we're not simulating any CPU usage:
Assert.Equal(10, metricCollector.LastMeasurement.Value); // Consumed 10% of the CPU.
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Consumed 10% of the CPU.
}

[Theory]
[InlineData(ResourceUtilizationInstruments.ProcessMemoryUtilization)]
[InlineData(ResourceUtilizationInstruments.ContainerMemoryLimitUtilization)]
public void SnapshotProvider_EmitsMemoryMetrics(string instrumentName)
[InlineData(ResourceUtilizationInstruments.ProcessMemoryUtilization, true)]
[InlineData(ResourceUtilizationInstruments.ProcessMemoryUtilization, false)]
[InlineData(ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, true)]
[InlineData(ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, false)]
public void SnapshotProvider_EmitsMemoryMetrics(string instrumentName, bool useZeroToOneRange)
{
_appMemoryUsage = 200UL;
ulong updatedAppMemoryUsage = 600UL;
Expand All @@ -279,8 +287,12 @@ public void SnapshotProvider_EmitsMemoryMetrics(string instrumentName)
.Returns(meter);
using var metricCollector = new MetricCollector<double>(meter, instrumentName, fakeClock);

var options = new ResourceMonitoringOptions { MemoryConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };

var options = new ResourceMonitoringOptions
{
MemoryConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2),
UseZeroToOneRangeForMetrics = useZeroToOneRange
};
var multiplier = useZeroToOneRange ? 1 : 100;
var snapshotProvider = new WindowsContainerSnapshotProvider(
_memoryInfoMock.Object,
_systemInfoMock.Object,
Expand All @@ -294,17 +306,17 @@ public void SnapshotProvider_EmitsMemoryMetrics(string instrumentName)
// Step #0 - state in the beginning:
metricCollector.RecordObservableInstruments();
Assert.NotNull(metricCollector.LastMeasurement?.Value);
Assert.Equal(10, metricCollector.LastMeasurement.Value); // Consuming 10% of the memory initially.
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Consuming 10% of the memory initially.

// Step #1 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(options.MemoryConsumptionRefreshInterval - TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();
Assert.Equal(10, metricCollector.LastMeasurement.Value); // Still consuming 10% as gauge wasn't updated.
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Still consuming 10% as gauge wasn't updated.

// Step #2 - simulate 2 milliseconds passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();
Assert.Equal(30, metricCollector.LastMeasurement.Value); // Consuming 30% of the memory afterwards.
Assert.Equal(0.3 * multiplier, metricCollector.LastMeasurement.Value); // Consuming 30% of the memory afterwards.
}

[Fact]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,18 @@ public Task SnapshotProvider_EmitsLogRecord()
return Verifier.Verify(logRecords[0]).UseDirectory(VerifiedDataDirectory);
}

[ConditionalFact]
public void SnapshotProvider_EmitsCpuMetrics()
[ConditionalTheory]
[CombinatorialData]
public void SnapshotProvider_EmitsCpuMetrics(bool useZeroToOneRange)
{
var fakeClock = new FakeTimeProvider();
var cpuTicks = 500L;
var options = new ResourceMonitoringOptions { CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };
var options = new ResourceMonitoringOptions
{
CpuConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2),
UseZeroToOneRangeForMetrics = useZeroToOneRange
};
var multiplier = useZeroToOneRange ? 1 : 100;
using var meter = new Meter(nameof(SnapshotProvider_EmitsCpuMetrics));
var meterFactoryMock = new Mock<IMeterFactory>();
meterFactoryMock.Setup(x => x.Create(It.IsAny<MeterOptions>())).Returns(meter);
Expand All @@ -96,22 +102,28 @@ public void SnapshotProvider_EmitsCpuMetrics()
// Step #1 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();
Assert.Equal(5, metricCollector.LastMeasurement?.Value); // Consuming 5% of the CPU (2 CPUs, 1000 ticks, 1ms).
Assert.Equal(0.05 * multiplier, metricCollector.LastMeasurement?.Value); // Consuming 5% of the CPU (2 CPUs, 1000 ticks, 1ms).

// Step #2 - simulate another 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();

// CPU usage should be the same as before, as we're not simulating any CPU usage:
Assert.Equal(5, metricCollector.LastMeasurement?.Value); // Still consuming 5% of the CPU
Assert.Equal(0.05 * multiplier, metricCollector.LastMeasurement?.Value); // Still consuming 5% of the CPU
}

[ConditionalFact]
public void SnapshotProvider_EmitsMemoryMetrics()
[ConditionalTheory]
[CombinatorialData]
public void SnapshotProvider_EmitsMemoryMetrics(bool useZeroToOneRange)
{
var fakeClock = new FakeTimeProvider();
long memoryUsed = 300L;
var options = new ResourceMonitoringOptions { MemoryConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2) };
var options = new ResourceMonitoringOptions
{
MemoryConsumptionRefreshInterval = TimeSpan.FromMilliseconds(2),
UseZeroToOneRangeForMetrics = useZeroToOneRange
};
var multiplier = useZeroToOneRange ? 1 : 100;
using var meter = new Meter(nameof(SnapshotProvider_EmitsMemoryMetrics));
var meterFactoryMock = new Mock<IMeterFactory>();
meterFactoryMock.Setup(x => x.Create(It.IsAny<MeterOptions>()))
Expand All @@ -124,21 +136,21 @@ public void SnapshotProvider_EmitsMemoryMetrics()
// Step #0 - state in the beginning:
metricCollector.RecordObservableInstruments();
Assert.NotNull(metricCollector.LastMeasurement);
Assert.Equal(10, metricCollector.LastMeasurement.Value); // Consuming 5% of the memory initially
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Consuming 5% of the memory initially

memoryUsed = 900L; // Simulate 30% memory usage.

// Step #1 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();

Assert.Equal(10, metricCollector.LastMeasurement.Value); // Still consuming 10% as gauge wasn't updated.
Assert.Equal(0.1 * multiplier, metricCollector.LastMeasurement.Value); // Still consuming 10% as gauge wasn't updated.

// Step #2 - simulate 1 millisecond passing and collect metrics again:
fakeClock.Advance(TimeSpan.FromMilliseconds(1));
metricCollector.RecordObservableInstruments();

Assert.Equal(30, metricCollector.LastMeasurement.Value); // Consuming 30% of the memory afterwards
Assert.Equal(0.3 * multiplier, metricCollector.LastMeasurement.Value); // Consuming 30% of the memory afterwards

memoryUsed = 3_100L; // Simulate more than 100% memory usage

Expand All @@ -147,7 +159,7 @@ public void SnapshotProvider_EmitsMemoryMetrics()
metricCollector.RecordObservableInstruments();

// Memory usage should be the same as before, as we're not simulating any CPU usage:
Assert.Equal(100, metricCollector.LastMeasurement.Value); // Consuming 100% of the memory
Assert.Equal(1 * multiplier, Math.Round(metricCollector.LastMeasurement.Value)); // Consuming 100% of the memory
}

[ConditionalFact]
Expand Down

0 comments on commit cc2317e

Please sign in to comment.