From bb393887c561ae9a17e24bed9053ff46146412a3 Mon Sep 17 00:00:00 2001 From: Alexander Dejanovski Date: Wed, 9 Oct 2019 19:18:20 +0200 Subject: [PATCH] Fix metric naming to allow long term tracking of repairs. Using the repair run id in the metric name doesn't allow to match consecutive repairs with the same settings together. This fix uses the repair unit id instead, which will be the same throughout all instances of a specific repair definition (cluster, keyspace, tables and other settings). --- .../cassandrareaper/service/RepairRunner.java | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/server/src/main/java/io/cassandrareaper/service/RepairRunner.java b/src/server/src/main/java/io/cassandrareaper/service/RepairRunner.java index d270727fb..1fa708ccf 100644 --- a/src/server/src/main/java/io/cassandrareaper/service/RepairRunner.java +++ b/src/server/src/main/java/io/cassandrareaper/service/RepairRunner.java @@ -109,33 +109,49 @@ final class RepairRunner implements Runnable { // below four metric names are duplicated, so monitoring systems can follow per cluster or per cluster and keyspace String metricNameForRepairProgressPerKeyspace - = metricName("repairProgress", repairUnitClusterName, repairUnitKeyspaceName, repairRunId); + = metricName( + "repairProgress", + repairUnitClusterName, + repairUnitKeyspaceName, + repairRun.get().getRepairUnitId()); - String metricNameForRepairProgress = metricName("repairProgress", repairUnitClusterName, repairRunId); - - context.metricRegistry.register(metricNameForRepairProgressPerKeyspace, (Gauge) () -> repairProgress); - context.metricRegistry.register(metricNameForRepairProgress, (Gauge) () -> repairProgress); + String metricNameForRepairProgress + = metricName("repairProgress", repairUnitClusterName, repairRun.get().getRepairUnitId()); metricNameForMillisSinceLastRepairPerKeyspace - = metricName("millisSinceLastRepair", repairUnitClusterName, repairUnitKeyspaceName, repairRunId); + = metricName( + "millisSinceLastRepair", + repairUnitClusterName, + repairUnitKeyspaceName, + repairRun.get().getRepairUnitId()); - metricNameForMillisSinceLastRepair = metricName("millisSinceLastRepair", repairUnitClusterName, repairRunId); + metricNameForMillisSinceLastRepair + = metricName( + "millisSinceLastRepair", repairUnitClusterName, repairRun.get().getRepairUnitId()); String metricNameForDoneSegmentsPerKeyspace - = metricName("segmentsDone", repairUnitClusterName, repairUnitKeyspaceName, repairRunId); - - String metricNameForDoneSegments = metricName("segmentsDone", repairUnitClusterName, repairRunId); + = metricName("segmentsDone", repairUnitClusterName, repairUnitKeyspaceName, repairRun.get().getRepairUnitId()); - context.metricRegistry.register(metricNameForDoneSegmentsPerKeyspace, (Gauge) () -> segmentsDone); - context.metricRegistry.register(metricNameForDoneSegments, (Gauge) () -> (int)segmentsDone); + String metricNameForDoneSegments + = metricName("segmentsDone", repairUnitClusterName, repairRun.get().getRepairUnitId()); String metricNameForTotalSegmentsPerKeyspace - = metricName("segmentsTotal", repairUnitClusterName, repairUnitKeyspaceName, repairRunId); + = metricName("segmentsTotal", repairUnitClusterName, repairUnitKeyspaceName, repairRun.get().getRepairUnitId()); - String metricNameForTotalSegments = metricName("segmentsTotal", repairUnitClusterName, repairRunId); + String metricNameForTotalSegments + = metricName("segmentsTotal", repairUnitClusterName, repairRun.get().getRepairUnitId()); - context.metricRegistry.register(metricNameForTotalSegmentsPerKeyspace, (Gauge) () -> (int)segmentsTotal); - context.metricRegistry.register(metricNameForTotalSegments, (Gauge) () -> segmentsTotal); + // The metrics could already exist as a repair unit will be shared among repair runs + if (!context.metricRegistry.getGauges().containsKey(metricNameForDoneSegmentsPerKeyspace)) { + context.metricRegistry.register(metricNameForDoneSegmentsPerKeyspace, (Gauge) () -> segmentsDone); + context.metricRegistry.register(metricNameForDoneSegments, (Gauge) () -> (int)segmentsDone); + context.metricRegistry.register( + metricNameForTotalSegmentsPerKeyspace, (Gauge) () -> (int) segmentsTotal); + context.metricRegistry.register(metricNameForTotalSegments, (Gauge) () -> segmentsTotal); + context.metricRegistry.register(metricNameForRepairProgressPerKeyspace, (Gauge) () -> repairProgress); + context.metricRegistry.register(metricNameForRepairProgress, (Gauge) () -> repairProgress); + + } } UUID getRepairRunId() {