From 65eedbdcdc4c610ec00dcd9338b8b78d5439dbe9 Mon Sep 17 00:00:00 2001 From: Valery Meleshkin Date: Tue, 20 Sep 2022 14:32:25 +0200 Subject: [PATCH] - Adding a batch of metrics covering long running statements originating from maintenance jobs. - Adding lock-mode metrics for the maintenance jobs - Adding these metrics to the dashboard. --- CHANGELOG.md | 1 + docs/mixin/dashboards/promscale.json | 600 +++++++++++++++++++++++- pkg/pgmodel/metrics/database/metrics.go | 174 ++++++- pkg/util/metrics.go | 6 +- 4 files changed, 773 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a20112b4d..2561cef184 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ We use the following categories for changes: - Sizes of maintenance worker backlogs exposed as database metrics on the Promscale dashboard [#1634] - Added a vacuum engine that detects and vacuums/freezes compressed chunks [#1648] - Add pool of database connections for maintenance jobs e.g. telemetry [#1657] +- Metrics for long-running statements and locks originating from maintenance jobs. [#1661] ### Changed - Log throughput in the same line for samples, spans and metric metadata [#1643] diff --git a/docs/mixin/dashboards/promscale.json b/docs/mixin/dashboards/promscale.json index 59b76d4ba9..128f5321d1 100644 --- a/docs/mixin/dashboards/promscale.json +++ b/docs/mixin/dashboards/promscale.json @@ -2194,6 +2194,604 @@ "title": "Network latency", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 51, + "interval": "2m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_total)", + "interval": "", + "legendFormat": "total", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_compression)", + "hide": false, + "interval": "", + "legendFormat": "compression", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_retention_tracing)", + "hide": false, + "interval": "", + "legendFormat": "retention-tracing", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_retention_metric)", + "hide": false, + "interval": "", + "legendFormat": "retention-metric", + "range": true, + "refId": "C" + } + ], + "title": "Long running maintenance queries by job type", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 53, + "interval": "2m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_longest_seconds)", + "interval": "", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Longest running maintenance query", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 50, + "interval": "2m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_total)", + "interval": "", + "legendFormat": "total", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_buffer_pin)", + "hide": false, + "interval": "", + "legendFormat": "buffer_pin", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_io)", + "hide": false, + "interval": "", + "legendFormat": "io", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_ipc)", + "hide": false, + "interval": "", + "legendFormat": "ipc", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_lock)", + "hide": false, + "interval": "", + "legendFormat": "lock", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_lwlock)", + "hide": false, + "interval": "", + "legendFormat": "lwlock", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_long_running_timeout)", + "hide": false, + "interval": "", + "legendFormat": "timeout", + "range": true, + "refId": "G" + } + ], + "title": "Long running maintenance queries by wait event", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 52, + "interval": "2m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_total)", + "interval": "", + "legendFormat": "total", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_share_update_exclusive)", + "hide": false, + "interval": "", + "legendFormat": "share_update_exclusive", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_share_row_exclusive)", + "hide": false, + "interval": "", + "legendFormat": "share_row_exclusive", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_share)", + "hide": false, + "interval": "", + "legendFormat": "share", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_row_share)", + "hide": false, + "interval": "", + "legendFormat": "row_share", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_row_exclusive)", + "hide": false, + "interval": "", + "legendFormat": "row_exclusive", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_exclusive)", + "hide": false, + "interval": "", + "legendFormat": "exclusive", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_access_share)", + "hide": false, + "interval": "", + "legendFormat": "access_share", + "range": true, + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(promscale_sql_database_worker_maintenance_job_locks_access_exclusive)", + "hide": false, + "interval": "", + "legendFormat": "access_exclusive", + "range": true, + "refId": "I" + } + ], + "title": "Locks held by maintenance jobs by lock mode", + "type": "timeseries" + }, { "collapsed": true, "datasource": { @@ -2204,7 +2802,7 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 57 }, "id": 30, "panels": [ diff --git a/pkg/pgmodel/metrics/database/metrics.go b/pkg/pgmodel/metrics/database/metrics.go index 91da38ce59..bf63cb5b79 100644 --- a/pkg/pgmodel/metrics/database/metrics.go +++ b/pkg/pgmodel/metrics/database/metrics.go @@ -189,6 +189,169 @@ var metrics = []metricQueryWrap{ INNER JOIN _timescaledb_catalog.dimension_slice ds ON ds.id = cc.dimension_slice_id WHERE ds.range_start <= _timescaledb_internal.time_to_internal(now() - interval '1 hour') AND ds.range_end <= _timescaledb_internal.time_to_internal(now() - interval '1 hour')`, + }, { + metrics: gauges( + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_access_exclusive", + Help: "Number of AccessExclusiveLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_access_share", + Help: "Number of AccessShareLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_exclusive", + Help: "Number of ExclusiveLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_row_exclusive", + Help: "Number of RowExclusiveLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_row_share", + Help: "Number of RowShareLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_share", + Help: "Number of ShareLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_share_update_exclusive", + Help: "Number of ShareRowExclusiveLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_share_row_exclusive", + Help: "Number of ShareUpdateExclusiveLock locks held by Promscale maintenance workers.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_locks_total", + Help: "Number of locks held by Promscale maintenance workers.", + }, + ), + query: `SELECT + COUNT(*) FILTER (WHERE l.mode = 'AccessExclusiveLock') :: BIGINT AS AccessExclusiveLock, + COUNT(*) FILTER (WHERE l.mode = 'AccessShareLock') :: BIGINT AS AccessShareLock, + COUNT(*) FILTER (WHERE l.mode = 'ExclusiveLock') :: BIGINT AS ExclusiveLock, + COUNT(*) FILTER (WHERE l.mode = 'RowExclusiveLock') :: BIGINT AS RowExclusiveLock, + COUNT(*) FILTER (WHERE l.mode = 'RowShareLock') :: BIGINT AS RowShareLock, + COUNT(*) FILTER (WHERE l.mode = 'ShareLock') :: BIGINT AS ShareLock, + COUNT(*) FILTER (WHERE l.mode = 'ShareUpdateExclusiveLock') :: BIGINT AS ShareUpdateExclusiveLock, + COUNT(*) FILTER (WHERE l.mode = 'ShareRowExclusiveLock') :: BIGINT AS ShareRowExclusiveLock, + -- + COUNT(*) :: BIGINT AS total + FROM pg_stat_activity sa + JOIN pg_locks l ON l.pid = sa.pid + WHERE sa.application_name LIKE 'promscale maintenance%'`, + }, { + metrics: gauges( + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_buffer_pin", + Help: "Number of Promscale maintenance workers executing long running queries, waiting on BufferPin events.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_io", + Help: "Number of Promscale maintenance workers executing long running queries, waiting on IO events.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_ipc", + Help: "Number of Promscale maintenance workers executing long running queries, waiting on IPC events.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_lock", + Help: "Number of Promscale maintenance workers executing long running queries, waiting on Lock events.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_lwlock", + Help: "Number of Promscale maintenance workers executing long running queries, waiting on LWLock events.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_timeout", + Help: "Number of Promscale maintenance workers executing long running queries, waiting on Timeout events.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_compression", + Help: "Number of Promscale maintenance workers executing long running queries, originating from compression jobs.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_retention_metric", + Help: "Number of Promscale maintenance workers executing long running queries, originating from metric retention jobs.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_retention_tracing", + Help: "Number of Promscale maintenance workers executing long running queries, originating from tracing retention jobs.", + }, + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_total", + Help: "Number of Promscale maintenance workers executing long running queries.", + }, + ), + query: `SELECT + -- by wait event type; not exhaustive + COUNT(*) FILTER (WHERE sa.wait_event_type = 'BufferPin') :: BIGINT AS buffer_pin_cnt, + COUNT(*) FILTER (WHERE sa.wait_event_type = 'IO') :: BIGINT AS io_cnt, + COUNT(*) FILTER (WHERE sa.wait_event_type = 'IPC') :: BIGINT AS ipc_cnt, + COUNT(*) FILTER (WHERE sa.wait_event_type = 'Lock') :: BIGINT AS lock_cnt, + COUNT(*) FILTER (WHERE sa.wait_event_type = 'LWLock') :: BIGINT AS lwlock_cnt, + COUNT(*) FILTER (WHERE sa.wait_event_type = 'Timeout') :: BIGINT AS timeout_cnt, + -- by workload type; exhaustive (the sum should equal the total) + COUNT(*) FILTER (WHERE sa.application_name LIKE '%compression%') :: BIGINT AS compression_cnt, + COUNT(*) FILTER (WHERE sa.application_name LIKE '%retention%' AND sa.application_name LIKE '%metric%') :: BIGINT AS metric_retention_cnt, + COUNT(*) FILTER (WHERE sa.application_name LIKE '%retention%' AND sa.application_name LIKE '%tracing%') :: BIGINT AS tracing_retention_cnt, + -- + COUNT(*) :: BIGINT AS total + FROM pg_stat_activity sa + WHERE sa.application_name LIKE 'promscale maintenance%' + AND (now() - coalesce(sa.query_start, sa.xact_start)) > INTERVAL '10 seconds' + AND sa.state <> 'idle'`, + }, { + metrics: gauges( + prometheus.GaugeOpts{ + Namespace: util.PromNamespace, + Subsystem: "sql_database", + Name: "worker_maintenance_job_long_running_longest_seconds", + Help: "The duration in seconds of a longest running query, originating from a Promscale maintenance worker.", + }, + ), + query: `SELECT coalesce(extract(EPOCH FROM MAX(now() - coalesce(sa.query_start, sa.xact_start))) :: BIGINT, 0) + FROM pg_stat_activity sa WHERE sa.application_name LIKE 'promscale maintenance%'`, }, { metrics: gauges( prometheus.GaugeOpts{ @@ -264,8 +427,10 @@ var metrics = []metricQueryWrap{ }, } -// GetMetric returns the first metric whose Name matches the supplied name. +// GetMetric returns the metric whose Description best matches the supplied name. func GetMetric(name string) (prometheus.Metric, error) { + var candidate prometheus.Metric = nil + candidateDescLen := 0 for _, ms := range metrics { for _, m := range ms.metrics { metric := getMetric(m) @@ -273,12 +438,13 @@ func GetMetric(name string) (prometheus.Metric, error) { if err != nil { return nil, fmt.Errorf("extract metric string") } - if strings.Contains(str, name) { - return metric, nil + if strings.Contains(str, name) && (len(str) < candidateDescLen || candidate == nil) { + candidate = metric + candidateDescLen = len(str) } } } - return nil, nil + return candidate, nil } func getMetric(c prometheus.Collector) prometheus.Metric { diff --git a/pkg/util/metrics.go b/pkg/util/metrics.go index d3ff6bd6ae..1a811a4e1a 100644 --- a/pkg/util/metrics.go +++ b/pkg/util/metrics.go @@ -7,9 +7,9 @@ import ( io_prometheus_client "github.com/prometheus/client_model/go" ) -//returns a exponential histogram for a saturating metric. Grows exponentially -//until max-10, and has another bucket for max. -//This is done so we can tell from the histogram if the resource was saturated or not. +// returns a exponential histogram for a saturating metric. Grows exponentially +// until max-10, and has another bucket for max. +// This is done so we can tell from the histogram if the resource was saturated or not. func HistogramBucketsSaturating(start float64, factor float64, max float64) []float64 { if max-10 < 1 { panic("HistogramBucketsSaturating needs a positive max")