Skip to content
This repository has been archived by the owner on Apr 2, 2024. It is now read-only.

Commit

Permalink
Dashboard improvements and adding alerts.
Browse files Browse the repository at this point in the history
  • Loading branch information
sumerman committed Sep 19, 2022
1 parent 11e3e1f commit 96f1626
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 7 deletions.
47 changes: 47 additions & 0 deletions docs/mixin/alerts/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,53 @@ groups:
summary: Promscale maintenance jobs taking too long to complete.
description: "Promscale Database is taking {{ $value }} seconds to respond to Promscale's requests."
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
- alert: PromscaleMaintenanceJobNotKeepingup
expr: |
(
(
min_over_time(promscale_sql_database_chunks_metrics_uncompressed_count[1h]) > 10
)
and
(
delta(promscale_sql_database_chunks_metrics_uncompressed_count[10m]) > 0
)
)
or
(
(
min_over_time(promscale_sql_database_chunks_metrics_expired_count[1h]) > 10
)
and
(
delta(promscale_sql_database_chunks_metrics_expired_count[10m]) > 0
)
)
or
(
(
min_over_time(promscale_sql_database_chunks_traces_uncompressed_count[1h]) > 10
)
and
(
delta(promscale_sql_database_chunks_traces_uncompressed_count[10m]) > 0
)
)
or
(
(
min_over_time(promscale_sql_database_chunks_traces_expired_count[1h]) > 10
)
and
(
delta(promscale_sql_database_chunks_traces_expired_count[10m]) > 0
)
)
labels:
severity: warning
annotations:
summary: Promscale maintenance jobs are not keeping up.
description: "The amount of work for the promscale maintenance jobs is not decreasing for long time."
runbook_url: https://github.com/timescale/promscale/blob/master/docs/runbooks/PromscaleMaintenanceJobRunningTooLong.md
- alert: PromscaleMaintenanceJobFailures
expr: promscale_sql_database_worker_maintenance_job_failed == 1
labels:
Expand Down
15 changes: 8 additions & 7 deletions docs/mixin/dashboards/promscale.json
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@
"refId": "A"
}
],
"title": "Samples Ingest Rate",
"title": "Ingest Rates",
"type": "timeseries"
},
{
Expand Down Expand Up @@ -1619,13 +1619,13 @@
"exemplar": true,
"expr": "max by (job, instance) (promscale_sql_database_chunks_metrics_delayed_compression_count)",
"interval": "",
"legendFormat": "metrics-expired",
"legendFormat": "metrics-compression-delayed",
"range": true,
"refId": "E",
"hide": false
}
],
"title": "Maintenance job backlogs",
"title": "The number of chunks to be processed by maintenance jobs",
"type": "timeseries"
},
{
Expand All @@ -1648,7 +1648,7 @@
}
]
},
"unit": "dateTimeAsIso"
"unit": "s"
},
"overrides": []
},
Expand Down Expand Up @@ -1680,14 +1680,15 @@
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"exemplar": true,
"expr": "max(promscale_sql_database_worker_maintenance_job_start_timestamp_seconds) * 1000",
"exemplar": false,
"expr": "time() - max(promscale_sql_database_worker_maintenance_job_start_timestamp_seconds)",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Last DB maintenance job start",
"title": "Time since the last job start",
"description": "Time since the last DB maintenance job started",
"type": "stat"
},
{
Expand Down

0 comments on commit 96f1626

Please sign in to comment.