Skip to content
This repository has been archived by the owner on Apr 2, 2024. It is now read-only.

Commit

Permalink
Fix double counting while populating promscale_sql_database_chunks_co…
Browse files Browse the repository at this point in the history
…unt metric

Problem statment:
`PromscaleCompressionLow` alert fires regardless of a good compression ratio.

Root cause:
SQL query which populates `promscale_sql_database_chunks_count` is a sum of the below metrics,
1) Uncompressed Chunks
2) Proxy Chunks which points to Compressed Chunks
3) Compressed Chunks

However the total chunk count should be just (1) + (3), because (2) is already pointing to (3) and which leads to double counting.

Solution:
Fix the SQL query to consider entries which has `compressed_chunk_id` as null and this will be true for both (1) and (3).

Signed-off-by: Arunprasad Rajkumar <ar.arunprasad@gmail.com>
  • Loading branch information
arajkumar committed Jul 18, 2022
1 parent 3723dc7 commit a685504
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ We use the following categories for changes:
### Fixed
- Refine check for existence of `prom_schema_migrations` table [#1452]
- Do not run rules-manager in `-db.read-only` mode [#1451]
- Fix underlying metric(`promscale_sql_database_chunks_count`) which leads to false positive firing of PromscaleCompressionLow alert [#1494]

## [0.12.1] - 2022-06-29

Expand Down
7 changes: 4 additions & 3 deletions docs/runbooks/PromscaleCompressionLow.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ High disk usage by Promscale database
1. Open Grafana and navigate to Promscale dashboard
2. Go to Database section and see `Compressesd chunks ratio`. If you see a ratio of < 10% then compression is not adequate in your system
3. Open psql
4. Check number of uncompressed chunks: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id=null;`
5. Check number of maintenancec jobs: `select count(*) from timescaledb_information.jobs where proc_name = 'execute_maintenance_job'`
6. Run the following debugging query:
4. Check total number of chunks: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id is null;`
5. Check total number of compressed chunks: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id is not null;`
6. Check number of maintenancec jobs: `select count(*) from timescaledb_information.jobs where proc_name = 'execute_maintenance_job'`
7. Run the following debugging query:

```postgresql
SELECT
Expand Down
6 changes: 4 additions & 2 deletions pkg/pgmodel/metrics/database/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ var metrics = []metricQueryWrap{
Help: "Total number of chunks in TimescaleDB currently.",
},
),
query: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false`,
// Compressed_chunk_id is null for both yet to be compressed and already compressed chunks.
query: `select count(*)::bigint from _timescaledb_catalog.chunk where dropped=false and compressed_chunk_id is null`,
},
{
metric: prometheus.NewGauge(
Expand All @@ -78,7 +79,8 @@ var metrics = []metricQueryWrap{
Help: "Total number of chunks created since creation of database.",
},
),
query: `select count(*)::bigint from _timescaledb_catalog.chunk`,
// Compressed_chunk_id is null for both yet to be compressed and already compressed chunks.
query: `select count(*)::bigint from _timescaledb_catalog.chunk where compressed_chunk_id is null`,
},
{
metric: prometheus.NewGauge(
Expand Down
63 changes: 62 additions & 1 deletion pkg/tests/end_to_end_tests/database_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ func TestDatabaseMetrics(t *testing.T) {
require.Equal(t, float64(0), numMaintenanceJobs)
chunksCreated := getMetricValue(t, "chunks_created")
require.Equal(t, float64(0), chunksCreated)
chunksCount := getMetricValue(t, "chunks_count")
require.Equal(t, float64(0), chunksCount)
chunksCompressedCount := getMetricValue(t, "chunks_compressed_count")
require.Equal(t, float64(0), chunksCompressedCount)

// Update the metrics.
require.NoError(t, dbMetrics.Update())
Expand All @@ -45,6 +49,10 @@ func TestDatabaseMetrics(t *testing.T) {
require.Equal(t, float64(2), numMaintenanceJobs)
chunksCreated = getMetricValue(t, "chunks_created")
require.Equal(t, float64(0), chunksCreated)
chunksCount = getMetricValue(t, "chunks_count")
require.Equal(t, float64(0), chunksCount)
chunksCompressedCount = getMetricValue(t, "chunks_compressed_count")
require.Equal(t, float64(0), chunksCompressedCount)

// Ingest some data and then see check the metrics to ensure proper updating.
ingestor, err := ingstr.NewPgxIngestorForTests(pgxconn.NewPgxConn(db), nil)
Expand All @@ -57,7 +65,60 @@ func TestDatabaseMetrics(t *testing.T) {
require.NoError(t, dbMetrics.Update())

chunksCreated = getMetricValue(t, "chunks_created")
require.Equal(t, chunksCreated, float64(3))
require.Equal(t, float64(3), chunksCreated)
chunksCount = getMetricValue(t, "chunks_count")
require.Equal(t, float64(3), chunksCount)
chunksCompressedCount = getMetricValue(t, "chunks_compressed_count")
require.Equal(t, float64(0), chunksCompressedCount)
})
}

func TestDatabaseMetricsAfterCompression(t *testing.T) {
if !*useTimescaleDB {
t.Skip("test meaningless without TimescaleDB")
}
ts := generateSmallTimeseries()
withDB(t, *testDatabase, func(db *pgxpool.Pool, t testing.TB) {
ingestor, err := ingstr.NewPgxIngestorForTests(pgxconn.NewPgxConn(db), nil)
require.NoError(t, err)
defer ingestor.Close()
_, _, err = ingestor.Ingest(context.Background(), newWriteRequestWithTs(copyMetrics(ts)))
require.NoError(t, err)
err = ingestor.CompleteMetricCreation(context.Background())
if err != nil {
t.Fatal(err)
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

dbMetrics := database.NewEngine(ctx, pgxconn.NewPgxConn(db))

// Update the metrics.
require.NoError(t, dbMetrics.Update())
// Get metrics before compressing the firstMetric metric chunk.
compressionStatus := getMetricValue(t, "compression_status")
require.Equal(t, float64(1), compressionStatus)
numMaintenanceJobs := getMetricValue(t, "worker_maintenance_job")
require.Equal(t, float64(2), numMaintenanceJobs)
chunksCreated := getMetricValue(t, "chunks_created")
require.Equal(t, float64(2), chunksCreated)
chunksCount := getMetricValue(t, "chunks_count")
require.Equal(t, float64(2), chunksCount)
chunksCompressedCount := getMetricValue(t, "chunks_compressed_count")
require.Equal(t, float64(0), chunksCompressedCount)

_, err = db.Exec(context.Background(), `SELECT public.compress_chunk(i) from public.show_chunks('prom_data."firstMetric"') i;`)
require.NoError(t, err)

// Update the metrics after compression.
require.NoError(t, dbMetrics.Update())
chunksCreated = getMetricValue(t, "chunks_created")
require.Equal(t, float64(2), chunksCreated)
chunksCount = getMetricValue(t, "chunks_count")
require.Equal(t, float64(2), chunksCount)
chunksCompressedCount = getMetricValue(t, "chunks_compressed_count")
require.Equal(t, float64(1), chunksCompressedCount)
})
}

Expand Down

0 comments on commit a685504

Please sign in to comment.