Skip to content

Commit 6feb71f

Browse files
authored
refactor: add grpc write failed counter metric (#963)
## Rationale add grpc write failed counter metric ## Detailed Changes - add counter metric in grpc proxy - add counter metric in grpc remote engine service ## Test Plan Existing tests
1 parent d5593b7 commit 6feb71f

File tree

7 files changed

+96
-14
lines changed

7 files changed

+96
-14
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proxy/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ meta_client = { workspace = true }
3434
paste = { workspace = true }
3535
prom-remote-api = { workspace = true, features = ["warp"] }
3636
prometheus = { workspace = true }
37+
prometheus-static-metric = { workspace = true }
3738
prost = { workspace = true }
3839
query_engine = { workspace = true }
3940
query_frontend = { workspace = true }

proxy/src/grpc/metrics.rs

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright 2023 CeresDB Project Authors. Licensed under Apache-2.0.
2+
3+
// Grpc proxy metrics
4+
5+
use lazy_static::lazy_static;
6+
use prometheus::{register_int_counter_vec, IntCounterVec};
7+
use prometheus_static_metric::{auto_flush_from, make_auto_flush_static_metric};
8+
9+
make_auto_flush_static_metric! {
10+
pub label_enum GrpcTypeKind {
11+
write_failed,
12+
}
13+
14+
pub struct GrpcHandlerCounterVec: LocalIntCounter {
15+
"type" => GrpcTypeKind,
16+
}
17+
}
18+
19+
lazy_static! {
20+
pub static ref GRPC_HANDLER_COUNTER_VEC_GLOBAL: IntCounterVec =
21+
register_int_counter_vec!("grpc_handler_counter", "Grpc handler counter", &["type"])
22+
.unwrap();
23+
}
24+
25+
lazy_static! {
26+
pub static ref GRPC_HANDLER_COUNTER_VEC: GrpcHandlerCounterVec =
27+
auto_flush_from!(GRPC_HANDLER_COUNTER_VEC_GLOBAL, GrpcHandlerCounterVec);
28+
}

proxy/src/grpc/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// Copyright 2023 CeresDB Project Authors. Licensed under Apache-2.0.
22

3+
mod metrics;
34
mod prom_query;
45
mod route;
56
mod sql_query;

proxy/src/grpc/write.rs

+24-6
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,42 @@
33
use ceresdbproto::storage::{WriteRequest, WriteResponse};
44
use query_engine::executor::Executor as QueryExecutor;
55

6-
use crate::{error, error::build_ok_header, Context, Proxy};
6+
use crate::{
7+
error, error::build_ok_header, grpc::metrics::GRPC_HANDLER_COUNTER_VEC, Context, Proxy,
8+
};
79

810
impl<Q: QueryExecutor + 'static> Proxy<Q> {
911
pub async fn handle_write(&self, ctx: Context, req: WriteRequest) -> WriteResponse {
1012
self.hotspot_recorder.inc_write_reqs(&req).await;
13+
14+
let mut num_rows = 0;
15+
for table_request in &req.table_requests {
16+
for entry in &table_request.entries {
17+
num_rows += entry.field_groups.len();
18+
}
19+
}
20+
1121
match self.handle_write_internal(ctx, req).await {
1222
Err(e) => {
1323
error!("Failed to handle write, err:{e}");
24+
GRPC_HANDLER_COUNTER_VEC
25+
.write_failed
26+
.inc_by(num_rows as u64);
1427
WriteResponse {
1528
header: Some(error::build_err_header(e)),
1629
..Default::default()
1730
}
1831
}
19-
Ok(v) => WriteResponse {
20-
header: Some(build_ok_header()),
21-
success: v.success,
22-
failed: v.failed,
23-
},
32+
Ok(v) => {
33+
GRPC_HANDLER_COUNTER_VEC
34+
.write_failed
35+
.inc_by(v.failed as u64);
36+
WriteResponse {
37+
header: Some(build_ok_header()),
38+
success: v.success,
39+
failed: v.failed,
40+
}
41+
}
2442
}
2543
}
2644
}

server/src/grpc/metrics.rs

+23-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
// Grpc server metrics
44

55
use lazy_static::lazy_static;
6-
use prometheus::{exponential_buckets, register_histogram_vec, HistogramVec};
6+
use prometheus::{
7+
exponential_buckets, register_histogram_vec, register_int_counter_vec, HistogramVec,
8+
IntCounterVec,
9+
};
710
use prometheus_static_metric::{auto_flush_from, make_auto_flush_static_metric};
811

912
// Register auto flush static metrics.
@@ -32,6 +35,14 @@ make_auto_flush_static_metric! {
3235
"type" => RemoteEngineTypeKind,
3336
}
3437

38+
pub label_enum RemoteEngineGrpcTypeKind {
39+
write_failed,
40+
}
41+
42+
pub struct RemoteEngineGrpcHandlerCounterVec: LocalIntCounter {
43+
"type" => RemoteEngineGrpcTypeKind,
44+
}
45+
3546
pub label_enum MetaEventTypeKind {
3647
open_shard,
3748
close_shard,
@@ -64,6 +75,13 @@ lazy_static! {
6475
exponential_buckets(0.0005, 2.0, 20).unwrap()
6576
)
6677
.unwrap();
78+
pub static ref REMOTE_ENGINE_GRPC_HANDLER_COUNTER_VEC_GLOBAL: IntCounterVec =
79+
register_int_counter_vec!(
80+
"remote_engine_grpc_handler_counter",
81+
"Remote engine grpc handler counter",
82+
&["type"]
83+
)
84+
.unwrap();
6785
pub static ref META_EVENT_GRPC_HANDLER_DURATION_HISTOGRAM_VEC_GLOBAL: HistogramVec =
6886
register_histogram_vec!(
6987
"meta_event_grpc_handler_duration",
@@ -84,6 +102,10 @@ lazy_static! {
84102
REMOTE_ENGINE_GRPC_HANDLER_DURATION_HISTOGRAM_VEC_GLOBAL,
85103
RemoteEngineGrpcHandlerDurationHistogramVec
86104
);
105+
pub static ref REMOTE_ENGINE_GRPC_HANDLER_COUNTER_VEC: RemoteEngineGrpcHandlerCounterVec = auto_flush_from!(
106+
REMOTE_ENGINE_GRPC_HANDLER_COUNTER_VEC_GLOBAL,
107+
RemoteEngineGrpcHandlerCounterVec
108+
);
87109
pub static ref META_EVENT_GRPC_HANDLER_DURATION_HISTOGRAM_VEC: MetaEventGrpcHandlerDurationHistogramVec = auto_flush_from!(
88110
META_EVENT_GRPC_HANDLER_DURATION_HISTOGRAM_VEC_GLOBAL,
89111
MetaEventGrpcHandlerDurationHistogramVec

server/src/grpc/remote_engine_service/mod.rs

+18-7
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ use tokio_stream::wrappers::ReceiverStream;
3131
use tonic::{Request, Response, Status};
3232

3333
use crate::grpc::{
34-
metrics::REMOTE_ENGINE_GRPC_HANDLER_DURATION_HISTOGRAM_VEC,
34+
metrics::{
35+
REMOTE_ENGINE_GRPC_HANDLER_COUNTER_VEC, REMOTE_ENGINE_GRPC_HANDLER_DURATION_HISTOGRAM_VEC,
36+
},
3537
remote_engine_service::error::{ErrNoCause, ErrWithCause, Result, StatusCode},
3638
};
3739

@@ -353,20 +355,29 @@ async fn handle_write(ctx: HandlerContext, request: WriteRequest) -> Result<Writ
353355
msg: "fail to convert write request",
354356
})?;
355357

358+
let num_rows = write_request.write_request.row_group.num_rows();
356359
let table = find_table_by_identifier(&ctx, &write_request.table)?;
357360

358-
let affected_rows = table
361+
let res = table
359362
.write(write_request.write_request)
360363
.await
361364
.box_err()
362365
.context(ErrWithCause {
363366
code: StatusCode::Internal,
364367
msg: format!("fail to write table, table:{:?}", write_request.table),
365-
})?;
366-
Ok(WriteResponse {
367-
header: None,
368-
affected_rows: affected_rows as u64,
369-
})
368+
});
369+
match res {
370+
Ok(affected_rows) => Ok(WriteResponse {
371+
header: None,
372+
affected_rows: affected_rows as u64,
373+
}),
374+
Err(e) => {
375+
REMOTE_ENGINE_GRPC_HANDLER_COUNTER_VEC
376+
.write_failed
377+
.inc_by(num_rows as u64);
378+
Err(e)
379+
}
380+
}
370381
}
371382

372383
async fn handle_get_table_info(

0 commit comments

Comments
 (0)