Skip to content

Commit 9a9c0f7

Browse files
authored
chore: add logs and metric to recovery (#1007)
## Rationale Part of #799 ## Detailed Changes see title. ## Test Plan None.
1 parent e3b4009 commit 9a9c0f7

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

analytic_engine/src/instance/wal_replayer.rs

+26
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ use std::{
1111
use async_trait::async_trait;
1212
use common_types::{schema::IndexInWriterSchema, table::ShardId};
1313
use common_util::error::BoxError;
14+
use lazy_static::lazy_static;
1415
use log::{debug, error, info, trace};
16+
use prometheus::{exponential_buckets, register_histogram, Histogram};
1517
use snafu::ResultExt;
1618
use table_engine::table::TableId;
1719
use tokio::sync::MutexGuard;
@@ -34,6 +36,22 @@ use crate::{
3436
table::data::TableDataRef,
3537
};
3638

39+
// Metrics of wal replayer
40+
lazy_static! {
41+
static ref PULL_LOGS_DURATION_HISTOGRAM: Histogram = register_histogram!(
42+
"wal_replay_pull_logs_duration",
43+
"Histogram for pull logs duration in wal replay in seconds",
44+
exponential_buckets(0.01, 2.0, 13).unwrap()
45+
)
46+
.unwrap();
47+
static ref APPLY_LOGS_DURATION_HISTOGRAM: Histogram = register_histogram!(
48+
"wal_replay_apply_logs_duration",
49+
"Histogram for apply logs duration in wal replay in seconds",
50+
exponential_buckets(0.01, 2.0, 13).unwrap()
51+
)
52+
.unwrap();
53+
}
54+
3755
/// Wal replayer supporting both table based and region based
3856
// TODO: limit the memory usage in `RegionBased` mode.
3957
pub struct WalReplayer<'a> {
@@ -186,18 +204,21 @@ impl TableBasedReplay {
186204
let mut log_entry_buf = VecDeque::with_capacity(context.wal_replay_batch_size);
187205
loop {
188206
// fetch entries to log_entry_buf
207+
let timer = PULL_LOGS_DURATION_HISTOGRAM.start_timer();
189208
let decoder = WalDecoder::default();
190209
log_entry_buf = log_iter
191210
.next_log_entries(decoder, log_entry_buf)
192211
.await
193212
.box_err()
194213
.context(ReplayWalWithCause { msg: None })?;
214+
drop(timer);
195215

196216
if log_entry_buf.is_empty() {
197217
break;
198218
}
199219

200220
// Replay all log entries of current table
221+
let timer = APPLY_LOGS_DURATION_HISTOGRAM.start_timer();
201222
replay_table_log_entries(
202223
&context.flusher,
203224
context.max_retry_flush_limit,
@@ -206,6 +227,7 @@ impl TableBasedReplay {
206227
log_entry_buf.iter(),
207228
)
208229
.await?;
230+
drop(timer);
209231
}
210232

211233
Ok(())
@@ -276,19 +298,23 @@ impl RegionBasedReplay {
276298

277299
// Split and replay logs.
278300
loop {
301+
let timer = PULL_LOGS_DURATION_HISTOGRAM.start_timer();
279302
let decoder = WalDecoder::default();
280303
log_entry_buf = log_iter
281304
.next_log_entries(decoder, log_entry_buf)
282305
.await
283306
.box_err()
284307
.context(ReplayWalWithCause { msg: None })?;
308+
drop(timer);
285309

286310
if log_entry_buf.is_empty() {
287311
break;
288312
}
289313

314+
let timer = APPLY_LOGS_DURATION_HISTOGRAM.start_timer();
290315
Self::replay_single_batch(context, &log_entry_buf, &mut serial_exec_ctxs, faileds)
291316
.await?;
317+
drop(timer);
292318
}
293319

294320
Ok(())

wal/src/message_queue_impl/region.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ impl<M: MessageQueue> Region<M> {
579579
let (snapshot, synchronizer) = {
580580
let inner = self.inner.write().await;
581581

582-
debug!(
582+
info!(
583583
"Mark deleted entries to sequence num:{}, region id:{}, table id:{}",
584584
sequence_num,
585585
inner.region_context.region_id(),

0 commit comments

Comments
 (0)