Skip to content

Commit a15ef0c

Browse files
michalkucharczykmelekesdmitry-markinbkchr
authored andcommitted
service: storage monitor added (paritytech#13082)
* service: storage monitor added Storage monitor added. It uses `notify` create to get notifications about any changes to monitored path (which is database path). Notifications are consumed in essential task which terminates when available storage space drops below given threshold. Closes: paritytech#12399 * Cargo.lock updated * misspell * fs events throttling added * minor updates * filter out non mutating events * misspell * ".git/.scripts/commands/fmt/fmt.sh" * Update client/service/src/storage_monitor.rs Co-authored-by: Anton <anton.kalyaev@gmail.com> * storage-monitor crate added * cleanup: configuration + service builder * storage_monitor in custom service (wip) * copy-paste bad desc fixed * notify removed * storage_monitor added to node * fix for clippy * publish = false * Update bin/node/cli/src/command.rs Co-authored-by: Dmitry Markin <dmitry@markin.tech> * Apply suggestions from code review Co-authored-by: Bastian Köcher <git@kchr.de> * crate name: storage-monitor -> sc-storage-monitor * error handling improved * Apply suggestions from code review Co-authored-by: Bastian Köcher <git@kchr.de> * publish=false removed Co-authored-by: command-bot <> Co-authored-by: Anton <anton.kalyaev@gmail.com> Co-authored-by: Dmitry Markin <dmitry@markin.tech> Co-authored-by: Bastian Köcher <git@kchr.de>
1 parent 1eba5b7 commit a15ef0c

File tree

11 files changed

+227
-10
lines changed

11 files changed

+227
-10
lines changed

Cargo.lock

+31
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ members = [
6464
"client/service",
6565
"client/service/test",
6666
"client/state-db",
67+
"client/storage-monitor",
6768
"client/sysinfo",
6869
"client/sync-state-rpc",
6970
"client/telemetry",

bin/node/cli/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ sc-executor = { version = "0.10.0-dev", path = "../../../client/executor" }
8181
sc-authority-discovery = { version = "0.10.0-dev", path = "../../../client/authority-discovery" }
8282
sc-sync-state-rpc = { version = "0.10.0-dev", path = "../../../client/sync-state-rpc" }
8383
sc-sysinfo = { version = "6.0.0-dev", path = "../../../client/sysinfo" }
84+
sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" }
8485

8586
# frame dependencies
8687
frame-system = { version = "4.0.0-dev", path = "../../../frame/system" }
@@ -138,6 +139,7 @@ substrate-frame-cli = { version = "4.0.0-dev", optional = true, path = "../../..
138139
try-runtime-cli = { version = "0.10.0-dev", optional = true, path = "../../../utils/frame/try-runtime/cli" }
139140
sc-cli = { version = "0.10.0-dev", path = "../../../client/cli", optional = true }
140141
pallet-balances = { version = "4.0.0-dev", path = "../../../frame/balances" }
142+
sc-storage-monitor = { version = "0.1.0", path = "../../../client/storage-monitor" }
141143

142144
[features]
143145
default = ["cli"]

bin/node/cli/src/cli.rs

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ pub struct Cli {
3636
/// telemetry, if telemetry is enabled.
3737
#[arg(long)]
3838
pub no_hardware_benchmarks: bool,
39+
40+
#[allow(missing_docs)]
41+
#[clap(flatten)]
42+
pub storage_monitor: sc_storage_monitor::StorageMonitorParams,
3943
}
4044

4145
/// Possible subcommands of the main binary.

bin/node/cli/src/command.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,7 @@ pub fn run() -> Result<()> {
8787
None => {
8888
let runner = cli.create_runner(&cli.run)?;
8989
runner.run_node_until_exit(|config| async move {
90-
service::new_full(config, cli.no_hardware_benchmarks)
91-
.map_err(sc_cli::Error::Service)
90+
service::new_full(config, cli).map_err(sc_cli::Error::Service)
9291
})
9392
},
9493
Some(Subcommand::Inspect(cmd)) => {

bin/node/cli/src/service.rs

+13-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
//! Service implementation. Specialized wrapper over substrate service.
2222
23+
use crate::Cli;
2324
use codec::Encode;
2425
use frame_benchmarking_cli::SUBSTRATE_REFERENCE_HARDWARE;
2526
use frame_system_rpc_runtime_api::AccountNonceApi;
@@ -556,12 +557,18 @@ pub fn new_full_base(
556557
}
557558

558559
/// Builds a new service for a full client.
559-
pub fn new_full(
560-
config: Configuration,
561-
disable_hardware_benchmarks: bool,
562-
) -> Result<TaskManager, ServiceError> {
563-
new_full_base(config, disable_hardware_benchmarks, |_, _| ())
564-
.map(|NewFullBase { task_manager, .. }| task_manager)
560+
pub fn new_full(config: Configuration, cli: Cli) -> Result<TaskManager, ServiceError> {
561+
let database_source = config.database.clone();
562+
let task_manager = new_full_base(config, cli.no_hardware_benchmarks, |_, _| ())
563+
.map(|NewFullBase { task_manager, .. }| task_manager)?;
564+
565+
sc_storage_monitor::StorageMonitorService::try_spawn(
566+
cli.storage_monitor,
567+
database_source,
568+
&task_manager.spawn_essential_handle(),
569+
)?;
570+
571+
Ok(task_manager)
565572
}
566573

567574
#[cfg(test)]

client/cli/src/params/database_params.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
use crate::arg_enums::Database;
2020
use clap::Args;
2121

22-
/// Parameters for block import.
22+
/// Parameters for database
2323
#[derive(Debug, Clone, PartialEq, Args)]
2424
pub struct DatabaseParams {
2525
/// Select database backend to use.
@@ -32,7 +32,7 @@ pub struct DatabaseParams {
3232
}
3333

3434
impl DatabaseParams {
35-
/// Limit the memory the database cache can use.
35+
/// Database backend
3636
pub fn database(&self) -> Option<Database> {
3737
self.database
3838
}

client/service/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ sc-offchain = { version = "4.0.0-dev", path = "../offchain" }
7373
prometheus-endpoint = { package = "substrate-prometheus-endpoint", path = "../../utils/prometheus", version = "0.10.0-dev" }
7474
sc-tracing = { version = "4.0.0-dev", path = "../tracing" }
7575
sc-sysinfo = { version = "6.0.0-dev", path = "../sysinfo" }
76+
sc-storage-monitor = { version = "0.1.0", path = "../storage-monitor" }
7677
tracing = "0.1.29"
7778
tracing-futures = { version = "0.2.4" }
7879
async-trait = "0.1.57"

client/service/src/error.rs

+3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ pub enum Error {
4848
#[error(transparent)]
4949
Telemetry(#[from] sc_telemetry::Error),
5050

51+
#[error(transparent)]
52+
Storage(#[from] sc_storage_monitor::Error),
53+
5154
#[error("Best chain selection strategy (SelectChain) is not provided.")]
5255
SelectChainRequired,
5356

client/storage-monitor/Cargo.toml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[package]
2+
name = "sc-storage-monitor"
3+
version = "0.1.0"
4+
authors = ["Parity Technologies <admin@parity.io>"]
5+
edition = "2021"
6+
license = "GPL-3.0-or-later WITH Classpath-exception-2.0"
7+
repository = "https://github.com/paritytech/substrate"
8+
description = "Storage monitor service for substrate"
9+
homepage = "https://substrate.io"
10+
11+
[dependencies]
12+
clap = { version = "4.0.9", features = ["derive", "string"] }
13+
futures = "0.3.21"
14+
log = "0.4.17"
15+
nix = { version = "0.26.1", features = ["fs"] }
16+
sc-client-db = { version = "0.10.0-dev", default-features = false, path = "../db" }
17+
sc-utils = { version = "4.0.0-dev", path = "../utils" }
18+
sp-core = { version = "7.0.0", path = "../../primitives/core" }
19+
tokio = "1.22.0"
20+
thiserror = "1.0.30"

client/storage-monitor/src/lib.rs

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// This file is part of Substrate.
2+
3+
// Copyright (C) 2022 Parity Technologies (UK) Ltd.
4+
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
5+
6+
// This program is free software: you can redistribute it and/or modify
7+
// it under the terms of the GNU General Public License as published by
8+
// the Free Software Foundation, either version 3 of the License, or
9+
// (at your option) any later version.
10+
11+
// This program is distributed in the hope that it will be useful,
12+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
// GNU General Public License for more details.
15+
16+
// You should have received a copy of the GNU General Public License
17+
// along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
19+
use clap::Args;
20+
use nix::{errno::Errno, sys::statvfs::statvfs};
21+
use sc_client_db::DatabaseSource;
22+
use sp_core::traits::SpawnEssentialNamed;
23+
use std::{
24+
path::{Path, PathBuf},
25+
time::Duration,
26+
};
27+
28+
const LOG_TARGET: &str = "storage-monitor";
29+
30+
/// Error type used in this crate.
31+
#[derive(Debug, thiserror::Error)]
32+
pub enum Error {
33+
#[error("IO Error")]
34+
IOError(#[from] Errno),
35+
#[error("Out of storage space: available {0}MB, required {1}MB")]
36+
StorageOutOfSpace(u64, u64),
37+
}
38+
39+
/// Parameters used to create the storage monitor.
40+
#[derive(Default, Debug, Clone, Args)]
41+
pub struct StorageMonitorParams {
42+
/// Required available space on database storage. If available space for DB storage drops below
43+
/// the given threshold, node will be gracefully terminated. If `0` is given monitoring will be
44+
/// disabled.
45+
#[arg(long = "db-storage-threshold", value_name = "MB", default_value_t = 1000)]
46+
pub threshold: u64,
47+
48+
/// How often available space is polled.
49+
#[arg(long = "db-storage-polling-period", value_name = "SECONDS", default_value_t = 5, value_parser = clap::value_parser!(u32).range(1..))]
50+
pub polling_period: u32,
51+
}
52+
53+
/// Storage monitor service: checks the available space for the filesystem for fiven path.
54+
pub struct StorageMonitorService {
55+
/// watched path
56+
path: PathBuf,
57+
/// number of megabytes that shall be free on the filesystem for watched path
58+
threshold: u64,
59+
/// storage space polling period (seconds)
60+
polling_period: u32,
61+
}
62+
63+
impl StorageMonitorService {
64+
/// Creates new StorageMonitorService for given client config
65+
pub fn try_spawn(
66+
parameters: StorageMonitorParams,
67+
database: DatabaseSource,
68+
spawner: &impl SpawnEssentialNamed,
69+
) -> Result<(), Error> {
70+
Ok(match (parameters.threshold, database.path()) {
71+
(0, _) => {
72+
log::info!(
73+
target: LOG_TARGET,
74+
"StorageMonitorService: threshold `0` given, storage monitoring disabled",
75+
);
76+
},
77+
(_, None) => {
78+
log::warn!(
79+
target: LOG_TARGET,
80+
"StorageMonitorService: no database path to observe",
81+
);
82+
},
83+
(threshold, Some(path)) => {
84+
log::debug!(
85+
target: LOG_TARGET,
86+
"Initializing StorageMonitorService for db path: {:?}",
87+
path,
88+
);
89+
90+
Self::check_free_space(&path, threshold)?;
91+
92+
let storage_monitor_service = StorageMonitorService {
93+
path: path.to_path_buf(),
94+
threshold,
95+
polling_period: parameters.polling_period,
96+
};
97+
98+
spawner.spawn_essential(
99+
"storage-monitor",
100+
None,
101+
Box::pin(storage_monitor_service.run()),
102+
);
103+
},
104+
})
105+
}
106+
107+
/// Main monitoring loop, intended to be spawned as essential task. Quits if free space drop
108+
/// below threshold.
109+
async fn run(self) {
110+
loop {
111+
tokio::time::sleep(Duration::from_secs(self.polling_period.into())).await;
112+
if Self::check_free_space(&self.path, self.threshold).is_err() {
113+
break
114+
};
115+
}
116+
}
117+
118+
/// Returns free space in MB, or error if statvfs failed.
119+
fn free_space(path: &Path) -> Result<u64, Error> {
120+
statvfs(path)
121+
.map(|stats| stats.blocks_available() * stats.block_size() / 1_000_000)
122+
.map_err(Error::from)
123+
}
124+
125+
/// Checks if the amount of free space for given `path` is above given `threshold`.
126+
/// If it dropped below, error is returned.
127+
/// System errors are silently ignored.
128+
fn check_free_space(path: &Path, threshold: u64) -> Result<(), Error> {
129+
match StorageMonitorService::free_space(path) {
130+
Ok(available_space) => {
131+
log::trace!(
132+
target: LOG_TARGET,
133+
"free: {available_space} , threshold: {threshold}.",
134+
);
135+
136+
if available_space < threshold {
137+
log::error!(target: LOG_TARGET, "Available space {available_space}MB for path `{}` dropped below threshold: {threshold}MB , terminating...", path.display());
138+
Err(Error::StorageOutOfSpace(available_space, threshold))
139+
} else {
140+
Ok(())
141+
}
142+
},
143+
Err(e) => {
144+
log::error!(target: LOG_TARGET, "Could not read available space: {:?}.", e);
145+
Err(e)
146+
},
147+
}
148+
}
149+
}

0 commit comments

Comments
 (0)