Skip to content

Commit 41e988f

Browse files
mayastor-borsjonathan-teh
mayastor-bors
andcommitted
Merge #771
771: feat(nexus): implement set ANA state for an NVMf-published Nexus r=jonathan-teh a=jonathan-teh Implement set_ana_state in nexus_bdev::Nexus and expose that over gRPC and the client CLI. Fixes CAS-756, #758 Co-authored-by: Jonathan Teh <30538043+jonathan-teh@users.noreply.github.com>
2 parents 8772a94 + 1a924d3 commit 41e988f

File tree

12 files changed

+442
-22
lines changed

12 files changed

+442
-22
lines changed

doc/run.md

+38-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ It will also need at least 512 2 MB Hugepages configured.
1818

1919
> Learn more about hugepages: [parts 1][hugepages-lwn-one], [2][hugepages-lwn-two],
2020
> [3][hugepages-lwn-three], [4][hugepages-lwn-four], [5][hugepages-lwn-five].
21-
21+
2222
In NixOS:
2323

2424
```nix
@@ -78,7 +78,7 @@ In order to use the full feature set of Mayastor, some or all of the following c
7878
+ `nvme_loop`: NVMe Loop Device support
7979

8080
To load these on NixOS:
81-
81+
8282
```nix
8383
# /etc/nixos/configuration.nix
8484
boot.kernelModules = [
@@ -91,7 +91,40 @@ In order to use the full feature set of Mayastor, some or all of the following c
9191
```bash
9292
modprobe nbd nvmet nvmet_rdma nvme_fabrics nvme_tcp nvme_rdma nvme_loop
9393
```
94-
* An NVMe device. (Typically via PCI-E through an standard slot or [M.2][m-dot-2] port)
94+
* For Asymmetric Namespace Access (ANA) support (early preview), the following kernel build configuration enabled:
95+
+ `CONFIG_NVME_MULTIPATH`: enables support for multipath access to NVMe subsystems
96+
97+
This is usually already enabled in distributions kernels, at least for RHEL/CentOS 8.2, Ubuntu 20.04 LTS, and SUSE Linux Enterprise 15.2.
98+
99+
On some distributions such as RHEL 8, the feature must be enabled manually:
100+
101+
```sh
102+
# /etc/modprobe.d/nvme-multipath
103+
options nvme_core multipath=1
104+
```
105+
106+
followed by reloading the `nvme-core` module or rebooting.
107+
108+
To build this on NixOS:
109+
110+
```nix
111+
# /etc/nixos/configuration.nix
112+
boot.kernelPackages = pkgs.linuxPackages;
113+
boot.kernelPatches = [ {
114+
name = "nvme-multipath";
115+
patch = null;
116+
extraConfig = ''
117+
NVME_MULTIPATH y
118+
'';
119+
} ];
120+
```
121+
122+
followed by:
123+
124+
```sh
125+
sudo nixos-rebuild boot
126+
```
127+
* An NVMe device. (Typically via PCI-E through a standard slot or [M.2][m-dot-2] port)
95128
* A version of [`nix`][nix-install] configured as in the [build guide.][doc-build]
96129

97130
## Running binaries directly
@@ -158,7 +191,7 @@ Why these parameters?
158191
- `--privileged` to allow controlling memory policies.
159192

160193
> **TODO:** We can use [control groups][control-groups] for this!
161-
- `-v /dev:/dev:rw` is needed to get access to any raw device you might want to consume as local
194+
- `-v /dev:/dev:rw` is needed to get access to any raw device you might want to consume as local
162195
storage and huge pages
163196
- `-v /dev/shm:/dev/shm:rw` is needed as for a circular buffer that can trace any IO operations
164197
as they happen
@@ -184,7 +217,7 @@ nixpkgs.overlays = [
184217
];
185218
186219
systemd.services.mayastor = {
187-
wantedBy = [ "multi-user.target" ];
220+
wantedBy = [ "multi-user.target" ];
188221
after = [ "network.target" ];
189222
description = "A cloud native declarative data plane.";
190223
serviceConfig = {
@@ -273,4 +306,3 @@ production Mayastor deployment and operation instructions.
273306
[lxd]: https://linuxcontainers.org/
274307
[libvirtd]: https://libvirt.org/index.html
275308
[terraform-readme]: ./terraform/readme.adoc
276-
[aarch64-branch]:

mayastor/src/bdev/nexus/nexus_bdev.rs

+55
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::{
1313

1414
use futures::{channel::oneshot, future::join_all};
1515
use nix::errno::Errno;
16+
use rpc::mayastor::NvmeAnaState;
1617
use serde::Serialize;
1718
use snafu::{ResultExt, Snafu};
1819
use tonic::{Code, Status};
@@ -106,6 +107,8 @@ pub enum Error {
106107
AlreadyShared { name: String },
107108
#[snafu(display("The nexus {} has not been shared", name))]
108109
NotShared { name: String },
110+
#[snafu(display("The nexus {} has not been shared over NVMf", name))]
111+
NotSharedNvmf { name: String },
109112
#[snafu(display("Failed to share nexus over NBD {}", name))]
110113
ShareNbdNexus { source: NbdError, name: String },
111114
#[snafu(display("Failed to share iscsi nexus {}", name))]
@@ -233,6 +236,8 @@ pub enum Error {
233236
},
234237
#[snafu(display("Invalid ShareProtocol value {}", sp_value))]
235238
InvalidShareProtocol { sp_value: i32 },
239+
#[snafu(display("Invalid NvmeAnaState value {}", ana_value))]
240+
InvalidNvmeAnaState { ana_value: i32 },
236241
#[snafu(display("Failed to create nexus {}", name))]
237242
NexusCreate { name: String },
238243
#[snafu(display("Failed to destroy nexus {}", name))]
@@ -252,6 +257,16 @@ pub enum Error {
252257
FailedGetHandle,
253258
#[snafu(display("Failed to create snapshot on nexus {}", name))]
254259
FailedCreateSnapshot { name: String, source: CoreError },
260+
#[snafu(display("NVMf subsystem error: {}", e))]
261+
SubsysNvmfError { e: String },
262+
}
263+
264+
impl From<subsys::NvmfError> for Error {
265+
fn from(error: subsys::NvmfError) -> Self {
266+
Error::SubsysNvmfError {
267+
e: error.to_string(),
268+
}
269+
}
255270
}
256271

257272
impl From<Error> for tonic::Status {
@@ -272,6 +287,9 @@ impl From<Error> for tonic::Status {
272287
Error::NotShared {
273288
..
274289
} => Status::invalid_argument(e.to_string()),
290+
Error::NotSharedNvmf {
291+
..
292+
} => Status::invalid_argument(e.to_string()),
275293
Error::CreateChild {
276294
..
277295
} => Status::invalid_argument(e.to_string()),
@@ -630,6 +648,43 @@ impl Nexus {
630648
Ok(())
631649
}
632650

651+
/// get ANA state of the NVMe subsystem
652+
pub async fn get_ana_state(&self) -> Result<NvmeAnaState, Error> {
653+
if let Some(Protocol::Nvmf) = self.shared() {
654+
if let Some(subsystem) = NvmfSubsystem::nqn_lookup(&self.name) {
655+
let ana_state = subsystem.get_ana_state().await? as i32;
656+
return NvmeAnaState::from_i32(ana_state).ok_or({
657+
Error::InvalidNvmeAnaState {
658+
ana_value: ana_state,
659+
}
660+
});
661+
}
662+
}
663+
664+
Err(Error::NotSharedNvmf {
665+
name: self.name.clone(),
666+
})
667+
}
668+
669+
/// set ANA state of the NVMe subsystem
670+
pub async fn set_ana_state(
671+
&self,
672+
ana_state: NvmeAnaState,
673+
) -> Result<(), Error> {
674+
if let Some(Protocol::Nvmf) = self.shared() {
675+
if let Some(subsystem) = NvmfSubsystem::nqn_lookup(&self.name) {
676+
subsystem.pause().await?;
677+
let res = subsystem.set_ana_state(ana_state as u32).await;
678+
subsystem.resume().await?;
679+
return Ok(res?);
680+
}
681+
}
682+
683+
Err(Error::NotSharedNvmf {
684+
name: self.name.clone(),
685+
})
686+
}
687+
633688
/// register the bdev with SPDK and set the callbacks for io channel
634689
/// creation. Once this function is called, the device is visible and can
635690
/// be used for IO.

mayastor/src/bin/mayastor-client/nexus_cli.rs

+91
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,24 @@ pub fn subcommands<'a, 'b>() -> App<'a, 'b> {
5454
.help("uuid for the nexus"),
5555
);
5656

57+
let ana_state = SubCommand::with_name("ana_state")
58+
.about("get or set the NVMe ANA state of the nexus")
59+
.arg(
60+
Arg::with_name("uuid")
61+
.required(true)
62+
.index(1)
63+
.help("uuid for the nexus"),
64+
)
65+
.arg(
66+
Arg::with_name("state")
67+
.required(false)
68+
.index(2)
69+
.possible_value("optimized")
70+
.possible_value("non_optimized")
71+
.possible_value("inaccessible")
72+
.help("NVMe ANA state of the nexus"),
73+
);
74+
5775
let add = SubCommand::with_name("add")
5876
.about("add a child")
5977
.arg(
@@ -122,6 +140,7 @@ pub fn subcommands<'a, 'b>() -> App<'a, 'b> {
122140
.subcommand(add)
123141
.subcommand(remove)
124142
.subcommand(unpublish)
143+
.subcommand(ana_state)
125144
.subcommand(list)
126145
.subcommand(children)
127146
.subcommand(nexus_child_cli::subcommands())
@@ -138,6 +157,7 @@ pub async fn handler(
138157
("children", Some(args)) => nexus_children(ctx, &args).await,
139158
("publish", Some(args)) => nexus_publish(ctx, &args).await,
140159
("unpublish", Some(args)) => nexus_unpublish(ctx, &args).await,
160+
("ana_state", Some(args)) => nexus_nvme_ana_state(ctx, &args).await,
141161
("add", Some(args)) => nexus_add(ctx, &args).await,
142162
("remove", Some(args)) => nexus_remove(ctx, &args).await,
143163
("child", Some(args)) => nexus_child_cli::handler(ctx, args).await,
@@ -321,6 +341,66 @@ async fn nexus_unpublish(
321341
Ok(())
322342
}
323343

344+
async fn nexus_nvme_ana_state(
345+
ctx: Context,
346+
matches: &ArgMatches<'_>,
347+
) -> Result<(), Status> {
348+
let uuid = matches.value_of("uuid").unwrap().to_string();
349+
let ana_state = matches.value_of("state").unwrap_or("").to_string();
350+
if ana_state.is_empty() {
351+
nexus_get_nvme_ana_state(ctx, uuid).await
352+
} else {
353+
nexus_set_nvme_ana_state(ctx, uuid, ana_state).await
354+
}
355+
}
356+
357+
async fn nexus_get_nvme_ana_state(
358+
mut ctx: Context,
359+
uuid: String,
360+
) -> Result<(), Status> {
361+
ctx.v2(&format!("Getting NVMe ANA state for nexus {}", uuid));
362+
let resp = ctx
363+
.client
364+
.get_nvme_ana_state(rpc::GetNvmeAnaStateRequest {
365+
uuid: uuid.clone(),
366+
})
367+
.await?;
368+
ctx.v1(ana_state_idx_to_str(resp.get_ref().ana_state));
369+
Ok(())
370+
}
371+
372+
async fn nexus_set_nvme_ana_state(
373+
mut ctx: Context,
374+
uuid: String,
375+
ana_state_str: String,
376+
) -> Result<(), Status> {
377+
let ana_state: rpc::NvmeAnaState = match ana_state_str.parse() {
378+
Ok(a) => a,
379+
_ => {
380+
return Err(Status::new(
381+
Code::Internal,
382+
"Invalid value of NVMe ANA state".to_owned(),
383+
));
384+
}
385+
};
386+
387+
ctx.v2(&format!(
388+
"Setting NVMe ANA state for nexus {} to {:?}",
389+
uuid, ana_state
390+
));
391+
ctx.client
392+
.set_nvme_ana_state(rpc::SetNvmeAnaStateRequest {
393+
uuid: uuid.clone(),
394+
ana_state: ana_state.into(),
395+
})
396+
.await?;
397+
ctx.v1(&format!(
398+
"Set NVMe ANA state for nexus {} to {:?}",
399+
uuid, ana_state
400+
));
401+
Ok(())
402+
}
403+
324404
async fn nexus_add(
325405
mut ctx: Context,
326406
matches: &ArgMatches<'_>,
@@ -363,6 +443,17 @@ async fn nexus_remove(
363443
Ok(())
364444
}
365445

446+
fn ana_state_idx_to_str(idx: i32) -> &'static str {
447+
match rpc::NvmeAnaState::from_i32(idx).unwrap() {
448+
rpc::NvmeAnaState::NvmeAnaInvalidState => "invalid",
449+
rpc::NvmeAnaState::NvmeAnaOptimizedState => "optimized",
450+
rpc::NvmeAnaState::NvmeAnaNonOptimizedState => "non_optimized",
451+
rpc::NvmeAnaState::NvmeAnaInaccessibleState => "inaccessible",
452+
rpc::NvmeAnaState::NvmeAnaPersistentLossState => "persistent_loss",
453+
rpc::NvmeAnaState::NvmeAnaChangeState => "change",
454+
}
455+
}
456+
366457
fn nexus_state_to_str(idx: i32) -> &'static str {
367458
match rpc::NexusState::from_i32(idx).unwrap() {
368459
rpc::NexusState::NexusUnknown => "unknown",

mayastor/src/bin/mayastor.rs

+21
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,33 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
4242
let free_pages: u32 = sysfs::parse_value(&hugepage_path, "free_hugepages")?;
4343
let nr_pages: u32 = sysfs::parse_value(&hugepage_path, "nr_hugepages")?;
4444
let uring_supported = uring::kernel_support();
45+
let nvme_core_path = Path::new("/sys/module/nvme_core/parameters");
46+
let nvme_mp: String =
47+
match sysfs::parse_value::<String>(&nvme_core_path, "multipath") {
48+
Ok(s) => match s.as_str() {
49+
"Y" => "yes".to_string(),
50+
"N" => "disabled".to_string(),
51+
u => format!("unknown value {}", u),
52+
},
53+
Err(e) => {
54+
if e.kind() == std::io::ErrorKind::NotFound {
55+
if nvme_core_path.exists() {
56+
"not built".to_string()
57+
} else {
58+
"nvme not loaded".to_string()
59+
}
60+
} else {
61+
format!("unknown error: {}", e)
62+
}
63+
}
64+
};
4565

4666
info!("Starting Mayastor ..");
4767
info!(
4868
"kernel io_uring support: {}",
4969
if uring_supported { "yes" } else { "no" }
5070
);
71+
info!("kernel nvme initiator multipath support: {}", nvme_mp);
5172
info!("free_pages: {} nr_pages: {}", free_pages, nr_pages);
5273

5374
let grpc_endpoint = grpc::endpoint(args.grpc_endpoint.clone());

mayastor/src/grpc/mayastor_grpc.rs

+46
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,52 @@ impl mayastor_server::Mayastor for MayastorSvc {
289289
.await
290290
}
291291

292+
#[instrument(level = "debug", err)]
293+
async fn get_nvme_ana_state(
294+
&self,
295+
request: Request<GetNvmeAnaStateRequest>,
296+
) -> GrpcResult<GetNvmeAnaStateReply> {
297+
let args = request.into_inner();
298+
let uuid = args.uuid.clone();
299+
debug!("Getting NVMe ANA state for nexus {} ...", uuid);
300+
301+
let ana_state = locally! { async move {
302+
nexus_lookup(&args.uuid)?.get_ana_state().await
303+
}};
304+
305+
info!("Got nexus {} NVMe ANA state {:?}", uuid, ana_state);
306+
Ok(Response::new(GetNvmeAnaStateReply {
307+
ana_state: ana_state as i32,
308+
}))
309+
}
310+
311+
#[instrument(level = "debug", err)]
312+
async fn set_nvme_ana_state(
313+
&self,
314+
request: Request<SetNvmeAnaStateRequest>,
315+
) -> GrpcResult<Null> {
316+
let args = request.into_inner();
317+
let uuid = args.uuid.clone();
318+
debug!("Setting NVMe ANA state for nexus {} ...", uuid);
319+
320+
let ana_state = match NvmeAnaState::from_i32(args.ana_state) {
321+
Some(ana_state) => ana_state,
322+
None => {
323+
return Err(nexus_bdev::Error::InvalidNvmeAnaState {
324+
ana_value: args.ana_state as i32,
325+
}
326+
.into());
327+
}
328+
};
329+
330+
locally! { async move {
331+
nexus_lookup(&args.uuid)?.set_ana_state(ana_state).await
332+
}};
333+
334+
info!("Set nexus {} NVMe ANA state {:?}", uuid, ana_state);
335+
Ok(Response::new(Null {}))
336+
}
337+
292338
#[instrument(level = "debug", err)]
293339
async fn child_operation(
294340
&self,

0 commit comments

Comments
 (0)