Skip to content

Commit 0b51617

Browse files
author
mayastor-bors
committed
Try #1701:
2 parents c661e1b + 07ca713 commit 0b51617

File tree

9 files changed

+300
-76
lines changed

9 files changed

+300
-76
lines changed

io-engine/src/bin/io-engine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn start_tokio_runtime(args: &MayastorCliArgs) {
108108

109109
if args.rdma {
110110
env::set_var("ENABLE_RDMA", "true");
111-
warn!("RDMA is enabled for Mayastor NVMEoF target");
111+
warn!("RDMA is requested to be enabled for Mayastor NVMEoF target");
112112
}
113113

114114
unsafe {

io-engine/src/core/bdev.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use snafu::ResultExt;
1111
use spdk_rs::libspdk::{spdk_bdev, spdk_get_ticks_hz};
1212

1313
use crate::{
14-
bdev::bdev_event_callback,
14+
bdev::{bdev_event_callback, nexus::NEXUS_MODULE_NAME},
1515
bdev_api::bdev_uri_eq,
1616
core::{
1717
share::{NvmfShareProps, Protocol, Share, UpdateProps},
@@ -210,6 +210,7 @@ where
210210
) -> Result<Self::Output, Self::Error> {
211211
let me = unsafe { self.get_unchecked_mut() };
212212
let props = NvmfShareProps::from(props);
213+
let is_nexus_bdev = me.driver() == NEXUS_MODULE_NAME;
213214

214215
let ptpl = props.ptpl().as_ref().map(|ptpl| ptpl.path());
215216

@@ -232,7 +233,7 @@ where
232233
.await
233234
.context(ShareNvmf {})?;
234235

235-
subsystem.start().await.context(ShareNvmf {})
236+
subsystem.start(is_nexus_bdev).await.context(ShareNvmf {})
236237
}
237238

238239
fn create_ptpl(&self) -> Result<Option<PtplProps>, Self::Error> {

io-engine/src/core/env.rs

+5
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,11 @@ impl MayastorEnvironment {
834834
.map(|s| s.clone())
835835
}
836836

837+
/// Check if RDMA needs to be enabled for Mayastor nvmf target.
838+
pub fn rdma(&self) -> bool {
839+
self.rdma
840+
}
841+
837842
/// Detects IP address for NVMF target by the interface specified in CLI
838843
/// arguments.
839844
fn detect_nvmf_tgt_iface_ip(iface: &str) -> Result<String, String> {

io-engine/src/subsys/config/opts.rs

+36-8
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use std::{
3535
};
3636

3737
use crate::core::MayastorEnvironment;
38+
use strum_macros::{AsRefStr, EnumString, EnumVariantNames};
3839

3940
pub trait GetOpts {
4041
fn get(&self) -> Self;
@@ -84,6 +85,30 @@ impl GetOpts for NexusOpts {
8485
/// Must be equal to the size of `spdk_nvmf_target_opts.crdt`.
8586
pub const TARGET_CRDT_LEN: usize = 3;
8687

88+
#[derive(Clone, Default, EnumString, EnumVariantNames, AsRefStr)]
89+
#[strum(serialize_all = "lowercase")]
90+
pub enum NvmfTgtTransport {
91+
Rdma,
92+
#[default]
93+
Tcp,
94+
}
95+
96+
impl NvmfTransportOpts {
97+
/// Tweak a few opts more suited for rdma.
98+
fn for_rdma(mut self) -> Self {
99+
self.in_capsule_data_size = try_from_env(
100+
"NVMF_RDMA_IN_CAPSULE_DATA_SIZE",
101+
self.in_capsule_data_size,
102+
);
103+
self.io_unit_size = try_from_env("NVMF_RDMA_IO_UNIT_SIZE", 8192); // SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE
104+
self.data_wr_pool_size =
105+
try_from_env("NVMF_RDMA_DATA_WR_POOL_SIZE", 4095); // SPDK_NVMF_RDMA_DEFAULT_DATA_WR_POOL_SIZE
106+
self.num_shared_buf =
107+
try_from_env("NVMF_RDMA_NUM_SHARED_BUF", self.num_shared_buf);
108+
self
109+
}
110+
}
111+
87112
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
88113
#[serde(default, deny_unknown_fields)]
89114
pub struct NvmfTgtConfig {
@@ -94,11 +119,13 @@ pub struct NvmfTgtConfig {
94119
/// NVMF target Command Retry Delay in x100 ms.
95120
pub crdt: [u16; TARGET_CRDT_LEN],
96121
/// TCP transport options
97-
pub opts: NvmfTcpTransportOpts,
122+
pub opts_tcp: NvmfTransportOpts,
98123
/// NVMF target interface (ip, mac, name or subnet).
99124
pub interface: Option<String>,
100125
/// Enable RDMA for NVMF target or not
101126
pub rdma: Option<bool>,
127+
/// RDMA transport options.
128+
pub opts_rdma: NvmfTransportOpts,
102129
}
103130

104131
impl From<NvmfTgtConfig> for Box<spdk_nvmf_target_opts> {
@@ -126,9 +153,10 @@ impl Default for NvmfTgtConfig {
126153
name: "mayastor_target".to_string(),
127154
max_namespaces: 2048,
128155
crdt: args.nvmf_tgt_crdt,
129-
opts: NvmfTcpTransportOpts::default(),
156+
opts_tcp: NvmfTransportOpts::default(),
130157
interface: None,
131158
rdma: None,
159+
opts_rdma: NvmfTransportOpts::default().for_rdma(),
132160
}
133161
}
134162
}
@@ -139,10 +167,10 @@ impl GetOpts for NvmfTgtConfig {
139167
}
140168
}
141169

142-
/// Settings for the TCP transport
170+
/// Nvmf settings for the transports
143171
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
144172
#[serde(default, deny_unknown_fields)]
145-
pub struct NvmfTcpTransportOpts {
173+
pub struct NvmfTransportOpts {
146174
/// max queue depth
147175
max_queue_depth: u16,
148176
/// max qpairs per controller
@@ -264,7 +292,7 @@ where
264292
}
265293
}
266294

267-
impl Default for NvmfTcpTransportOpts {
295+
impl Default for NvmfTransportOpts {
268296
fn default() -> Self {
269297
Self {
270298
max_queue_depth: try_from_env("NVMF_TCP_MAX_QUEUE_DEPTH", 32),
@@ -283,16 +311,16 @@ impl Default for NvmfTcpTransportOpts {
283311
acceptor_poll_rate: try_from_env("NVMF_ACCEPTOR_POLL_RATE", 10_000),
284312
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
285313
ack_timeout: try_from_env("NVMF_ACK_TIMEOUT", 0),
286-
data_wr_pool_size: try_from_env("NVMF_DATA_WR_POOL_SIZE", 0),
314+
data_wr_pool_size: 0,
287315
}
288316
}
289317
}
290318

291319
/// we cannot add derives for YAML to these structs directly, so we need to
292320
/// copy them. The upside though, is that if the FFI structures change, we will
293321
/// know about it during compile time.
294-
impl From<NvmfTcpTransportOpts> for spdk_nvmf_transport_opts {
295-
fn from(o: NvmfTcpTransportOpts) -> Self {
322+
impl From<NvmfTransportOpts> for spdk_nvmf_transport_opts {
323+
fn from(o: NvmfTransportOpts) -> Self {
296324
struct_size_init!(
297325
Self {
298326
max_queue_depth: o.max_queue_depth,

io-engine/src/subsys/nvmf/subsystem.rs

+74-26
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ use spdk_rs::{
4848
spdk_nvmf_subsystem_state_change_done,
4949
spdk_nvmf_subsystem_stop,
5050
spdk_nvmf_tgt,
51+
spdk_nvmf_tgt_get_transport,
5152
SPDK_NVME_SCT_GENERIC,
5253
SPDK_NVME_SC_CAPACITY_EXCEEDED,
5354
SPDK_NVME_SC_RESERVATION_CONFLICT,
@@ -68,8 +69,13 @@ use crate::{
6869
ffihelper::{cb_arg, done_cb, AsStr, FfiResult, IntoCString},
6970
lvs::Lvol,
7071
subsys::{
72+
config::opts::NvmfTgtTransport,
7173
make_subsystem_serial,
72-
nvmf::{transport::TransportId, Error, NVMF_TGT},
74+
nvmf::{
75+
transport::{TransportId, RDMA_TRANSPORT},
76+
Error,
77+
NVMF_TGT,
78+
},
7379
Config,
7480
},
7581
};
@@ -798,7 +804,10 @@ impl NvmfSubsystem {
798804
}
799805

800806
// we currently allow all listeners to the subsystem
801-
async fn add_listener(&self) -> Result<(), Error> {
807+
async fn add_listener(
808+
&self,
809+
transport: NvmfTgtTransport,
810+
) -> Result<(), Error> {
802811
extern "C" fn listen_cb(arg: *mut c_void, status: i32) {
803812
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
804813
s.send(status).unwrap();
@@ -807,8 +816,8 @@ impl NvmfSubsystem {
807816
let cfg = Config::get();
808817

809818
// dont yet enable both ports, IOW just add one transportID now
810-
811-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
819+
let trid_replica =
820+
TransportId::new(cfg.nexus_opts.nvmf_replica_port, transport);
812821

813822
let (s, r) = oneshot::channel::<i32>();
814823
unsafe {
@@ -907,8 +916,23 @@ impl NvmfSubsystem {
907916
/// start the subsystem previously created -- note that we destroy it on
908917
/// failure to ensure the state is not in limbo and to avoid leaking
909918
/// resources
910-
pub async fn start(self) -> Result<String, Error> {
911-
self.add_listener().await?;
919+
pub async fn start(self, need_rdma: bool) -> Result<String, Error> {
920+
self.add_listener(NvmfTgtTransport::Tcp).await?;
921+
// Only attempt rdma listener addition for this subsystem after making
922+
// sure the Mayastor nvmf tgt has rdma transport created.
923+
if need_rdma && self.nvmf_tgt_has_rdma_xprt() {
924+
let _ =
925+
self.add_listener(NvmfTgtTransport::Rdma)
926+
.await
927+
.map_err(|e| {
928+
warn!(
929+
"NvmfSubsystem RDMA listener add failed {}. \
930+
Subsystem will be accessible over TCP only.\
931+
{:?}",
932+
e, self
933+
);
934+
});
935+
}
912936

913937
if let Err(e) = self
914938
.change_state("start", |ss, cb, arg| unsafe {
@@ -958,9 +982,15 @@ impl NvmfSubsystem {
958982
}
959983

960984
/// get ANA state
985+
/// XXX: The SPDK NVME multipath is transport protocol independent. Should
986+
/// we keep returning the ana state here by default using TCP transport
987+
/// as today?
961988
pub async fn get_ana_state(&self) -> Result<u32, Error> {
962989
let cfg = Config::get();
963-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
990+
let trid_replica = TransportId::new(
991+
cfg.nexus_opts.nvmf_replica_port,
992+
NvmfTgtTransport::Tcp,
993+
);
964994
let listener = unsafe {
965995
nvmf_subsystem_find_listener(self.0.as_ptr(), trid_replica.as_ptr())
966996
};
@@ -981,29 +1011,35 @@ impl NvmfSubsystem {
9811011
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
9821012
s.send(status).unwrap();
9831013
}
984-
let cfg = Config::get();
985-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
9861014

987-
let (s, r) = oneshot::channel::<i32>();
1015+
// setting ANA state can only be done when subsystem is shared, meaning
1016+
// it'll have listeners configured. So let's fetch transport ids
1017+
// based on active listeners instead of reading static Config.
1018+
let trids = self.listeners_to_vec().unwrap_or_default();
9881019

989-
unsafe {
990-
spdk_nvmf_subsystem_set_ana_state(
991-
self.0.as_ptr(),
992-
trid_replica.as_ptr(),
993-
ana_state,
994-
0,
995-
Some(set_ana_state_cb),
996-
cb_arg(s),
997-
);
1020+
for trid in trids {
1021+
debug!("set_ana_state {ana_state}, {trid:?}");
1022+
let (s, r) = oneshot::channel::<i32>();
1023+
unsafe {
1024+
spdk_nvmf_subsystem_set_ana_state(
1025+
self.0.as_ptr(),
1026+
trid.as_ptr(),
1027+
ana_state,
1028+
0,
1029+
Some(set_ana_state_cb),
1030+
cb_arg(s),
1031+
);
1032+
}
1033+
r.await
1034+
.expect("Cancellation is not supported")
1035+
.to_result(|e| Error::Subsystem {
1036+
source: Errno::from_i32(-e),
1037+
nqn: self.get_nqn(),
1038+
msg: "failed to set_ana_state of the subsystem".to_string(),
1039+
})?;
9981040
}
9991041

1000-
r.await
1001-
.expect("Cancellation is not supported")
1002-
.to_result(|e| Error::Subsystem {
1003-
source: Errno::from_i32(-e),
1004-
nqn: self.get_nqn(),
1005-
msg: "failed to set_ana_state of the subsystem".to_string(),
1006-
})
1042+
Ok(())
10071043
}
10081044

10091045
/// destroy all subsystems associated with our target, subsystems must be in
@@ -1073,6 +1109,18 @@ impl NvmfSubsystem {
10731109
Bdev::checked_from_ptr(unsafe { spdk_nvmf_ns_get_bdev(ns) })
10741110
}
10751111

1112+
fn nvmf_tgt_has_rdma_xprt(&self) -> bool {
1113+
NVMF_TGT.with(|t| {
1114+
let transport = unsafe {
1115+
spdk_nvmf_tgt_get_transport(
1116+
t.borrow().tgt.as_ptr(),
1117+
RDMA_TRANSPORT.as_ptr(),
1118+
)
1119+
};
1120+
!transport.is_null()
1121+
})
1122+
}
1123+
10761124
fn listeners_to_vec(&self) -> Option<Vec<TransportId>> {
10771125
unsafe {
10781126
let mut listener =

0 commit comments

Comments
 (0)