Skip to content

Commit b1cffbe

Browse files
committed
feat: add RDMA listener to Mayastor Nvmf target
This adds the capability to listen for rdma connections to the Mayastor Nvmf target if the rdma feature is enabled during installation. Any Nvmf subsystem facing the host i.e. the nexus nvmf subsystem will now be able to support tcp and rdma both. Signed-off-by: Diwakar Sharma <diwakar.sharma@datacore.com>
1 parent c661e1b commit b1cffbe

File tree

9 files changed

+322
-98
lines changed

9 files changed

+322
-98
lines changed

io-engine/src/bin/io-engine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn start_tokio_runtime(args: &MayastorCliArgs) {
108108

109109
if args.rdma {
110110
env::set_var("ENABLE_RDMA", "true");
111-
warn!("RDMA is enabled for Mayastor NVMEoF target");
111+
warn!("RDMA is requested to be enabled for Mayastor NVMEoF target");
112112
}
113113

114114
unsafe {

io-engine/src/core/bdev.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use snafu::ResultExt;
1111
use spdk_rs::libspdk::{spdk_bdev, spdk_get_ticks_hz};
1212

1313
use crate::{
14-
bdev::bdev_event_callback,
14+
bdev::{bdev_event_callback, nexus::NEXUS_MODULE_NAME},
1515
bdev_api::bdev_uri_eq,
1616
core::{
1717
share::{NvmfShareProps, Protocol, Share, UpdateProps},
@@ -210,6 +210,7 @@ where
210210
) -> Result<Self::Output, Self::Error> {
211211
let me = unsafe { self.get_unchecked_mut() };
212212
let props = NvmfShareProps::from(props);
213+
let is_nexus_bdev = me.driver() == NEXUS_MODULE_NAME;
213214

214215
let ptpl = props.ptpl().as_ref().map(|ptpl| ptpl.path());
215216

@@ -232,7 +233,7 @@ where
232233
.await
233234
.context(ShareNvmf {})?;
234235

235-
subsystem.start().await.context(ShareNvmf {})
236+
subsystem.start(is_nexus_bdev).await.context(ShareNvmf {})
236237
}
237238

238239
fn create_ptpl(&self) -> Result<Option<PtplProps>, Self::Error> {

io-engine/src/core/env.rs

+5
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,11 @@ impl MayastorEnvironment {
834834
.map(|s| s.clone())
835835
}
836836

837+
/// Check if RDMA needs to be enabled for Mayastor nvmf target.
838+
pub fn rdma(&self) -> bool {
839+
self.rdma
840+
}
841+
837842
/// Detects IP address for NVMF target by the interface specified in CLI
838843
/// arguments.
839844
fn detect_nvmf_tgt_iface_ip(iface: &str) -> Result<String, String> {

io-engine/src/subsys/config/opts.rs

+58-30
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use std::{
3535
};
3636

3737
use crate::core::MayastorEnvironment;
38+
use strum_macros::{AsRefStr, EnumString, EnumVariantNames};
3839

3940
pub trait GetOpts {
4041
fn get(&self) -> Self;
@@ -84,6 +85,54 @@ impl GetOpts for NexusOpts {
8485
/// Must be equal to the size of `spdk_nvmf_target_opts.crdt`.
8586
pub const TARGET_CRDT_LEN: usize = 3;
8687

88+
#[derive(Clone, Default, EnumString, EnumVariantNames, AsRefStr)]
89+
#[strum(serialize_all = "lowercase")]
90+
pub enum NvmfTgtTransport {
91+
Rdma,
92+
#[default]
93+
Tcp,
94+
}
95+
96+
impl NvmfTransportOpts {
97+
/// Tweak a few opts more suited for rdma.
98+
fn for_rdma(mut self) -> Self {
99+
self.in_capsule_data_size = try_from_env(
100+
"NVMF_RDMA_IN_CAPSULE_DATA_SIZE",
101+
self.in_capsule_data_size,
102+
);
103+
self.io_unit_size = try_from_env("NVMF_RDMA_IO_UNIT_SIZE", 8192); // SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE
104+
self.data_wr_pool_size =
105+
try_from_env("NVMF_RDMA_DATA_WR_POOL_SIZE", 4095); // SPDK_NVMF_RDMA_DEFAULT_DATA_WR_POOL_SIZE
106+
self.num_shared_buf =
107+
try_from_env("NVMF_RDMA_NUM_SHARED_BUF", self.num_shared_buf);
108+
self
109+
}
110+
}
111+
112+
impl Default for NvmfTransportOpts {
113+
fn default() -> Self {
114+
Self {
115+
max_queue_depth: try_from_env("NVMF_TCP_MAX_QUEUE_DEPTH", 32),
116+
in_capsule_data_size: 4096,
117+
max_io_size: 131_072,
118+
io_unit_size: 131_072,
119+
max_qpairs_per_ctrl: try_from_env(
120+
"NVMF_TCP_MAX_QPAIRS_PER_CTRL",
121+
32,
122+
),
123+
num_shared_buf: try_from_env("NVMF_TCP_NUM_SHARED_BUF", 2047),
124+
buf_cache_size: try_from_env("NVMF_TCP_BUF_CACHE_SIZE", 64),
125+
dif_insert_or_strip: false,
126+
max_aq_depth: 32,
127+
abort_timeout_sec: 1,
128+
acceptor_poll_rate: try_from_env("NVMF_ACCEPTOR_POLL_RATE", 10_000),
129+
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
130+
ack_timeout: try_from_env("NVMF_ACK_TIMEOUT", 0),
131+
data_wr_pool_size: 0,
132+
}
133+
}
134+
}
135+
87136
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
88137
#[serde(default, deny_unknown_fields)]
89138
pub struct NvmfTgtConfig {
@@ -94,11 +143,13 @@ pub struct NvmfTgtConfig {
94143
/// NVMF target Command Retry Delay in x100 ms.
95144
pub crdt: [u16; TARGET_CRDT_LEN],
96145
/// TCP transport options
97-
pub opts: NvmfTcpTransportOpts,
146+
pub opts_tcp: NvmfTransportOpts,
98147
/// NVMF target interface (ip, mac, name or subnet).
99148
pub interface: Option<String>,
100149
/// Enable RDMA for NVMF target or not
101150
pub rdma: Option<bool>,
151+
/// RDMA transport options.
152+
pub opts_rdma: NvmfTransportOpts,
102153
}
103154

104155
impl From<NvmfTgtConfig> for Box<spdk_nvmf_target_opts> {
@@ -126,9 +177,10 @@ impl Default for NvmfTgtConfig {
126177
name: "mayastor_target".to_string(),
127178
max_namespaces: 2048,
128179
crdt: args.nvmf_tgt_crdt,
129-
opts: NvmfTcpTransportOpts::default(),
180+
opts_tcp: NvmfTransportOpts::default(),
130181
interface: None,
131182
rdma: None,
183+
opts_rdma: NvmfTransportOpts::default().for_rdma(),
132184
}
133185
}
134186
}
@@ -139,10 +191,10 @@ impl GetOpts for NvmfTgtConfig {
139191
}
140192
}
141193

142-
/// Settings for the TCP transport
194+
/// Nvmf settings for the transports
143195
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
144196
#[serde(default, deny_unknown_fields)]
145-
pub struct NvmfTcpTransportOpts {
197+
pub struct NvmfTransportOpts {
146198
/// max queue depth
147199
max_queue_depth: u16,
148200
/// max qpairs per controller
@@ -264,35 +316,11 @@ where
264316
}
265317
}
266318

267-
impl Default for NvmfTcpTransportOpts {
268-
fn default() -> Self {
269-
Self {
270-
max_queue_depth: try_from_env("NVMF_TCP_MAX_QUEUE_DEPTH", 32),
271-
in_capsule_data_size: 4096,
272-
max_io_size: 131_072,
273-
io_unit_size: 131_072,
274-
max_qpairs_per_ctrl: try_from_env(
275-
"NVMF_TCP_MAX_QPAIRS_PER_CTRL",
276-
32,
277-
),
278-
num_shared_buf: try_from_env("NVMF_TCP_NUM_SHARED_BUF", 2047),
279-
buf_cache_size: try_from_env("NVMF_TCP_BUF_CACHE_SIZE", 64),
280-
dif_insert_or_strip: false,
281-
max_aq_depth: 32,
282-
abort_timeout_sec: 1,
283-
acceptor_poll_rate: try_from_env("NVMF_ACCEPTOR_POLL_RATE", 10_000),
284-
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
285-
ack_timeout: try_from_env("NVMF_ACK_TIMEOUT", 0),
286-
data_wr_pool_size: try_from_env("NVMF_DATA_WR_POOL_SIZE", 0),
287-
}
288-
}
289-
}
290-
291319
/// we cannot add derives for YAML to these structs directly, so we need to
292320
/// copy them. The upside though, is that if the FFI structures change, we will
293321
/// know about it during compile time.
294-
impl From<NvmfTcpTransportOpts> for spdk_nvmf_transport_opts {
295-
fn from(o: NvmfTcpTransportOpts) -> Self {
322+
impl From<NvmfTransportOpts> for spdk_nvmf_transport_opts {
323+
fn from(o: NvmfTransportOpts) -> Self {
296324
struct_size_init!(
297325
Self {
298326
max_queue_depth: o.max_queue_depth,

io-engine/src/subsys/nvmf/subsystem.rs

+74-26
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ use spdk_rs::{
4848
spdk_nvmf_subsystem_state_change_done,
4949
spdk_nvmf_subsystem_stop,
5050
spdk_nvmf_tgt,
51+
spdk_nvmf_tgt_get_transport,
5152
SPDK_NVME_SCT_GENERIC,
5253
SPDK_NVME_SC_CAPACITY_EXCEEDED,
5354
SPDK_NVME_SC_RESERVATION_CONFLICT,
@@ -68,8 +69,13 @@ use crate::{
6869
ffihelper::{cb_arg, done_cb, AsStr, FfiResult, IntoCString},
6970
lvs::Lvol,
7071
subsys::{
72+
config::opts::NvmfTgtTransport,
7173
make_subsystem_serial,
72-
nvmf::{transport::TransportId, Error, NVMF_TGT},
74+
nvmf::{
75+
transport::{TransportId, RDMA_TRANSPORT},
76+
Error,
77+
NVMF_TGT,
78+
},
7379
Config,
7480
},
7581
};
@@ -798,7 +804,10 @@ impl NvmfSubsystem {
798804
}
799805

800806
// we currently allow all listeners to the subsystem
801-
async fn add_listener(&self) -> Result<(), Error> {
807+
async fn add_listener(
808+
&self,
809+
transport: NvmfTgtTransport,
810+
) -> Result<(), Error> {
802811
extern "C" fn listen_cb(arg: *mut c_void, status: i32) {
803812
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
804813
s.send(status).unwrap();
@@ -807,8 +816,8 @@ impl NvmfSubsystem {
807816
let cfg = Config::get();
808817

809818
// dont yet enable both ports, IOW just add one transportID now
810-
811-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
819+
let trid_replica =
820+
TransportId::new(cfg.nexus_opts.nvmf_replica_port, transport);
812821

813822
let (s, r) = oneshot::channel::<i32>();
814823
unsafe {
@@ -907,8 +916,23 @@ impl NvmfSubsystem {
907916
/// start the subsystem previously created -- note that we destroy it on
908917
/// failure to ensure the state is not in limbo and to avoid leaking
909918
/// resources
910-
pub async fn start(self) -> Result<String, Error> {
911-
self.add_listener().await?;
919+
pub async fn start(self, need_rdma: bool) -> Result<String, Error> {
920+
self.add_listener(NvmfTgtTransport::Tcp).await?;
921+
// Only attempt rdma listener addition for this subsystem after making
922+
// sure the Mayastor nvmf tgt has rdma transport created.
923+
if need_rdma && self.nvmf_tgt_has_rdma_xprt() {
924+
let _ =
925+
self.add_listener(NvmfTgtTransport::Rdma)
926+
.await
927+
.map_err(|e| {
928+
warn!(
929+
"NvmfSubsystem RDMA listener add failed {}. \
930+
Subsystem will be accessible over TCP only.\
931+
{:?}",
932+
e, self
933+
);
934+
});
935+
}
912936

913937
if let Err(e) = self
914938
.change_state("start", |ss, cb, arg| unsafe {
@@ -958,9 +982,15 @@ impl NvmfSubsystem {
958982
}
959983

960984
/// get ANA state
985+
/// XXX: The SPDK NVME multipath is transport protocol independent. Should
986+
/// we keep returning the ana state here by default using TCP transport
987+
/// as today?
961988
pub async fn get_ana_state(&self) -> Result<u32, Error> {
962989
let cfg = Config::get();
963-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
990+
let trid_replica = TransportId::new(
991+
cfg.nexus_opts.nvmf_replica_port,
992+
NvmfTgtTransport::Tcp,
993+
);
964994
let listener = unsafe {
965995
nvmf_subsystem_find_listener(self.0.as_ptr(), trid_replica.as_ptr())
966996
};
@@ -981,29 +1011,35 @@ impl NvmfSubsystem {
9811011
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
9821012
s.send(status).unwrap();
9831013
}
984-
let cfg = Config::get();
985-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
9861014

987-
let (s, r) = oneshot::channel::<i32>();
1015+
// setting ANA state can only be done when subsystem is shared, meaning
1016+
// it'll have listeners configured. So let's fetch transport ids
1017+
// based on active listeners instead of reading static Config.
1018+
let trids = self.listeners_to_vec().unwrap_or_default();
9881019

989-
unsafe {
990-
spdk_nvmf_subsystem_set_ana_state(
991-
self.0.as_ptr(),
992-
trid_replica.as_ptr(),
993-
ana_state,
994-
0,
995-
Some(set_ana_state_cb),
996-
cb_arg(s),
997-
);
1020+
for trid in trids {
1021+
debug!("set_ana_state {ana_state}, {trid:?}");
1022+
let (s, r) = oneshot::channel::<i32>();
1023+
unsafe {
1024+
spdk_nvmf_subsystem_set_ana_state(
1025+
self.0.as_ptr(),
1026+
trid.as_ptr(),
1027+
ana_state,
1028+
0,
1029+
Some(set_ana_state_cb),
1030+
cb_arg(s),
1031+
);
1032+
}
1033+
r.await
1034+
.expect("Cancellation is not supported")
1035+
.to_result(|e| Error::Subsystem {
1036+
source: Errno::from_i32(-e),
1037+
nqn: self.get_nqn(),
1038+
msg: "failed to set_ana_state of the subsystem".to_string(),
1039+
})?;
9981040
}
9991041

1000-
r.await
1001-
.expect("Cancellation is not supported")
1002-
.to_result(|e| Error::Subsystem {
1003-
source: Errno::from_i32(-e),
1004-
nqn: self.get_nqn(),
1005-
msg: "failed to set_ana_state of the subsystem".to_string(),
1006-
})
1042+
Ok(())
10071043
}
10081044

10091045
/// destroy all subsystems associated with our target, subsystems must be in
@@ -1073,6 +1109,18 @@ impl NvmfSubsystem {
10731109
Bdev::checked_from_ptr(unsafe { spdk_nvmf_ns_get_bdev(ns) })
10741110
}
10751111

1112+
fn nvmf_tgt_has_rdma_xprt(&self) -> bool {
1113+
NVMF_TGT.with(|t| {
1114+
let transport = unsafe {
1115+
spdk_nvmf_tgt_get_transport(
1116+
t.borrow().tgt.as_ptr(),
1117+
RDMA_TRANSPORT.as_ptr(),
1118+
)
1119+
};
1120+
!transport.is_null()
1121+
})
1122+
}
1123+
10761124
fn listeners_to_vec(&self) -> Option<Vec<TransportId>> {
10771125
unsafe {
10781126
let mut listener =

0 commit comments

Comments
 (0)