Skip to content

Commit dd74e56

Browse files
committed
feat: add RDMA listener to Mayastor Nvmf target
This adds the capability to listen for rdma connections to the Mayastor Nvmf target if the rdma feature is enabled during installation. Any Nvmf subsystem facing the host i.e. the nexus nvmf subsystem will now be able to support tcp and rdma both. Signed-off-by: Diwakar Sharma <diwakar.sharma@datacore.com>
1 parent 09f9189 commit dd74e56

File tree

9 files changed

+299
-35
lines changed

9 files changed

+299
-35
lines changed

io-engine/src/bin/io-engine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn start_tokio_runtime(args: &MayastorCliArgs) {
108108

109109
if args.rdma {
110110
env::set_var("ENABLE_RDMA", "true");
111-
warn!("RDMA is enabled for Mayastor NVMEoF target");
111+
warn!("RDMA is requested to be enabled for Mayastor NVMEoF target");
112112
}
113113

114114
print_feature!("Async QPair connection", "spdk-async-qpair-connect");

io-engine/src/core/bdev.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ where
210210
) -> Result<Self::Output, Self::Error> {
211211
let me = unsafe { self.get_unchecked_mut() };
212212
let props = NvmfShareProps::from(props);
213+
let is_lvol = me.driver() == "lvol";
213214

214215
let ptpl = props.ptpl().as_ref().map(|ptpl| ptpl.path());
215216

@@ -232,7 +233,7 @@ where
232233
.await
233234
.context(ShareNvmf {})?;
234235

235-
subsystem.start().await.context(ShareNvmf {})
236+
subsystem.start(!is_lvol).await.context(ShareNvmf {})
236237
}
237238

238239
fn create_ptpl(&self) -> Result<Option<PtplProps>, Self::Error> {

io-engine/src/core/env.rs

+5
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,11 @@ impl MayastorEnvironment {
827827
.map(|s| s.clone())
828828
}
829829

830+
/// Check if RDMA needs to be enabled for Mayastor target.
831+
pub fn rdma(&self) -> bool {
832+
self.rdma
833+
}
834+
830835
/// Detects IP address for NVMF target by the interface specified in CLI
831836
/// arguments.
832837
fn detect_nvmf_tgt_iface_ip(iface: &str) -> Result<String, String> {

io-engine/src/subsys/config/opts.rs

+103-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use spdk_rs::{
2828

2929
use std::{
3030
convert::TryFrom,
31-
fmt::{Debug, Display},
31+
fmt::{Debug, Display, Formatter},
3232
mem::zeroed,
3333
ptr::null_mut,
3434
str::FromStr,
@@ -84,6 +84,26 @@ impl GetOpts for NexusOpts {
8484
/// Must be equal to the size of `spdk_nvmf_target_opts.crdt`.
8585
pub const TARGET_CRDT_LEN: usize = 3;
8686

87+
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
88+
pub enum NvmfTgtTransport {
89+
Rdma,
90+
#[default]
91+
Tcp,
92+
}
93+
94+
impl Display for NvmfTgtTransport {
95+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
96+
write!(
97+
f,
98+
"{}",
99+
match self {
100+
NvmfTgtTransport::Rdma => "rdma",
101+
NvmfTgtTransport::Tcp => "tcp",
102+
}
103+
)
104+
}
105+
}
106+
87107
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
88108
#[serde(default, deny_unknown_fields)]
89109
pub struct NvmfTgtConfig {
@@ -94,11 +114,13 @@ pub struct NvmfTgtConfig {
94114
/// NVMF target Command Retry Delay in x100 ms.
95115
pub crdt: [u16; TARGET_CRDT_LEN],
96116
/// TCP transport options
97-
pub opts: NvmfTcpTransportOpts,
117+
pub opts_tcp: NvmfTcpTransportOpts,
98118
/// NVMF target interface (ip, mac, name or subnet).
99119
pub interface: Option<String>,
100120
/// Enable RDMA for NVMF target or not
101121
pub rdma: Option<bool>,
122+
/// RDMA transport options
123+
pub opts_rdma: NvmfRdmaTransportOpts,
102124
}
103125

104126
impl From<NvmfTgtConfig> for Box<spdk_nvmf_target_opts> {
@@ -121,9 +143,10 @@ impl Default for NvmfTgtConfig {
121143
name: "mayastor_target".to_string(),
122144
max_namespaces: 2048,
123145
crdt: args.nvmf_tgt_crdt,
124-
opts: NvmfTcpTransportOpts::default(),
146+
opts_tcp: NvmfTcpTransportOpts::default(),
125147
interface: None,
126148
rdma: None,
149+
opts_rdma: NvmfRdmaTransportOpts::default(),
127150
}
128151
}
129152
}
@@ -164,6 +187,36 @@ pub struct NvmfTcpTransportOpts {
164187
zcopy: bool,
165188
}
166189

190+
/// Settings for the RDMA transport
191+
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
192+
#[serde(default, deny_unknown_fields)]
193+
pub struct NvmfRdmaTransportOpts {
194+
/// max queue depth
195+
max_queue_depth: u16,
196+
/// max qpairs per controller
197+
max_qpairs_per_ctrl: u16,
198+
/// encapsulated data size
199+
in_capsule_data_size: u32,
200+
/// max IO size
201+
max_io_size: u32,
202+
/// IO unit size
203+
io_unit_size: u32,
204+
/// max admin queue depth per admin queue
205+
max_aq_depth: u32,
206+
/// num of shared buffers
207+
num_shared_buf: u32,
208+
/// cache size
209+
buf_cache_size: u32,
210+
/// dif
211+
dif_insert_or_strip: bool,
212+
/// abort execution timeout
213+
abort_timeout_sec: u32,
214+
/// acceptor poll rate, microseconds
215+
acceptor_poll_rate: u32,
216+
/// Use zero-copy operations if the underlying bdev supports them
217+
zcopy: bool,
218+
}
219+
167220
/// try to read an env variable or returns the default when not found
168221
fn try_from_env<T>(name: &str, default: T) -> T
169222
where
@@ -277,6 +330,29 @@ impl Default for NvmfTcpTransportOpts {
277330
}
278331
}
279332

333+
// todo: Tune the defaults by experiments or recommendations, if required.
334+
impl Default for NvmfRdmaTransportOpts {
335+
fn default() -> Self {
336+
Self {
337+
max_queue_depth: try_from_env("NVMF_RDMA_MAX_QUEUE_DEPTH", 128),
338+
in_capsule_data_size: 8192,
339+
max_io_size: 131_072,
340+
io_unit_size: 8192,
341+
max_qpairs_per_ctrl: try_from_env(
342+
"NVMF_RDMA_MAX_QPAIRS_PER_CTRL",
343+
32,
344+
),
345+
num_shared_buf: try_from_env("NVMF_RDMA_NUM_SHARED_BUF", 2047),
346+
buf_cache_size: try_from_env("NVMF_RDMA_BUF_CACHE_SIZE", 64),
347+
dif_insert_or_strip: false,
348+
max_aq_depth: 32,
349+
abort_timeout_sec: 1,
350+
acceptor_poll_rate: try_from_env("NVMF_ACCEPTOR_POLL_RATE", 10_000),
351+
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
352+
}
353+
}
354+
}
355+
280356
/// we cannot add derives for YAML to these structs directly, so we need to
281357
/// copy them. The upside though, is that if the FFI structures change, we will
282358
/// know about it during compile time.
@@ -304,6 +380,30 @@ impl From<NvmfTcpTransportOpts> for spdk_nvmf_transport_opts {
304380
}
305381
}
306382

383+
impl From<NvmfRdmaTransportOpts> for spdk_nvmf_transport_opts {
384+
fn from(o: NvmfRdmaTransportOpts) -> Self {
385+
Self {
386+
max_queue_depth: o.max_queue_depth,
387+
max_qpairs_per_ctrlr: o.max_qpairs_per_ctrl,
388+
in_capsule_data_size: o.in_capsule_data_size,
389+
max_io_size: o.max_io_size,
390+
io_unit_size: o.io_unit_size,
391+
max_aq_depth: o.max_aq_depth,
392+
num_shared_buffers: o.num_shared_buf,
393+
buf_cache_size: o.buf_cache_size,
394+
dif_insert_or_strip: o.dif_insert_or_strip,
395+
reserved29: Default::default(),
396+
abort_timeout_sec: o.abort_timeout_sec,
397+
association_timeout: 120000,
398+
transport_specific: std::ptr::null(),
399+
opts_size: std::mem::size_of::<spdk_nvmf_transport_opts>() as u64,
400+
acceptor_poll_rate: o.acceptor_poll_rate,
401+
zcopy: o.zcopy,
402+
reserved61: Default::default(),
403+
}
404+
}
405+
}
406+
307407
/// generic settings for the NVMe bdev (all our replicas)
308408
#[derive(Debug, PartialEq, Serialize, Deserialize)]
309409
#[serde(default, deny_unknown_fields)]

io-engine/src/subsys/nvmf/subsystem.rs

+26-6
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ use crate::{
6868
ffihelper::{cb_arg, done_cb, AsStr, FfiResult, IntoCString},
6969
lvs::Lvol,
7070
subsys::{
71+
config::opts::NvmfTgtTransport,
7172
make_subsystem_serial,
7273
nvmf::{transport::TransportId, Error, NVMF_TGT},
7374
Config,
@@ -799,7 +800,7 @@ impl NvmfSubsystem {
799800
}
800801

801802
// we currently allow all listeners to the subsystem
802-
async fn add_listener(&self) -> Result<(), Error> {
803+
async fn add_listener(&self, xprt: NvmfTgtTransport) -> Result<(), Error> {
803804
extern "C" fn listen_cb(arg: *mut c_void, status: i32) {
804805
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
805806
s.send(status).unwrap();
@@ -808,8 +809,11 @@ impl NvmfSubsystem {
808809
let cfg = Config::get();
809810

810811
// dont yet enable both ports, IOW just add one transportID now
811-
812-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
812+
let trid_replica = if xprt == NvmfTgtTransport::Tcp {
813+
TransportId::new(cfg.nexus_opts.nvmf_replica_port)
814+
} else {
815+
TransportId::new_with_rdma(cfg.nexus_opts.nvmf_replica_port)
816+
};
813817

814818
let (s, r) = oneshot::channel::<i32>();
815819
unsafe {
@@ -908,8 +912,21 @@ impl NvmfSubsystem {
908912
/// start the subsystem previously created -- note that we destroy it on
909913
/// failure to ensure the state is not in limbo and to avoid leaking
910914
/// resources
911-
pub async fn start(self) -> Result<String, Error> {
912-
self.add_listener().await?;
915+
pub async fn start(self, need_rdma: bool) -> Result<String, Error> {
916+
self.add_listener(NvmfTgtTransport::Tcp).await?;
917+
if need_rdma {
918+
let _ =
919+
self.add_listener(NvmfTgtTransport::Rdma)
920+
.await
921+
.map_err(|e| {
922+
warn!(
923+
"NvmfSubsystem RDMA listener add failed {}. \
924+
Subsystem will be accessible over TCP only.\
925+
{:?}",
926+
e, self
927+
);
928+
});
929+
}
913930

914931
if let Err(e) = self
915932
.change_state("start", |ss, cb, arg| unsafe {
@@ -1119,7 +1136,10 @@ impl NvmfSubsystem {
11191136
pub fn uri_endpoints(&self) -> Option<Vec<String>> {
11201137
if let Some(v) = self.listeners_to_vec() {
11211138
let nqn = self.get_nqn();
1122-
Some(v.iter().map(|t| format!("{t}/{nqn}")).collect::<Vec<_>>())
1139+
let retvec = Some(
1140+
v.iter().map(|t| format!("{t}/{nqn}")).collect::<Vec<_>>(),
1141+
);
1142+
retvec
11231143
} else {
11241144
None
11251145
}

0 commit comments

Comments
 (0)