Skip to content

Commit e04e6d5

Browse files
author
mayastor-bors
committed
Try #1701:
2 parents d09082c + 1b7f911 commit e04e6d5

File tree

9 files changed

+349
-120
lines changed

9 files changed

+349
-120
lines changed

io-engine/src/bin/io-engine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ fn start_tokio_runtime(args: &MayastorCliArgs) {
108108

109109
if args.rdma {
110110
env::set_var("ENABLE_RDMA", "true");
111-
warn!("RDMA is enabled for Mayastor NVMEoF target");
111+
warn!("RDMA is requested to be enabled for Mayastor NVMEoF target");
112112
}
113113

114114
unsafe {

io-engine/src/core/bdev.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ where
210210
) -> Result<Self::Output, Self::Error> {
211211
let me = unsafe { self.get_unchecked_mut() };
212212
let props = NvmfShareProps::from(props);
213+
let is_lvol = me.driver() == "lvol";
213214

214215
let ptpl = props.ptpl().as_ref().map(|ptpl| ptpl.path());
215216

@@ -232,7 +233,7 @@ where
232233
.await
233234
.context(ShareNvmf {})?;
234235

235-
subsystem.start().await.context(ShareNvmf {})
236+
subsystem.start(!is_lvol).await.context(ShareNvmf {})
236237
}
237238

238239
fn create_ptpl(&self) -> Result<Option<PtplProps>, Self::Error> {

io-engine/src/core/env.rs

+5
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,11 @@ impl MayastorEnvironment {
834834
.map(|s| s.clone())
835835
}
836836

837+
/// Check if RDMA needs to be enabled for Mayastor target.
838+
pub fn rdma(&self) -> bool {
839+
self.rdma
840+
}
841+
837842
/// Detects IP address for NVMF target by the interface specified in CLI
838843
/// arguments.
839844
fn detect_nvmf_tgt_iface_ip(iface: &str) -> Result<String, String> {

io-engine/src/subsys/config/opts.rs

+152-86
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use spdk_rs::{
2828

2929
use std::{
3030
convert::TryFrom,
31-
fmt::{Debug, Display},
31+
fmt::{Debug, Display, Formatter},
3232
mem::zeroed,
3333
ptr::null_mut,
3434
str::FromStr,
@@ -84,6 +84,129 @@ impl GetOpts for NexusOpts {
8484
/// Must be equal to the size of `spdk_nvmf_target_opts.crdt`.
8585
pub const TARGET_CRDT_LEN: usize = 3;
8686

87+
#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
88+
pub enum NvmfTgtTransport {
89+
Rdma,
90+
#[default]
91+
Tcp,
92+
}
93+
94+
impl Display for NvmfTgtTransport {
95+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
96+
write!(
97+
f,
98+
"{}",
99+
match self {
100+
NvmfTgtTransport::Rdma => "rdma",
101+
NvmfTgtTransport::Tcp => "tcp",
102+
}
103+
)
104+
}
105+
}
106+
107+
/// Nvmf settings for the transports
108+
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
109+
#[serde(default, deny_unknown_fields)]
110+
pub struct NvmfTransportOpts {
111+
/// max queue depth
112+
max_queue_depth: u16,
113+
/// max qpairs per controller
114+
max_qpairs_per_ctrl: u16,
115+
/// encapsulated data size
116+
in_capsule_data_size: u32,
117+
/// max IO size
118+
max_io_size: u32,
119+
/// IO unit size
120+
io_unit_size: u32,
121+
/// max admin queue depth per admin queue
122+
max_aq_depth: u32,
123+
/// num of shared buffers
124+
num_shared_buf: u32,
125+
/// cache size
126+
buf_cache_size: u32,
127+
/// dif
128+
dif_insert_or_strip: bool,
129+
/// abort execution timeout
130+
abort_timeout_sec: u32,
131+
/// acceptor poll rate, microseconds
132+
acceptor_poll_rate: u32,
133+
/// Use zero-copy operations if the underlying bdev supports them
134+
zcopy: bool,
135+
/// ACK timeout in milliseconds
136+
ack_timeout: u32,
137+
/// Size of RDMA data WR pool
138+
data_wr_pool_size: u32,
139+
}
140+
141+
impl NvmfTransportOpts {
142+
pub fn new(xprt: NvmfTgtTransport) -> Self {
143+
xprt.xprt_nvmf_opts()
144+
}
145+
}
146+
147+
impl Default for NvmfTransportOpts {
148+
fn default() -> Self {
149+
NvmfTransportOpts::new(NvmfTgtTransport::Tcp)
150+
}
151+
}
152+
153+
/// This can be extended to configure transport specific options
154+
/// like tcp_transport_opts and rdma_transport_opts if required later.
155+
pub trait TransportOpts {
156+
fn xprt_nvmf_opts(&self) -> NvmfTransportOpts;
157+
}
158+
159+
impl TransportOpts for NvmfTgtTransport {
160+
fn xprt_nvmf_opts(&self) -> NvmfTransportOpts {
161+
match self {
162+
NvmfTgtTransport::Rdma => NvmfTransportOpts {
163+
max_queue_depth: try_from_env("NVMF_RDMA_MAX_QUEUE_DEPTH", 128),
164+
in_capsule_data_size: 8192,
165+
max_io_size: 131_072,
166+
io_unit_size: 8192,
167+
max_qpairs_per_ctrl: try_from_env(
168+
"NVMF_RDMA_MAX_QPAIRS_PER_CTRL",
169+
32,
170+
),
171+
num_shared_buf: try_from_env("NVMF_RDMA_NUM_SHARED_BUF", 2047),
172+
buf_cache_size: try_from_env("NVMF_RDMA_BUF_CACHE_SIZE", 64),
173+
dif_insert_or_strip: false,
174+
max_aq_depth: 32,
175+
abort_timeout_sec: 1,
176+
acceptor_poll_rate: try_from_env(
177+
"NVMF_ACCEPTOR_POLL_RATE",
178+
10_000,
179+
),
180+
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
181+
ack_timeout: try_from_env("NVMF_ACK_TIMEOUT", 0),
182+
data_wr_pool_size: try_from_env("NVMF_DATA_WR_POOL_SIZE", 4095),
183+
},
184+
NvmfTgtTransport::Tcp => NvmfTransportOpts {
185+
max_queue_depth: try_from_env("NVMF_TCP_MAX_QUEUE_DEPTH", 32),
186+
in_capsule_data_size: 4096,
187+
max_io_size: 131_072,
188+
io_unit_size: 131_072,
189+
max_qpairs_per_ctrl: try_from_env(
190+
"NVMF_TCP_MAX_QPAIRS_PER_CTRL",
191+
32,
192+
),
193+
num_shared_buf: try_from_env("NVMF_TCP_NUM_SHARED_BUF", 2047),
194+
buf_cache_size: try_from_env("NVMF_TCP_BUF_CACHE_SIZE", 64),
195+
dif_insert_or_strip: false,
196+
max_aq_depth: 32,
197+
abort_timeout_sec: 1,
198+
acceptor_poll_rate: try_from_env(
199+
"NVMF_ACCEPTOR_POLL_RATE",
200+
10_000,
201+
),
202+
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
203+
ack_timeout: try_from_env("NVMF_ACK_TIMEOUT", 10_000),
204+
data_wr_pool_size: try_from_env("NVMF_DATA_WR_POOL_SIZE", 0),
205+
},
206+
}
207+
}
208+
}
209+
87210
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
88211
#[serde(default, deny_unknown_fields)]
89212
pub struct NvmfTgtConfig {
@@ -94,11 +217,13 @@ pub struct NvmfTgtConfig {
94217
/// NVMF target Command Retry Delay in x100 ms.
95218
pub crdt: [u16; TARGET_CRDT_LEN],
96219
/// TCP transport options
97-
pub opts: NvmfTcpTransportOpts,
220+
pub opts_tcp: NvmfTransportOpts,
98221
/// NVMF target interface (ip, mac, name or subnet).
99222
pub interface: Option<String>,
100223
/// Enable RDMA for NVMF target or not
101224
pub rdma: Option<bool>,
225+
/// RDMA transport options
226+
pub opts_rdma: NvmfTransportOpts,
102227
}
103228

104229
impl From<NvmfTgtConfig> for Box<spdk_nvmf_target_opts> {
@@ -126,9 +251,10 @@ impl Default for NvmfTgtConfig {
126251
name: "mayastor_target".to_string(),
127252
max_namespaces: 2048,
128253
crdt: args.nvmf_tgt_crdt,
129-
opts: NvmfTcpTransportOpts::default(),
254+
opts_tcp: NvmfTransportOpts::new(NvmfTgtTransport::Tcp),
130255
interface: None,
131256
rdma: None,
257+
opts_rdma: NvmfTransportOpts::new(NvmfTgtTransport::Rdma),
132258
}
133259
}
134260
}
@@ -139,40 +265,6 @@ impl GetOpts for NvmfTgtConfig {
139265
}
140266
}
141267

142-
/// Settings for the TCP transport
143-
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
144-
#[serde(default, deny_unknown_fields)]
145-
pub struct NvmfTcpTransportOpts {
146-
/// max queue depth
147-
max_queue_depth: u16,
148-
/// max qpairs per controller
149-
max_qpairs_per_ctrl: u16,
150-
/// encapsulated data size
151-
in_capsule_data_size: u32,
152-
/// max IO size
153-
max_io_size: u32,
154-
/// IO unit size
155-
io_unit_size: u32,
156-
/// max admin queue depth per admin queue
157-
max_aq_depth: u32,
158-
/// num of shared buffers
159-
num_shared_buf: u32,
160-
/// cache size
161-
buf_cache_size: u32,
162-
/// dif
163-
dif_insert_or_strip: bool,
164-
/// abort execution timeout
165-
abort_timeout_sec: u32,
166-
/// acceptor poll rate, microseconds
167-
acceptor_poll_rate: u32,
168-
/// Use zero-copy operations if the underlying bdev supports them
169-
zcopy: bool,
170-
/// ACK timeout in milliseconds
171-
ack_timeout: u32,
172-
/// Size of RDMA data WR pool
173-
data_wr_pool_size: u32,
174-
}
175-
176268
/// try to read an env variable or returns the default when not found
177269
pub(crate) fn try_from_env<T>(name: &str, default: T) -> T
178270
where
@@ -264,58 +356,32 @@ where
264356
}
265357
}
266358

267-
impl Default for NvmfTcpTransportOpts {
268-
fn default() -> Self {
269-
Self {
270-
max_queue_depth: try_from_env("NVMF_TCP_MAX_QUEUE_DEPTH", 32),
271-
in_capsule_data_size: 4096,
272-
max_io_size: 131_072,
273-
io_unit_size: 131_072,
274-
max_qpairs_per_ctrl: try_from_env(
275-
"NVMF_TCP_MAX_QPAIRS_PER_CTRL",
276-
32,
277-
),
278-
num_shared_buf: try_from_env("NVMF_TCP_NUM_SHARED_BUF", 2047),
279-
buf_cache_size: try_from_env("NVMF_TCP_BUF_CACHE_SIZE", 64),
280-
dif_insert_or_strip: false,
281-
max_aq_depth: 32,
282-
abort_timeout_sec: 1,
283-
acceptor_poll_rate: try_from_env("NVMF_ACCEPTOR_POLL_RATE", 10_000),
284-
zcopy: try_from_env("NVMF_ZCOPY", 1) == 1,
285-
ack_timeout: try_from_env("NVMF_ACK_TIMEOUT", 0),
286-
data_wr_pool_size: try_from_env("NVMF_DATA_WR_POOL_SIZE", 0),
287-
}
288-
}
289-
}
290-
291359
/// we cannot add derives for YAML to these structs directly, so we need to
292360
/// copy them. The upside though, is that if the FFI structures change, we will
293361
/// know about it during compile time.
294-
impl From<NvmfTcpTransportOpts> for spdk_nvmf_transport_opts {
295-
fn from(o: NvmfTcpTransportOpts) -> Self {
296-
struct_size_init!(
297-
Self {
298-
max_queue_depth: o.max_queue_depth,
299-
max_qpairs_per_ctrlr: o.max_qpairs_per_ctrl,
300-
in_capsule_data_size: o.in_capsule_data_size,
301-
max_io_size: o.max_io_size,
302-
io_unit_size: o.io_unit_size,
303-
max_aq_depth: o.max_aq_depth,
304-
num_shared_buffers: o.num_shared_buf,
305-
buf_cache_size: o.buf_cache_size,
306-
dif_insert_or_strip: o.dif_insert_or_strip,
307-
reserved29: Default::default(),
308-
abort_timeout_sec: o.abort_timeout_sec,
309-
association_timeout: 120000,
310-
transport_specific: std::ptr::null(),
311-
acceptor_poll_rate: o.acceptor_poll_rate,
312-
zcopy: o.zcopy,
313-
reserved61: Default::default(),
314-
ack_timeout: o.ack_timeout,
315-
data_wr_pool_size: o.data_wr_pool_size,
316-
},
317-
opts_size
318-
)
362+
impl From<NvmfTransportOpts> for spdk_nvmf_transport_opts {
363+
fn from(o: NvmfTransportOpts) -> Self {
364+
Self {
365+
max_queue_depth: o.max_queue_depth,
366+
max_qpairs_per_ctrlr: o.max_qpairs_per_ctrl,
367+
in_capsule_data_size: o.in_capsule_data_size,
368+
max_io_size: o.max_io_size,
369+
io_unit_size: o.io_unit_size,
370+
max_aq_depth: o.max_aq_depth,
371+
num_shared_buffers: o.num_shared_buf,
372+
buf_cache_size: o.buf_cache_size,
373+
dif_insert_or_strip: o.dif_insert_or_strip,
374+
reserved29: Default::default(),
375+
abort_timeout_sec: o.abort_timeout_sec,
376+
association_timeout: 120000,
377+
transport_specific: std::ptr::null(),
378+
opts_size: std::mem::size_of::<spdk_nvmf_transport_opts>() as u64,
379+
acceptor_poll_rate: o.acceptor_poll_rate,
380+
zcopy: o.zcopy,
381+
ack_timeout: o.ack_timeout,
382+
data_wr_pool_size: o.data_wr_pool_size,
383+
reserved61: Default::default(),
384+
}
319385
}
320386
}
321387

io-engine/src/subsys/nvmf/subsystem.rs

+26-6
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ use crate::{
6868
ffihelper::{cb_arg, done_cb, AsStr, FfiResult, IntoCString},
6969
lvs::Lvol,
7070
subsys::{
71+
config::opts::NvmfTgtTransport,
7172
make_subsystem_serial,
7273
nvmf::{transport::TransportId, Error, NVMF_TGT},
7374
Config,
@@ -798,7 +799,7 @@ impl NvmfSubsystem {
798799
}
799800

800801
// we currently allow all listeners to the subsystem
801-
async fn add_listener(&self) -> Result<(), Error> {
802+
async fn add_listener(&self, xprt: NvmfTgtTransport) -> Result<(), Error> {
802803
extern "C" fn listen_cb(arg: *mut c_void, status: i32) {
803804
let s = unsafe { Box::from_raw(arg as *mut oneshot::Sender<i32>) };
804805
s.send(status).unwrap();
@@ -807,8 +808,11 @@ impl NvmfSubsystem {
807808
let cfg = Config::get();
808809

809810
// dont yet enable both ports, IOW just add one transportID now
810-
811-
let trid_replica = TransportId::new(cfg.nexus_opts.nvmf_replica_port);
811+
let trid_replica = if xprt == NvmfTgtTransport::Tcp {
812+
TransportId::new(cfg.nexus_opts.nvmf_replica_port)
813+
} else {
814+
TransportId::new_with_rdma(cfg.nexus_opts.nvmf_replica_port)
815+
};
812816

813817
let (s, r) = oneshot::channel::<i32>();
814818
unsafe {
@@ -907,8 +911,21 @@ impl NvmfSubsystem {
907911
/// start the subsystem previously created -- note that we destroy it on
908912
/// failure to ensure the state is not in limbo and to avoid leaking
909913
/// resources
910-
pub async fn start(self) -> Result<String, Error> {
911-
self.add_listener().await?;
914+
pub async fn start(self, need_rdma: bool) -> Result<String, Error> {
915+
self.add_listener(NvmfTgtTransport::Tcp).await?;
916+
if need_rdma {
917+
let _ =
918+
self.add_listener(NvmfTgtTransport::Rdma)
919+
.await
920+
.map_err(|e| {
921+
warn!(
922+
"NvmfSubsystem RDMA listener add failed {}. \
923+
Subsystem will be accessible over TCP only.\
924+
{:?}",
925+
e, self
926+
);
927+
});
928+
}
912929

913930
if let Err(e) = self
914931
.change_state("start", |ss, cb, arg| unsafe {
@@ -1118,7 +1135,10 @@ impl NvmfSubsystem {
11181135
pub fn uri_endpoints(&self) -> Option<Vec<String>> {
11191136
if let Some(v) = self.listeners_to_vec() {
11201137
let nqn = self.get_nqn();
1121-
Some(v.iter().map(|t| format!("{t}/{nqn}")).collect::<Vec<_>>())
1138+
let retvec = Some(
1139+
v.iter().map(|t| format!("{t}/{nqn}")).collect::<Vec<_>>(),
1140+
);
1141+
retvec
11221142
} else {
11231143
None
11241144
}

0 commit comments

Comments
 (0)