Skip to content

Commit 414fc2e

Browse files
refactor(rebuild): use new rebuild rangers
Make use of the rebuild rangers to configure rebuild types. This allows us to remove the setting of the rebuild map being done after the rebuild job is created for the nexus and removing it from the shared rebuild descriptor. The nexus still uses the partial but sequential rebuild to reduce the scope of changes. Once the fully partial rebuild is validated we can switch the nexus to it. Signed-off-by: Tiago Castro <tiagolobocastro@gmail.com>
1 parent bebb5d5 commit 414fc2e

9 files changed

+311
-219
lines changed

io-engine/src/bdev/nexus/nexus_bdev_rebuild.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::{
1919
rebuild::{
2020
HistoryRecord,
2121
NexusRebuildJob,
22+
NexusRebuildJobStarter,
2223
RebuildError,
2324
RebuildJobOptions,
2425
RebuildState,
@@ -119,7 +120,8 @@ impl<'n> Nexus<'n> {
119120
}?;
120121

121122
// Create a rebuild job for the child.
122-
self.create_rebuild_job(&src_child_uri, &dst_child_uri)
123+
let starter = self
124+
.create_rebuild_job(&src_child_uri, &dst_child_uri)
123125
.await?;
124126

125127
self.event(
@@ -146,8 +148,8 @@ impl<'n> Nexus<'n> {
146148
.lookup_child(&dst_child_uri)
147149
.and_then(|c| c.stop_io_log());
148150

149-
self.rebuild_job_mut(&dst_child_uri)?
150-
.start(map)
151+
starter
152+
.start(self.rebuild_job_mut(&dst_child_uri)?, map)
151153
.await
152154
.context(nexus_err::RebuildOperation {
153155
job: child_uri.to_owned(),
@@ -160,7 +162,7 @@ impl<'n> Nexus<'n> {
160162
&self,
161163
src_child_uri: &str,
162164
dst_child_uri: &str,
163-
) -> Result<(), Error> {
165+
) -> Result<NexusRebuildJobStarter, Error> {
164166
let verify_mode = match std::env::var("NEXUS_REBUILD_VERIFY")
165167
.unwrap_or_default()
166168
.as_str()
@@ -186,7 +188,7 @@ impl<'n> Nexus<'n> {
186188
verify_mode,
187189
};
188190

189-
NexusRebuildJob::new(
191+
NexusRebuildJob::new_starter(
190192
&self.name,
191193
src_child_uri,
192194
dst_child_uri,
@@ -202,7 +204,7 @@ impl<'n> Nexus<'n> {
202204
},
203205
)
204206
.await
205-
.and_then(NexusRebuildJob::store)
207+
.and_then(NexusRebuildJobStarter::store)
206208
.context(nexus_err::CreateRebuild {
207209
child: dst_child_uri.to_owned(),
208210
name: self.name.clone(),

io-engine/src/rebuild/bdev_rebuild.rs

+33-35
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
use std::{
2-
ops::{Deref, Range},
3-
rc::Rc,
4-
};
1+
use std::ops::{Deref, Range};
52

63
use super::{
74
rebuild_descriptor::RebuildDescriptor,
@@ -13,7 +10,10 @@ use super::{
1310
SEGMENT_TASKS,
1411
};
1512

16-
use crate::gen_rebuild_instances;
13+
use crate::{
14+
gen_rebuild_instances,
15+
rebuild::rebuilders::{FullRebuild, RangeRebuilder},
16+
};
1717

1818
/// A Bdev rebuild job is responsible for managing a rebuild (copy) which reads
1919
/// from source_hdl and writes into destination_hdl from specified start to end.
@@ -59,47 +59,50 @@ gen_rebuild_instances!(BdevRebuildJob);
5959
/// A rebuild job which is responsible for rebuilding from
6060
/// source to target of the `RebuildDescriptor`.
6161
pub(super) struct BdevRebuildJobBackend {
62-
/// The next block to be rebuilt.
63-
next: u64,
6462
/// A pool of tasks which perform the actual data rebuild.
6563
task_pool: RebuildTasks,
6664
/// A generic rebuild descriptor.
67-
descriptor: Rc<RebuildDescriptor>,
65+
copier: FullRebuild<RebuildDescriptor>,
6866
/// Notification callback with src and dst uri's.
6967
notify_fn: fn(&str, &str) -> (),
7068
}
7169

7270
#[async_trait::async_trait(?Send)]
7371
impl RebuildBackend for BdevRebuildJobBackend {
7472
fn on_state_change(&mut self) {
75-
(self.notify_fn)(&self.descriptor.src_uri, &self.descriptor.dst_uri);
73+
let desc = self.common_desc();
74+
(self.notify_fn)(&desc.src_uri, &desc.dst_uri);
7675
}
7776

7877
fn common_desc(&self) -> &RebuildDescriptor {
79-
&self.descriptor
78+
self.copier.desc()
79+
}
80+
81+
fn blocks_remaining(&self) -> u64 {
82+
self.copier.blocks_remaining()
83+
}
84+
85+
fn is_partial(&self) -> bool {
86+
self.copier.is_partial()
8087
}
8188

8289
fn task_pool(&self) -> &RebuildTasks {
8390
&self.task_pool
8491
}
8592

8693
fn schedule_task_by_id(&mut self, id: usize) -> bool {
87-
if self.next >= self.descriptor.range.end {
88-
false
89-
} else {
90-
let next = std::cmp::min(
91-
self.next + self.descriptor.segment_size_blks,
92-
self.descriptor.range.end,
93-
);
94-
self.task_pool.schedule_segment_rebuild(
95-
id,
96-
self.next,
97-
self.descriptor.clone(),
98-
);
99-
self.task_pool.active += 1;
100-
self.next = next;
101-
true
102-
}
94+
self.copier
95+
.next()
96+
.map(|blk| {
97+
self.task_pool.schedule_segment_rebuild(
98+
id,
99+
blk,
100+
self.copier.copier(),
101+
);
102+
self.task_pool.active += 1;
103+
true
104+
})
105+
.unwrap_or_default()
103106
}
104107

105108
async fn await_one_task(&mut self) -> Option<TaskResult> {
@@ -110,7 +113,7 @@ impl RebuildBackend for BdevRebuildJobBackend {
110113
impl std::fmt::Debug for BdevRebuildJobBackend {
111114
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112115
f.debug_struct("BdevRebuildJob")
113-
.field("next", &self.next)
116+
.field("next", &self.copier.peek_next())
114117
.finish()
115118
}
116119
}
@@ -130,15 +133,10 @@ impl BdevRebuildJobBackend {
130133
notify_fn: fn(&str, &str) -> (),
131134
descriptor: RebuildDescriptor,
132135
) -> Result<Self, RebuildError> {
133-
let be = Self {
134-
next: descriptor.range.start,
136+
Ok(Self {
135137
task_pool,
136-
descriptor: Rc::new(descriptor),
138+
copier: FullRebuild::new(descriptor),
137139
notify_fn,
138-
};
139-
140-
info!("{be}: backend created");
141-
142-
Ok(be)
140+
})
143141
}
144142
}

io-engine/src/rebuild/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ mod rebuild_task;
1212
mod rebuilders;
1313

1414
pub use bdev_rebuild::BdevRebuildJob;
15-
pub use nexus_rebuild::NexusRebuildJob;
15+
pub use nexus_rebuild::{NexusRebuildJob, NexusRebuildJobStarter};
1616
use rebuild_descriptor::RebuildDescriptor;
1717
pub(crate) use rebuild_error::RebuildError;
1818
use rebuild_job::RebuildOperation;

0 commit comments

Comments
 (0)