Skip to content

Commit 0d71aa9

Browse files
Bar Davidbardavid
Bar David
authored andcommitted
dedupe: allow to generate dedupe buffers from working set
This commit introduced new dedupe generation mode "working_set". Working set mode simulates a more realistic approach to deduped data, in which deduped buffers are generated from pre-existing working set - % size of the device or file. In other words, dedupe is not usually expected to be close in time with the source buffer, as well as source buffers are usually composed of small subset of the entire file or device. Signed-off-by: Bar David <bardavvid@gmail.com>
1 parent 77c72e0 commit 0d71aa9

16 files changed

+255
-27
lines changed

DEDUPE-TODO

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
- Mixed buffers of dedupe-able and compressible data.
2+
Major usecase in performance benchmarking of storage subsystems.
3+
4+
- Shifted dedup-able data.
5+
Allow for dedup buffer generation to shift contents by random number
6+
of sectors (fill the gaps with uncompressible data). Some storage
7+
subsystems modernized the deduplication detection algorithms to look
8+
for shifted data as well. For example, some databases push a timestamp
9+
on the prefix of written blocks, which makes the underlying data
10+
dedup-able in different alignment. FIO should be able to simulate such
11+
workload.
12+
13+
- Generation of similar data (but not exact).
14+
A rising trend in enterprise storage systems.
15+
Generation of "similar" data means random uncompressible buffers
16+
that differ by few(configurable number of) bits from each other.
17+
The storage subsystem usually identifies the similar buffers using
18+
locality-sensitive hashing or other methods.
19+

HOWTO

+30
Original file line numberDiff line numberDiff line change
@@ -1705,6 +1705,36 @@ Buffers and memory
17051705
this option will also enable :option:`refill_buffers` to prevent every buffer
17061706
being identical.
17071707

1708+
.. option:: dedupe_mode=str
1709+
1710+
If ``dedupe_percentage=<int>`` is given, then this option controls how fio
1711+
generates the dedupe buffers.
1712+
1713+
**repeat**
1714+
Generate dedupe buffers by repeating previous writes
1715+
**working_set**
1716+
Generate dedupe buffers from working set
1717+
1718+
``repeat`` is the default option for fio. Dedupe buffers are generated
1719+
by repeating previous unique write.
1720+
1721+
``working_set`` is a more realistic workload.
1722+
With ``working_set``, ``dedupe_working_set_percentage=<int>`` should be provided.
1723+
Given that, fio will use the initial unique write buffers as its working set.
1724+
Upon deciding to dedupe, fio will randomly choose a buffer from the working set.
1725+
Note that by using ``working_set`` the dedupe percentage will converge
1726+
to the desired over time while ``repeat`` maintains the desired percentage
1727+
throughout the job.
1728+
1729+
.. option:: dedupe_working_set_percentage=int
1730+
1731+
If ``dedupe_mode=<str>`` is set to ``working_set``, then this controls
1732+
the percentage of size of the file or device used as the buffers
1733+
fio will choose to generate the dedupe buffers from
1734+
1735+
Note that size needs to be explicitly provided and only 1 file per
1736+
job is supported
1737+
17081738
.. option:: invalidate=bool
17091739

17101740
Invalidate the buffer/page cache parts of the files to be used prior to

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ SOURCE := $(sort $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/crc/*.c)) \
6161
gettime-thread.c helpers.c json.c idletime.c td_error.c \
6262
profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
6363
workqueue.c rate-submit.c optgroup.c helper_thread.c \
64-
steadystate.c zone-dist.c zbd.c
64+
steadystate.c zone-dist.c zbd.c dedupe.c
6565

6666
ifdef CONFIG_LIBHDFS
6767
HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)

cconv.c

+4
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ void convert_thread_options_to_cpu(struct thread_options *o,
298298
o->compress_percentage = le32_to_cpu(top->compress_percentage);
299299
o->compress_chunk = le32_to_cpu(top->compress_chunk);
300300
o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage);
301+
o->dedupe_mode = le32_to_cpu(top->dedupe_mode);
302+
o->dedupe_working_set_percentage = le32_to_cpu(top->dedupe_working_set_percentage);
301303
o->block_error_hist = le32_to_cpu(top->block_error_hist);
302304
o->replay_align = le32_to_cpu(top->replay_align);
303305
o->replay_scale = le32_to_cpu(top->replay_scale);
@@ -499,6 +501,8 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
499501
top->compress_percentage = cpu_to_le32(o->compress_percentage);
500502
top->compress_chunk = cpu_to_le32(o->compress_chunk);
501503
top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage);
504+
top->dedupe_mode = cpu_to_le32(o->dedupe_mode);
505+
top->dedupe_working_set_percentage = cpu_to_le32(o->dedupe_working_set_percentage);
502506
top->block_error_hist = cpu_to_le32(o->block_error_hist);
503507
top->replay_align = cpu_to_le32(o->replay_align);
504508
top->replay_scale = cpu_to_le32(o->replay_scale);

dedupe.c

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#include "fio.h"
2+
3+
int init_dedupe_working_set_seeds(struct thread_data *td)
4+
{
5+
unsigned long long i;
6+
struct frand_state dedupe_working_set_state = {0};
7+
8+
if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET))
9+
return 0;
10+
11+
/*
12+
* The dedupe working set keeps seeds of unique data (generated by buf_state).
13+
* Dedupe-ed pages will be generated using those seeds.
14+
*/
15+
td->num_unique_pages = (td->o.size * (unsigned long long)td->o.dedupe_working_set_percentage / 100) / td->o.min_bs[DDIR_WRITE];
16+
td->dedupe_working_set_states = malloc(sizeof(struct frand_state) * td->num_unique_pages);
17+
if (!td->dedupe_working_set_states) {
18+
log_err("fio: could not allocate dedupe working set\n");
19+
return 1;
20+
}
21+
frand_copy(&dedupe_working_set_state, &td->buf_state);
22+
for (i = 0; i < td->num_unique_pages; i++) {
23+
frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state);
24+
__get_next_seed(&dedupe_working_set_state);
25+
}
26+
27+
return 0;
28+
}

dedupe.h

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifndef DEDUPE_H
2+
#define DEDUPE_H
3+
4+
int init_dedupe_working_set_seeds(struct thread_data *td);
5+
6+
#endif

fio.1

+42
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,48 @@ all \-\- this option only controls the distribution of unique buffers. Setting
15091509
this option will also enable \fBrefill_buffers\fR to prevent every buffer
15101510
being identical.
15111511
.TP
1512+
.BI dedupe_mode \fR=\fPstr
1513+
If \fBdedupe_percentage\fR is given, then this option controls how fio
1514+
generates the dedupe buffers.
1515+
.RS
1516+
.RS
1517+
.TP
1518+
.B repeat
1519+
.P
1520+
.RS
1521+
Generate dedupe buffers by repeating previous writes
1522+
.RE
1523+
.TP
1524+
.B working_set
1525+
.P
1526+
.RS
1527+
Generate dedupe buffers from working set
1528+
.RE
1529+
.RE
1530+
.P
1531+
\fBrepeat\fR is the default option for fio. Dedupe buffers are generated
1532+
by repeating previous unique write.
1533+
1534+
\fBworking_set\fR is a more realistic workload.
1535+
With \fBworking_set\fR, \fBdedupe_working_set_percentage\fR should be provided.
1536+
Given that, fio will use the initial unique write buffers as its working set.
1537+
Upon deciding to dedupe, fio will randomly choose a buffer from the working set.
1538+
Note that by using \fBworking_set\fR the dedupe percentage will converge
1539+
to the desired over time while \fBrepeat\fR maintains the desired percentage
1540+
throughout the job.
1541+
.RE
1542+
.RE
1543+
.TP
1544+
.BI dedupe_working_set_percentage \fR=\fPint
1545+
If \fBdedupe_mode\fR is set to \fBworking_set\fR, then this controls
1546+
the percentage of size of the file or device used as the buffers
1547+
fio will choose to generate the dedupe buffers from
1548+
.P
1549+
.RS
1550+
Note that \fBsize\fR needs to be explicitly provided and only 1 file
1551+
per job is supported
1552+
.RE
1553+
.TP
15121554
.BI invalidate \fR=\fPbool
15131555
Invalidate the buffer/page cache parts of the files to be used prior to
15141556
starting I/O if the platform and file type support it. Defaults to true.

fio.h

+6
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "workqueue.h"
4848
#include "steadystate.h"
4949
#include "lib/nowarn_snprintf.h"
50+
#include "dedupe.h"
5051

5152
#ifdef CONFIG_SOLARISAIO
5253
#include <sys/asynch.h>
@@ -140,6 +141,7 @@ enum {
140141
FIO_RAND_POISSON2_OFF,
141142
FIO_RAND_POISSON3_OFF,
142143
FIO_RAND_PRIO_CMDS,
144+
FIO_RAND_DEDUPE_WORKING_SET_IX,
143145
FIO_RAND_NR_OFFS,
144146
};
145147

@@ -263,6 +265,10 @@ struct thread_data {
263265
struct frand_state dedupe_state;
264266
struct frand_state zone_state;
265267
struct frand_state prio_state;
268+
struct frand_state dedupe_working_set_index_state;
269+
struct frand_state *dedupe_working_set_states;
270+
271+
unsigned long long num_unique_pages;
266272

267273
struct zone_split_index **zone_state_index;
268274
unsigned int num_open_zones;

init.c

+26
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,28 @@ static int fixup_options(struct thread_data *td)
958958

959959
o->latency_target *= 1000ULL;
960960

961+
/*
962+
* Dedupe working set verifications
963+
*/
964+
if (o->dedupe_percentage && o->dedupe_mode == DEDUPE_MODE_WORKING_SET) {
965+
if (!fio_option_is_set(o, size)) {
966+
log_err("fio: pregenerated dedupe working set "
967+
"requires size to be set\n");
968+
ret |= 1;
969+
} else if (o->nr_files != 1) {
970+
log_err("fio: dedupe working set mode supported with "
971+
"single file per job, but %d files "
972+
"provided\n", o->nr_files);
973+
ret |= 1;
974+
} else if (o->dedupe_working_set_percentage + o->dedupe_percentage > 100) {
975+
log_err("fio: impossible to reach expected dedupe percentage %u "
976+
"since %u percentage of size is reserved to dedupe working set "
977+
"(those are unique pages)\n",
978+
o->dedupe_percentage, o->dedupe_working_set_percentage);
979+
ret |= 1;
980+
}
981+
}
982+
961983
return ret;
962984
}
963985

@@ -1031,6 +1053,7 @@ static void td_fill_rand_seeds_internal(struct thread_data *td, bool use64)
10311053
init_rand_seed(&td->dedupe_state, td->rand_seeds[FIO_DEDUPE_OFF], false);
10321054
init_rand_seed(&td->zone_state, td->rand_seeds[FIO_RAND_ZONE_OFF], false);
10331055
init_rand_seed(&td->prio_state, td->rand_seeds[FIO_RAND_PRIO_CMDS], false);
1056+
init_rand_seed(&td->dedupe_working_set_index_state, td->rand_seeds[FIO_RAND_DEDUPE_WORKING_SET_IX], use64);
10341057

10351058
if (!td_random(td))
10361059
return;
@@ -1491,6 +1514,9 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
14911514
if (fixup_options(td))
14921515
goto err;
14931516

1517+
if (init_dedupe_working_set_seeds(td))
1518+
goto err;
1519+
14941520
/*
14951521
* Belongs to fixup_options, but o->name is not necessarily set as yet
14961522
*/

io_u.c

+20-10
Original file line numberDiff line numberDiff line change
@@ -2172,6 +2172,7 @@ void io_u_queued(struct thread_data *td, struct io_u *io_u)
21722172
static struct frand_state *get_buf_state(struct thread_data *td)
21732173
{
21742174
unsigned int v;
2175+
unsigned long long i;
21752176

21762177
if (!td->o.dedupe_percentage)
21772178
return &td->buf_state;
@@ -2182,16 +2183,25 @@ static struct frand_state *get_buf_state(struct thread_data *td)
21822183

21832184
v = rand_between(&td->dedupe_state, 1, 100);
21842185

2185-
if (v <= td->o.dedupe_percentage) {
2186-
/*
2187-
* The caller advances the returned frand_state.
2188-
* A copy of prev should be returned instead since
2189-
* a subsequent intention to generate a deduped buffer
2190-
* might result in generating a unique one
2191-
*/
2192-
frand_copy(&td->buf_state_ret, &td->buf_state_prev);
2193-
return &td->buf_state_ret;
2194-
}
2186+
if (v <= td->o.dedupe_percentage)
2187+
switch (td->o.dedupe_mode) {
2188+
case DEDUPE_MODE_REPEAT:
2189+
/*
2190+
* The caller advances the returned frand_state.
2191+
* A copy of prev should be returned instead since
2192+
* a subsequent intention to generate a deduped buffer
2193+
* might result in generating a unique one
2194+
*/
2195+
frand_copy(&td->buf_state_ret, &td->buf_state_prev);
2196+
return &td->buf_state_ret;
2197+
case DEDUPE_MODE_WORKING_SET:
2198+
i = rand_between(&td->dedupe_working_set_index_state, 0, td->num_unique_pages - 1);
2199+
frand_copy(&td->buf_state_ret, &td->dedupe_working_set_states[i]);
2200+
return &td->buf_state_ret;
2201+
default:
2202+
log_err("unexpected dedupe mode %u\n", td->o.dedupe_mode);
2203+
assert(0);
2204+
}
21952205

21962206
return &td->buf_state;
21972207
}

lib/rand.c

+2-8
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,7 @@ void __fill_random_buf(void *buf, unsigned int len, uint64_t seed)
125125
uint64_t fill_random_buf(struct frand_state *fs, void *buf,
126126
unsigned int len)
127127
{
128-
uint64_t r = __rand(fs);
129-
130-
if (sizeof(int) != sizeof(long *))
131-
r *= (unsigned long) __rand(fs);
128+
uint64_t r = __get_next_seed(fs);
132129

133130
__fill_random_buf(buf, len, r);
134131
return r;
@@ -188,10 +185,7 @@ uint64_t fill_random_buf_percentage(struct frand_state *fs, void *buf,
188185
unsigned int segment, unsigned int len,
189186
char *pattern, unsigned int pbytes)
190187
{
191-
uint64_t r = __rand(fs);
192-
193-
if (sizeof(int) != sizeof(long *))
194-
r *= (unsigned long) __rand(fs);
188+
uint64_t r = __get_next_seed(fs);
195189

196190
__fill_random_buf_percentage(r, buf, percentage, segment, len,
197191
pattern, pbytes);

lib/rand.h

+10
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,16 @@ static inline uint64_t rand_between(struct frand_state *state, uint64_t start,
150150
return start + rand32_upto(state, end - start);
151151
}
152152

153+
static inline uint64_t __get_next_seed(struct frand_state *fs)
154+
{
155+
uint64_t r = __rand(fs);
156+
157+
if (sizeof(int) != sizeof(long *))
158+
r *= (unsigned long) __rand(fs);
159+
160+
return r;
161+
}
162+
153163
extern void init_rand(struct frand_state *, bool);
154164
extern void init_rand_seed(struct frand_state *, uint64_t seed, bool);
155165
extern void __fill_random_buf(void *buf, unsigned int len, uint64_t seed);

options.c

+34
Original file line numberDiff line numberDiff line change
@@ -4497,6 +4497,40 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
44974497
.category = FIO_OPT_C_IO,
44984498
.group = FIO_OPT_G_IO_BUF,
44994499
},
4500+
{
4501+
.name = "dedupe_mode",
4502+
.lname = "Dedupe mode",
4503+
.help = "Mode for the deduplication buffer generation",
4504+
.type = FIO_OPT_STR,
4505+
.off1 = offsetof(struct thread_options, dedupe_mode),
4506+
.parent = "dedupe_percentage",
4507+
.def = "repeat",
4508+
.category = FIO_OPT_C_IO,
4509+
.group = FIO_OPT_G_IO_BUF,
4510+
.posval = {
4511+
{ .ival = "repeat",
4512+
.oval = DEDUPE_MODE_REPEAT,
4513+
.help = "repeat previous page",
4514+
},
4515+
{ .ival = "working_set",
4516+
.oval = DEDUPE_MODE_WORKING_SET,
4517+
.help = "choose a page randomly from limited working set defined in dedupe_working_set_percentage",
4518+
},
4519+
},
4520+
},
4521+
{
4522+
.name = "dedupe_working_set_percentage",
4523+
.lname = "Dedupe working set percentage",
4524+
.help = "Dedupe working set size in percentages from file or device size used to generate dedupe patterns from",
4525+
.type = FIO_OPT_INT,
4526+
.off1 = offsetof(struct thread_options, dedupe_working_set_percentage),
4527+
.parent = "dedupe_percentage",
4528+
.def = "5",
4529+
.maxval = 100,
4530+
.minval = 0,
4531+
.category = FIO_OPT_C_IO,
4532+
.group = FIO_OPT_G_IO_BUF,
4533+
},
45004534
{
45014535
.name = "clat_percentiles",
45024536
.lname = "Completion latency percentiles",

server.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ struct fio_net_cmd_reply {
4848
};
4949

5050
enum {
51-
FIO_SERVER_VER = 91,
51+
FIO_SERVER_VER = 92,
5252

5353
FIO_SERVER_MAX_FRAGMENT_PDU = 1024,
5454
FIO_SERVER_MAX_CMD_MB = 2048,

0 commit comments

Comments
 (0)