Skip to content

Commit

Permalink
Provide consistent alignment to swap_ranges benchmark (#5043)
Browse files Browse the repository at this point in the history
Co-authored-by: Casey Carter <cartec69@gmail.com>
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent cb1e359 commit 51d34c4
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 18 deletions.
76 changes: 76 additions & 0 deletions benchmarks/inc/skewed_allocator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#pragma once

#include <cstddef>
#include <cstdlib>
#include <new>

template <class T, size_t Alignment, size_t Skew>
struct skewed_allocator {
using value_type = T;
static_assert(Alignment % alignof(T) == 0, "Chosen Alignment will produce unaligned T objects");
static_assert(Skew % alignof(T) == 0, "Chosen Skew will produce unaligned T objects");

template <class U>
struct rebind {
using other = skewed_allocator<U, Alignment, Skew>;
};

skewed_allocator() = default;
template <class U>
skewed_allocator(const skewed_allocator<U, Alignment, Skew>&) {}

template <class U>
bool operator==(const skewed_allocator<U, Alignment, Skew>&) const {
return true;
}

T* allocate(const size_t n) {
const auto p = static_cast<unsigned char*>(_aligned_malloc(n * sizeof(T) + Skew, Alignment));
if (!p) {
throw std::bad_alloc{};
}
return reinterpret_cast<T*>(p + Skew);
}

void deallocate(T* const p, size_t) {
if (p) {
_aligned_free(reinterpret_cast<unsigned char*>(p) - Skew);
}
}
};

// The purpose is to provide consistent behavior for benchmarks.
// 64 would be a reasonable alignment for practical perf uses,
// as it is both the cache line size and the maximum vector instruction size (on x64).
// However, aligning to the page size will provide even more consistency
// by ensuring that the same number of page boundaries is crossed each time.
inline constexpr size_t page_size = 4096;

// A realistic skew relative to allocation granularity, when a variable is placed
// next to a pointer in a structure or on the stack. Also corresponds to the default packing.
inline constexpr size_t realistic_skew = 8;

template <class T>
using highly_aligned_allocator = skewed_allocator<T, page_size, 0>;

template <class T>
using not_highly_aligned_allocator = skewed_allocator<T, page_size, realistic_skew>;

#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier

template <class T>
struct alignas(page_size) highly_aligned {
T value;
};

template <class T>
struct alignas(page_size) not_highly_aligned {
char pad[realistic_skew];
T value;
};

#pragma warning(pop)
63 changes: 45 additions & 18 deletions benchmarks/src/swap_ranges.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@
#include <cstdint>
#include <vector>

#include "skewed_allocator.hpp"

using namespace std;

template <size_t N, class T>
template <size_t N, class T, template <class> class Padder>
void std_swap(benchmark::State& state) {
T a[N];
Padder<T[N]> padded_a;
auto& a = padded_a.value;
memset(a, 'a', sizeof(a));
T b[N];
Padder<T[N]> padded_b;
auto& b = padded_b.value;
memset(b, 'b', sizeof(b));

for (auto _ : state) {
Expand All @@ -23,10 +27,10 @@ void std_swap(benchmark::State& state) {
}
}

template <class T>
template <class T, template <class> class Alloc>
void std_swap_ranges(benchmark::State& state) {
vector<T> a(static_cast<size_t>(state.range(0)), T{'a'});
vector<T> b(static_cast<size_t>(state.range(0)), T{'b'});
vector<T, Alloc<T>> a(static_cast<size_t>(state.range(0)), T{'a'});
vector<T, Alloc<T>> b(static_cast<size_t>(state.range(0)), T{'b'});

for (auto _ : state) {
swap_ranges(a.begin(), a.end(), b.begin());
Expand All @@ -35,18 +39,41 @@ void std_swap_ranges(benchmark::State& state) {
}
}

BENCHMARK(std_swap<1, uint8_t>);
BENCHMARK(std_swap<5, uint8_t>);
BENCHMARK(std_swap<15, uint8_t>);
BENCHMARK(std_swap<26, uint8_t>);
BENCHMARK(std_swap<38, uint8_t>);
BENCHMARK(std_swap<60, uint8_t>);
BENCHMARK(std_swap<125, uint8_t>);
BENCHMARK(std_swap<800, uint8_t>);
BENCHMARK(std_swap<3000, uint8_t>);
BENCHMARK(std_swap<9000, uint8_t>);

BENCHMARK(std_swap_ranges<uint8_t>)
BENCHMARK(std_swap<1, uint8_t, highly_aligned>);
BENCHMARK(std_swap<5, uint8_t, highly_aligned>);
BENCHMARK(std_swap<15, uint8_t, highly_aligned>);
BENCHMARK(std_swap<26, uint8_t, highly_aligned>);
BENCHMARK(std_swap<38, uint8_t, highly_aligned>);
BENCHMARK(std_swap<60, uint8_t, highly_aligned>);
BENCHMARK(std_swap<125, uint8_t, highly_aligned>);
BENCHMARK(std_swap<800, uint8_t, highly_aligned>);
BENCHMARK(std_swap<3000, uint8_t, highly_aligned>);
BENCHMARK(std_swap<9000, uint8_t, highly_aligned>);

BENCHMARK(std_swap<1, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<5, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<15, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<26, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<38, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<60, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<125, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<800, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<3000, uint8_t, not_highly_aligned>);
BENCHMARK(std_swap<9000, uint8_t, not_highly_aligned>);

BENCHMARK(std_swap_ranges<uint8_t, highly_aligned_allocator>)
->Arg(1)
->Arg(5)
->Arg(15)
->Arg(26)
->Arg(38)
->Arg(60)
->Arg(125)
->Arg(800)
->Arg(3000)
->Arg(9000);

BENCHMARK(std_swap_ranges<uint8_t, not_highly_aligned_allocator>)
->Arg(1)
->Arg(5)
->Arg(15)
Expand Down

0 comments on commit 51d34c4

Please sign in to comment.