Skip to content

Commit d3b6388

Browse files
jan-wassenbergcopybara-github
authored andcommitted
print gain stats for pool autotune
PiperOrigin-RevId: 739163094
1 parent 0fccdf8 commit d3b6388

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

hwy/contrib/thread_pool/thread_pool.h

+22-6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "hwy/contrib/thread_pool/futex.h"
4343
#include "hwy/contrib/thread_pool/spin.h"
4444
#include "hwy/contrib/thread_pool/topology.h"
45+
#include "hwy/stats.h"
4546
#include "hwy/timer.h"
4647

4748
// Define to HWY_NOINLINE to see profiles of `WorkerRun*` and waits.
@@ -73,7 +74,11 @@ enum class PoolWaitMode : uint8_t { kBlock = 1, kSpin };
7374

7475
namespace pool {
7576

76-
static constexpr int kVerbosity = 0;
77+
#ifndef HWY_POOL_VERBOSITY
78+
#define HWY_POOL_VERBOSITY 0
79+
#endif
80+
81+
static constexpr int kVerbosity = HWY_POOL_VERBOSITY;
7782

7883
// Some CPUs already have more than this many threads, but rather than one
7984
// large pool, we assume applications create multiple pools, ideally per
@@ -1128,12 +1133,23 @@ class alignas(HWY_ALIGNMENT) ThreadPool {
11281133
ClearBusy(); // before `SendConfig`
11291134
if (auto_tuner.Best()) { // just finished
11301135
HWY_IF_CONSTEXPR(pool::kVerbosity >= 1) {
1131-
const size_t idx_best =
1132-
auto_tuner.Best() - auto_tuner.Candidates().data();
1136+
const size_t idx_best = static_cast<size_t>(
1137+
auto_tuner.Best() - auto_tuner.Candidates().data());
1138+
HWY_DASSERT(idx_best < auto_tuner.Costs().size());
11331139
auto& AT = auto_tuner.Costs()[idx_best];
1134-
fprintf(stderr, " %s %f (%f %f %f)\n",
1135-
auto_tuner.Best()->ToString().c_str(), AT.EstimateCost(),
1136-
AT.Stddev(), AT.Lower(), AT.Upper());
1140+
const double best_cost = AT.EstimateCost();
1141+
HWY_DASSERT(best_cost > 0.0); // will divide by this below
1142+
1143+
Stats s_ratio;
1144+
for (size_t i = 0; i < auto_tuner.Costs().size(); ++i) {
1145+
if (i == idx_best) continue;
1146+
const double cost = auto_tuner.Costs()[i].EstimateCost();
1147+
s_ratio.Notify(static_cast<float>(cost / best_cost));
1148+
}
1149+
1150+
fprintf(stderr, " %s %5.0f +/- %4.0f. Gain %.2fx [%.2fx, %.2fx]\n",
1151+
auto_tuner.Best()->ToString().c_str(), best_cost, AT.Stddev(),
1152+
s_ratio.GeometricMean(), s_ratio.Min(), s_ratio.Max());
11371153
}
11381154
SendConfig(*auto_tuner.Best());
11391155
} else {

0 commit comments

Comments
 (0)