|
42 | 42 | #include "hwy/contrib/thread_pool/futex.h"
|
43 | 43 | #include "hwy/contrib/thread_pool/spin.h"
|
44 | 44 | #include "hwy/contrib/thread_pool/topology.h"
|
| 45 | +#include "hwy/stats.h" |
45 | 46 | #include "hwy/timer.h"
|
46 | 47 |
|
47 | 48 | // Define to HWY_NOINLINE to see profiles of `WorkerRun*` and waits.
|
@@ -73,7 +74,11 @@ enum class PoolWaitMode : uint8_t { kBlock = 1, kSpin };
|
73 | 74 |
|
74 | 75 | namespace pool {
|
75 | 76 |
|
76 |
| -static constexpr int kVerbosity = 0; |
| 77 | +#ifndef HWY_POOL_VERBOSITY |
| 78 | +#define HWY_POOL_VERBOSITY 0 |
| 79 | +#endif |
| 80 | + |
| 81 | +static constexpr int kVerbosity = HWY_POOL_VERBOSITY; |
77 | 82 |
|
78 | 83 | // Some CPUs already have more than this many threads, but rather than one
|
79 | 84 | // large pool, we assume applications create multiple pools, ideally per
|
@@ -1128,12 +1133,23 @@ class alignas(HWY_ALIGNMENT) ThreadPool {
|
1128 | 1133 | ClearBusy(); // before `SendConfig`
|
1129 | 1134 | if (auto_tuner.Best()) { // just finished
|
1130 | 1135 | HWY_IF_CONSTEXPR(pool::kVerbosity >= 1) {
|
1131 |
| - const size_t idx_best = |
1132 |
| - auto_tuner.Best() - auto_tuner.Candidates().data(); |
| 1136 | + const size_t idx_best = static_cast<size_t>( |
| 1137 | + auto_tuner.Best() - auto_tuner.Candidates().data()); |
| 1138 | + HWY_DASSERT(idx_best < auto_tuner.Costs().size()); |
1133 | 1139 | auto& AT = auto_tuner.Costs()[idx_best];
|
1134 |
| - fprintf(stderr, " %s %f (%f %f %f)\n", |
1135 |
| - auto_tuner.Best()->ToString().c_str(), AT.EstimateCost(), |
1136 |
| - AT.Stddev(), AT.Lower(), AT.Upper()); |
| 1140 | + const double best_cost = AT.EstimateCost(); |
| 1141 | + HWY_DASSERT(best_cost > 0.0); // will divide by this below |
| 1142 | + |
| 1143 | + Stats s_ratio; |
| 1144 | + for (size_t i = 0; i < auto_tuner.Costs().size(); ++i) { |
| 1145 | + if (i == idx_best) continue; |
| 1146 | + const double cost = auto_tuner.Costs()[i].EstimateCost(); |
| 1147 | + s_ratio.Notify(static_cast<float>(cost / best_cost)); |
| 1148 | + } |
| 1149 | + |
| 1150 | + fprintf(stderr, " %s %5.0f +/- %4.0f. Gain %.2fx [%.2fx, %.2fx]\n", |
| 1151 | + auto_tuner.Best()->ToString().c_str(), best_cost, AT.Stddev(), |
| 1152 | + s_ratio.GeometricMean(), s_ratio.Min(), s_ratio.Max()); |
1137 | 1153 | }
|
1138 | 1154 | SendConfig(*auto_tuner.Best());
|
1139 | 1155 | } else {
|
|
0 commit comments