-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathchampsim_branch_hashed_perceptron.cc.inc
250 lines (161 loc) · 6.28 KB
/
champsim_branch_hashed_perceptron.cc.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*
This code implements a hashed perceptron branch predictor using geometric
history lengths and dynamic threshold setting. It was written by Daniel
A. Jiménez in March 2019.
The original perceptron branch predictor is from Jiménez and Lin, "Dynamic
Branch Prediction with Perceptrons," HPCA 2001.
The idea of using multiple independently indexed tables of perceptron weights
is from Jiménez, "Fast Path-Based Neural Branch Prediction," MICRO 2003 and
later expanded in "Piecewise Linear Branch Prediction" from ISCA 2005.
The idea of using hashes of branch history to reduce the number of independent
tables is documented in three contemporaneous papers:
1. Seznec, "Revisiting the Perceptron Predictor," IRISA technical report, 2004.
2. Tarjan and Skadron, "Revisiting the Perceptron Predictor Again," UVA
technical report, 2004, expanded and published in ACM TACO 2005 as "Merging
path and gshare indexing in perceptron branch prediction"; introduces the term
"hashed perceptron."
3. Loh and Jiménez, "Reducing the Power and Complexity of Path-Based Neural
Branch Prediction," WCED 2005.
The ideas of using "geometric history lengths" i.e. hashing into tables with
histories of exponentially increasing length, as well as dynamically adjusting
the theta parameter, are from Seznec, "The O-GEHL Branch Predictor," from CBP
2004, expanded later as "Analysis of the O-GEometric History Length Branch
Predictor" in ISCA 2005.
This code uses these ideas, but prefers simplicity over absolute accuracy (I
wrote it in about an hour and later spent more time on this comment block than
I did on the code). These papers and subsequent papers by Jiménez and other
authors significantly improve the accuracy of perceptron-based predictors but
involve tricks and analysis beyond the needs of a tool like ChampSim that
targets cache optimizations. If you want accuracy at any cost, see the winners
of the latest branch prediction contest, CBP 2016 as of this writing, but
prepare to have your face melted off by the complexity of the code you find
there. If you are a student being asked to code a good branch predictor for
your computer architecture class, don't copy this code; there are much better
sources for you to plagiarize.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// #include "ooo_cpu.h"
// this many tables
#define NTABLES 16
// maximum history length
#define MAXHIST 232
// minimum history length (for table 1; table 0 is biases)
#define MINHIST 3
// speed for dynamic threshold setting
#define SPEED 18
// 12-bit indices for the tables
#define LOG_TABLE_SIZE 12
#define TABLE_SIZE (1 << LOG_TABLE_SIZE)
// this many 12-bit words will be kept in the global history
#define NGHIST_WORDS (MAXHIST / LOG_TABLE_SIZE + 1)
namespace
{
// geometric global history lengths
inline constexpr int history_lengths[NTABLES] = {0, 3, 4, 6, 8, 10, 14, 19, 26, 36, 49, 67, 91, 125, 170, MAXHIST};
// tables of 8-bit weights
int tables[NUM_CPUS][NTABLES][TABLE_SIZE];
// words that store the global history
unsigned int ghist_words[NUM_CPUS][NGHIST_WORDS];
// remember the indices into the tables from prediction to update
uint64_t indices[NUM_CPUS][NTABLES];
// initialize theta to something reasonable,
int theta[NUM_CPUS],
// initialize counter for threshold setting algorithm
tc[NUM_CPUS],
// perceptron sum
yout[NUM_CPUS];
} // namespace
void O3_CPU::initialize_branch_predictor()
{
// zero out the weights tables
memset(::tables, 0, sizeof(::tables));
// zero out the global history
memset(::ghist_words, 0, sizeof(::ghist_words));
// make a reasonable theta
for (unsigned i = 0; i < NUM_CPUS; i++)
::theta[i] = 10;
}
uint8_t O3_CPU::predict_branch(uint64_t pc)
{
// initialize perceptron sum
::yout[cpu] = 0;
// for each table...
for (int i = 0; i < NTABLES; i++) {
// n is the history length for this table
int n = history_lengths[i];
// hash global history bits 0..n-1 into x by XORing the words from the
// ghist_words array
uint64_t x = 0;
// most of the words are 12 bits long
int most_words = n / LOG_TABLE_SIZE;
// the last word is fewer than 12 bits
int last_word = n % LOG_TABLE_SIZE;
// XOR up to the next-to-the-last word
int j;
for (j = 0; j < most_words; j++)
x ^= ::ghist_words[cpu][j];
// XOR in the last word
x ^= ::ghist_words[cpu][j] & ((1 << last_word) - 1);
// XOR in the PC to spread accesses around (like gshare)
x ^= pc;
// stay within the table size
x &= TABLE_SIZE - 1;
// remember this index for update
::indices[cpu][i] = x;
// add the selected weight to the perceptron sum
::yout[cpu] += ::tables[cpu][i][x];
}
return ::yout[cpu] >= 1;
}
void O3_CPU::last_branch_result(uint64_t pc, uint64_t branch_target, uint8_t taken, uint8_t branch_type)
{
// was this prediction correct?
bool correct = taken == (::yout[cpu] >= 1);
// insert this branch outcome into the global history
bool b = taken;
for (int i = 0; i < NGHIST_WORDS; i++) {
// shift b into the lsb of the current word
::ghist_words[cpu][i] <<= 1;
::ghist_words[cpu][i] |= b;
// get b as the previous msb of the current word
b = !!(::ghist_words[cpu][i] & TABLE_SIZE);
::ghist_words[cpu][i] &= TABLE_SIZE - 1;
}
// get the magnitude of yout
int a = (::yout[cpu] < 0) ? -::yout[cpu] : ::yout[cpu];
// perceptron learning rule: train if misprediction or weak correct prediction
if (!correct || a < ::theta[cpu]) {
// update weights
for (int i = 0; i < NTABLES; i++) {
// which weight did we use to compute yout?
int* c = &::tables[cpu][i][::indices[cpu][i]];
// increment if taken, decrement if not, saturating at 127/-128
if (taken) {
if (*c < 127)
(*c)++;
} else {
if (*c > -128)
(*c)--;
}
}
// dynamic threshold setting from Seznec's O-GEHL paper
if (!correct) {
// increase theta after enough mispredictions
::tc[cpu]++;
if (::tc[cpu] >= SPEED) {
::theta[cpu]++;
::tc[cpu] = 0;
}
} else if (a < ::theta[cpu]) {
// decrease theta after enough weak but correct predictions
::tc[cpu]--;
if (::tc[cpu] <= -SPEED) {
::theta[cpu]--;
::tc[cpu] = 0;
}
}
}
}