@@ -48,10 +48,12 @@ def accumulate_perf_counter(phase: str, t: PerfCounters, counters: Dict[str, int
48
48
def run_on_dataset (
49
49
ds : Dataset ,
50
50
M : int ,
51
- num_threads :
52
- int ,
51
+ num_threads :int ,
52
+ num_add_iterations : int ,
53
+ num_search_iterations : int ,
53
54
efSearch : int = 16 ,
54
- efConstruction : int = 40
55
+ efConstruction : int = 40 ,
56
+ search_bounded_queue : bool = True
55
57
) -> Dict [str , int ]:
56
58
xq = ds .get_queries ()
57
59
xb = ds .get_database ()
@@ -63,22 +65,27 @@ def run_on_dataset(
63
65
# pyre-ignore[16]: Module `faiss` has no attribute `omp_set_num_threads`.
64
66
faiss .omp_set_num_threads (num_threads )
65
67
index = faiss .IndexHNSWFlat (d , M )
66
- index .hnsw .efConstruction = 40 # default
68
+ index .hnsw .efConstruction = efConstruction # default
67
69
with timed_execution () as t :
68
- index .add (xb )
70
+ for _ in range (num_add_iterations ):
71
+ index .add (xb )
69
72
counters = {}
70
73
accumulate_perf_counter ("add" , t , counters )
71
74
counters ["nb" ] = nb
75
+ counters ["num_add_iterations" ] = num_add_iterations
72
76
73
77
index .hnsw .efSearch = efSearch
78
+ index .hnsw .search_bounded_queue = search_bounded_queue
74
79
with timed_execution () as t :
75
- D , I = index .search (xq , k )
80
+ for _ in range (num_search_iterations ):
81
+ D , I = index .search (xq , k )
76
82
accumulate_perf_counter ("search" , t , counters )
77
83
counters ["nq" ] = nq
78
84
counters ["efSearch" ] = efSearch
79
85
counters ["efConstruction" ] = efConstruction
80
86
counters ["M" ] = M
81
87
counters ["d" ] = d
88
+ counters ["num_search_iterations" ] = num_search_iterations
82
89
83
90
return counters
84
91
@@ -89,61 +96,25 @@ def run(
89
96
nq : int ,
90
97
M : int ,
91
98
num_threads : int ,
99
+ num_add_iterations : int = 1 ,
100
+ num_search_iterations : int = 1 ,
92
101
efSearch : int = 16 ,
93
102
efConstruction : int = 40 ,
103
+ search_bounded_queue : bool = True ,
94
104
) -> Dict [str , int ]:
95
105
ds = SyntheticDataset (d = d , nb = nb , nt = 0 , nq = nq , metric = "L2" , seed = 1338 )
96
106
return run_on_dataset (
97
107
ds ,
98
108
M = M ,
109
+ num_add_iterations = num_add_iterations ,
110
+ num_search_iterations = num_search_iterations ,
99
111
num_threads = num_threads ,
100
112
efSearch = efSearch ,
101
113
efConstruction = efConstruction ,
114
+ search_bounded_queue = search_bounded_queue ,
102
115
)
103
116
104
117
105
- def _merge_counters (
106
- element : Dict [str , int ], accu : Optional [Dict [str , int ]] = None
107
- ) -> Dict [str , int ]:
108
- if accu is None :
109
- return dict (element )
110
- else :
111
- assert accu .keys () <= element .keys (), (
112
- "Accu keys must be a subset of element keys: "
113
- f"{ accu .keys ()} not a subset of { element .keys ()} "
114
- )
115
- for key in accu .keys ():
116
- if is_perf_counter (key ):
117
- accu [key ] += element [key ]
118
- return accu
119
-
120
-
121
- def run_with_iterations (
122
- iterations : int ,
123
- d : int ,
124
- nb : int ,
125
- nq : int ,
126
- M : int ,
127
- num_threads : int ,
128
- efSearch : int = 16 ,
129
- efConstruction : int = 40 ,
130
- ) -> Dict [str , int ]:
131
- result = None
132
- for _ in range (iterations ):
133
- counters = run (
134
- d = d ,
135
- nb = nb ,
136
- nq = nq ,
137
- M = M ,
138
- num_threads = num_threads ,
139
- efSearch = efSearch ,
140
- efConstruction = efConstruction ,
141
- )
142
- result = _merge_counters (counters , result )
143
- assert result is not None
144
- return result
145
-
146
-
147
118
def _accumulate_counters (
148
119
element : Dict [str , int ], accu : Optional [Dict [str , List [int ]]] = None
149
120
) -> Dict [str , List [int ]]:
@@ -165,10 +136,13 @@ def main():
165
136
parser .add_argument ("-M" , "--M" , type = int , required = True )
166
137
parser .add_argument ("-t" , "--num-threads" , type = int , required = True )
167
138
parser .add_argument ("-w" , "--warm-up-iterations" , type = int , default = 0 )
168
- parser .add_argument ("-i" , "--num-iterations" , type = int , default = 20 )
139
+ parser .add_argument ("-i" , "--num-search-iterations" , type = int , default = 20 )
140
+ parser .add_argument ("-i" , "--num-add-iterations" , type = int , default = 20 )
169
141
parser .add_argument ("-r" , "--num-repetitions" , type = int , default = 20 )
170
142
parser .add_argument ("-s" , "--ef-search" , type = int , default = 16 )
171
143
parser .add_argument ("-c" , "--ef-construction" , type = int , default = 40 )
144
+ parser .add_argument ("-b" , "--search-bounded-queue" , action = 'store_true' )
145
+
172
146
parser .add_argument ("-n" , "--nb" , type = int , default = 5000 )
173
147
parser .add_argument ("-q" , "--nq" , type = int , default = 500 )
174
148
parser .add_argument ("-d" , "--d" , type = int , default = 128 )
@@ -177,15 +151,17 @@ def main():
177
151
if args .warm_up_iterations > 0 :
178
152
print (f"Warming up for { args .warm_up_iterations } iterations..." )
179
153
# warm-up
180
- run_with_iterations (
181
- iterations = args .warm_up_iterations ,
154
+ run (
155
+ num_search_iterations = args .warm_up_iterations ,
156
+ num_add_iterations = args .warm_up_iterations ,
182
157
d = args .d ,
183
158
nb = args .nb ,
184
159
nq = args .nq ,
185
160
M = args .M ,
186
161
num_threads = args .num_threads ,
187
162
efSearch = args .ef_search ,
188
163
efConstruction = args .ef_construction ,
164
+ search_bounded_queue = args .search_bounded_queue ,
189
165
)
190
166
print (
191
167
f"Running benchmark with dataset(nb={ args .nb } , nq={ args .nq } , "
@@ -194,15 +170,17 @@ def main():
194
170
)
195
171
result = None
196
172
for _ in range (args .num_repetitions ):
197
- counters = run_with_iterations (
198
- iterations = args .num_iterations ,
173
+ counters = run (
174
+ num_search_iterations = args .num_search_iterations ,
175
+ num_add_iterations = args .num_add_iterations ,
199
176
d = args .d ,
200
177
nb = args .nb ,
201
178
nq = args .nq ,
202
179
M = args .M ,
203
180
num_threads = args .num_threads ,
204
181
efSearch = args .ef_search ,
205
182
efConstruction = args .ef_construction ,
183
+ search_bounded_queue = args .search_bounded_queue ,
206
184
)
207
185
result = _accumulate_counters (counters , result )
208
186
assert result is not None
0 commit comments