Skip to content

Commit 900649e

Browse files
mengdilinfacebook-github-bot
authored andcommitted
add benchmarking for hnsw flat based on efSearch
Summary: Stealing from Alexander's hnsw benchmark where he tuned the efSearch parameter and search_bounded_queue in https://fburl.com/code/keovfhxk Differential Revision: D62652021
1 parent dda1dda commit 900649e

File tree

1 file changed

+31
-53
lines changed

1 file changed

+31
-53
lines changed

faiss/perf_tests/bench_hnsw.py

+31-53
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,12 @@ def accumulate_perf_counter(phase: str, t: PerfCounters, counters: Dict[str, int
4848
def run_on_dataset(
4949
ds: Dataset,
5050
M: int,
51-
num_threads:
52-
int,
51+
num_threads:int,
52+
num_add_iterations: int,
53+
num_search_iterations: int,
5354
efSearch: int = 16,
54-
efConstruction: int = 40
55+
efConstruction: int = 40,
56+
search_bounded_queue: bool = True
5557
) -> Dict[str, int]:
5658
xq = ds.get_queries()
5759
xb = ds.get_database()
@@ -63,22 +65,27 @@ def run_on_dataset(
6365
# pyre-ignore[16]: Module `faiss` has no attribute `omp_set_num_threads`.
6466
faiss.omp_set_num_threads(num_threads)
6567
index = faiss.IndexHNSWFlat(d, M)
66-
index.hnsw.efConstruction = 40 # default
68+
index.hnsw.efConstruction = efConstruction # default
6769
with timed_execution() as t:
68-
index.add(xb)
70+
for _ in range(num_add_iterations):
71+
index.add(xb)
6972
counters = {}
7073
accumulate_perf_counter("add", t, counters)
7174
counters["nb"] = nb
75+
counters["num_add_iterations"] = num_add_iterations
7276

7377
index.hnsw.efSearch = efSearch
78+
index.hnsw.search_bounded_queue = search_bounded_queue
7479
with timed_execution() as t:
75-
D, I = index.search(xq, k)
80+
for _ in range(num_search_iterations):
81+
D, I = index.search(xq, k)
7682
accumulate_perf_counter("search", t, counters)
7783
counters["nq"] = nq
7884
counters["efSearch"] = efSearch
7985
counters["efConstruction"] = efConstruction
8086
counters["M"] = M
8187
counters["d"] = d
88+
counters["num_search_iterations"] = num_search_iterations
8289

8390
return counters
8491

@@ -89,61 +96,25 @@ def run(
8996
nq: int,
9097
M: int,
9198
num_threads: int,
99+
num_add_iterations: int = 1,
100+
num_search_iterations: int = 1,
92101
efSearch: int = 16,
93102
efConstruction: int = 40,
103+
search_bounded_queue: bool = True,
94104
) -> Dict[str, int]:
95105
ds = SyntheticDataset(d=d, nb=nb, nt=0, nq=nq, metric="L2", seed=1338)
96106
return run_on_dataset(
97107
ds,
98108
M=M,
109+
num_add_iterations=num_add_iterations,
110+
num_search_iterations=num_search_iterations,
99111
num_threads=num_threads,
100112
efSearch=efSearch,
101113
efConstruction=efConstruction,
114+
search_bounded_queue=search_bounded_queue,
102115
)
103116

104117

105-
def _merge_counters(
106-
element: Dict[str, int], accu: Optional[Dict[str, int]] = None
107-
) -> Dict[str, int]:
108-
if accu is None:
109-
return dict(element)
110-
else:
111-
assert accu.keys() <= element.keys(), (
112-
"Accu keys must be a subset of element keys: "
113-
f"{accu.keys()} not a subset of {element.keys()}"
114-
)
115-
for key in accu.keys():
116-
if is_perf_counter(key):
117-
accu[key] += element[key]
118-
return accu
119-
120-
121-
def run_with_iterations(
122-
iterations: int,
123-
d: int,
124-
nb: int,
125-
nq: int,
126-
M: int,
127-
num_threads: int,
128-
efSearch: int = 16,
129-
efConstruction: int = 40,
130-
) -> Dict[str, int]:
131-
result = None
132-
for _ in range(iterations):
133-
counters = run(
134-
d=d,
135-
nb=nb,
136-
nq=nq,
137-
M=M,
138-
num_threads=num_threads,
139-
efSearch=efSearch,
140-
efConstruction=efConstruction,
141-
)
142-
result = _merge_counters(counters, result)
143-
assert result is not None
144-
return result
145-
146-
147118
def _accumulate_counters(
148119
element: Dict[str, int], accu: Optional[Dict[str, List[int]]] = None
149120
) -> Dict[str, List[int]]:
@@ -165,10 +136,13 @@ def main():
165136
parser.add_argument("-M", "--M", type=int, required=True)
166137
parser.add_argument("-t", "--num-threads", type=int, required=True)
167138
parser.add_argument("-w", "--warm-up-iterations", type=int, default=0)
168-
parser.add_argument("-i", "--num-iterations", type=int, default=20)
139+
parser.add_argument("-i", "--num-search-iterations", type=int, default=20)
140+
parser.add_argument("-i", "--num-add-iterations", type=int, default=20)
169141
parser.add_argument("-r", "--num-repetitions", type=int, default=20)
170142
parser.add_argument("-s", "--ef-search", type=int, default=16)
171143
parser.add_argument("-c", "--ef-construction", type=int, default=40)
144+
parser.add_argument("-b", "--search-bounded-queue", action='store_true')
145+
172146
parser.add_argument("-n", "--nb", type=int, default=5000)
173147
parser.add_argument("-q", "--nq", type=int, default=500)
174148
parser.add_argument("-d", "--d", type=int, default=128)
@@ -177,15 +151,17 @@ def main():
177151
if args.warm_up_iterations > 0:
178152
print(f"Warming up for {args.warm_up_iterations} iterations...")
179153
# warm-up
180-
run_with_iterations(
181-
iterations=args.warm_up_iterations,
154+
run(
155+
num_search_iterations=args.warm_up_iterations,
156+
num_add_iterations=args.warm_up_iterations,
182157
d=args.d,
183158
nb=args.nb,
184159
nq=args.nq,
185160
M=args.M,
186161
num_threads=args.num_threads,
187162
efSearch=args.ef_search,
188163
efConstruction=args.ef_construction,
164+
search_bounded_queue=args.search_bounded_queue,
189165
)
190166
print(
191167
f"Running benchmark with dataset(nb={args.nb}, nq={args.nq}, "
@@ -194,15 +170,17 @@ def main():
194170
)
195171
result = None
196172
for _ in range(args.num_repetitions):
197-
counters = run_with_iterations(
198-
iterations=args.num_iterations,
173+
counters = run(
174+
num_search_iterations=args.num_search_iterations,
175+
num_add_iterations=args.num_add_iterations,
199176
d=args.d,
200177
nb=args.nb,
201178
nq=args.nq,
202179
M=args.M,
203180
num_threads=args.num_threads,
204181
efSearch=args.ef_search,
205182
efConstruction=args.ef_construction,
183+
search_bounded_queue=args.search_bounded_queue,
206184
)
207185
result = _accumulate_counters(counters, result)
208186
assert result is not None

0 commit comments

Comments
 (0)