Skip to content

Commit 1f6c5b1

Browse files
authored
Merge pull request #396 from sam-herman/add-jmh
Add jmh benchmarks
2 parents d59232f + 6b289f0 commit 1f6c5b1

File tree

8 files changed

+366
-1
lines changed

8 files changed

+366
-1
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,6 @@ hdf5/
3333

3434
### aider
3535
.aider*
36+
37+
# JMH generated files
38+
dependency-reduced-pom.xml

benchmarks-jmh/README.md

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# JMH Benchmarks
2+
Micro benchmarks for jVector. While {@link Bench.java} is about recall, the JMH benchmarks
3+
are mostly targeting scalability and latency aspects.
4+
5+
## Building and running the benchmark
6+
7+
1. You can build and then run
8+
```shell
9+
mvn clean install -DskipTests=true
10+
java --enable-native-access=ALL-UNNAMED \
11+
--add-modules=jdk.incubator.vector \
12+
-XX:+HeapDumpOnOutOfMemoryError \
13+
-Xmx14G -Djvector.experimental.enable_native_vectorization=true \
14+
-jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.2-SNAPSHOT.jar
15+
```
16+
17+
You can add additional optional JMH arguments dynamically from command line. For example, to run the benchmarks with 4 forks, 5 warmup iterations, 5 measurement iterations, 2 threads, and 10 seconds warmup time per iteration, use the following command:
18+
```shell
19+
java --enable-native-access=ALL-UNNAMED \
20+
--add-modules=jdk.incubator.vector \
21+
-XX:+HeapDumpOnOutOfMemoryError \
22+
-Xmx14G -Djvector.experimental.enable_native_vectorization=true \
23+
-jar benchmarks-jmh/target/benchmarks-jmh-4.0.0-beta.2-SNAPSHOT.jar \
24+
-f 4 -wi 5 -i 5 -t 2 -w 10s
25+
```
26+
27+
Common JMH command line options you can use in the configuration or command line:
28+
- `-f <num>` - Number of forks
29+
- `-wi <num>` - Number of warmup iterations
30+
- `-i <num>` - Number of measurement iterations
31+
- `-w <time>` - Warmup time per iteration
32+
- `-r <time>` - Measurement time per iteration
33+
- `-t <num>` - Number of threads
34+
- `-p <param>=<value>` - Benchmark parameters
35+
- `-prof <profiler>` - Add profiler

benchmarks-jmh/pom.xml

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<modelVersion>4.0.0</modelVersion>
6+
<parent>
7+
<groupId>io.github.jbellis</groupId>
8+
<artifactId>jvector-parent</artifactId>
9+
<version>${revision}</version>
10+
</parent>
11+
12+
<artifactId>benchmarks-jmh</artifactId>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
<maven.compiler.release>22</maven.compiler.release>
17+
<jmh.version>1.37</jmh.version>
18+
</properties>
19+
20+
<dependencies>
21+
<dependency>
22+
<groupId>${project.groupId}</groupId>
23+
<artifactId>jvector-base</artifactId>
24+
<version>${project.version}</version>
25+
</dependency>
26+
<dependency>
27+
<groupId>io.github.jbellis</groupId>
28+
<artifactId>jvector-twenty</artifactId>
29+
<version>${project.version}</version>
30+
</dependency>
31+
<dependency>
32+
<groupId>io.github.jbellis</groupId>
33+
<artifactId>jvector-native</artifactId>
34+
<version>${project.version}</version>
35+
</dependency>
36+
<dependency>
37+
<groupId>io.github.jbellis</groupId>
38+
<artifactId>jvector-examples</artifactId>
39+
<version>${project.version}</version>
40+
</dependency>
41+
<dependency>
42+
<groupId>org.openjdk.jmh</groupId>
43+
<artifactId>jmh-core</artifactId>
44+
<version>${jmh.version}</version>
45+
</dependency>
46+
<dependency>
47+
<groupId>org.openjdk.jmh</groupId>
48+
<artifactId>jmh-generator-annprocess</artifactId>
49+
<version>${jmh.version}</version>
50+
</dependency>
51+
<dependency>
52+
<groupId>org.apache.logging.log4j</groupId>
53+
<artifactId>log4j-slf4j2-impl</artifactId>
54+
<version>2.24.3</version>
55+
</dependency>
56+
57+
</dependencies>
58+
59+
<build>
60+
<plugins>
61+
<!--Ensures that annotation processor is running during compilation-->
62+
<plugin>
63+
<artifactId>maven-compiler-plugin</artifactId>
64+
<version>3.13.0</version>
65+
<configuration>
66+
<annotationProcessorPaths>
67+
<path>
68+
<groupId>org.openjdk.jmh</groupId>
69+
<artifactId>jmh-generator-annprocess</artifactId>
70+
<version>${jmh.version}</version>
71+
</path>
72+
</annotationProcessorPaths>
73+
</configuration>
74+
</plugin>
75+
76+
<!-- Shade this so we can run as a standalone jar -->
77+
<plugin>
78+
<groupId>org.apache.maven.plugins</groupId>
79+
<artifactId>maven-shade-plugin</artifactId>
80+
<version>3.3.0</version>
81+
<executions>
82+
<execution>
83+
<phase>package</phase>
84+
<goals>
85+
<goal>shade</goal>
86+
</goals>
87+
<configuration>
88+
<transformers>
89+
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
90+
<mainClass>org.openjdk.jmh.Main</mainClass>
91+
</transformer>
92+
</transformers>
93+
</configuration>
94+
</execution>
95+
</executions>
96+
</plugin>
97+
</plugins>
98+
</build>
99+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.github.jbellis.jvector.bench;
17+
18+
import io.github.jbellis.jvector.example.SiftSmall;
19+
import io.github.jbellis.jvector.graph.*;
20+
import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
21+
import io.github.jbellis.jvector.util.Bits;
22+
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
23+
import io.github.jbellis.jvector.vector.VectorizationProvider;
24+
import io.github.jbellis.jvector.vector.types.VectorFloat;
25+
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
26+
import org.openjdk.jmh.annotations.*;
27+
import org.openjdk.jmh.infra.Blackhole;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
30+
31+
import java.io.IOException;
32+
import java.util.ArrayList;
33+
import java.util.concurrent.TimeUnit;
34+
35+
@BenchmarkMode(Mode.AverageTime)
36+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
37+
@State(Scope.Thread)
38+
@Fork(1)
39+
@Warmup(iterations = 2)
40+
@Measurement(iterations = 5)
41+
@Threads(1)
42+
public class RandomVectorsBenchmark {
43+
private static final Logger log = LoggerFactory.getLogger(RandomVectorsBenchmark.class);
44+
private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
45+
private RandomAccessVectorValues ravv;
46+
private ArrayList<VectorFloat<?>> baseVectors;
47+
private ArrayList<VectorFloat<?>> queryVectors;
48+
private GraphIndexBuilder graphIndexBuilder;
49+
private GraphIndex graphIndex;
50+
int originalDimension;
51+
@Param({"1000", "10000", "100000", "1000000"})
52+
int numBaseVectors;
53+
@Param({"10"})
54+
int numQueryVectors;
55+
56+
@Setup
57+
public void setup() throws IOException {
58+
originalDimension = 128; // Example dimension, can be adjusted
59+
60+
baseVectors = new ArrayList<>(numBaseVectors);
61+
queryVectors = new ArrayList<>(numQueryVectors);
62+
63+
for (int i = 0; i < numBaseVectors; i++) {
64+
VectorFloat<?> vector = createRandomVector(originalDimension);
65+
baseVectors.add(vector);
66+
}
67+
68+
for (int i = 0; i < numQueryVectors; i++) {
69+
VectorFloat<?> vector = createRandomVector(originalDimension);
70+
queryVectors.add(vector);
71+
}
72+
73+
// wrap the raw vectors in a RandomAccessVectorValues
74+
ravv = new ListRandomAccessVectorValues(baseVectors, originalDimension);
75+
76+
// score provider using the raw, in-memory vectors
77+
BuildScoreProvider bsp = BuildScoreProvider.randomAccessScoreProvider(ravv, VectorSimilarityFunction.EUCLIDEAN);
78+
79+
graphIndexBuilder = new GraphIndexBuilder(bsp,
80+
ravv.dimension(),
81+
16, // graph degree
82+
100, // construction search depth
83+
1.2f, // allow degree overflow during construction by this factor
84+
1.2f); // relax neighbor diversity requirement by this factor
85+
graphIndex = graphIndexBuilder.build(ravv);
86+
}
87+
88+
private VectorFloat<?> createRandomVector(int dimension) {
89+
VectorFloat<?> vector = VECTOR_TYPE_SUPPORT.createFloatVector(dimension);
90+
for (int i = 0; i < dimension; i++) {
91+
vector.set(i, (float) Math.random());
92+
}
93+
return vector;
94+
}
95+
96+
@TearDown
97+
public void tearDown() throws IOException {
98+
baseVectors.clear();
99+
queryVectors.clear();
100+
graphIndexBuilder.close();
101+
}
102+
103+
@Benchmark
104+
public void testOnHeapRandomVectors(Blackhole blackhole) {
105+
var queryVector = SiftSmall.randomVector(originalDimension);
106+
// Your benchmark code here
107+
var searchResult = GraphSearcher.search(queryVector,
108+
10, // number of results
109+
ravv, // vectors we're searching, used for scoring
110+
VectorSimilarityFunction.EUCLIDEAN, // how to score
111+
graphIndex,
112+
Bits.ALL); // valid ordinals to consider
113+
blackhole.consume(searchResult);
114+
}
115+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.github.jbellis.jvector.bench;
17+
18+
import io.github.jbellis.jvector.example.SiftSmall;
19+
import io.github.jbellis.jvector.example.util.SiftLoader;
20+
import io.github.jbellis.jvector.graph.*;
21+
import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
22+
import io.github.jbellis.jvector.util.Bits;
23+
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
24+
import io.github.jbellis.jvector.vector.types.VectorFloat;
25+
import org.openjdk.jmh.annotations.*;
26+
import org.openjdk.jmh.infra.Blackhole;
27+
import org.slf4j.Logger;
28+
import org.slf4j.LoggerFactory;
29+
30+
import java.io.IOException;
31+
import java.util.ArrayList;
32+
import java.util.Set;
33+
import java.util.concurrent.TimeUnit;
34+
35+
@BenchmarkMode(Mode.AverageTime)
36+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
37+
@State(Scope.Thread)
38+
@Fork(1)
39+
@Warmup(iterations = 2)
40+
@Measurement(iterations = 5)
41+
@Threads(1)
42+
public class StaticSetVectorsBenchmark {
43+
private static final Logger log = LoggerFactory.getLogger(StaticSetVectorsBenchmark.class);
44+
private RandomAccessVectorValues ravv;
45+
private ArrayList<VectorFloat<?>> baseVectors;
46+
private ArrayList<VectorFloat<?>> queryVectors;
47+
private ArrayList<Set<Integer>> groundTruth;
48+
private GraphIndexBuilder graphIndexBuilder;
49+
private GraphIndex graphIndex;
50+
int originalDimension;
51+
52+
@Setup
53+
public void setup() throws IOException {
54+
var siftPath = "siftsmall";
55+
baseVectors = SiftLoader.readFvecs(String.format("%s/siftsmall_base.fvecs", siftPath));
56+
queryVectors = SiftLoader.readFvecs(String.format("%s/siftsmall_query.fvecs", siftPath));
57+
groundTruth = SiftLoader.readIvecs(String.format("%s/siftsmall_groundtruth.ivecs", siftPath));
58+
log.info("base vectors size: {}, query vectors size: {}, loaded, dimensions {}",
59+
baseVectors.size(), queryVectors.size(), baseVectors.get(0).length());
60+
originalDimension = baseVectors.get(0).length();
61+
// wrap the raw vectors in a RandomAccessVectorValues
62+
ravv = new ListRandomAccessVectorValues(baseVectors, originalDimension);
63+
64+
// score provider using the raw, in-memory vectors
65+
BuildScoreProvider bsp = BuildScoreProvider.randomAccessScoreProvider(ravv, VectorSimilarityFunction.EUCLIDEAN);
66+
67+
graphIndexBuilder = new GraphIndexBuilder(bsp,
68+
ravv.dimension(),
69+
16, // graph degree
70+
100, // construction search depth
71+
1.2f, // allow degree overflow during construction by this factor
72+
1.2f); // relax neighbor diversity requirement by this factor
73+
graphIndex = graphIndexBuilder.build(ravv);
74+
}
75+
76+
@TearDown
77+
public void tearDown() throws IOException {
78+
baseVectors.clear();
79+
queryVectors.clear();
80+
groundTruth.clear();
81+
graphIndexBuilder.close();
82+
}
83+
84+
@Benchmark
85+
public void testOnHeapWithRandomQueryVectors(Blackhole blackhole) throws IOException {
86+
var queryVector = SiftSmall.randomVector(originalDimension);
87+
// Your benchmark code here
88+
var searchResult = GraphSearcher.search(queryVector,
89+
10, // number of results
90+
ravv, // vectors we're searching, used for scoring
91+
VectorSimilarityFunction.EUCLIDEAN, // how to score
92+
graphIndex,
93+
Bits.ALL); // valid ordinals to consider
94+
blackhole.consume(searchResult);
95+
}
96+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<Configuration status="INFO">
3+
<Appenders>
4+
<!-- Console Appender -->
5+
<Console name="Console" target="SYSTEM_OUT">
6+
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
7+
</Console>
8+
</Appenders>
9+
<Loggers>
10+
<!-- Root Logger -->
11+
<Root level="INFO">
12+
<AppenderRef ref="Console"/>
13+
</Root>
14+
</Loggers>
15+
</Configuration>

pom.xml

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
<module>jvector-tests</module>
5151
<module>jvector-multirelease</module>
5252
<module>jvector-examples</module>
53+
<module>benchmarks-jmh</module>
5354
</modules>
5455
<build>
5556
<resources>

rat-excludes.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ pom.xml
88
src/main/assembly/test-jar-with-dependencies.xml
99
src/assembly/mrjar.xml
1010
src/assembly/sourcesjar.xml
11-
src/main/java/io/github/jbellis/jvector/vector/cnative/*
11+
src/main/java/io/github/jbellis/jvector/vector/cnative/*
12+
src/main/resources/log4j2.xml

0 commit comments

Comments
 (0)