feat: New script to output table of benchmarks for README pasting. (#…

…2780) Benchmarks are cool man. They let you see how fast the thing goes. This adds a tiny benchmark lib to barretenberg. Use it e.g. ```c++ Timer proof_timer; auto proof = acir_composer.create_proof(constraint_system, witness, recursive); write_benchmark("proof_construction_time", proof_timer.milliseconds(), "acir_test", current_dir); ``` The `write_benchmark` function takes a name, value, and then an arbitrary number of label/value pairs. The output data is very raw. The library will auto set the type to number/string/bool. The output data is JSONL in the form e.g. ```jsonl {"timestamp": "2023-10-10T22:17:10Z", "name": "pk_construction_time", "type": "number", "value": 234, "threads": 128, "acir_test": "1_mul"} {"timestamp": "2023-10-10T22:17:10Z", "name": "proof_construction_time", "type": "number", "value": 173, "threads": 128, "acir_test": "1_mul"} {"timestamp": "2023-10-10T22:17:10Z", "name": "vk_construction_time", "type": "number", "value": 74, "threads": 128, "acir_test": "1_mul"} ``` The labels are for ingesters to group/slice the data as they wish. To enable benchmarking, set the env var e.g `BENCHMARK_FD=3`, then redirect the given file descriptor number to wherever you want the benchmarks, e.g. ``` BENCHMARK_FD=3 ./run_acir_tests.sh 1_mul 3>&1 > /dev/null ``` Sends them to FD1 (stdout) and sends stdout to /dev/null. Thus this just prints out the benchmarks like above. Or: ``` BENCHMARK_FD=3 ./run_acir_tests.sh 1_mul 3>>benchmarks.jsonl ``` Appends them to file benchmarks.jsonl Longer term vision for benchmarks is that our TS code (or any other code), outputs benchmarks in the same style. The build-system will expect benchmarks, if enabled, to be have been written to a specific file in the container (or mounted volume), and will then upload the benchmarks to S3 for later, async processing (using e.g. glue -> athena -> quicksight/grafana). Both the container itself could manipulate the JSONL to inject properties specific to the test run, and the build-system could inject properties prior to upload such as `job-name` or `commit-hash` (although structured file-naming might solve this better).
AztecProtocol · Oct 28, 2023 · 6c20b45 · 6c20b45
1 parent 933f1b2
commit 6c20b45
Show file tree

Hide file tree

Showing 14 changed files with 362 additions and 26 deletions.
diff --git a/barretenberg/README.md b/barretenberg/README.md
@@ -7,6 +7,39 @@ As the spec solidifies, this should be less of an issue. Aztec and Barretenberg
 
 **This code is highly experimental, use at your own risk!**
 
+### Benchmarks!
+
+Table represents time in ms to build circuit and proof for each test on n threads.
+Ignores proving key construction.
+
+#### x86_64
+
+```
++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+
+| Test                     | Gate Count | Subgroup Size |         1 |         4 |        16 |        32 |        64 |
++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+
+| sha256                   | 38799      | 65536         |      5947 |      1653 |       729 |       476 |       388 |
+| ecdsa_secp256k1          | 41049      | 65536         |      6005 |      2060 |       963 |       693 |       583 |
+| ecdsa_secp256r1          | 67331      | 131072        |     12186 |      3807 |      1612 |      1351 |      1137 |
+| schnorr                  | 33740      | 65536         |      5817 |      1696 |       688 |       532 |       432 |
+| double_verify_proof      | 505513     | 524288        |     47841 |     15824 |      7970 |      6784 |      6082 |
++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+
+```
+
+#### WASM
+
+```
++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+
+| Test                     | Gate Count | Subgroup Size |         1 |         4 |        16 |        32 |        64 |
++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+
+| sha256                   | 38799      | 65536         |     18764 |      5116 |      1854 |      1524 |      1635 |
+| ecdsa_secp256k1          | 41049      | 65536         |     19129 |      5595 |      2255 |      2097 |      2166 |
+| ecdsa_secp256r1          | 67331      | 131072        |     38815 |     11257 |      4744 |      3633 |      3702 |
+| schnorr                  | 33740      | 65536         |     18649 |      5244 |      2019 |      1498 |      1702 |
+| double_verify_proof      | 505513     | 524288        |    149652 |     45702 |     20811 |     16979 |     15679 |
++--------------------------+------------+---------------+-----------+-----------+-----------+-----------+-----------+
+```
+
 ### Dependencies
 
 - cmake >= 3.24

diff --git a/barretenberg/acir_tests/bench_acir_tests.sh b/barretenberg/acir_tests/bench_acir_tests.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+TEST_NAMES=("$@")
+THREADS=(1 4 16 32 64)
+BENCHMARKS=$(mktemp)
+
+if [ "${#TEST_NAMES[@]}" -eq 0 ]; then
+    TEST_NAMES=(sha256 ecdsa_secp256k1 ecdsa_secp256r1 schnorr double_verify_proof)
+fi
+
+for TEST in ${TEST_NAMES[@]}; do
+    for HC in ${THREADS[@]}; do
+        HARDWARE_CONCURRENCY=$HC BENCHMARK_FD=3 ./run_acir_tests.sh $TEST 3>>$BENCHMARKS
+    done
+done
+
+# Build results into string with \n delimited rows and space delimited values.
+TABLE_DATA=""
+for TEST in ${TEST_NAMES[@]}; do
+    GATE_COUNT=$(jq -r --arg test "$TEST" 'select(.name == "gate_count" and .acir_test == $test) | .value' $BENCHMARKS | uniq)
+    SUBGROUP_SIZE=$(jq -r --arg test "$TEST" 'select(.name == "subgroup_size" and .acir_test == $test) | .value' $BENCHMARKS | uniq)
+    # Name in col 1, gate count in col 2, subgroup size in col 3.
+    TABLE_DATA+="$TEST $GATE_COUNT $SUBGROUP_SIZE"
+    # Each thread timing in subsequent cols.
+    for HC in "${THREADS[@]}"; do
+        RESULT=$(cat $BENCHMARKS | jq -r --arg test "$TEST" --argjson hc $HC 'select(.name == "proof_construction_time" and .acir_test == $test and .threads == $hc) | .value')
+        TABLE_DATA+=" $RESULT"
+    done
+    TABLE_DATA+=$'\n'
+done
+
+# Trim the trailing newline.
+TABLE_DATA="${TABLE_DATA%$'\n'}"
+
+echo
+echo Table represents time in ms to build circuit and proof for each test on n threads.
+echo Ignores proving key construction.
+echo
+# Use awk to print the table
+echo -e "$TABLE_DATA" | awk -v threads="${THREADS[*]}" 'BEGIN {
+    split(threads, t, " ");
+    len_threads = length(t);
+    print "+--------------------------+------------+---------------+" genseparator(len_threads);
+    print "| Test                     | Gate Count | Subgroup Size |" genthreadheaders(t, len_threads);
+    print "+--------------------------+------------+---------------+" genseparator(len_threads);
+}
+{
+    printf("| %-24s | %-10s | %-13s |", $1, $2, $3);
+    for (i = 4; i <= len_threads+3; i++) {
+        printf " %9s |", $(i);
+    }
+    print "";
+}
+END {
+    print "+--------------------------+------------+---------------+" genseparator(len_threads);
+}
+function genseparator(len,   res) {
+    for (i = 1; i <= len; i++) res = res "-----------+";
+    return res;
+}
+function genthreadheaders(t, len,   res) {
+    for (i = 1; i <= len; i++) res = res sprintf(" %9s |", t[i]);
+    return res;
+}
+'
+
+rm $BENCHMARKS
diff --git a/barretenberg/acir_tests/flows/prove_and_verify.sh b/barretenberg/acir_tests/flows/prove_and_verify.sh
@@ -4,5 +4,5 @@ set -eu
 if [ -n "$VERBOSE" ]; then
   $BIN prove_and_verify -v -c $CRS_PATH -b ./target/acir.gz
 else
-  $BIN prove_and_verify -c $CRS_PATH -b ./target/acir.gz > /dev/null 2>&1
+  $BIN prove_and_verify -c $CRS_PATH -b ./target/acir.gz
 fi
diff --git a/barretenberg/acir_tests/run_acir_tests.sh b/barretenberg/acir_tests/run_acir_tests.sh
@@ -9,7 +9,7 @@ FLOW=${FLOW:-prove_and_verify}
 CRS_PATH=~/.bb-crs
 BRANCH=master
 VERBOSE=${VERBOSE:-}
-NAMED_TEST=${1:-}
+TEST_NAMES=("$@")
 
 FLOW_SCRIPT=$(realpath ./flows/${FLOW}.sh)
 
@@ -47,12 +47,15 @@ function test() {
   cd $1
 
   set +e
+  start=$(date +%s%3N)
   $FLOW_SCRIPT
   result=$?
+  end=$(date +%s%3N)
+  duration=$((end - start))
   set -eu
 
   if [ $result -eq 0 ]; then
-    echo -e "\033[32mPASSED\033[0m"
+    echo -e "\033[32mPASSED\033[0m ($duration ms)"
   else
     echo -e "\033[31mFAILED\033[0m"
     exit 1
@@ -61,9 +64,11 @@ function test() {
   cd ..
 }
 
-if [ -n "$NAMED_TEST" ]; then
-  echo -n "Testing $NAMED_TEST... "
-  test $NAMED_TEST
+if [ "${#TEST_NAMES[@]}" -ne 0 ]; then
+  for NAMED_TEST in "${TEST_NAMES[@]}"; do
+    echo -n "Testing $NAMED_TEST... "
+    test $NAMED_TEST
+  done
 else
   for TEST_NAME in $(find -maxdepth 1 -type d -not -path '.' | sed 's|^\./||'); do
     echo -n "Testing $TEST_NAME... "

diff --git a/barretenberg/cpp/dockerfiles/Dockerfile.wasm-linux-clang b/barretenberg/cpp/dockerfiles/Dockerfile.wasm-linux-clang
@@ -4,11 +4,8 @@ WORKDIR /usr/src/barretenberg/cpp
 COPY ./scripts/install-wasi-sdk.sh ./scripts/install-wasi-sdk.sh
 RUN ./scripts/install-wasi-sdk.sh
 COPY . .
-# Building both wasm's in parallel reduces build from 120s to 80s.
-RUN (cmake --preset wasm && cmake --build --preset wasm) & \
-    (cmake --preset wasm-threads && cmake --build --preset wasm-threads) & \
-    wait
-
+RUN cmake --preset wasm && cmake --build --preset wasm
+RUN cmake --preset wasm-threads && cmake --build --preset wasm-threads
 
 FROM scratch
 WORKDIR /usr/src/barretenberg/cpp

diff --git a/barretenberg/cpp/src/barretenberg/bb/main.cpp b/barretenberg/cpp/src/barretenberg/bb/main.cpp
@@ -1,9 +1,13 @@
+#include "barretenberg/dsl/acir_format/acir_format.hpp"
+#include "barretenberg/dsl/types.hpp"
 #include "config.hpp"
 #include "get_bytecode.hpp"
 #include "get_crs.hpp"
 #include "get_witness.hpp"
 #include "log.hpp"
+#include <barretenberg/common/benchmark.hpp>
 #include <barretenberg/common/container.hpp>
+#include <barretenberg/common/timer.hpp>
 #include <barretenberg/dsl/acir_format/acir_to_constraint_buf.hpp>
 #include <barretenberg/dsl/acir_proofs/acir_composer.hpp>
 #include <barretenberg/srs/global_crs.hpp>
@@ -16,6 +20,9 @@ using namespace barretenberg;
 std::string CRS_PATH = "./crs";
 bool verbose = false;
 
+const std::filesystem::path current_path = std::filesystem::current_path();
+const auto current_dir = current_path.filename().string();
+
 acir_proofs::AcirComposer init(acir_format::acir_format& constraint_system)
 {
     acir_proofs::AcirComposer acir_composer(0, verbose);
@@ -69,7 +76,20 @@ bool proveAndVerify(const std::string& bytecodePath, const std::string& witnessP
     auto witness = get_witness(witnessPath);
     auto acir_composer = init(constraint_system);
 
+    Timer pk_timer;
+    acir_composer.init_proving_key(constraint_system);
+    write_benchmark("pk_construction_time", pk_timer.milliseconds(), "acir_test", current_dir);
+    write_benchmark("gate_count", acir_composer.get_total_circuit_size(), "acir_test", current_dir);
+    write_benchmark("subgroup_size", acir_composer.get_circuit_subgroup_size(), "acir_test", current_dir);
+
+    Timer proof_timer;
     auto proof = acir_composer.create_proof(constraint_system, witness, recursive);
+    write_benchmark("proof_construction_time", proof_timer.milliseconds(), "acir_test", current_dir);
+
+    Timer vk_timer;
+    acir_composer.init_verification_key();
+    write_benchmark("vk_construction_time", vk_timer.milliseconds(), "acir_test", current_dir);
+
     auto verified = acir_composer.verify_proof(proof, recursive);
 
     vinfo("verified: ", verified);

diff --git a/barretenberg/cpp/src/barretenberg/common/benchmark.hpp b/barretenberg/cpp/src/barretenberg/common/benchmark.hpp
@@ -0,0 +1,105 @@
+#include "barretenberg/common/throw_or_abort.hpp"
+#include "barretenberg/env/hardware_concurrency.hpp"
+#include <cstdlib>
+#include <ctime>
+#include <fcntl.h>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <unistd.h>
+
+namespace {
+/**
+ * If user provides the env var BENCHMARK_FD write benchmarks to this fd, otherwise default to -1 (disable).
+ * e.g:
+ *   BENCHMARK_FD=3 bb 3> benchmarks.jsonl
+ */
+auto bfd = []() {
+    try {
+        static auto bfd_str = std::getenv("BENCHMARK_FD");
+        int bfd = bfd_str ? (int)std::stoul(bfd_str) : -1;
+        if (bfd >= 0 && (fcntl(bfd, F_GETFD) == -1 || errno == EBADF)) {
+            throw_or_abort("fd is not open. Did you redirect in your shell?");
+        }
+        return bfd;
+    } catch (std::exception const& e) {
+        std::string inner_msg = e.what();
+        throw_or_abort("Invalid BENCHMARK_FD: " + inner_msg);
+    }
+}();
+} // namespace
+
+template <typename T, typename Enable = void> struct TypeTraits;
+
+template <typename T> struct TypeTraits<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
+    static const char* type;
+};
+
+template <typename T>
+const char* TypeTraits<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>::type = "number";
+
+template <> struct TypeTraits<std::string> {
+    static const char* type;
+};
+
+const char* TypeTraits<std::string>::type = "string";
+
+template <> struct TypeTraits<double> {
+    static const char* type;
+};
+
+const char* TypeTraits<double>::type = "number";
+
+template <> struct TypeTraits<bool> {
+    static const char* type;
+};
+
+const char* TypeTraits<bool>::type = "bool";
+
+// Helper function to get the current timestamp in the desired format
+std::string getCurrentTimestamp()
+{
+    std::time_t now = std::time(nullptr);
+    std::tm* now_tm = std::gmtime(&now);
+    char buf[21] = { 0 };
+    strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", now_tm);
+    return std::string(buf);
+}
+
+template <typename T> std::string toString(const T& value)
+{
+    std::ostringstream oss;
+    oss << value;
+    return oss.str();
+}
+
+void appendToStream(std::ostringstream&)
+{
+    // base case: do nothing
+}
+
+template <typename K, typename V, typename... Args>
+void appendToStream(std::ostringstream& oss, const K& key, const V& value, Args... args)
+{
+    oss << ", \"" << key << "\": \"" << toString(value) << "\"";
+    appendToStream(oss, args...); // recursively process the remaining arguments
+}
+
+template <typename T, typename... Args> void write_benchmark(const std::string& name, const T& value, Args... args)
+{
+    if (bfd == -1) {
+        return;
+    }
+    std::ostringstream oss;
+    oss << "{\"timestamp\": \"" << getCurrentTimestamp() << "\", "
+        << "\"name\": \"" << name << "\", "
+        << "\"type\": \"" << TypeTraits<T>::type << "\", "
+        << "\"value\": " << value << ", "
+        << "\"threads\": " << env_hardware_concurrency();
+
+    appendToStream(oss, args...); // unpack and append the key-value pairs
+
+    oss << "}" << std::endl;
+    const std::string& tmp = oss.str();
+    write((int)bfd, tmp.c_str(), tmp.size());
+}
diff --git a/barretenberg/cpp/src/barretenberg/common/timer.hpp b/barretenberg/cpp/src/barretenberg/common/timer.hpp
@@ -55,6 +55,15 @@ class Timer {
         return nanos;
     }
 
+    /**
+     * @brief Return the number of nanoseconds elapsed since the start of the timer.
+     */
+    [[nodiscard]] int64_t milliseconds() const
+    {
+        int64_t nanos = nanoseconds();
+        return nanos / 1000000;
+    }
+
     /**
      * @brief Return the number of seconds elapsed since the start of the timer.
      */

diff --git a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp
@@ -1,10 +1,24 @@
 #include "hardware_concurrency.hpp"
+#include <barretenberg/common/throw_or_abort.hpp>
+#include <cstdlib>
+#include <stdexcept>
+#include <string>
 #include <thread>
 
 extern "C" {
 
 uint32_t env_hardware_concurrency()
 {
-    return std::thread::hardware_concurrency();
+#ifndef __wasm__
+    try {
+#endif
+        static auto val = std::getenv("HARDWARE_CONCURRENCY");
+        static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency();
+        return cores;
+#ifndef __wasm__
+    } catch (std::exception const&) {
+        throw std::runtime_error("HARDWARE_CONCURRENCY invalid.");
+    }
+#endif
 }
 }
diff --git a/barretenberg/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp b/barretenberg/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp
@@ -26,8 +26,6 @@ class UltraComposer {
     std::shared_ptr<plonk::proving_key> circuit_proving_key;
     std::shared_ptr<plonk::verification_key> circuit_verification_key;
 
-    // The crs_factory holds the path to the srs and exposes methods to extract the srs elements
-
     bool computed_witness = false;
 
     // This variable controls the amount with which the lookup table and witness values need to be shifted