Skip to content

Commit

Permalink
Prebuild the benchmark binary with -O3 on Android (#532)
Browse files Browse the repository at this point in the history
  • Loading branch information
lgeiger authored Oct 14, 2020
1 parent 5b59c4e commit ec2de3f
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 9 deletions.
4 changes: 2 additions & 2 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ build:windows --host_cxxopt=/std:c++14
# These can be activated using --config=rpi3 and --config=aarch64
build:rpi3 --crosstool_top=@local_config_arm_compiler//:toolchain
build:rpi3 --cpu=armeabi
build:rpi3 -c opt --copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 --copt=-std=gnu++11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-O3 --copt=-fno-tree-pre --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 --define=raspberry_pi_with_neon=true --define=framework_shared_object=false --copt=-funsafe-math-optimizations --copt=-ftree-vectorize --copt=-fomit-frame-pointer --verbose_failures
build:rpi3 --copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 --copt=-std=gnu++11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-fno-tree-pre --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 --define=raspberry_pi_with_neon=true --define=framework_shared_object=false --copt=-funsafe-math-optimizations --copt=-ftree-vectorize --copt=-fomit-frame-pointer --verbose_failures

build:aarch64 --crosstool_top=@local_config_arm_compiler//:toolchain
build:aarch64 --cpu=aarch64
build:aarch64 -c opt --copt=-march=armv8-a --copt=-std=gnu++11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-O3 --copt=-fno-tree-pre --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 --define=framework_shared_object=false --copt=-funsafe-math-optimizations --copt=-ftree-vectorize --copt=-fomit-frame-pointer --verbose_failures
build:aarch64 --copt=-march=armv8-a --copt=-std=gnu++11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-fno-tree-pre --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 --define=framework_shared_object=false --copt=-funsafe-math-optimizations --copt=-ftree-vectorize --copt=-fomit-frame-pointer --verbose_failures

# Options to build TensorFlow 1.x or 2.x.
build:v1 --define=tf_api_version=1
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ jobs:
- run: mkdir benchmark-binaries
- name: Build Benchmark utility for AArch64
run: |
bazelisk build //larq_compute_engine/tflite/benchmark:lce_benchmark_model -c opt --config=aarch64
bazelisk build //larq_compute_engine/tflite/benchmark:lce_benchmark_model --config=aarch64 -c opt --copt=-O3
cp bazel-bin/larq_compute_engine/tflite/benchmark/lce_benchmark_model benchmark-binaries/lce_benchmark_model_aarch64
- name: Build Benchmark utility for AArch32
run: |
bazelisk build //larq_compute_engine/tflite/benchmark:lce_benchmark_model -c opt --config=rpi3
bazelisk build //larq_compute_engine/tflite/benchmark:lce_benchmark_model --config=rpi3 -c opt --copt=-O3
cp bazel-bin/larq_compute_engine/tflite/benchmark/lce_benchmark_model benchmark-binaries/lce_benchmark_model_aarch32
- name: Build Benchmark utility for Android
run: |
bazelisk build //larq_compute_engine/tflite/benchmark:lce_benchmark_model -c opt --config=android_arm64
bazelisk build //larq_compute_engine/tflite/benchmark:lce_benchmark_model --config=android_arm64 -c opt --copt=-O3
cp bazel-bin/larq_compute_engine/tflite/benchmark/lce_benchmark_model benchmark-binaries/lce_benchmark_model_android_arm64
- uses: actions/upload-artifact@v2.2.0
with:
Expand Down
4 changes: 0 additions & 4 deletions larq_compute_engine/core/bgemm/ruy_pack.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ struct LceRuyPackImpl<ThePath, FixedKernelLayout<Order::kColMajor, 4, 4>,
PMat<TBitpacked>* packed_matrix, int start_col, int end_col) {
profiler::ScopeLabel label("Pack (ColMajor, 4x4)");

// Ruy supports collecting column sums during the packing process, which we
// have no need for.
RUY_DCHECK_EQ(packed_matrix->sums, nullptr);

// Likewise, Ruy supports arbitrary zero points, but we only use true-zero.
RUY_DCHECK_EQ(src_matrix.zero_point, 0);

Expand Down

0 comments on commit ec2de3f

Please sign in to comment.