From e7e4684ed80e0f8ef9841c5578330e6185debfdd Mon Sep 17 00:00:00 2001 From: Harvey Tuch Date: Thu, 13 Apr 2017 17:48:04 -0400 Subject: [PATCH] bazel: rewrite MD5 .note.gnu.build-id with truncated git SHA1. This is a workaround for https://github.com/bazelbuild/bazel/issues/2805. Small reorganization of version_generated.cc rules, since we rely on them here and this can also assist with the Google import of Envoy and its versioning scheme. Also added docs on build types and how to do release builds. --- BUILD | 18 ------ bazel/README.md | 31 ++++++++- bazel/envoy_build_system.bzl | 33 ++++++++++ source/common/common/BUILD | 2 +- source/server/BUILD | 4 +- source/server/http/BUILD | 2 +- source/version_generated/BUILD | 19 ++++++ tools/BUILD | 7 +++ tools/git_sha_rewriter.py | 111 +++++++++++++++++++++++++++++++++ 9 files changed, 204 insertions(+), 23 deletions(-) create mode 100644 source/version_generated/BUILD create mode 100644 tools/BUILD create mode 100755 tools/git_sha_rewriter.py diff --git a/BUILD b/BUILD index 38c7b9dcd85f..e69de29bb2d1 100644 --- a/BUILD +++ b/BUILD @@ -1,18 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -load("//bazel:envoy_build_system.bzl", "envoy_cc_library") - -genrule( - name = "envoy_version", - srcs = glob([".git/**"]), - outs = ["version_generated.cc"], - cmd = "touch $@ && $(location tools/gen_git_sha.sh) $$(dirname $(location tools/gen_git_sha.sh)) $@", - local = 1, - tools = ["tools/gen_git_sha.sh"], -) - -envoy_cc_library( - name = "version_generated", - srcs = ["version_generated.cc"], - deps = ["//source/common/common:version_includes"], -) diff --git a/bazel/README.md b/bazel/README.md index 7bf49f0dd27f..800cab9afcd5 100644 --- a/bazel/README.md +++ b/bazel/README.md @@ -71,13 +71,42 @@ tools/bazel-test-gdb //test/common/http:async_client_impl_test # Additional Envoy build and test options +In general, there are 3 [compilation +modes](https://bazel.build/versions/master/docs/bazel-user-manual.html#flag--compilation_mode) +that Bazel supports: + +* `fastbuild`: `-O0 -DDEBUG`, aimed at developer speed (default). +* `opt`: `-O2`, for production builds and performance benchmarking. +* `dbg`: `-O0 -ggdb3 -DDEBUG`, debug symbols. + +You can use the `-c ` flag to control this, e.g. + +``` +bazel build -c opt //source/exe:envoy-static +``` + To build and run tests with the compiler's address sanitizer (ASAN) enabled: ``` bazel test -c dbg --config=asan //test/... ``` -The ASAN failure stack traces include numbers as a results of running ASAN with a `dbg` build above. +The ASAN failure stack traces include line numbers as a results of running ASAN with a `dbg` build above. + +# Release builds + +Release builds should be built in `opt` mode, processed with `strip` and have a +`.note.gnu.build-id` section with the Git SHA1 at which the build took place. +They should also ignore any local `.bazelrc` for reproducibility. This can be +achieved with: + +``` +bazel --bazelrc=/dev/null build -c opt //source/exe:envoy-static.stripped.stamped +``` + +One caveat to note is that the Git SHA1 is truncated to 16 bytes today as a +result of the workaround in place for +https://github.com/bazelbuild/bazel/issues/2805. # Adding or maintaining Envoy build rules diff --git a/bazel/envoy_build_system.bzl b/bazel/envoy_build_system.bzl index 3078656b763d..b2d06b656d18 100644 --- a/bazel/envoy_build_system.bzl +++ b/bazel/envoy_build_system.bzl @@ -55,6 +55,24 @@ def envoy_cc_library(name, alwayslink = 1, ) +def _git_stamped_genrule(name): + # To workaround https://github.com/bazelbuild/bazel/issues/2805, we + # do binary rewriting to replace the linker produced MD5 hash with the + # version_generated.cc git SHA1 hash (truncated). + native.genrule( + name = name + "_stamped", + srcs = [ + name, + "//source/version_generated:version_generated.cc", + ], + outs = [name + ".stamped"], + cmd = "cp $(location " + name + ") $@ && " + + "chmod u+w $@ && " + + "$(location //tools:git_sha_rewriter.py) " + + "$(location //source/version_generated:version_generated.cc) $@", + tools = ["//tools:git_sha_rewriter.py"], + ) + # Envoy C++ binary targets should be specified with this function. def envoy_cc_binary(name, srcs = [], @@ -62,6 +80,9 @@ def envoy_cc_binary(name, visibility = None, repository = "", deps = []): + # Implicit .stamped targets to obtain builds with the (truncated) git SHA1. + _git_stamped_genrule(name) + _git_stamped_genrule(name + ".stripped") native.cc_binary( name = name, srcs = srcs, @@ -70,9 +91,21 @@ def envoy_cc_binary(name, linkopts = [ "-pthread", "-lrt", + # Force MD5 hash in build. This is part of the workaround for + # https://github.com/bazelbuild/bazel/issues/2805. Bazel actually + # does this by itself prior to + # https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882. + # Ironically, forcing it here so that in future releases we will + # have the same behavior. When everyone is using an updated version + # of Bazel, we can use linkopts to set the git SHA1 directly in the + # --build-id and avoid doing the following. + '-Wl,--build-id=md5', + '-Wl,--hash-style=gnu', ], linkstatic = 1, visibility = visibility, + # See above comment on MD5 hash. + stamp = 0, deps = deps + [ repository + "//source/precompiled:precompiled_includes", ], diff --git a/source/common/common/BUILD b/source/common/common/BUILD index 882502462f79..88f4abff5ed3 100644 --- a/source/common/common/BUILD +++ b/source/common/common/BUILD @@ -99,6 +99,6 @@ envoy_cc_library( srcs = ["version.cc"], deps = [ ":version_includes", - "//:version_generated", + "//source/version_generated:version_generated", ], ) diff --git a/source/server/BUILD b/source/server/BUILD index 48eb848200ed..1e1861565586 100644 --- a/source/server/BUILD +++ b/source/server/BUILD @@ -88,7 +88,7 @@ envoy_cc_library( hdrs = ["options_impl.h"], external_deps = ["tclap"], deps = [ - "//:version_generated", + "//source/version_generated:version_generated", "//include/envoy/server:options_interface", "//source/common/common:macros", "//source/common/common:version_lib", @@ -104,7 +104,7 @@ envoy_cc_library( ":connection_handler_lib", ":test_hooks_lib", ":worker_lib", - "//:version_generated", + "//source/version_generated:version_generated", "//include/envoy/common:optional", "//include/envoy/event:dispatcher_interface", "//include/envoy/event:signal_interface", diff --git a/source/server/http/BUILD b/source/server/http/BUILD index 1b82b8fe6e5d..2e0bc662a6e8 100644 --- a/source/server/http/BUILD +++ b/source/server/http/BUILD @@ -7,7 +7,7 @@ envoy_cc_library( srcs = ["admin.cc"], hdrs = ["admin.h"], deps = [ - "//:version_generated", + "//source/version_generated:version_generated", "//include/envoy/filesystem:filesystem_interface", "//include/envoy/http:filter_interface", "//include/envoy/network:listen_socket_interface", diff --git a/source/version_generated/BUILD b/source/version_generated/BUILD new file mode 100644 index 000000000000..6b560518edfa --- /dev/null +++ b/source/version_generated/BUILD @@ -0,0 +1,19 @@ +package(default_visibility = ["//visibility:public"]) + +load("//bazel:envoy_build_system.bzl", "envoy_cc_library") + +genrule( + name = "envoy_version", + srcs = glob([".git/**"]), + outs = ["version_generated.cc"], + cmd = "touch $@ && $(location //tools:gen_git_sha.sh) " + + "$$(dirname $(location //tools:gen_git_sha.sh)) $@", + local = 1, + tools = ["//tools:gen_git_sha.sh"], +) + +envoy_cc_library( + name = "version_generated", + srcs = ["version_generated.cc"], + deps = ["//source/common/common:version_includes"], +) diff --git a/tools/BUILD b/tools/BUILD new file mode 100644 index 000000000000..e06a4574c70d --- /dev/null +++ b/tools/BUILD @@ -0,0 +1,7 @@ +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "gen_git_sha.sh", + "git_sha_rewriter.py", +]) + diff --git a/tools/git_sha_rewriter.py b/tools/git_sha_rewriter.py new file mode 100755 index 000000000000..0d62f80b36b0 --- /dev/null +++ b/tools/git_sha_rewriter.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python + +# This tool takes an ELF binary that has been built with -Wl,--build-id=md5' +# '-Wl,--hash-style=gnu (as done by Bazel prior to +# https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882, +# versions prior to 0.5), and replaces the MD5 compiler hash with a truncated +# git SHA1 hash found in Envoy's version_generated.cc. +# +# This is useful to folks who want the build commit in the .note.gnu.build-id +# section rather than the compiler hash of inputs. Please note that the hash is +# a 16 byte truncated git SHA1, rather than a complete 20 byte git SHA1. +# This is a workaround to https://github.com/bazelbuild/bazel/issues/2805. + +import binascii +import re +import subprocess as sp +import sys + +# This is what the part of .note.gnu.build-id prior to the MD5 hash looks like. +EXPECTED_BUILD_ID_NOTE_PREFIX = [ + # The "name" of the note is 4 bytes long. + 0x04, + 0x00, + 0x00, + 0x00, + # The "description" of the note is 16 bytes. + 0x10, + 0x00, + 0x00, + 0x00, + # The "type" of the note. + 0x03, + 0x00, + 0x00, + 0x00, + # 'G', 'N', 'U', '\0' (name) + 0x47, + 0x4e, + 0x55, + 0x00, +] +# We're expecting an MD5 hash, 16 bytes. +MD5_HASH_LEN = 16 +EXPECTED_BUILD_ID_NOTE_LENGTH = len(EXPECTED_BUILD_ID_NOTE_PREFIX) + MD5_HASH_LEN + + +class RewriterException(Exception): + pass + + +# Extract MD5 hash hex string from version_generated.cc. +def ExtractGitSha(path): + with open(path, 'r') as f: + contents = f.read() + sr = re.search('GIT_SHA\("(\w+)"', contents, flags=re.MULTILINE) + if not sr: + raise RewriterException('Bad version_generated.cc: %s' % contents) + return sr.group(1) + + +# Scrape the offset of .note.gnu.build-id via readelf from the binary. Also +# verify the note section is what we expect. +def ExtractBuildIdNoteOffset(path): + try: + readelf_output = sp.check_output('readelf -SW %s' % path, shell=True) + # Sanity check the ordering of fields from readelf. + if not re.search('Name\s+Type\s+Address\s+Off\s+Size\s', readelf_output): + raise RewriterException('Invalid readelf output: %s' % readelf_output) + sr = re.search('.note.gnu.build-id\s+NOTE\s+\w+\s+(\w+)\s(\w+)\s', + readelf_output) + if not sr: + raise RewriterException( + 'Unable to parse .note.gnu.build-id note: %s' % readelf_output) + raw_note_offset, raw_note_size = sr.groups() + if long(raw_note_size, 16) != EXPECTED_BUILD_ID_NOTE_LENGTH: + raise RewriterException( + 'Incorrect .note.gnu.build-id note size: %s' % readelf_output) + note_offset = long(raw_note_offset, 16) + with open(path, 'rb') as f: + f.seek(note_offset) + note_prefix = [ord(b) for b in f.read(len(EXPECTED_BUILD_ID_NOTE_PREFIX))] + if note_prefix != EXPECTED_BUILD_ID_NOTE_PREFIX: + raise RewriterException( + 'Unexpected .note.gnu.build-id prefix in %s: %s' % (path, + note_prefix)) + return note_offset + except sp.CalledProcessError as e: + raise RewriterException('%s %s' % (e, readelf_output.output)) + + +# Inplace binary rewriting of the 16 byte .note.gnu.build-id description with +# the truncated hash. +def RewriteBinary(path, offset, git5_sha1): + truncated_hash = git5_sha1[:2 * MD5_HASH_LEN] + print 'Writing %s truncated to %s at offset 0x%x in %s' % (git5_sha1, + truncated_hash, + offset, path) + with open(path, 'r+b') as f: + f.seek(offset + len(EXPECTED_BUILD_ID_NOTE_PREFIX)) + f.write(binascii.unhexlify(truncated_hash)) + + +if __name__ == '__main__': + if len(sys.argv) != 3: + print('Usage: %s ' % + sys.argv[0]) + sys.exit(1) + version_generated = ExtractGitSha(sys.argv[1]) + envoy_bin_path = sys.argv[2] + build_id_note_offset = ExtractBuildIdNoteOffset(envoy_bin_path) + RewriteBinary(envoy_bin_path, build_id_note_offset, version_generated)