From 9c5584833b9add8766b3b742f0879f068ca8f857 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 7 Nov 2023 17:03:35 +0000 Subject: [PATCH 01/41] Reflection-based deterministic message hashing Signed-off-by: Raven Black --- source/common/protobuf/BUILD | 12 ++ source/common/protobuf/deterministic_hash.cc | 195 +++++++++++++++++++ source/common/protobuf/deterministic_hash.h | 11 ++ source/common/protobuf/utility.cc | 16 +- test/common/protobuf/utility_test.cc | 15 ++ tools/code_format/check_format.py | 4 +- 6 files changed, 239 insertions(+), 14 deletions(-) create mode 100644 source/common/protobuf/deterministic_hash.cc create mode 100644 source/common/protobuf/deterministic_hash.h diff --git a/source/common/protobuf/BUILD b/source/common/protobuf/BUILD index 7fa661b274c7..9f18dd0f4ae1 100644 --- a/source/common/protobuf/BUILD +++ b/source/common/protobuf/BUILD @@ -199,6 +199,17 @@ envoy_cc_library( ]), ) +envoy_cc_library( + name = "deterministic_hash_lib", + srcs = ["deterministic_hash.cc"], + hdrs = ["deterministic_hash.h"], + deps = [ + ":protobuf", + "//source/common/common:assert_lib", + "//source/common/common:hash_lib", + ], +) + envoy_cc_library( name = "utility_lib", srcs = ["utility.cc"], @@ -206,6 +217,7 @@ envoy_cc_library( "protobuf", ], deps = [ + ":deterministic_hash_lib", ":message_validator_lib", ":protobuf", ":utility_lib_header", diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc new file mode 100644 index 000000000000..adcc995078f4 --- /dev/null +++ b/source/common/protobuf/deterministic_hash.cc @@ -0,0 +1,195 @@ +#if defined(ENVOY_ENABLE_FULL_PROTOS) +#include "source/common/protobuf/deterministic_hash.h" + +#include "source/common/common/assert.h" +#include "source/common/common/hash.h" + +namespace Envoy { +namespace DeterministicProtoHash { +namespace { +#define REFLECTION_FOR_EACH(get_type, hash_type) \ + if (field->is_repeated()) { \ + for (int i = 0; i < reflection->FieldSize(message, field); i++) { \ + hash_type(reflection->GetRepeated##get_type(message, field, i)); \ + } \ + } else { \ + hash_type(reflection->Get##get_type(message, field)); \ + } + +#define MAP_SORT_BY(get_type) \ + do { \ + std::sort( \ + map.begin(), map.end(), \ + [&entry_reflection, &key_field](const Protobuf::Message& a, const Protobuf::Message& b) { \ + return entry_reflection->Get##get_type(a, key_field) < \ + entry_reflection->Get##get_type(b, key_field); \ + }); \ + } while (0) + +#define HASH_FIXED(v) \ + do { \ + auto q = v; \ + seed = \ + HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); \ + } while (0) + +#define HASH_STRING(v) \ + do { \ + seed = HashUtil::xxHash64(v, seed); \ + } while (0) + +#define HASH_MESSAGE(v) \ + do { \ + seed = reflectionHashMessage(reflection->GetMessage(message, field), seed); \ + } while (0) + +uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0); +uint64_t reflectionHashField(const Protobuf::Message& message, + const Protobuf::FieldDescriptor* field, uint64_t seed); + +// To make a map serialize deterministically we need to force the order. +// Here we're going to sort the keys into numerical order for number keys, +// or lexigraphical order for strings, and then hash them in that order. +uint64_t reflectionHashMapField(const Protobuf::Message& message, + const Protobuf::FieldDescriptor* field, uint64_t seed) { + using Protobuf::FieldDescriptor; + const auto reflection = message.GetReflection(); + auto entries = reflection->GetRepeatedFieldRef(message, field); + std::vector> map(entries.begin(), entries.end()); + auto entry_reflection = map.begin()->get().GetReflection(); + auto entry_descriptor = map.begin()->get().GetDescriptor(); + const FieldDescriptor* key_field = entry_descriptor->map_key(); + const FieldDescriptor* value_field = entry_descriptor->map_value(); + HASH_FIXED(key_field->number()); + switch (key_field->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + MAP_SORT_BY(Int32); + break; + case FieldDescriptor::CPPTYPE_UINT32: + MAP_SORT_BY(UInt32); + break; + case FieldDescriptor::CPPTYPE_INT64: + MAP_SORT_BY(Int64); + break; + case FieldDescriptor::CPPTYPE_UINT64: + MAP_SORT_BY(UInt64); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + MAP_SORT_BY(Double); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + MAP_SORT_BY(Float); + break; + case FieldDescriptor::CPPTYPE_STRING: + // TODO: use StringReference + MAP_SORT_BY(String); + break; + case FieldDescriptor::CPPTYPE_BOOL: + case FieldDescriptor::CPPTYPE_ENUM: + case FieldDescriptor::CPPTYPE_MESSAGE: + IS_ENVOY_BUG("invalid map key type"); + } + for (const auto& entry : map) { + seed = reflectionHashField(entry, key_field, seed); + seed = reflectionHashField(entry, value_field, seed); + } + return seed; +} + +uint64_t reflectionHashField(const Protobuf::Message& message, + const Protobuf::FieldDescriptor* field, uint64_t seed) { + using Protobuf::FieldDescriptor; + std::cerr << "field = " << field->DebugString() << std::endl; + const auto reflection = message.GetReflection(); + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + REFLECTION_FOR_EACH(Int32, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_UINT32: + REFLECTION_FOR_EACH(UInt32, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_INT64: + REFLECTION_FOR_EACH(Int64, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_UINT64: + REFLECTION_FOR_EACH(UInt64, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + REFLECTION_FOR_EACH(Double, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + REFLECTION_FOR_EACH(Float, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_BOOL: + REFLECTION_FOR_EACH(Bool, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_ENUM: + REFLECTION_FOR_EACH(EnumValue, HASH_FIXED); + break; + case FieldDescriptor::CPPTYPE_STRING: + // TODO: use StringReference + REFLECTION_FOR_EACH(String, HASH_STRING); + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + if (field->is_map()) { + return reflectionHashMapField(message, field, seed); + } + REFLECTION_FOR_EACH(Message, HASH_MESSAGE); + break; + } + return seed; +} + +absl::string_view typeUrlToDescriptorFullName(absl::string_view url) { + const size_t pos = url.rfind('/'); + if (pos != absl::string_view::npos) { + return url.substr(pos + 1); + } + return url; +} + +std::unique_ptr unpackAnyForReflection(const ProtobufWkt::Any& any) { + const Protobuf::Descriptor* descriptor = + Protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName( + typeUrlToDescriptorFullName(any.type_url())); + if (descriptor == nullptr) { + return nullptr; + } + const Protobuf::Message* prototype = + Protobuf::MessageFactory::generated_factory()->GetPrototype(descriptor); + auto msg = std::unique_ptr(prototype->New()); + any.UnpackTo(msg.get()); + return msg; +} + +// This is intentionally ignoring unknown fields. +uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) { + using Protobuf::FieldDescriptor; + std::string scratch; + const auto reflection = message.GetReflection(); + const auto descriptor = message.GetDescriptor(); + if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) { + const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated(&message); + auto submsg = unpackAnyForReflection(*any); + if (submsg == nullptr) { + // If we wanted to handle unknown types in Any, this is where we'd have to do it. + return seed; + } + HASH_STRING(any->type_url()); + return reflectionHashMessage(*submsg, seed); + } + std::vector fields; + reflection->ListFields(message, &fields); + // If we wanted to handle unknown fields, we'd need to also GetUnknownFields here. + for (const auto field : fields) { + seed = reflectionHashField(message, field, seed); + } + return seed; +} +} // namespace + +uint64_t hash(const Protobuf::Message& message) { return reflectionHashMessage(message, 0); } + +} // namespace DeterministicProtoHash +} // namespace Envoy +#endif diff --git a/source/common/protobuf/deterministic_hash.h b/source/common/protobuf/deterministic_hash.h new file mode 100644 index 000000000000..894e78af766b --- /dev/null +++ b/source/common/protobuf/deterministic_hash.h @@ -0,0 +1,11 @@ +#pragma once + +#include "source/common/protobuf/protobuf.h" + +#if defined(ENVOY_ENABLE_FULL_PROTOS) +namespace Envoy { +namespace DeterministicProtoHash { +uint64_t hash(const Protobuf::Message& message); +} +} // namespace Envoy +#endif diff --git a/source/common/protobuf/utility.cc b/source/common/protobuf/utility.cc index 7765a0d0dc81..8e3c25cc290b 100644 --- a/source/common/protobuf/utility.cc +++ b/source/common/protobuf/utility.cc @@ -10,6 +10,7 @@ #include "source/common/common/assert.h" #include "source/common/common/documentation_url.h" #include "source/common/common/fmt.h" +#include "source/common/protobuf/deterministic_hash.h" #include "source/common/protobuf/message_validator_impl.h" #include "source/common/protobuf/protobuf.h" #include "source/common/protobuf/visitor.h" @@ -137,22 +138,11 @@ void ProtoExceptionUtil::throwProtoValidationException(const std::string& valida } size_t MessageUtil::hash(const Protobuf::Message& message) { - std::string text_format; - #if defined(ENVOY_ENABLE_FULL_PROTOS) - { - Protobuf::TextFormat::Printer printer; - printer.SetExpandAny(true); - printer.SetUseFieldNumber(true); - printer.SetSingleLineMode(true); - printer.SetHideUnknownFields(true); - printer.PrintToString(message, &text_format); - } + return DeterministicProtoHash::hash(message); #else - absl::StrAppend(&text_format, message.SerializeAsString()); + return HashUtil::xxHash64(message.SerializeAsString()); #endif - - return HashUtil::xxHash64(text_format); } #if !defined(ENVOY_ENABLE_FULL_PROTOS) diff --git a/test/common/protobuf/utility_test.cc b/test/common/protobuf/utility_test.cc index 7b51f2b0e2c7..b3feba633fd7 100644 --- a/test/common/protobuf/utility_test.cc +++ b/test/common/protobuf/utility_test.cc @@ -175,6 +175,12 @@ TEST_F(ProtobufUtilityTest, MessageUtilHash) { ProtobufWkt::Struct s; (*s.mutable_fields())["ab"].set_string_value("fgh"); (*s.mutable_fields())["cde"].set_string_value("ij"); + ProtobufWkt::Struct s2; + (*s2.mutable_fields())["ab"].set_string_value("ij"); + (*s2.mutable_fields())["cde"].set_string_value("fgh"); + ProtobufWkt::Struct s3; + (*s3.mutable_fields())["ac"].set_string_value("fgh"); + (*s3.mutable_fields())["cdb"].set_string_value("ij"); ProtobufWkt::Any a1; a1.PackFrom(s); @@ -184,10 +190,19 @@ TEST_F(ProtobufUtilityTest, MessageUtilHash) { a2.set_value(Base64::decode("CgsKA2NkZRIEGgJpagoLCgJhYhIFGgNmZ2g=")); ProtobufWkt::Any a3 = a1; a3.set_value(Base64::decode("CgsKAmFiEgUaA2ZnaAoLCgNjZGUSBBoCaWo=")); + ProtobufWkt::Any a4, a5; + a4.PackFrom(s2); + a5.PackFrom(s3); EXPECT_EQ(MessageUtil::hash(a1), MessageUtil::hash(a2)); EXPECT_EQ(MessageUtil::hash(a2), MessageUtil::hash(a3)); EXPECT_NE(0, MessageUtil::hash(a1)); + // Same keys and values but with the values in a different order should not have + // the same hash. + EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a4)); + // Different keys with the values in the same order should not have the same hash. + EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a5)); + // Struct without 'any' around it should not hash the same as struct inside 'any'. EXPECT_NE(MessageUtil::hash(s), MessageUtil::hash(a1)); } diff --git a/tools/code_format/check_format.py b/tools/code_format/check_format.py index ac723f7c0cbe..916e4e8f6db6 100755 --- a/tools/code_format/check_format.py +++ b/tools/code_format/check_format.py @@ -384,7 +384,9 @@ def allow_listed_for_grpc_init(self, file_path): def allow_listed_for_unpack_to(self, file_path): return file_path.startswith("./test") or file_path in [ - "./source/common/protobuf/utility.cc", "./source/common/protobuf/utility.h" + "./source/common/protobuf/deterministic_hash.cc", + "./source/common/protobuf/utility.cc", + "./source/common/protobuf/utility.h", ] def allow_listed_for_raw_try(self, file_path): From e9ca0671450b2c3cdd51a0193bc90aafe918a560 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 7 Nov 2023 18:28:12 +0000 Subject: [PATCH 02/41] Add comment Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/source/common/protobuf/deterministic_hash.h b/source/common/protobuf/deterministic_hash.h index 894e78af766b..9bcf57ecd036 100644 --- a/source/common/protobuf/deterministic_hash.h +++ b/source/common/protobuf/deterministic_hash.h @@ -5,7 +5,17 @@ #if defined(ENVOY_ENABLE_FULL_PROTOS) namespace Envoy { namespace DeterministicProtoHash { + +// Note: this ignores unknown fields and unrecognized types in Any fields. +// An alternative approach might treat such fields as "raw data" and include +// them in the hash, which would risk breaking the deterministic behavior, +// versus this way risks ignoring significant data. +// +// Ignoring unknown fields was chosen as the implementation because the +// TextFormat-based hashing this replaces was explicitly ignoring unknown +// fields. uint64_t hash(const Protobuf::Message& message); -} + +} // namespace DeterministicProtoHash } // namespace Envoy #endif From 44f7ebe184ff340703c9704181662ae70d3b7316 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 7 Nov 2023 18:48:59 +0000 Subject: [PATCH 03/41] Use GetStringReference Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 36 +++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index adcc995078f4..1e5cc206a2ec 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -74,17 +74,21 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, case FieldDescriptor::CPPTYPE_UINT64: MAP_SORT_BY(UInt64); break; - case FieldDescriptor::CPPTYPE_DOUBLE: - MAP_SORT_BY(Double); - break; - case FieldDescriptor::CPPTYPE_FLOAT: - MAP_SORT_BY(Float); + case FieldDescriptor::CPPTYPE_BOOL: + MAP_SORT_BY(Bool); break; - case FieldDescriptor::CPPTYPE_STRING: - // TODO: use StringReference - MAP_SORT_BY(String); + case FieldDescriptor::CPPTYPE_STRING: { + std::sort( + map.begin(), map.end(), + [&entry_reflection, &key_field](const Protobuf::Message& a, const Protobuf::Message& b) { + std::string scratch_a, scratch_b; + return entry_reflection->GetStringReference(a, key_field, &scratch_a) < + entry_reflection->GetStringReference(b, key_field, &scratch_b); + }); break; - case FieldDescriptor::CPPTYPE_BOOL: + } + case FieldDescriptor::CPPTYPE_DOUBLE: + case FieldDescriptor::CPPTYPE_FLOAT: case FieldDescriptor::CPPTYPE_ENUM: case FieldDescriptor::CPPTYPE_MESSAGE: IS_ENVOY_BUG("invalid map key type"); @@ -126,10 +130,16 @@ uint64_t reflectionHashField(const Protobuf::Message& message, case FieldDescriptor::CPPTYPE_ENUM: REFLECTION_FOR_EACH(EnumValue, HASH_FIXED); break; - case FieldDescriptor::CPPTYPE_STRING: - // TODO: use StringReference - REFLECTION_FOR_EACH(String, HASH_STRING); - break; + case FieldDescriptor::CPPTYPE_STRING: { + std::string scratch; + if (field->is_repeated()) { + for (int i = 0; i < reflection->FieldSize(message, field); i++) { + HASH_STRING(reflection->GetRepeatedStringReference(message, field, i, &scratch)); + } + } else { + HASH_STRING(reflection->GetStringReference(message, field, &scratch)); + } + } break; case FieldDescriptor::CPPTYPE_MESSAGE: if (field->is_map()) { return reflectionHashMapField(message, field, seed); From 465d8331060ffcf51678e18dd2ea2bc38eb57888 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 7 Nov 2023 19:06:43 +0000 Subject: [PATCH 04/41] Spelling Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 1e5cc206a2ec..c3eefaa0a2b0 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -49,7 +49,7 @@ uint64_t reflectionHashField(const Protobuf::Message& message, // To make a map serialize deterministically we need to force the order. // Here we're going to sort the keys into numerical order for number keys, -// or lexigraphical order for strings, and then hash them in that order. +// or lexicographical order for strings, and then hash them in that order. uint64_t reflectionHashMapField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { using Protobuf::FieldDescriptor; From 0bfb6f1c7353e2194ee3fa7bbd257e39d00fee0e Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 7 Nov 2023 23:16:40 +0000 Subject: [PATCH 05/41] Debug Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index c3eefaa0a2b0..8bfc421743da 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -38,11 +38,6 @@ namespace { seed = HashUtil::xxHash64(v, seed); \ } while (0) -#define HASH_MESSAGE(v) \ - do { \ - seed = reflectionHashMessage(reflection->GetMessage(message, field), seed); \ - } while (0) - uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0); uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed); @@ -103,7 +98,6 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { using Protobuf::FieldDescriptor; - std::cerr << "field = " << field->DebugString() << std::endl; const auto reflection = message.GetReflection(); switch (field->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: @@ -131,20 +125,26 @@ uint64_t reflectionHashField(const Protobuf::Message& message, REFLECTION_FOR_EACH(EnumValue, HASH_FIXED); break; case FieldDescriptor::CPPTYPE_STRING: { - std::string scratch; if (field->is_repeated()) { - for (int i = 0; i < reflection->FieldSize(message, field); i++) { - HASH_STRING(reflection->GetRepeatedStringReference(message, field, i, &scratch)); + for (const std::string& str : reflection->GetRepeatedFieldRef(message, field)) { + HASH_STRING(str); } } else { + std::string scratch; HASH_STRING(reflection->GetStringReference(message, field, &scratch)); } } break; case FieldDescriptor::CPPTYPE_MESSAGE: if (field->is_map()) { - return reflectionHashMapField(message, field, seed); + seed = reflectionHashMapField(message, field, seed); + } else if (field->is_repeated()) { + for (const auto& submsg : + reflection->GetRepeatedFieldRef(message, field)) { + seed = reflectionHashMessage(submsg, seed); + } + } else { + seed = reflectionHashMessage(reflection->GetMessage(message, field), seed); } - REFLECTION_FOR_EACH(Message, HASH_MESSAGE); break; } return seed; From ef99f1e7ab1335a73c51050fe8f89ac1dbe066c8 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 9 Nov 2023 16:41:34 +0000 Subject: [PATCH 06/41] Empty message hash must be nonzero Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 8bfc421743da..13f96a4c108e 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -198,7 +198,10 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) } } // namespace -uint64_t hash(const Protobuf::Message& message) { return reflectionHashMessage(message, 0); } +uint64_t hash(const Protobuf::Message& message) { + // Must use a nonzero initial seed, because the empty message must hash to nonzero. + return reflectionHashMessage(message, 1); +} } // namespace DeterministicProtoHash } // namespace Envoy From 11aba214fe22599e031f13ea3e6a965150664fea Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 9 Nov 2023 17:53:40 +0000 Subject: [PATCH 07/41] Make http_cache use deterministic hash Signed-off-by: Raven Black --- source/extensions/filters/http/cache/BUILD | 2 +- source/extensions/filters/http/cache/http_cache.cc | 4 ++-- source/extensions/filters/http/cache/http_cache.h | 2 -- test/extensions/filters/http/cache/http_cache_test.cc | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/source/extensions/filters/http/cache/BUILD b/source/extensions/filters/http/cache/BUILD index fbe778851c59..7eaf3a4114f4 100644 --- a/source/extensions/filters/http/cache/BUILD +++ b/source/extensions/filters/http/cache/BUILD @@ -102,7 +102,7 @@ envoy_cc_library( "//source/common/common:assert_lib", "//source/common/http:header_utility_lib", "//source/common/http:headers_lib", - "//source/common/protobuf:utility_lib", + "//source/common/protobuf:deterministic_hash_lib", "@envoy_api//envoy/extensions/filters/http/cache/v3:pkg_cc_proto", ], ) diff --git a/source/extensions/filters/http/cache/http_cache.cc b/source/extensions/filters/http/cache/http_cache.cc index 3f5659e88495..93f68d93507c 100644 --- a/source/extensions/filters/http/cache/http_cache.cc +++ b/source/extensions/filters/http/cache/http_cache.cc @@ -10,7 +10,7 @@ #include "source/common/http/header_utility.h" #include "source/common/http/headers.h" #include "source/common/http/utility.h" -#include "source/common/protobuf/utility.h" +#include "source/common/protobuf/deterministic_hash.h" #include "source/extensions/filters/http/cache/cache_custom_headers.h" #include "source/extensions/filters/http/cache/cache_headers_utils.h" @@ -54,7 +54,7 @@ LookupRequest::LookupRequest(const Http::RequestHeaderMap& request_headers, Syst // Unless this API is still alpha, calls to stableHashKey() must always return // the same result, or a way must be provided to deal with a complete cache // flush. -size_t stableHashKey(const Key& key) { return MessageUtil::hash(key); } +size_t stableHashKey(const Key& key) { return DeterministicProtoHash::hash(key); } void LookupRequest::initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers) { const absl::string_view cache_control = diff --git a/source/extensions/filters/http/cache/http_cache.h b/source/extensions/filters/http/cache/http_cache.h index f5b670c25199..177d12863e33 100644 --- a/source/extensions/filters/http/cache/http_cache.h +++ b/source/extensions/filters/http/cache/http_cache.h @@ -69,8 +69,6 @@ using LookupResultPtr = std::unique_ptr; // // When providing a cached response, Caches must ensure that the keys (and not // just their hashes) match. -// -// TODO(toddmgreer): Ensure that stability guarantees above are accurate. size_t stableHashKey(const Key& key); // LookupRequest holds everything about a request that's needed to look for a diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc index fa13cf30b4a1..bdfa140294a3 100644 --- a/test/extensions/filters/http/cache/http_cache_test.cc +++ b/test/extensions/filters/http/cache/http_cache_test.cc @@ -296,7 +296,7 @@ TEST_F(LookupRequestTest, PragmaNoFallback) { TEST(HttpCacheTest, StableHashKey) { Key key; key.set_host("example.com"); - ASSERT_EQ(stableHashKey(key), 9582653837550152292u); + ASSERT_EQ(stableHashKey(key), 14708617532262531931u); } TEST_P(LookupRequestTest, ResultWithBodyAndTrailersMatchesExpectation) { From a5b4a8939f07bc132a6c93cae09ae9d32d9348b5 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 9 Nov 2023 18:24:50 +0000 Subject: [PATCH 08/41] Messages of different types with identical contents should hash differently Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 8 +++----- test/extensions/filters/http/cache/http_cache_test.cc | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 13f96a4c108e..63ce377d9bbb 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -99,6 +99,7 @@ uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { using Protobuf::FieldDescriptor; const auto reflection = message.GetReflection(); + HASH_FIXED(field->number()); switch (field->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: REFLECTION_FOR_EACH(Int32, HASH_FIXED); @@ -178,6 +179,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) std::string scratch; const auto reflection = message.GetReflection(); const auto descriptor = message.GetDescriptor(); + HASH_STRING(descriptor->full_name()); if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) { const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated(&message); auto submsg = unpackAnyForReflection(*any); @@ -185,7 +187,6 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) // If we wanted to handle unknown types in Any, this is where we'd have to do it. return seed; } - HASH_STRING(any->type_url()); return reflectionHashMessage(*submsg, seed); } std::vector fields; @@ -198,10 +199,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) } } // namespace -uint64_t hash(const Protobuf::Message& message) { - // Must use a nonzero initial seed, because the empty message must hash to nonzero. - return reflectionHashMessage(message, 1); -} +uint64_t hash(const Protobuf::Message& message) { return reflectionHashMessage(message, 0); } } // namespace DeterministicProtoHash } // namespace Envoy diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc index bdfa140294a3..27e540dfea3e 100644 --- a/test/extensions/filters/http/cache/http_cache_test.cc +++ b/test/extensions/filters/http/cache/http_cache_test.cc @@ -296,7 +296,7 @@ TEST_F(LookupRequestTest, PragmaNoFallback) { TEST(HttpCacheTest, StableHashKey) { Key key; key.set_host("example.com"); - ASSERT_EQ(stableHashKey(key), 14708617532262531931u); + ASSERT_EQ(stableHashKey(key), 15427631249665725347u); } TEST_P(LookupRequestTest, ResultWithBodyAndTrailersMatchesExpectation) { From 6c7d0902981588ef5720f2e0cbfbfc5b238868c5 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 10 Nov 2023 15:54:20 +0000 Subject: [PATCH 09/41] Template map sorting Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 57 ++++++++++++-------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 63ce377d9bbb..80296998ef8b 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -16,16 +16,6 @@ namespace { hash_type(reflection->Get##get_type(message, field)); \ } -#define MAP_SORT_BY(get_type) \ - do { \ - std::sort( \ - map.begin(), map.end(), \ - [&entry_reflection, &key_field](const Protobuf::Message& a, const Protobuf::Message& b) { \ - return entry_reflection->Get##get_type(a, key_field) < \ - entry_reflection->Get##get_type(b, key_field); \ - }); \ - } while (0) - #define HASH_FIXED(v) \ do { \ auto q = v; \ @@ -42,6 +32,14 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed); +template +std::function +makeCompareFn(std::function getter) { + return [getter](const Protobuf::Message& a, const Protobuf::Message& b) { + return getter(a) < getter(b); + }; +} + // To make a map serialize deterministically we need to force the order. // Here we're going to sort the keys into numerical order for number keys, // or lexicographical order for strings, and then hash them in that order. @@ -56,30 +54,44 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, const FieldDescriptor* key_field = entry_descriptor->map_key(); const FieldDescriptor* value_field = entry_descriptor->map_value(); HASH_FIXED(key_field->number()); + std::function compareFn; switch (key_field->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: - MAP_SORT_BY(Int32); + compareFn = + makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { + return entry_reflection->GetInt32(msg, key_field); + }); break; case FieldDescriptor::CPPTYPE_UINT32: - MAP_SORT_BY(UInt32); + compareFn = + makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { + return entry_reflection->GetUInt32(msg, key_field); + }); break; case FieldDescriptor::CPPTYPE_INT64: - MAP_SORT_BY(Int64); + compareFn = + makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { + return entry_reflection->GetInt64(msg, key_field); + }); break; case FieldDescriptor::CPPTYPE_UINT64: - MAP_SORT_BY(UInt64); + compareFn = + makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { + return entry_reflection->GetInt64(msg, key_field); + }); break; case FieldDescriptor::CPPTYPE_BOOL: - MAP_SORT_BY(Bool); + compareFn = makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { + return entry_reflection->GetBool(msg, key_field); + }); break; case FieldDescriptor::CPPTYPE_STRING: { - std::sort( - map.begin(), map.end(), - [&entry_reflection, &key_field](const Protobuf::Message& a, const Protobuf::Message& b) { - std::string scratch_a, scratch_b; - return entry_reflection->GetStringReference(a, key_field, &scratch_a) < - entry_reflection->GetStringReference(b, key_field, &scratch_b); - }); + compareFn = [&entry_reflection, &key_field](const Protobuf::Message& a, + const Protobuf::Message& b) { + std::string scratch_a, scratch_b; + return entry_reflection->GetStringReference(a, key_field, &scratch_a) < + entry_reflection->GetStringReference(b, key_field, &scratch_b); + }; break; } case FieldDescriptor::CPPTYPE_DOUBLE: @@ -88,6 +100,7 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, case FieldDescriptor::CPPTYPE_MESSAGE: IS_ENVOY_BUG("invalid map key type"); } + std::sort(map.begin(), map.end(), compareFn); for (const auto& entry : map) { seed = reflectionHashField(entry, key_field, seed); seed = reflectionHashField(entry, value_field, seed); From 29532f6e12853491a5f8ae8362595d3b8b43ca1c Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 10 Nov 2023 16:16:12 +0000 Subject: [PATCH 10/41] Fewer macros Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 51 +++++++++----------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 80296998ef8b..b8a56ad673be 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -7,26 +7,18 @@ namespace Envoy { namespace DeterministicProtoHash { namespace { -#define REFLECTION_FOR_EACH(get_type, hash_type) \ + +#define REFLECTION_FOR_EACH(get_type, value_type) \ if (field->is_repeated()) { \ - for (int i = 0; i < reflection->FieldSize(message, field); i++) { \ - hash_type(reflection->GetRepeated##get_type(message, field, i)); \ + for (const auto q : reflection->GetRepeatedFieldRef(message, field)) { \ + seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, \ + seed); \ } \ } else { \ - hash_type(reflection->Get##get_type(message, field)); \ - } - -#define HASH_FIXED(v) \ - do { \ - auto q = v; \ + const auto q = reflection->Get##get_type(message, field); \ seed = \ HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); \ - } while (0) - -#define HASH_STRING(v) \ - do { \ - seed = HashUtil::xxHash64(v, seed); \ - } while (0) + } uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0); uint64_t reflectionHashField(const Protobuf::Message& message, @@ -53,7 +45,6 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, auto entry_descriptor = map.begin()->get().GetDescriptor(); const FieldDescriptor* key_field = entry_descriptor->map_key(); const FieldDescriptor* value_field = entry_descriptor->map_value(); - HASH_FIXED(key_field->number()); std::function compareFn; switch (key_field->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: @@ -112,40 +103,44 @@ uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { using Protobuf::FieldDescriptor; const auto reflection = message.GetReflection(); - HASH_FIXED(field->number()); + { + const auto q = field->number(); + seed = + HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); + } switch (field->cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: - REFLECTION_FOR_EACH(Int32, HASH_FIXED); + REFLECTION_FOR_EACH(Int32, int32_t); break; case FieldDescriptor::CPPTYPE_UINT32: - REFLECTION_FOR_EACH(UInt32, HASH_FIXED); + REFLECTION_FOR_EACH(UInt32, uint32_t); break; case FieldDescriptor::CPPTYPE_INT64: - REFLECTION_FOR_EACH(Int64, HASH_FIXED); + REFLECTION_FOR_EACH(Int64, int64_t); break; case FieldDescriptor::CPPTYPE_UINT64: - REFLECTION_FOR_EACH(UInt64, HASH_FIXED); + REFLECTION_FOR_EACH(UInt64, uint64_t); break; case FieldDescriptor::CPPTYPE_DOUBLE: - REFLECTION_FOR_EACH(Double, HASH_FIXED); + REFLECTION_FOR_EACH(Double, double); break; case FieldDescriptor::CPPTYPE_FLOAT: - REFLECTION_FOR_EACH(Float, HASH_FIXED); + REFLECTION_FOR_EACH(Float, float); break; case FieldDescriptor::CPPTYPE_BOOL: - REFLECTION_FOR_EACH(Bool, HASH_FIXED); + REFLECTION_FOR_EACH(Bool, bool); break; case FieldDescriptor::CPPTYPE_ENUM: - REFLECTION_FOR_EACH(EnumValue, HASH_FIXED); + REFLECTION_FOR_EACH(EnumValue, uint32_t); break; case FieldDescriptor::CPPTYPE_STRING: { if (field->is_repeated()) { for (const std::string& str : reflection->GetRepeatedFieldRef(message, field)) { - HASH_STRING(str); + seed = HashUtil::xxHash64(str, seed); } } else { std::string scratch; - HASH_STRING(reflection->GetStringReference(message, field, &scratch)); + seed = HashUtil::xxHash64(reflection->GetStringReference(message, field, &scratch), seed); } } break; case FieldDescriptor::CPPTYPE_MESSAGE: @@ -192,7 +187,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) std::string scratch; const auto reflection = message.GetReflection(); const auto descriptor = message.GetDescriptor(); - HASH_STRING(descriptor->full_name()); + seed = HashUtil::xxHash64(descriptor->full_name(), seed); if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) { const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated(&message); auto submsg = unpackAnyForReflection(*any); From aa9e5fd93e808f60dfb800d7772ad12a29662b3a Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 10 Nov 2023 16:20:51 +0000 Subject: [PATCH 11/41] Type-typo Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index b8a56ad673be..c9e06808d68c 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -68,7 +68,7 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, case FieldDescriptor::CPPTYPE_UINT64: compareFn = makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { - return entry_reflection->GetInt64(msg, key_field); + return entry_reflection->GetUInt64(msg, key_field); }); break; case FieldDescriptor::CPPTYPE_BOOL: From 9e38e75d7c625d987a8588f17d3c62ec04a3c498 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 10 Nov 2023 21:42:39 +0000 Subject: [PATCH 12/41] Add some tests, fix a bug they found Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 6 +- test/common/protobuf/BUILD | 14 +++ .../protobuf/deterministic_hash_test.cc | 114 ++++++++++++++++++ .../protobuf/deterministic_hash_test.proto | 53 ++++++++ .../filters/http/cache/http_cache_test.cc | 2 +- 5 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 test/common/protobuf/deterministic_hash_test.cc create mode 100644 test/common/protobuf/deterministic_hash_test.proto diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index c9e06808d68c..d3308829c9e9 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -203,7 +203,11 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) for (const auto field : fields) { seed = reflectionHashField(message, field, seed); } - return seed; + // Hash one extra character to signify end of message, so that + // msg{} field2=2 + // hashes differently from + // msg{field2=2} + return HashUtil::xxHash64("\x17", seed); } } // namespace diff --git a/test/common/protobuf/BUILD b/test/common/protobuf/BUILD index e45bd54b7be8..71e3fd32da4f 100644 --- a/test/common/protobuf/BUILD +++ b/test/common/protobuf/BUILD @@ -21,6 +21,20 @@ envoy_cc_test( ], ) +envoy_proto_library( + name = "deterministic_hash_test_proto", + srcs = ["deterministic_hash_test.proto"], +) + +envoy_cc_test( + name = "deterministic_hash_test", + srcs = ["deterministic_hash_test.cc"], + deps = [ + ":deterministic_hash_test_proto_cc_proto", + "//source/common/protobuf:deterministic_hash_lib", + ], +) + envoy_proto_library( name = "utility_test_protos", srcs = [ diff --git a/test/common/protobuf/deterministic_hash_test.cc b/test/common/protobuf/deterministic_hash_test.cc new file mode 100644 index 000000000000..9d3f015909d8 --- /dev/null +++ b/test/common/protobuf/deterministic_hash_test.cc @@ -0,0 +1,114 @@ +#include "source/common/protobuf/deterministic_hash.h" + +#include "test/common/protobuf/deterministic_hash_test.pb.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace Envoy { +namespace DeterministicProtoHash { + +TEST(HashTest, EmptyMessageHashDiffersByMessageType) { + deterministichashtest::Maps empty1; + deterministichashtest::SingleFields empty2; + EXPECT_NE(hash(empty1), hash(empty2)); +} + +TEST(HashTest, EmptyMessageHashMatches) { + deterministichashtest::Maps empty1, empty2; + EXPECT_EQ(hash(empty1), hash(empty2)); +} + +TEST(HashTest, BoolKeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_bool_string())[false] = "false"; + (*map1.mutable_bool_string())[true] = "true"; + (*map2.mutable_bool_string())[true] = "true"; + (*map2.mutable_bool_string())[false] = "false"; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, StringKeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_string_bool())["false"] = false; + (*map1.mutable_string_bool())["true"] = true; + (*map2.mutable_string_bool())["true"] = true; + (*map2.mutable_string_bool())["false"] = false; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, Int32KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_int32_uint32())[-5] = 5; + (*map1.mutable_int32_uint32())[-8] = 8; + (*map2.mutable_int32_uint32())[-8] = 8; + (*map2.mutable_int32_uint32())[-5] = 5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, UInt32KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_uint32_int32())[5] = -5; + (*map1.mutable_uint32_int32())[8] = -8; + (*map2.mutable_uint32_int32())[8] = -8; + (*map2.mutable_uint32_int32())[5] = -5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, Int64KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_int64_uint64())[-5] = 5; + (*map1.mutable_int64_uint64())[-8] = 8; + (*map2.mutable_int64_uint64())[-8] = 8; + (*map2.mutable_int64_uint64())[-5] = 5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, UInt64KeyedMapIsInsertionOrderAgnostic) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_uint64_int64())[5] = -5; + (*map1.mutable_uint64_int64())[8] = -8; + (*map2.mutable_uint64_int64())[8] = -8; + (*map2.mutable_uint64_int64())[5] = -5; + EXPECT_EQ(hash(map1), hash(map2)); +} + +TEST(HashTest, MapWithSameKeysAndValuesPairedDifferentlyDoesNotMatch) { + deterministichashtest::Maps map1, map2; + (*map1.mutable_string_bool())["false"] = false; + (*map1.mutable_string_bool())["true"] = true; + (*map2.mutable_string_bool())["true"] = false; + (*map2.mutable_string_bool())["false"] = true; + EXPECT_NE(hash(map1), hash(map2)); +} + +TEST(HashTest, RecursiveMessageMatchesWhenSame) { + deterministichashtest::Recursion r1, r2; + r1.set_index(0); + r1.mutable_child()->set_index(1); + r2.set_index(0); + r2.mutable_child()->set_index(1); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RecursiveMessageDoesNotMatchWhenSameValuesInDifferentOrder) { + deterministichashtest::Recursion r1, r2; + r1.set_index(0); + r1.mutable_child()->set_index(1); + r2.set_index(1); + r2.mutable_child()->set_index(0); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RecursiveMessageDoesNotMatchWithDifferentDepth) { + deterministichashtest::Recursion r1, r2; + r1.set_index(0); + r1.mutable_child()->set_index(1); + r2.set_index(0); + r2.mutable_child()->set_index(1); + r2.mutable_child()->mutable_child(); + EXPECT_NE(hash(r1), hash(r2)); +} + +} // namespace DeterministicProtoHash +} // namespace Envoy diff --git a/test/common/protobuf/deterministic_hash_test.proto b/test/common/protobuf/deterministic_hash_test.proto new file mode 100644 index 000000000000..9516f898280c --- /dev/null +++ b/test/common/protobuf/deterministic_hash_test.proto @@ -0,0 +1,53 @@ +syntax = "proto3"; + +package deterministichashtest; + +enum FooEnum { + ZERO = 0; + FOO = 1; + BAR = 2; +} + +message Maps { + map bool_string = 1; + map string_bool = 2; + map int32_uint32 = 3; + map uint32_int32 = 4; + map int64_uint64 = 5; + map uint64_int64 = 6; +}; + +message Recursion { + Recursion child = 1; + uint32 index = 2; +}; + +message RepeatedFields { + repeated bool bools = 1; + repeated string strings = 2; + repeated int32 int32s = 3; + repeated uint32 uint32s = 4; + repeated int64 int64s = 5; + repeated uint64 uint64s = 6; + repeated bytes byteses = 7; + repeated double doubles = 8; + repeated float floats = 9; + repeated FooEnum enums = 10; +}; + +message SingleFields { + bool bool = 1; + string strings = 2; + int32 int32 = 3; + uint32 uint32 = 4; + int64 int64 = 5; + uint64 uint64 = 6; + bytes bytes = 7; + double double = 8; + float float = 9; + FooEnum enum = 10; +}; + +//message WithAny { +// Any any = 1; +//}; diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc index 27e540dfea3e..5f42f83c134e 100644 --- a/test/extensions/filters/http/cache/http_cache_test.cc +++ b/test/extensions/filters/http/cache/http_cache_test.cc @@ -296,7 +296,7 @@ TEST_F(LookupRequestTest, PragmaNoFallback) { TEST(HttpCacheTest, StableHashKey) { Key key; key.set_host("example.com"); - ASSERT_EQ(stableHashKey(key), 15427631249665725347u); + ASSERT_EQ(stableHashKey(key), 6153940628716543519u); } TEST_P(LookupRequestTest, ResultWithBodyAndTrailersMatchesExpectation) { From edcf36302c3a5432acccbee5d0a1f71ed00934fd Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 10 Nov 2023 22:24:40 +0000 Subject: [PATCH 13/41] Last of the tests and another fix Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 11 +- .../protobuf/deterministic_hash_test.cc | 328 ++++++++++++++++++ .../protobuf/deterministic_hash_test.proto | 10 +- 3 files changed, 343 insertions(+), 6 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index d3308829c9e9..24ebeda2abe9 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -131,7 +131,16 @@ uint64_t reflectionHashField(const Protobuf::Message& message, REFLECTION_FOR_EACH(Bool, bool); break; case FieldDescriptor::CPPTYPE_ENUM: - REFLECTION_FOR_EACH(EnumValue, uint32_t); + if (field->is_repeated()) { + int c = reflection->FieldSize(message, field); + for (int i = 0; i < c; i++) { + int v = reflection->GetRepeatedEnumValue(message, field, i); + seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&v), sizeof(v)}, seed); + } + } else { + int v = reflection->GetEnumValue(message, field); + seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&v), sizeof(v)}, seed); + } break; case FieldDescriptor::CPPTYPE_STRING: { if (field->is_repeated()) { diff --git a/test/common/protobuf/deterministic_hash_test.cc b/test/common/protobuf/deterministic_hash_test.cc index 9d3f015909d8..b3df97071c5f 100644 --- a/test/common/protobuf/deterministic_hash_test.cc +++ b/test/common/protobuf/deterministic_hash_test.cc @@ -110,5 +110,333 @@ TEST(HashTest, RecursiveMessageDoesNotMatchWithDifferentDepth) { EXPECT_NE(hash(r1), hash(r2)); } +TEST(HashTest, MatchingRepeatedBoolsMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_bools(false); + r1.add_bools(true); + r2.add_bools(false); + r2.add_bools(true); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBoolsDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_bools(false); + r1.add_bools(true); + r2.add_bools(true); + r2.add_bools(false); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBoolsDifferentLengthMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_bools(false); + r2.add_bools(false); + r2.add_bools(false); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedStringsMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_strings("foo"); + r1.add_strings("bar"); + r2.add_strings("foo"); + r2.add_strings("bar"); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedStringsDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_strings("foo"); + r1.add_strings("bar"); + r2.add_strings("bar"); + r2.add_strings("foo"); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBytesMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_byteses("\x01\x02\x03\x04"); + r1.add_byteses("\x01\x02\x03\x04\x05"); + r2.add_byteses("\x01\x02\x03\x04"); + r2.add_byteses("\x01\x02\x03\x04\x05"); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedBytesDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_byteses("\x01\x02\x03\x04"); + r1.add_byteses("\x01\x02\x03\x04\x05"); + r2.add_byteses("\x01\x02\x03\x04\x05"); + r2.add_byteses("\x01\x02\x03\x04"); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt32Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int32s(5); + r1.add_int32s(8); + r2.add_int32s(5); + r2.add_int32s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt32DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int32s(5); + r1.add_int32s(8); + r2.add_int32s(8); + r2.add_int32s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt32Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint32s(5); + r1.add_uint32s(8); + r2.add_uint32s(5); + r2.add_uint32s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt32DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint32s(5); + r1.add_uint32s(8); + r2.add_uint32s(8); + r2.add_uint32s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt64Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int64s(5); + r1.add_int64s(8); + r2.add_int64s(5); + r2.add_int64s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedInt64DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_int64s(5); + r1.add_int64s(8); + r2.add_int64s(8); + r2.add_int64s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt64Match) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint64s(5); + r1.add_uint64s(8); + r2.add_uint64s(5); + r2.add_uint64s(8); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedUInt64DifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_uint64s(5); + r1.add_uint64s(8); + r2.add_uint64s(8); + r2.add_uint64s(5); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedEnumMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_enums(deterministichashtest::FOO); + r1.add_enums(deterministichashtest::BAR); + r2.add_enums(deterministichashtest::FOO); + r2.add_enums(deterministichashtest::BAR); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedEnumDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_enums(deterministichashtest::FOO); + r1.add_enums(deterministichashtest::BAR); + r2.add_enums(deterministichashtest::BAR); + r2.add_enums(deterministichashtest::FOO); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedDoubleMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_doubles(1.84); + r1.add_doubles(-4.88); + r2.add_doubles(1.84); + r2.add_doubles(-4.88); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedDoubleDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_doubles(1.84); + r1.add_doubles(-4.88); + r2.add_doubles(-4.88); + r2.add_doubles(1.84); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedFloatMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_floats(1.84f); + r1.add_floats(-4.88f); + r2.add_floats(1.84f); + r2.add_floats(-4.88f); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedFloatDifferentOrderMismatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_floats(1.84f); + r1.add_floats(-4.88f); + r2.add_floats(-4.88f); + r2.add_floats(1.84f); + EXPECT_NE(hash(r1), hash(r2)); +} + +TEST(HashTest, SingleInt32Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_int32(5); + s2.set_int32(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleInt32Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_int32(5); + s2.set_int32(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt32Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint32(5); + s2.set_uint32(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt32Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint32(5); + s2.set_uint32(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleInt64Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_int64(5); + s2.set_int64(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleInt64Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_int64(5); + s2.set_int64(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt64Match) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint64(5); + s2.set_uint64(5); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleUInt64Mismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_uint64(5); + s2.set_uint64(8); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBoolMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_b(true); + s2.set_b(true); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBoolMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_b(true); + s2.set_b(false); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleStringMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_string("true"); + s2.set_string("true"); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleStringMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_string("true"); + s2.set_string("false"); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBytesMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_bytes("\x01\x02"); + s2.set_bytes("\x01\x02"); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleBytesMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_bytes("\x01\x02"); + s2.set_bytes("\x01\x02\x03"); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleDoubleMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_db(3.9); + s2.set_db(3.9); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleDoubleMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_db(3.9); + s2.set_db(3.91); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleFloatMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_f(3.9f); + s2.set_f(3.9f); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleFloatMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_f(3.9f); + s2.set_f(3.91f); + EXPECT_NE(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleEnumMatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_e(deterministichashtest::FOO); + s2.set_e(deterministichashtest::FOO); + EXPECT_EQ(hash(s1), hash(s2)); +} + +TEST(HashTest, SingleEnumMismatch) { + deterministichashtest::SingleFields s1, s2; + s1.set_e(deterministichashtest::FOO); + s2.set_e(deterministichashtest::BAR); + EXPECT_NE(hash(s1), hash(s2)); +} + } // namespace DeterministicProtoHash } // namespace Envoy diff --git a/test/common/protobuf/deterministic_hash_test.proto b/test/common/protobuf/deterministic_hash_test.proto index 9516f898280c..b75cbb339df6 100644 --- a/test/common/protobuf/deterministic_hash_test.proto +++ b/test/common/protobuf/deterministic_hash_test.proto @@ -36,16 +36,16 @@ message RepeatedFields { }; message SingleFields { - bool bool = 1; - string strings = 2; + bool b = 1; + string string = 2; int32 int32 = 3; uint32 uint32 = 4; int64 int64 = 5; uint64 uint64 = 6; bytes bytes = 7; - double double = 8; - float float = 9; - FooEnum enum = 10; + double db = 8; + float f = 9; + FooEnum e = 10; }; //message WithAny { From 9b6711949d7bb1e5c2d1e3761c8b5d1b13825575 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 13 Nov 2023 17:36:24 +0000 Subject: [PATCH 14/41] Untemplate Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 126 ++++++++++--------- 1 file changed, 64 insertions(+), 62 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 24ebeda2abe9..b5a685d0fa61 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -24,13 +24,65 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed); -template -std::function -makeCompareFn(std::function getter) { - return [getter](const Protobuf::Message& a, const Protobuf::Message& b) { - return getter(a) < getter(b); - }; -} +struct MapFieldSorter { + MapFieldSorter(const Protobuf::RepeatedFieldRef entries) + : entries_(entries.begin(), entries.end()), + reflection_(*entries_.begin()->get().GetReflection()), + descriptor_(*entries_.begin()->get().GetDescriptor()), key_field_(*descriptor_.map_key()), + value_field_(*descriptor_.map_value()) {} + const std::vector>& sorted() { + using Protobuf::FieldDescriptor; + std::function compareFn; + switch (key_field_.cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); + }; + break; + case FieldDescriptor::CPPTYPE_UINT32: + compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); + }; + break; + case FieldDescriptor::CPPTYPE_INT64: + compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); + }; + break; + case FieldDescriptor::CPPTYPE_UINT64: + compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); + }; + break; + case FieldDescriptor::CPPTYPE_BOOL: + compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); + }; + break; + case FieldDescriptor::CPPTYPE_STRING: { + compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { + std::string scratch_a, scratch_b; + return reflection_.GetStringReference(a, &key_field_, &scratch_a) < + reflection_.GetStringReference(b, &key_field_, &scratch_b); + }; + break; + } + case FieldDescriptor::CPPTYPE_DOUBLE: + case FieldDescriptor::CPPTYPE_FLOAT: + case FieldDescriptor::CPPTYPE_ENUM: + case FieldDescriptor::CPPTYPE_MESSAGE: + IS_ENVOY_BUG("invalid map key type"); + } + std::sort(entries_.begin(), entries_.end(), compareFn); + return entries_; + } + + std::vector> entries_; + const Protobuf::Reflection& reflection_; + const Protobuf::Descriptor& descriptor_; + const Protobuf::FieldDescriptor& key_field_; + const Protobuf::FieldDescriptor& value_field_; +}; // To make a map serialize deterministically we need to force the order. // Here we're going to sort the keys into numerical order for number keys, @@ -40,61 +92,11 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, using Protobuf::FieldDescriptor; const auto reflection = message.GetReflection(); auto entries = reflection->GetRepeatedFieldRef(message, field); - std::vector> map(entries.begin(), entries.end()); - auto entry_reflection = map.begin()->get().GetReflection(); - auto entry_descriptor = map.begin()->get().GetDescriptor(); - const FieldDescriptor* key_field = entry_descriptor->map_key(); - const FieldDescriptor* value_field = entry_descriptor->map_value(); - std::function compareFn; - switch (key_field->cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - compareFn = - makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { - return entry_reflection->GetInt32(msg, key_field); - }); - break; - case FieldDescriptor::CPPTYPE_UINT32: - compareFn = - makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { - return entry_reflection->GetUInt32(msg, key_field); - }); - break; - case FieldDescriptor::CPPTYPE_INT64: - compareFn = - makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { - return entry_reflection->GetInt64(msg, key_field); - }); - break; - case FieldDescriptor::CPPTYPE_UINT64: - compareFn = - makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { - return entry_reflection->GetUInt64(msg, key_field); - }); - break; - case FieldDescriptor::CPPTYPE_BOOL: - compareFn = makeCompareFn([&entry_reflection, &key_field](const Protobuf::Message& msg) { - return entry_reflection->GetBool(msg, key_field); - }); - break; - case FieldDescriptor::CPPTYPE_STRING: { - compareFn = [&entry_reflection, &key_field](const Protobuf::Message& a, - const Protobuf::Message& b) { - std::string scratch_a, scratch_b; - return entry_reflection->GetStringReference(a, key_field, &scratch_a) < - entry_reflection->GetStringReference(b, key_field, &scratch_b); - }; - break; - } - case FieldDescriptor::CPPTYPE_DOUBLE: - case FieldDescriptor::CPPTYPE_FLOAT: - case FieldDescriptor::CPPTYPE_ENUM: - case FieldDescriptor::CPPTYPE_MESSAGE: - IS_ENVOY_BUG("invalid map key type"); - } - std::sort(map.begin(), map.end(), compareFn); - for (const auto& entry : map) { - seed = reflectionHashField(entry, key_field, seed); - seed = reflectionHashField(entry, value_field, seed); + MapFieldSorter sorter(entries); + auto& sorted_entries = sorter.sorted(); + for (const auto& entry : sorted_entries) { + seed = reflectionHashField(entry, &sorter.key_field_, seed); + seed = reflectionHashField(entry, &sorter.value_field_, seed); } return seed; } From c172e2d7a507bb013322e5a5e138bbc9851652e5 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 13 Nov 2023 21:53:50 +0000 Subject: [PATCH 15/41] No lambdas version Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 105 +++++++++++-------- 1 file changed, 62 insertions(+), 43 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index b5a685d0fa61..1780c42a83ca 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -26,58 +26,77 @@ uint64_t reflectionHashField(const Protobuf::Message& message, struct MapFieldSorter { MapFieldSorter(const Protobuf::RepeatedFieldRef entries) - : entries_(entries.begin(), entries.end()), + : entries_(entries.begin(), entries.end()), compare_(*entries_.begin()), reflection_(*entries_.begin()->get().GetReflection()), descriptor_(*entries_.begin()->get().GetDescriptor()), key_field_(*descriptor_.map_key()), value_field_(*descriptor_.map_value()) {} const std::vector>& sorted() { using Protobuf::FieldDescriptor; - std::function compareFn; - switch (key_field_.cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); - }; - break; - case FieldDescriptor::CPPTYPE_UINT32: - compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); - }; - break; - case FieldDescriptor::CPPTYPE_INT64: - compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); - }; - break; - case FieldDescriptor::CPPTYPE_UINT64: - compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); - }; - break; - case FieldDescriptor::CPPTYPE_BOOL: - compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); - }; - break; - case FieldDescriptor::CPPTYPE_STRING: { - compareFn = [this](const Protobuf::Message& a, const Protobuf::Message& b) { - std::string scratch_a, scratch_b; - return reflection_.GetStringReference(a, &key_field_, &scratch_a) < - reflection_.GetStringReference(b, &key_field_, &scratch_b); - }; - break; - } - case FieldDescriptor::CPPTYPE_DOUBLE: - case FieldDescriptor::CPPTYPE_FLOAT: - case FieldDescriptor::CPPTYPE_ENUM: - case FieldDescriptor::CPPTYPE_MESSAGE: - IS_ENVOY_BUG("invalid map key type"); - } - std::sort(entries_.begin(), entries_.end(), compareFn); + std::sort(entries_.begin(), entries_.end(), compare_); return entries_; } std::vector> entries_; + struct Compare { + Compare(const Protobuf::Message& first_msg) + : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), + key_field_(*descriptor_.map_key()), compare_fn_(selectCompareFn()) {} + bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) { + return (this->*compare_fn_)(a, b); + } + + private: + bool compareByInt32(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); + } + bool compareByUInt32(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); + } + bool compareByInt64(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); + } + bool compareByUInt64(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); + } + bool compareByBool(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); + } + bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) { + std::string scratch_a, scratch_b; + return reflection_.GetStringReference(a, &key_field_, &scratch_a) < + reflection_.GetStringReference(b, &key_field_, &scratch_b); + } + using CompareMemberFn = bool (Compare::*)(const Protobuf::Message& a, + const Protobuf::Message& b); + CompareMemberFn selectCompareFn() { + using Protobuf::FieldDescriptor; + switch (key_field_.cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + return &Compare::compareByInt32; + case FieldDescriptor::CPPTYPE_UINT32: + return &Compare::compareByUInt32; + case FieldDescriptor::CPPTYPE_INT64: + return &Compare::compareByInt64; + case FieldDescriptor::CPPTYPE_UINT64: + return &Compare::compareByUInt64; + case FieldDescriptor::CPPTYPE_BOOL: + return &Compare::compareByBool; + case FieldDescriptor::CPPTYPE_STRING: { + return &Compare::compareByString; + } + case FieldDescriptor::CPPTYPE_DOUBLE: + case FieldDescriptor::CPPTYPE_FLOAT: + case FieldDescriptor::CPPTYPE_ENUM: + case FieldDescriptor::CPPTYPE_MESSAGE: + IS_ENVOY_BUG("invalid map key type"); + return nullptr; + } + } + const Protobuf::Reflection& reflection_; + const Protobuf::Descriptor& descriptor_; + const Protobuf::FieldDescriptor& key_field_; + CompareMemberFn compare_fn_; + } compare_; const Protobuf::Reflection& reflection_; const Protobuf::Descriptor& descriptor_; const Protobuf::FieldDescriptor& key_field_; From c686b3afbc876764071433d093342d1310e3da38 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 13 Nov 2023 22:06:25 +0000 Subject: [PATCH 16/41] Remove nested struct Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 144 +++++++++---------- 1 file changed, 68 insertions(+), 76 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 1780c42a83ca..8513c33a4b52 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -24,98 +24,90 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed); -struct MapFieldSorter { - MapFieldSorter(const Protobuf::RepeatedFieldRef entries) - : entries_(entries.begin(), entries.end()), compare_(*entries_.begin()), - reflection_(*entries_.begin()->get().GetReflection()), - descriptor_(*entries_.begin()->get().GetDescriptor()), key_field_(*descriptor_.map_key()), - value_field_(*descriptor_.map_value()) {} - const std::vector>& sorted() { - using Protobuf::FieldDescriptor; - std::sort(entries_.begin(), entries_.end(), compare_); - return entries_; +struct MapFieldComparer { + MapFieldComparer(const Protobuf::Message& first_msg) + : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), + key_field_(*descriptor_.map_key()), compare_fn_(selectCompareFn()) {} + bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) { + return (this->*compare_fn_)(a, b); } - std::vector> entries_; - struct Compare { - Compare(const Protobuf::Message& first_msg) - : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), - key_field_(*descriptor_.map_key()), compare_fn_(selectCompareFn()) {} - bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) { - return (this->*compare_fn_)(a, b); - } - - private: - bool compareByInt32(const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); - } - bool compareByUInt32(const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); - } - bool compareByInt64(const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); - } - bool compareByUInt64(const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); - } - bool compareByBool(const Protobuf::Message& a, const Protobuf::Message& b) { - return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); - } - bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) { - std::string scratch_a, scratch_b; - return reflection_.GetStringReference(a, &key_field_, &scratch_a) < - reflection_.GetStringReference(b, &key_field_, &scratch_b); +private: + bool compareByInt32(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); + } + bool compareByUInt32(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); + } + bool compareByInt64(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); + } + bool compareByUInt64(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); + } + bool compareByBool(const Protobuf::Message& a, const Protobuf::Message& b) { + return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); + } + bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) { + std::string scratch_a, scratch_b; + return reflection_.GetStringReference(a, &key_field_, &scratch_a) < + reflection_.GetStringReference(b, &key_field_, &scratch_b); + } + using CompareMemberFn = bool (MapFieldComparer::*)(const Protobuf::Message& a, + const Protobuf::Message& b); + CompareMemberFn selectCompareFn() { + using Protobuf::FieldDescriptor; + switch (key_field_.cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32: + return &MapFieldComparer::compareByInt32; + case FieldDescriptor::CPPTYPE_UINT32: + return &MapFieldComparer::compareByUInt32; + case FieldDescriptor::CPPTYPE_INT64: + return &MapFieldComparer::compareByInt64; + case FieldDescriptor::CPPTYPE_UINT64: + return &MapFieldComparer::compareByUInt64; + case FieldDescriptor::CPPTYPE_BOOL: + return &MapFieldComparer::compareByBool; + case FieldDescriptor::CPPTYPE_STRING: { + return &MapFieldComparer::compareByString; } - using CompareMemberFn = bool (Compare::*)(const Protobuf::Message& a, - const Protobuf::Message& b); - CompareMemberFn selectCompareFn() { - using Protobuf::FieldDescriptor; - switch (key_field_.cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - return &Compare::compareByInt32; - case FieldDescriptor::CPPTYPE_UINT32: - return &Compare::compareByUInt32; - case FieldDescriptor::CPPTYPE_INT64: - return &Compare::compareByInt64; - case FieldDescriptor::CPPTYPE_UINT64: - return &Compare::compareByUInt64; - case FieldDescriptor::CPPTYPE_BOOL: - return &Compare::compareByBool; - case FieldDescriptor::CPPTYPE_STRING: { - return &Compare::compareByString; - } - case FieldDescriptor::CPPTYPE_DOUBLE: - case FieldDescriptor::CPPTYPE_FLOAT: - case FieldDescriptor::CPPTYPE_ENUM: - case FieldDescriptor::CPPTYPE_MESSAGE: - IS_ENVOY_BUG("invalid map key type"); - return nullptr; - } + case FieldDescriptor::CPPTYPE_DOUBLE: + case FieldDescriptor::CPPTYPE_FLOAT: + case FieldDescriptor::CPPTYPE_ENUM: + case FieldDescriptor::CPPTYPE_MESSAGE: + IS_ENVOY_BUG("invalid map key type"); + return nullptr; } - const Protobuf::Reflection& reflection_; - const Protobuf::Descriptor& descriptor_; - const Protobuf::FieldDescriptor& key_field_; - CompareMemberFn compare_fn_; - } compare_; + } const Protobuf::Reflection& reflection_; const Protobuf::Descriptor& descriptor_; const Protobuf::FieldDescriptor& key_field_; - const Protobuf::FieldDescriptor& value_field_; + CompareMemberFn compare_fn_; }; +std::vector> +sortedMapField(const Protobuf::RepeatedFieldRef map_entries) { + std::vector> entries(map_entries.begin(), + map_entries.end()); + auto compare = MapFieldComparer(*entries.begin()); + std::sort(entries.begin(), entries.end(), compare); + return entries; +} + // To make a map serialize deterministically we need to force the order. // Here we're going to sort the keys into numerical order for number keys, // or lexicographical order for strings, and then hash them in that order. uint64_t reflectionHashMapField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { - using Protobuf::FieldDescriptor; const auto reflection = message.GetReflection(); - auto entries = reflection->GetRepeatedFieldRef(message, field); - MapFieldSorter sorter(entries); - auto& sorted_entries = sorter.sorted(); + auto sorted_entries = + sortedMapField(reflection->GetRepeatedFieldRef(message, field)); + const auto map_descriptor = sorted_entries.begin()->get().GetDescriptor(); + const auto key_field = map_descriptor->map_key(); + const auto value_field = map_descriptor->map_value(); for (const auto& entry : sorted_entries) { - seed = reflectionHashField(entry, &sorter.key_field_, seed); - seed = reflectionHashField(entry, &sorter.value_field_, seed); + seed = reflectionHashField(entry, key_field, seed); + seed = reflectionHashField(entry, value_field, seed); } return seed; } From 5b4e623548d9d6aef4391b3712a3503267cf2a0d Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 13 Nov 2023 22:41:57 +0000 Subject: [PATCH 17/41] Satisfy Windows inferior "no return value" checks Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 8513c33a4b52..987603713b3e 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -75,9 +75,10 @@ struct MapFieldComparer { case FieldDescriptor::CPPTYPE_FLOAT: case FieldDescriptor::CPPTYPE_ENUM: case FieldDescriptor::CPPTYPE_MESSAGE: - IS_ENVOY_BUG("invalid map key type"); - return nullptr; + break; } + IS_ENVOY_BUG("invalid map key type"); + return nullptr; } const Protobuf::Reflection& reflection_; const Protobuf::Descriptor& descriptor_; From cd2d3c124e54b628c9f9705b94e962af07c5e09e Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 14 Nov 2023 16:52:24 +0000 Subject: [PATCH 18/41] Panic > bug for invalid enum Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 987603713b3e..6d7d4993ed3b 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -77,8 +77,7 @@ struct MapFieldComparer { case FieldDescriptor::CPPTYPE_MESSAGE: break; } - IS_ENVOY_BUG("invalid map key type"); - return nullptr; + PANIC_DUE_TO_CORRUPT_ENUM; } const Protobuf::Reflection& reflection_; const Protobuf::Descriptor& descriptor_; From 38c7f6d2475be0164f66a3a21e7c27582849dd78 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 14 Nov 2023 18:56:27 +0000 Subject: [PATCH 19/41] Reduce coverage threshold The reduction is necessary because the new uncovered lines are unreachable enum paths that cannot be tested, and the existing covered function had its LoC reduced which also contributed to a coverage percentage downswing. Signed-off-by: Raven Black --- test/per_file_coverage.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/per_file_coverage.sh b/test/per_file_coverage.sh index 88d6844d1332..69cff90b097a 100755 --- a/test/per_file_coverage.sh +++ b/test/per_file_coverage.sh @@ -15,7 +15,7 @@ declare -a KNOWN_LOW_COVERAGE=( "source/common/matcher:94.6" "source/common/network:94.4" # Flaky, `activateFileEvents`, `startSecureTransport` and `ioctl`, listener_socket do not always report LCOV "source/common/network/dns_resolver:91.4" # A few lines of MacOS code not tested in linux scripts. Tested in MacOS scripts -"source/common/protobuf:96.5" +"source/common/protobuf:96.3" "source/common/quic:93.6" "source/common/secret:95.1" "source/common/signal:87.2" # Death tests don't report LCOV From b6e84b0052e08a5f8ea958b53714e813cf464edb Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 14 Nov 2023 20:38:13 +0000 Subject: [PATCH 20/41] Remove one uncovered line :) Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 6d7d4993ed3b..7efd77d1d2f2 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -68,9 +68,8 @@ struct MapFieldComparer { return &MapFieldComparer::compareByUInt64; case FieldDescriptor::CPPTYPE_BOOL: return &MapFieldComparer::compareByBool; - case FieldDescriptor::CPPTYPE_STRING: { + case FieldDescriptor::CPPTYPE_STRING: return &MapFieldComparer::compareByString; - } case FieldDescriptor::CPPTYPE_DOUBLE: case FieldDescriptor::CPPTYPE_FLOAT: case FieldDescriptor::CPPTYPE_ENUM: From 3320ed0d33fe7063df9fc2766e4cf2bf31b0fa56 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 16 Nov 2023 20:26:03 +0000 Subject: [PATCH 21/41] Coverage back up by 0.1% for the one line Signed-off-by: Raven Black --- test/per_file_coverage.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/per_file_coverage.sh b/test/per_file_coverage.sh index 69cff90b097a..13962bf722e0 100755 --- a/test/per_file_coverage.sh +++ b/test/per_file_coverage.sh @@ -15,7 +15,7 @@ declare -a KNOWN_LOW_COVERAGE=( "source/common/matcher:94.6" "source/common/network:94.4" # Flaky, `activateFileEvents`, `startSecureTransport` and `ioctl`, listener_socket do not always report LCOV "source/common/network/dns_resolver:91.4" # A few lines of MacOS code not tested in linux scripts. Tested in MacOS scripts -"source/common/protobuf:96.3" +"source/common/protobuf:96.4" "source/common/quic:93.6" "source/common/secret:95.1" "source/common/signal:87.2" # Death tests don't report LCOV From 5afe859192356e6ed7e2a760a2ac229946900e4e Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 17 Nov 2023 21:45:35 +0000 Subject: [PATCH 22/41] Remove most auto, Comparer->Comparator Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 52 ++++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 7efd77d1d2f2..c41519413000 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -10,12 +10,12 @@ namespace { #define REFLECTION_FOR_EACH(get_type, value_type) \ if (field->is_repeated()) { \ - for (const auto q : reflection->GetRepeatedFieldRef(message, field)) { \ + for (const value_type& q : reflection->GetRepeatedFieldRef(message, field)) { \ seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, \ seed); \ } \ } else { \ - const auto q = reflection->Get##get_type(message, field); \ + const value_type q = reflection->Get##get_type(message, field); \ seed = \ HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); \ } @@ -24,8 +24,8 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed); -struct MapFieldComparer { - MapFieldComparer(const Protobuf::Message& first_msg) +struct MapFieldComparator { + MapFieldComparator(const Protobuf::Message& first_msg) : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), key_field_(*descriptor_.map_key()), compare_fn_(selectCompareFn()) {} bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) { @@ -53,23 +53,23 @@ struct MapFieldComparer { return reflection_.GetStringReference(a, &key_field_, &scratch_a) < reflection_.GetStringReference(b, &key_field_, &scratch_b); } - using CompareMemberFn = bool (MapFieldComparer::*)(const Protobuf::Message& a, - const Protobuf::Message& b); + using CompareMemberFn = bool (MapFieldComparator::*)(const Protobuf::Message& a, + const Protobuf::Message& b); CompareMemberFn selectCompareFn() { using Protobuf::FieldDescriptor; switch (key_field_.cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: - return &MapFieldComparer::compareByInt32; + return &MapFieldComparator::compareByInt32; case FieldDescriptor::CPPTYPE_UINT32: - return &MapFieldComparer::compareByUInt32; + return &MapFieldComparator::compareByUInt32; case FieldDescriptor::CPPTYPE_INT64: - return &MapFieldComparer::compareByInt64; + return &MapFieldComparator::compareByInt64; case FieldDescriptor::CPPTYPE_UINT64: - return &MapFieldComparer::compareByUInt64; + return &MapFieldComparator::compareByUInt64; case FieldDescriptor::CPPTYPE_BOOL: - return &MapFieldComparer::compareByBool; + return &MapFieldComparator::compareByBool; case FieldDescriptor::CPPTYPE_STRING: - return &MapFieldComparer::compareByString; + return &MapFieldComparator::compareByString; case FieldDescriptor::CPPTYPE_DOUBLE: case FieldDescriptor::CPPTYPE_FLOAT: case FieldDescriptor::CPPTYPE_ENUM: @@ -88,7 +88,7 @@ std::vector> sortedMapField(const Protobuf::RepeatedFieldRef map_entries) { std::vector> entries(map_entries.begin(), map_entries.end()); - auto compare = MapFieldComparer(*entries.begin()); + MapFieldComparator compare(*entries.begin()); std::sort(entries.begin(), entries.end(), compare); return entries; } @@ -98,13 +98,13 @@ sortedMapField(const Protobuf::RepeatedFieldRef map_entries) // or lexicographical order for strings, and then hash them in that order. uint64_t reflectionHashMapField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { - const auto reflection = message.GetReflection(); + const Protobuf::Reflection* reflection = message.GetReflection(); auto sorted_entries = sortedMapField(reflection->GetRepeatedFieldRef(message, field)); - const auto map_descriptor = sorted_entries.begin()->get().GetDescriptor(); - const auto key_field = map_descriptor->map_key(); - const auto value_field = map_descriptor->map_value(); - for (const auto& entry : sorted_entries) { + const Protobuf::Descriptor* map_descriptor = sorted_entries.begin()->get().GetDescriptor(); + const Protobuf::FieldDescriptor* key_field = map_descriptor->map_key(); + const Protobuf::FieldDescriptor* value_field = map_descriptor->map_value(); + for (const Protobuf::Message& entry : sorted_entries) { seed = reflectionHashField(entry, key_field, seed); seed = reflectionHashField(entry, value_field, seed); } @@ -114,9 +114,9 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { using Protobuf::FieldDescriptor; - const auto reflection = message.GetReflection(); + const Protobuf::Reflection* reflection = message.GetReflection(); { - const auto q = field->number(); + const int q = field->number(); seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); } @@ -168,7 +168,7 @@ uint64_t reflectionHashField(const Protobuf::Message& message, if (field->is_map()) { seed = reflectionHashMapField(message, field, seed); } else if (field->is_repeated()) { - for (const auto& submsg : + for (const Protobuf::Message& submsg : reflection->GetRepeatedFieldRef(message, field)) { seed = reflectionHashMessage(submsg, seed); } @@ -197,7 +197,7 @@ std::unique_ptr unpackAnyForReflection(const ProtobufWkt::Any } const Protobuf::Message* prototype = Protobuf::MessageFactory::generated_factory()->GetPrototype(descriptor); - auto msg = std::unique_ptr(prototype->New()); + std::unique_ptr msg(prototype->New()); any.UnpackTo(msg.get()); return msg; } @@ -206,12 +206,12 @@ std::unique_ptr unpackAnyForReflection(const ProtobufWkt::Any uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) { using Protobuf::FieldDescriptor; std::string scratch; - const auto reflection = message.GetReflection(); - const auto descriptor = message.GetDescriptor(); + const Protobuf::Reflection* reflection = message.GetReflection(); + const Protobuf::Descriptor* descriptor = message.GetDescriptor(); seed = HashUtil::xxHash64(descriptor->full_name(), seed); if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) { const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated(&message); - auto submsg = unpackAnyForReflection(*any); + std::unique_ptr submsg = unpackAnyForReflection(*any); if (submsg == nullptr) { // If we wanted to handle unknown types in Any, this is where we'd have to do it. return seed; @@ -221,7 +221,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) std::vector fields; reflection->ListFields(message, &fields); // If we wanted to handle unknown fields, we'd need to also GetUnknownFields here. - for (const auto field : fields) { + for (const FieldDescriptor* field : fields) { seed = reflectionHashField(message, field, seed); } // Hash one extra character to signify end of message, so that From da477568da80a7fadd59b2d4cc832e23798b2c79 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 17 Nov 2023 21:46:47 +0000 Subject: [PATCH 23/41] const Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index c41519413000..c6d7e5909f60 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -28,33 +28,33 @@ struct MapFieldComparator { MapFieldComparator(const Protobuf::Message& first_msg) : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), key_field_(*descriptor_.map_key()), compare_fn_(selectCompareFn()) {} - bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) { + bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) const { return (this->*compare_fn_)(a, b); } private: - bool compareByInt32(const Protobuf::Message& a, const Protobuf::Message& b) { + bool compareByInt32(const Protobuf::Message& a, const Protobuf::Message& b) const { return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); } - bool compareByUInt32(const Protobuf::Message& a, const Protobuf::Message& b) { + bool compareByUInt32(const Protobuf::Message& a, const Protobuf::Message& b) const { return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); } - bool compareByInt64(const Protobuf::Message& a, const Protobuf::Message& b) { + bool compareByInt64(const Protobuf::Message& a, const Protobuf::Message& b) const { return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); } - bool compareByUInt64(const Protobuf::Message& a, const Protobuf::Message& b) { + bool compareByUInt64(const Protobuf::Message& a, const Protobuf::Message& b) const { return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); } - bool compareByBool(const Protobuf::Message& a, const Protobuf::Message& b) { + bool compareByBool(const Protobuf::Message& a, const Protobuf::Message& b) const { return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); } - bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) { + bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) const { std::string scratch_a, scratch_b; return reflection_.GetStringReference(a, &key_field_, &scratch_a) < reflection_.GetStringReference(b, &key_field_, &scratch_b); } using CompareMemberFn = bool (MapFieldComparator::*)(const Protobuf::Message& a, - const Protobuf::Message& b); + const Protobuf::Message& b) const; CompareMemberFn selectCompareFn() { using Protobuf::FieldDescriptor; switch (key_field_.cpp_type()) { From e128ee4940a607ab08b753f0e4616c5383b728b1 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 17 Nov 2023 22:21:26 +0000 Subject: [PATCH 24/41] Explanatory comment Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index c6d7e5909f60..2211495206c7 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -24,6 +24,14 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed); +// MapFieldComparator uses selectCompareFn at init-time, rather than putting the +// switch into the comparator, because that way the switch is only performed once, +// rather than O(n*log(n)) times during std::sort. +// +// This uses a member-function pointer rather than a virtual function with a +// polymorphic object selected during a create function because std::sort +// does not accept a polymorphic object as a comparator (due to that argument +// being taken by value). struct MapFieldComparator { MapFieldComparator(const Protobuf::Message& first_msg) : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), From 974f4f0e66bcfecbca5c0551fb6435e681b54b60 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 17 Nov 2023 22:29:48 +0000 Subject: [PATCH 25/41] Verbose comment about sorting map fields, plus an ASSERT Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 2211495206c7..ce0f94e4de14 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -107,6 +107,22 @@ sortedMapField(const Protobuf::RepeatedFieldRef map_entries) uint64_t reflectionHashMapField(const Protobuf::Message& message, const Protobuf::FieldDescriptor* field, uint64_t seed) { const Protobuf::Reflection* reflection = message.GetReflection(); + // For repeated fields in general the order is significant. For map fields, + // the implementation may have a fixed order or a nondeterministic order, + // but conceptually a map field is supposed to be order agnostic. + // + // We should definitely *not* sort repeated fields in general; maps, however, + // are represented on the wire and in reflection format as repeated fields. + // Since the order is canonically not part of the data in the case of maps, + // this means for consistent hashing it must be ensured that the fields are + // handled in a deterministic order. + // + // It may be that if we didn't sort them the order would be deterministic, but + // that would be a non-guaranteed implementation detail, and a future version + // of protobuf could potentially change the internal map representation. Sorting + // also means theoretically we could produce a cross-language compatible hash; + // golang, for example, explicitly randomly orders map fields in its implementation. + ASSERT(field->is_map()); auto sorted_entries = sortedMapField(reflection->GetRepeatedFieldRef(message, field)); const Protobuf::Descriptor* map_descriptor = sorted_entries.begin()->get().GetDescriptor(); From 5b0b8188e36f6870e3caebe681d52ffcac69067c Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 28 Nov 2023 21:12:52 +0000 Subject: [PATCH 26/41] Moar templates Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 168 ++++++++++++------- 1 file changed, 107 insertions(+), 61 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index ce0f94e4de14..ea6c89049e85 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -8,21 +8,77 @@ namespace Envoy { namespace DeterministicProtoHash { namespace { -#define REFLECTION_FOR_EACH(get_type, value_type) \ - if (field->is_repeated()) { \ - for (const value_type& q : reflection->GetRepeatedFieldRef(message, field)) { \ - seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, \ - seed); \ - } \ - } else { \ - const value_type q = reflection->Get##get_type(message, field); \ - seed = \ - HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); \ +// Get a scalar field from protobuf reflection field definition. The return +// type must be specified by the caller. Every implementation is a specialization +// because the reflection interface did separate named functions instead of a +// template. +template +T reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field); + +template <> +uint32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetUInt32(message, &field); +} + +template <> +int32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetInt32(message, &field); +} + +template <> +uint64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetUInt64(message, &field); +} + +template <> +int64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetInt64(message, &field); +} + +template <> +float reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetFloat(message, &field); +} + +template <> +double reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetDouble(message, &field); +} + +template <> +bool reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field) { + return reflection.GetBool(message, &field); +} + +// Takes a field of scalar type, which may be a repeated field, and hashes +// it (or each of it). +template +uint64_t hashScalarField(const Protobuf::Reflection& reflection, const Protobuf::Message& message, + const Protobuf::FieldDescriptor& field, uint64_t seed) { + if (field.is_repeated()) { + for (const T& q : reflection.GetRepeatedFieldRef(message, &field)) { + seed = + HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); + } + } else { + const T q = reflectionGet(reflection, message, field); + seed = + HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); } + return seed; +} uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0); uint64_t reflectionHashField(const Protobuf::Message& message, - const Protobuf::FieldDescriptor* field, uint64_t seed); + const Protobuf::FieldDescriptor& field, uint64_t seed); // MapFieldComparator uses selectCompareFn at init-time, rather than putting the // switch into the comparator, because that way the switch is only performed once, @@ -41,21 +97,11 @@ struct MapFieldComparator { } private: - bool compareByInt32(const Protobuf::Message& a, const Protobuf::Message& b) const { - return reflection_.GetInt32(a, &key_field_) < reflection_.GetInt32(b, &key_field_); - } - bool compareByUInt32(const Protobuf::Message& a, const Protobuf::Message& b) const { - return reflection_.GetUInt32(a, &key_field_) < reflection_.GetUInt32(b, &key_field_); - } - bool compareByInt64(const Protobuf::Message& a, const Protobuf::Message& b) const { - return reflection_.GetInt64(a, &key_field_) < reflection_.GetInt64(b, &key_field_); - } - bool compareByUInt64(const Protobuf::Message& a, const Protobuf::Message& b) const { - return reflection_.GetUInt64(a, &key_field_) < reflection_.GetUInt64(b, &key_field_); - } - bool compareByBool(const Protobuf::Message& a, const Protobuf::Message& b) const { - return reflection_.GetBool(a, &key_field_) < reflection_.GetBool(b, &key_field_); - } + template + bool compareByScalar(const Protobuf::Message& a, const Protobuf::Message& b) const { + return reflectionGet(reflection_, a, key_field_) < + reflectionGet(reflection_, b, key_field_); + }; bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) const { std::string scratch_a, scratch_b; return reflection_.GetStringReference(a, &key_field_, &scratch_a) < @@ -67,15 +113,15 @@ struct MapFieldComparator { using Protobuf::FieldDescriptor; switch (key_field_.cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: - return &MapFieldComparator::compareByInt32; + return &MapFieldComparator::compareByScalar; case FieldDescriptor::CPPTYPE_UINT32: - return &MapFieldComparator::compareByUInt32; + return &MapFieldComparator::compareByScalar; case FieldDescriptor::CPPTYPE_INT64: - return &MapFieldComparator::compareByInt64; + return &MapFieldComparator::compareByScalar; case FieldDescriptor::CPPTYPE_UINT64: - return &MapFieldComparator::compareByUInt64; + return &MapFieldComparator::compareByScalar; case FieldDescriptor::CPPTYPE_BOOL: - return &MapFieldComparator::compareByBool; + return &MapFieldComparator::compareByScalar; case FieldDescriptor::CPPTYPE_STRING: return &MapFieldComparator::compareByString; case FieldDescriptor::CPPTYPE_DOUBLE: @@ -105,8 +151,8 @@ sortedMapField(const Protobuf::RepeatedFieldRef map_entries) // Here we're going to sort the keys into numerical order for number keys, // or lexicographical order for strings, and then hash them in that order. uint64_t reflectionHashMapField(const Protobuf::Message& message, - const Protobuf::FieldDescriptor* field, uint64_t seed) { - const Protobuf::Reflection* reflection = message.GetReflection(); + const Protobuf::FieldDescriptor& field, uint64_t seed) { + const Protobuf::Reflection& reflection = *message.GetReflection(); // For repeated fields in general the order is significant. For map fields, // the implementation may have a fixed order or a nondeterministic order, // but conceptually a map field is supposed to be order agnostic. @@ -122,12 +168,12 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, // of protobuf could potentially change the internal map representation. Sorting // also means theoretically we could produce a cross-language compatible hash; // golang, for example, explicitly randomly orders map fields in its implementation. - ASSERT(field->is_map()); + ASSERT(field.is_map()); auto sorted_entries = - sortedMapField(reflection->GetRepeatedFieldRef(message, field)); - const Protobuf::Descriptor* map_descriptor = sorted_entries.begin()->get().GetDescriptor(); - const Protobuf::FieldDescriptor* key_field = map_descriptor->map_key(); - const Protobuf::FieldDescriptor* value_field = map_descriptor->map_value(); + sortedMapField(reflection.GetRepeatedFieldRef(message, &field)); + const Protobuf::Descriptor& map_descriptor = *sorted_entries.begin()->get().GetDescriptor(); + const Protobuf::FieldDescriptor& key_field = *map_descriptor.map_key(); + const Protobuf::FieldDescriptor& value_field = *map_descriptor.map_value(); for (const Protobuf::Message& entry : sorted_entries) { seed = reflectionHashField(entry, key_field, seed); seed = reflectionHashField(entry, value_field, seed); @@ -136,68 +182,68 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, } uint64_t reflectionHashField(const Protobuf::Message& message, - const Protobuf::FieldDescriptor* field, uint64_t seed) { + const Protobuf::FieldDescriptor& field, uint64_t seed) { using Protobuf::FieldDescriptor; - const Protobuf::Reflection* reflection = message.GetReflection(); + const Protobuf::Reflection& reflection = *message.GetReflection(); { - const int q = field->number(); + const int q = field.number(); seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); } - switch (field->cpp_type()) { + switch (field.cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: - REFLECTION_FOR_EACH(Int32, int32_t); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_UINT32: - REFLECTION_FOR_EACH(UInt32, uint32_t); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_INT64: - REFLECTION_FOR_EACH(Int64, int64_t); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_UINT64: - REFLECTION_FOR_EACH(UInt64, uint64_t); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_DOUBLE: - REFLECTION_FOR_EACH(Double, double); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_FLOAT: - REFLECTION_FOR_EACH(Float, float); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_BOOL: - REFLECTION_FOR_EACH(Bool, bool); + seed = hashScalarField(reflection, message, field, seed); break; case FieldDescriptor::CPPTYPE_ENUM: - if (field->is_repeated()) { - int c = reflection->FieldSize(message, field); + if (field.is_repeated()) { + int c = reflection.FieldSize(message, &field); for (int i = 0; i < c; i++) { - int v = reflection->GetRepeatedEnumValue(message, field, i); + int v = reflection.GetRepeatedEnumValue(message, &field, i); seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&v), sizeof(v)}, seed); } } else { - int v = reflection->GetEnumValue(message, field); + int v = reflection.GetEnumValue(message, &field); seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&v), sizeof(v)}, seed); } break; case FieldDescriptor::CPPTYPE_STRING: { - if (field->is_repeated()) { - for (const std::string& str : reflection->GetRepeatedFieldRef(message, field)) { + if (field.is_repeated()) { + for (const std::string& str : reflection.GetRepeatedFieldRef(message, &field)) { seed = HashUtil::xxHash64(str, seed); } } else { std::string scratch; - seed = HashUtil::xxHash64(reflection->GetStringReference(message, field, &scratch), seed); + seed = HashUtil::xxHash64(reflection.GetStringReference(message, &field, &scratch), seed); } } break; case FieldDescriptor::CPPTYPE_MESSAGE: - if (field->is_map()) { + if (field.is_map()) { seed = reflectionHashMapField(message, field, seed); - } else if (field->is_repeated()) { + } else if (field.is_repeated()) { for (const Protobuf::Message& submsg : - reflection->GetRepeatedFieldRef(message, field)) { + reflection.GetRepeatedFieldRef(message, &field)) { seed = reflectionHashMessage(submsg, seed); } } else { - seed = reflectionHashMessage(reflection->GetMessage(message, field), seed); + seed = reflectionHashMessage(reflection.GetMessage(message, &field), seed); } break; } @@ -246,7 +292,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) reflection->ListFields(message, &fields); // If we wanted to handle unknown fields, we'd need to also GetUnknownFields here. for (const FieldDescriptor* field : fields) { - seed = reflectionHashField(message, field, seed); + seed = reflectionHashField(message, *field, seed); } // Hash one extra character to signify end of message, so that // msg{} field2=2 From f5393b69690f87ef26ec875d2ada7f31122078cc Mon Sep 17 00:00:00 2001 From: Raven Black Date: Wed, 29 Nov 2023 15:53:41 +0000 Subject: [PATCH 27/41] Runtime guard Signed-off-by: Raven Black --- source/common/protobuf/utility.cc | 13 +++++++++- source/common/runtime/runtime_features.cc | 1 + .../filters/http/cache/http_cache.cc | 8 +++++- test/common/protobuf/utility_test.cc | 26 ++++++++++++------- test/extensions/filters/http/cache/BUILD | 1 + .../filters/http/cache/http_cache_test.cc | 10 +++++++ 6 files changed, 47 insertions(+), 12 deletions(-) diff --git a/source/common/protobuf/utility.cc b/source/common/protobuf/utility.cc index 8e3c25cc290b..206f6c513f37 100644 --- a/source/common/protobuf/utility.cc +++ b/source/common/protobuf/utility.cc @@ -139,7 +139,18 @@ void ProtoExceptionUtil::throwProtoValidationException(const std::string& valida size_t MessageUtil::hash(const Protobuf::Message& message) { #if defined(ENVOY_ENABLE_FULL_PROTOS) - return DeterministicProtoHash::hash(message); + if (Runtime::runtimeFeatureEnabled("envoy.restart_features.use_fast_protobuf_hash")) { + return DeterministicProtoHash::hash(message); + } else { + std::string text_format; + Protobuf::TextFormat::Printer printer; + printer.SetExpandAny(true); + printer.SetUseFieldNumber(true); + printer.SetSingleLineMode(true); + printer.SetHideUnknownFields(true); + printer.PrintToString(message, &text_format); + return HashUtil::xxHash64(text_format); + } #else return HashUtil::xxHash64(message.SerializeAsString()); #endif diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index ea7a92e65f54..acd202e7c2f2 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -91,6 +91,7 @@ RUNTIME_GUARD(envoy_reloadable_features_validate_grpc_header_before_log_grpc_sta RUNTIME_GUARD(envoy_reloadable_features_validate_upstream_headers); RUNTIME_GUARD(envoy_restart_features_send_goaway_for_premature_rst_streams); RUNTIME_GUARD(envoy_restart_features_udp_read_normalize_addresses); +RUNTIME_GUARD(envoy_restart_features_use_fast_protobuf_hash); // Begin false flags. These should come with a TODO to flip true. // Sentinel and test flag. diff --git a/source/extensions/filters/http/cache/http_cache.cc b/source/extensions/filters/http/cache/http_cache.cc index 93f68d93507c..73a2365a5f99 100644 --- a/source/extensions/filters/http/cache/http_cache.cc +++ b/source/extensions/filters/http/cache/http_cache.cc @@ -54,7 +54,13 @@ LookupRequest::LookupRequest(const Http::RequestHeaderMap& request_headers, Syst // Unless this API is still alpha, calls to stableHashKey() must always return // the same result, or a way must be provided to deal with a complete cache // flush. -size_t stableHashKey(const Key& key) { return DeterministicProtoHash::hash(key); } +size_t stableHashKey(const Key& key) { + if (Runtime::runtimeFeatureEnabled("envoy.restart_features.use_fast_protobuf_hash")) { + return DeterministicProtoHash::hash(key); + } else { + return MessageUtil::hash(key); + } +} void LookupRequest::initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers) { const absl::string_view cache_control = diff --git a/test/common/protobuf/utility_test.cc b/test/common/protobuf/utility_test.cc index b3feba633fd7..c63117551871 100644 --- a/test/common/protobuf/utility_test.cc +++ b/test/common/protobuf/utility_test.cc @@ -194,16 +194,22 @@ TEST_F(ProtobufUtilityTest, MessageUtilHash) { a4.PackFrom(s2); a5.PackFrom(s3); - EXPECT_EQ(MessageUtil::hash(a1), MessageUtil::hash(a2)); - EXPECT_EQ(MessageUtil::hash(a2), MessageUtil::hash(a3)); - EXPECT_NE(0, MessageUtil::hash(a1)); - // Same keys and values but with the values in a different order should not have - // the same hash. - EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a4)); - // Different keys with the values in the same order should not have the same hash. - EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a5)); - // Struct without 'any' around it should not hash the same as struct inside 'any'. - EXPECT_NE(MessageUtil::hash(s), MessageUtil::hash(a1)); + TestScopedRuntime runtime_; + for (std::string runtime_value : {"true", "false"}) { + // TODO(ravenblack): when the runtime flag is removed, keep the expects + // but remove the loop around them and the extra output. + runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", runtime_value}}); + EXPECT_EQ(MessageUtil::hash(a1), MessageUtil::hash(a2)) << runtime_value; + EXPECT_EQ(MessageUtil::hash(a2), MessageUtil::hash(a3)) << runtime_value; + EXPECT_NE(0, MessageUtil::hash(a1)) << runtime_value; + // Same keys and values but with the values in a different order should not have + // the same hash. + EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a4)) << runtime_value; + // Different keys with the values in the same order should not have the same hash. + EXPECT_NE(MessageUtil::hash(a1), MessageUtil::hash(a5)) << runtime_value; + // Struct without 'any' around it should not hash the same as struct inside 'any'. + EXPECT_NE(MessageUtil::hash(s), MessageUtil::hash(a1)) << runtime_value; + } } TEST_F(ProtobufUtilityTest, RepeatedPtrUtilDebugString) { diff --git a/test/extensions/filters/http/cache/BUILD b/test/extensions/filters/http/cache/BUILD index 65930a8771f3..81ab2735dae1 100644 --- a/test/extensions/filters/http/cache/BUILD +++ b/test/extensions/filters/http/cache/BUILD @@ -69,6 +69,7 @@ envoy_extension_cc_test( "//source/extensions/http/cache/simple_http_cache:config", "//test/mocks/http:http_mocks", "//test/test_common:simulated_time_system_lib", + "//test/test_common:test_runtime_lib", "//test/test_common:utility_lib", ], ) diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc index 5f42f83c134e..659cb4fc96ca 100644 --- a/test/extensions/filters/http/cache/http_cache_test.cc +++ b/test/extensions/filters/http/cache/http_cache_test.cc @@ -7,6 +7,7 @@ #include "test/extensions/filters/http/cache/common.h" #include "test/mocks/http/mocks.h" #include "test/test_common/simulated_time_system.h" +#include "test/test_common/test_runtime.h" #include "test/test_common/utility.h" #include "gtest/gtest.h" @@ -299,6 +300,15 @@ TEST(HttpCacheTest, StableHashKey) { ASSERT_EQ(stableHashKey(key), 6153940628716543519u); } +TEST(HttpCacheTest, StableHashKeyWithSlowHash) { + // TODO(ravenblack): This test should be removed when the runtime guard is removed. + TestScopedRuntime runtime_; + runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); + Key key; + key.set_host("example.com"); + ASSERT_EQ(stableHashKey(key), 9582653837550152292u); +} + TEST_P(LookupRequestTest, ResultWithBodyAndTrailersMatchesExpectation) { request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, GetParam().request_cache_control); From 8830f12ee29b2f3d8eb8880242dc0bd692d5bf64 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Wed, 29 Nov 2023 17:54:52 +0000 Subject: [PATCH 28/41] Remove unused WithAny test proto Signed-off-by: Raven Black --- test/common/protobuf/deterministic_hash_test.proto | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/common/protobuf/deterministic_hash_test.proto b/test/common/protobuf/deterministic_hash_test.proto index b75cbb339df6..43cf64049045 100644 --- a/test/common/protobuf/deterministic_hash_test.proto +++ b/test/common/protobuf/deterministic_hash_test.proto @@ -47,7 +47,3 @@ message SingleFields { float f = 9; FooEnum e = 10; }; - -//message WithAny { -// Any any = 1; -//}; From f308278fcdcae3c1b65e3e4057d7cc2deca7f977 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Wed, 29 Nov 2023 18:59:50 +0000 Subject: [PATCH 29/41] Add comment about unknown Any types and MessageDifferencer Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 2 ++ source/common/protobuf/deterministic_hash.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index ea6c89049e85..54a5aae67825 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -262,6 +262,8 @@ std::unique_ptr unpackAnyForReflection(const ProtobufWkt::Any const Protobuf::Descriptor* descriptor = Protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName( typeUrlToDescriptorFullName(any.type_url())); + // If the type name refers to an unknown type, we treat it the same as other + // unknown fields - not including its contents in the hash. if (descriptor == nullptr) { return nullptr; } diff --git a/source/common/protobuf/deterministic_hash.h b/source/common/protobuf/deterministic_hash.h index 9bcf57ecd036..126ea4f96826 100644 --- a/source/common/protobuf/deterministic_hash.h +++ b/source/common/protobuf/deterministic_hash.h @@ -14,6 +14,11 @@ namespace DeterministicProtoHash { // Ignoring unknown fields was chosen as the implementation because the // TextFormat-based hashing this replaces was explicitly ignoring unknown // fields. +// +// If this is used as part of making a hash table, it may result in +// collisions if unknown fields are present and are not ignored by the +// corresponding comparer. A MessageDifferencer can be configured either +// way. uint64_t hash(const Protobuf::Message& message); } // namespace DeterministicProtoHash From 2104c6e1c4d9cbf9a210b41d9ee3f1834a120419 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Wed, 29 Nov 2023 19:02:41 +0000 Subject: [PATCH 30/41] Add comment about type URLs Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 54a5aae67825..0e5d5bf3d2e3 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -250,6 +250,11 @@ uint64_t reflectionHashField(const Protobuf::Message& message, return seed; } +// Converts from type urls OR descriptor full names to descriptor full names. +// Type urls are as used in envoy yaml config, e.g. +// "type.googleapis.com/envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig" +// becomes +// "envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig" absl::string_view typeUrlToDescriptorFullName(absl::string_view url) { const size_t pos = url.rfind('/'); if (pos != absl::string_view::npos) { From 234cbc80ae2bffb3754e3d8f4ce6d36fd481155f Mon Sep 17 00:00:00 2001 From: Raven Black Date: Wed, 29 Nov 2023 19:24:51 +0000 Subject: [PATCH 31/41] Return coverage to 96.5 Signed-off-by: Raven Black --- test/per_file_coverage.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/per_file_coverage.sh b/test/per_file_coverage.sh index 13962bf722e0..88d6844d1332 100755 --- a/test/per_file_coverage.sh +++ b/test/per_file_coverage.sh @@ -15,7 +15,7 @@ declare -a KNOWN_LOW_COVERAGE=( "source/common/matcher:94.6" "source/common/network:94.4" # Flaky, `activateFileEvents`, `startSecureTransport` and `ioctl`, listener_socket do not always report LCOV "source/common/network/dns_resolver:91.4" # A few lines of MacOS code not tested in linux scripts. Tested in MacOS scripts -"source/common/protobuf:96.4" +"source/common/protobuf:96.5" "source/common/quic:93.6" "source/common/secret:95.1" "source/common/signal:87.2" # Death tests don't report LCOV From 7b737bcba93356a84d257a85b9ffebf7546cbd04 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Wed, 29 Nov 2023 19:56:12 +0000 Subject: [PATCH 32/41] Spelling Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.h b/source/common/protobuf/deterministic_hash.h index 126ea4f96826..64ea0ff76bc1 100644 --- a/source/common/protobuf/deterministic_hash.h +++ b/source/common/protobuf/deterministic_hash.h @@ -17,8 +17,8 @@ namespace DeterministicProtoHash { // // If this is used as part of making a hash table, it may result in // collisions if unknown fields are present and are not ignored by the -// corresponding comparer. A MessageDifferencer can be configured either -// way. +// corresponding comparator. A `MessageDifferencer` can be configured to +// ignore unknown fields, or not to. uint64_t hash(const Protobuf::Message& message); } // namespace DeterministicProtoHash From c8298567288325fcef5812e5aa30d4099068a8f9 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 30 Nov 2023 19:37:01 +0000 Subject: [PATCH 33/41] Add benchmark Signed-off-by: Raven Black --- test/common/protobuf/BUILD | 18 ++++++++ test/common/protobuf/utility_speed_test.cc | 53 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 test/common/protobuf/utility_speed_test.cc diff --git a/test/common/protobuf/BUILD b/test/common/protobuf/BUILD index 71e3fd32da4f..a33f3bbdabaa 100644 --- a/test/common/protobuf/BUILD +++ b/test/common/protobuf/BUILD @@ -1,5 +1,7 @@ load( "//bazel:envoy_build_system.bzl", + "envoy_benchmark_test", + "envoy_cc_benchmark_binary", "envoy_cc_fuzz_test", "envoy_cc_test", "envoy_package", @@ -96,3 +98,19 @@ envoy_cc_fuzz_test( tags = ["no_fuzz"], deps = ["//source/common/protobuf:utility_lib"], ) + +envoy_cc_benchmark_binary( + name = "utility_speed_test", + srcs = ["utility_speed_test.cc"], + external_deps = ["benchmark"], + deps = [ + ":deterministic_hash_test_proto_cc_proto", + "//source/common/protobuf:utility_lib", + "//test/test_common:test_runtime_lib", + ], +) + +envoy_benchmark_test( + name = "utility_speed_test_benchmark_test", + benchmark_binary = "utility_speed_test", +) diff --git a/test/common/protobuf/utility_speed_test.cc b/test/common/protobuf/utility_speed_test.cc new file mode 100644 index 000000000000..61e13f56de2b --- /dev/null +++ b/test/common/protobuf/utility_speed_test.cc @@ -0,0 +1,53 @@ +// Note: this should be run with --compilation_mode=opt, and would benefit from a +// quiescent system with disabled cstate power management. + +#include "source/common/protobuf/utility.h" + +#include "test/common/protobuf/deterministic_hash_test.pb.h" +#include "test/test_common/test_runtime.h" + +#include "benchmark/benchmark.h" + +namespace Envoy { + +// NOLINT(namespace-envoy) + +static deterministichashtest::Maps testProto() { + deterministichashtest::Maps msg; + (*msg.mutable_bool_string())[false] = "abcdefghijklmnopqrstuvwxyz"; + (*msg.mutable_bool_string())[true] = "abcdefghijklmnopqrstuvwxyz"; + for (int i = 0; i < 100; i++) { + (*msg.mutable_string_bool())[absl::StrCat(i)] = true; + (*msg.mutable_int32_uint32())[i] = i; + (*msg.mutable_uint32_int32())[i] = i; + (*msg.mutable_uint64_int64())[i + 1000000000000L] = i + 1000000000000L; + (*msg.mutable_int64_uint64())[i + 1000000000000L] = i + 1000000000000L; + } + return msg; +} + +static void bmHashByTextFormat(benchmark::State& state) { + TestScopedRuntime runtime_; + runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); + auto msg = testProto(); + uint64_t hash = 0; + for (auto _ : state) { + UNREFERENCED_PARAMETER(_); + hash += MessageUtil::hash(msg); + } + benchmark::DoNotOptimize(hash); +} +BENCHMARK(bmHashByTextFormat); + +static void bmHashByDeterministicHash(benchmark::State& state) { + auto msg = testProto(); + uint64_t hash = 0; + for (auto _ : state) { + UNREFERENCED_PARAMETER(_); + hash += MessageUtil::hash(msg); + } + benchmark::DoNotOptimize(hash); +} +BENCHMARK(bmHashByDeterministicHash); + +} // namespace Envoy From ecdd501df20a3890e636d1495876c46b7804f638 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Thu, 30 Nov 2023 20:07:48 +0000 Subject: [PATCH 34/41] More benchmarks Signed-off-by: Raven Black --- test/common/protobuf/utility_speed_test.cc | 64 ++++++++++++++++------ 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/test/common/protobuf/utility_speed_test.cc b/test/common/protobuf/utility_speed_test.cc index 61e13f56de2b..9ba88cff00b4 100644 --- a/test/common/protobuf/utility_speed_test.cc +++ b/test/common/protobuf/utility_speed_test.cc @@ -12,42 +12,72 @@ namespace Envoy { // NOLINT(namespace-envoy) -static deterministichashtest::Maps testProto() { - deterministichashtest::Maps msg; - (*msg.mutable_bool_string())[false] = "abcdefghijklmnopqrstuvwxyz"; - (*msg.mutable_bool_string())[true] = "abcdefghijklmnopqrstuvwxyz"; +static std::unique_ptr testProtoWithMaps() { + auto msg = std::make_unique(); + (*msg->mutable_bool_string())[false] = "abcdefghijklmnopqrstuvwxyz"; + (*msg->mutable_bool_string())[true] = "abcdefghijklmnopqrstuvwxyz"; for (int i = 0; i < 100; i++) { - (*msg.mutable_string_bool())[absl::StrCat(i)] = true; - (*msg.mutable_int32_uint32())[i] = i; - (*msg.mutable_uint32_int32())[i] = i; - (*msg.mutable_uint64_int64())[i + 1000000000000L] = i + 1000000000000L; - (*msg.mutable_int64_uint64())[i + 1000000000000L] = i + 1000000000000L; + (*msg->mutable_string_bool())[absl::StrCat(i)] = true; + (*msg->mutable_int32_uint32())[i] = i; + (*msg->mutable_uint32_int32())[i] = i; + (*msg->mutable_uint64_int64())[i + 1000000000000L] = i + 1000000000000L; + (*msg->mutable_int64_uint64())[i + 1000000000000L] = i + 1000000000000L; } return msg; } -static void bmHashByTextFormat(benchmark::State& state) { +static std::unique_ptr testProtoWithRecursion() { + auto msg = std::make_unique(); + deterministichashtest::Recursion* p = msg.get(); + for (int i = 0; i < 100; i++) { + p->set_index(i); + p = p->mutable_child(); + } + return msg; +} + +static std::unique_ptr testProtoWithRepeatedFields() { + auto msg = std::make_unique(); + for (int i = 0; i < 100; i++) { + msg->add_bools(true); + msg->add_strings("abcdefghijklmnopqrstuvwxyz"); + msg->add_int32s(-12345); + msg->add_uint32s(12345); + msg->add_int64s(123456789012345L); + msg->add_uint64s(-123456789012345UL); + msg->add_byteses("abcdefghijklmnopqrstuvwxyz"); + msg->add_doubles(123456789.12345); + msg->add_floats(12345.12345); + msg->add_enums(deterministichashtest::FOO); + } + return msg; +} + +static void bmHashByTextFormat(benchmark::State& state, std::unique_ptr msg) { TestScopedRuntime runtime_; runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); - auto msg = testProto(); uint64_t hash = 0; for (auto _ : state) { UNREFERENCED_PARAMETER(_); - hash += MessageUtil::hash(msg); + hash += MessageUtil::hash(*msg); } benchmark::DoNotOptimize(hash); } -BENCHMARK(bmHashByTextFormat); -static void bmHashByDeterministicHash(benchmark::State& state) { - auto msg = testProto(); +static void bmHashByDeterministicHash(benchmark::State& state, + std::unique_ptr msg) { uint64_t hash = 0; for (auto _ : state) { UNREFERENCED_PARAMETER(_); - hash += MessageUtil::hash(msg); + hash += MessageUtil::hash(*msg); } benchmark::DoNotOptimize(hash); } -BENCHMARK(bmHashByDeterministicHash); +BENCHMARK_CAPTURE(bmHashByDeterministicHash, map, testProtoWithMaps()); +BENCHMARK_CAPTURE(bmHashByTextFormat, map, testProtoWithMaps()); +BENCHMARK_CAPTURE(bmHashByDeterministicHash, recursion, testProtoWithRecursion()); +BENCHMARK_CAPTURE(bmHashByTextFormat, recursion, testProtoWithRecursion()); +BENCHMARK_CAPTURE(bmHashByDeterministicHash, repeatedFields, testProtoWithRepeatedFields()); +BENCHMARK_CAPTURE(bmHashByTextFormat, repeatedFields, testProtoWithRepeatedFields()); } // namespace Envoy From 2757c86a2dac1402b37948048b3f19a60055336c Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 4 Dec 2023 17:15:20 +0000 Subject: [PATCH 35/41] Unorder map hashing without sorting Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 107 +++--------------- .../protobuf/deterministic_hash_test.cc | 10 ++ 2 files changed, 25 insertions(+), 92 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 0e5d5bf3d2e3..25ed60f7a78c 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -80,105 +80,28 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor& field, uint64_t seed); -// MapFieldComparator uses selectCompareFn at init-time, rather than putting the -// switch into the comparator, because that way the switch is only performed once, -// rather than O(n*log(n)) times during std::sort. -// -// This uses a member-function pointer rather than a virtual function with a -// polymorphic object selected during a create function because std::sort -// does not accept a polymorphic object as a comparator (due to that argument -// being taken by value). -struct MapFieldComparator { - MapFieldComparator(const Protobuf::Message& first_msg) - : reflection_(*first_msg.GetReflection()), descriptor_(*first_msg.GetDescriptor()), - key_field_(*descriptor_.map_key()), compare_fn_(selectCompareFn()) {} - bool operator()(const Protobuf::Message& a, const Protobuf::Message& b) const { - return (this->*compare_fn_)(a, b); - } - -private: - template - bool compareByScalar(const Protobuf::Message& a, const Protobuf::Message& b) const { - return reflectionGet(reflection_, a, key_field_) < - reflectionGet(reflection_, b, key_field_); - }; - bool compareByString(const Protobuf::Message& a, const Protobuf::Message& b) const { - std::string scratch_a, scratch_b; - return reflection_.GetStringReference(a, &key_field_, &scratch_a) < - reflection_.GetStringReference(b, &key_field_, &scratch_b); - } - using CompareMemberFn = bool (MapFieldComparator::*)(const Protobuf::Message& a, - const Protobuf::Message& b) const; - CompareMemberFn selectCompareFn() { - using Protobuf::FieldDescriptor; - switch (key_field_.cpp_type()) { - case FieldDescriptor::CPPTYPE_INT32: - return &MapFieldComparator::compareByScalar; - case FieldDescriptor::CPPTYPE_UINT32: - return &MapFieldComparator::compareByScalar; - case FieldDescriptor::CPPTYPE_INT64: - return &MapFieldComparator::compareByScalar; - case FieldDescriptor::CPPTYPE_UINT64: - return &MapFieldComparator::compareByScalar; - case FieldDescriptor::CPPTYPE_BOOL: - return &MapFieldComparator::compareByScalar; - case FieldDescriptor::CPPTYPE_STRING: - return &MapFieldComparator::compareByString; - case FieldDescriptor::CPPTYPE_DOUBLE: - case FieldDescriptor::CPPTYPE_FLOAT: - case FieldDescriptor::CPPTYPE_ENUM: - case FieldDescriptor::CPPTYPE_MESSAGE: - break; - } - PANIC_DUE_TO_CORRUPT_ENUM; - } - const Protobuf::Reflection& reflection_; - const Protobuf::Descriptor& descriptor_; - const Protobuf::FieldDescriptor& key_field_; - CompareMemberFn compare_fn_; -}; - -std::vector> -sortedMapField(const Protobuf::RepeatedFieldRef map_entries) { - std::vector> entries(map_entries.begin(), - map_entries.end()); - MapFieldComparator compare(*entries.begin()); - std::sort(entries.begin(), entries.end(), compare); - return entries; -} - -// To make a map serialize deterministically we need to force the order. -// Here we're going to sort the keys into numerical order for number keys, -// or lexicographical order for strings, and then hash them in that order. +// To make a map serialize deterministically we need to ignore the order of +// the map fields. To do that, we simply combine the hashes of each entry +// using an unordered operator (addition), and then apply that combined hash to +// the seed. uint64_t reflectionHashMapField(const Protobuf::Message& message, const Protobuf::FieldDescriptor& field, uint64_t seed) { const Protobuf::Reflection& reflection = *message.GetReflection(); - // For repeated fields in general the order is significant. For map fields, - // the implementation may have a fixed order or a nondeterministic order, - // but conceptually a map field is supposed to be order agnostic. - // - // We should definitely *not* sort repeated fields in general; maps, however, - // are represented on the wire and in reflection format as repeated fields. - // Since the order is canonically not part of the data in the case of maps, - // this means for consistent hashing it must be ensured that the fields are - // handled in a deterministic order. - // - // It may be that if we didn't sort them the order would be deterministic, but - // that would be a non-guaranteed implementation detail, and a future version - // of protobuf could potentially change the internal map representation. Sorting - // also means theoretically we could produce a cross-language compatible hash; - // golang, for example, explicitly randomly orders map fields in its implementation. ASSERT(field.is_map()); - auto sorted_entries = - sortedMapField(reflection.GetRepeatedFieldRef(message, &field)); - const Protobuf::Descriptor& map_descriptor = *sorted_entries.begin()->get().GetDescriptor(); + const auto& entries = reflection.GetRepeatedFieldRef(message, &field); + ASSERT(!entries.empty()); + const Protobuf::Descriptor& map_descriptor = *entries.begin()->GetDescriptor(); const Protobuf::FieldDescriptor& key_field = *map_descriptor.map_key(); const Protobuf::FieldDescriptor& value_field = *map_descriptor.map_value(); - for (const Protobuf::Message& entry : sorted_entries) { - seed = reflectionHashField(entry, key_field, seed); - seed = reflectionHashField(entry, value_field, seed); + uint64_t combined_hash = 0; + for (const Protobuf::Message& entry : entries) { + uint64_t entry_hash = reflectionHashField(entry, key_field, 0); + entry_hash = reflectionHashField(entry, value_field, entry_hash); + combined_hash += entry_hash; } - return seed; + return HashUtil::xxHash64( + absl::string_view{reinterpret_cast(&combined_hash), sizeof(combined_hash)}, + seed); } uint64_t reflectionHashField(const Protobuf::Message& message, diff --git a/test/common/protobuf/deterministic_hash_test.cc b/test/common/protobuf/deterministic_hash_test.cc index b3df97071c5f..065b95350550 100644 --- a/test/common/protobuf/deterministic_hash_test.cc +++ b/test/common/protobuf/deterministic_hash_test.cc @@ -19,6 +19,16 @@ TEST(HashTest, EmptyMessageHashMatches) { EXPECT_EQ(hash(empty1), hash(empty2)); } +TEST(HashTest, MapWithRemovedValueBehavesTheSameAsEmptyMap) { + // This test is from an abundance of caution, to make sure that + // reflection can't be driven to traverse a map field with no values + // in it (there would be an ASSERT in that path.) + deterministichashtest::Maps empty1, empty2; + (*empty1.mutable_bool_string())[false] = "false"; + (*empty1.mutable_bool_string()).erase(false); + EXPECT_EQ(hash(empty1), hash(empty2)); +} + TEST(HashTest, BoolKeyedMapIsInsertionOrderAgnostic) { deterministichashtest::Maps map1, map2; (*map1.mutable_bool_string())[false] = "false"; From 3baddeb7a6b3b5f6c0aa359ca5c168d251ff89ee Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 4 Dec 2023 21:40:09 +0000 Subject: [PATCH 36/41] Add relnote Signed-off-by: Raven Black --- changelogs/current.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 235ff8a658cc..b20433fcfdcb 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -16,6 +16,13 @@ behavior_changes: minor_behavior_changes: # *Changes that may cause incompatibilities for some users, but should not for most* +- area: config parsing, http cache filter + change: | + replaces protobuf hashing by human-readable string with a dedicated deterministic hashing algorithm. + The performance of the hash operation is improved by 2-10x depending on the structure of the message, + which is expected to reduce config update time or startup time by 10-25%. The new algorithm is also + used for http_cache_filter hashing, which will effectively cause a one-time cache flush on update + for users with a persistent cache. To revert this behavior set ``envoy.restart_features.use_fast_protobuf_hash`` to false. - area: aws change: | uses http async client to fetch the credentials from EC2 instance metadata and ECS task metadata providers instead of libcurl From fffa18cb7ec0856549182f13e7bd0e0c2e7f6163 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Tue, 5 Dec 2023 20:35:40 +0000 Subject: [PATCH 37/41] Use xxHash64Value Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 27 ++++++-------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 25ed60f7a78c..68f630d2aefc 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -60,18 +60,15 @@ bool reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Messa // Takes a field of scalar type, which may be a repeated field, and hashes // it (or each of it). -template +template , bool> = true> uint64_t hashScalarField(const Protobuf::Reflection& reflection, const Protobuf::Message& message, const Protobuf::FieldDescriptor& field, uint64_t seed) { if (field.is_repeated()) { - for (const T& q : reflection.GetRepeatedFieldRef(message, &field)) { - seed = - HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); + for (const T& scalar : reflection.GetRepeatedFieldRef(message, &field)) { + seed = HashUtil::xxHash64Value(scalar, seed); } } else { - const T q = reflectionGet(reflection, message, field); - seed = - HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); + seed = HashUtil::xxHash64Value(reflectionGet(reflection, message, field), seed); } return seed; } @@ -99,20 +96,14 @@ uint64_t reflectionHashMapField(const Protobuf::Message& message, entry_hash = reflectionHashField(entry, value_field, entry_hash); combined_hash += entry_hash; } - return HashUtil::xxHash64( - absl::string_view{reinterpret_cast(&combined_hash), sizeof(combined_hash)}, - seed); + return HashUtil::xxHash64Value(combined_hash, seed); } uint64_t reflectionHashField(const Protobuf::Message& message, const Protobuf::FieldDescriptor& field, uint64_t seed) { using Protobuf::FieldDescriptor; const Protobuf::Reflection& reflection = *message.GetReflection(); - { - const int q = field.number(); - seed = - HashUtil::xxHash64(absl::string_view{reinterpret_cast(&q), sizeof(q)}, seed); - } + seed = HashUtil::xxHash64Value(field.number(), seed); switch (field.cpp_type()) { case FieldDescriptor::CPPTYPE_INT32: seed = hashScalarField(reflection, message, field, seed); @@ -139,12 +130,10 @@ uint64_t reflectionHashField(const Protobuf::Message& message, if (field.is_repeated()) { int c = reflection.FieldSize(message, &field); for (int i = 0; i < c; i++) { - int v = reflection.GetRepeatedEnumValue(message, &field, i); - seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&v), sizeof(v)}, seed); + seed = HashUtil::xxHash64Value(reflection.GetRepeatedEnumValue(message, &field, i), seed); } } else { - int v = reflection.GetEnumValue(message, &field); - seed = HashUtil::xxHash64(absl::string_view{reinterpret_cast(&v), sizeof(v)}, seed); + seed = HashUtil::xxHash64Value(reflection.GetEnumValue(message, &field), seed); } break; case FieldDescriptor::CPPTYPE_STRING: { From 4255b6284744171a12eaf48eb42cdab36cc9bb7c Mon Sep 17 00:00:00 2001 From: Raven Black Date: Fri, 8 Dec 2023 22:24:45 +0000 Subject: [PATCH 38/41] Runtime guard defaulting to false Signed-off-by: Raven Black --- changelogs/current.yaml | 2 +- source/common/runtime/runtime_features.cc | 3 ++- test/common/protobuf/utility_speed_test.cc | 6 ++++-- test/extensions/filters/http/cache/http_cache_test.cc | 6 ++++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 0ba1c5ac0857..5a0a4efa91cb 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -51,7 +51,7 @@ minor_behavior_changes: The performance of the hash operation is improved by 2-10x depending on the structure of the message, which is expected to reduce config update time or startup time by 10-25%. The new algorithm is also used for http_cache_filter hashing, which will effectively cause a one-time cache flush on update - for users with a persistent cache. To revert this behavior set ``envoy.restart_features.use_fast_protobuf_hash`` to false. + for users with a persistent cache. To enable this behavior set ``envoy.restart_features.use_fast_protobuf_hash`` to true. - area: filter state change: | Added config name of filter sending a local reply in filter state with key diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 43cc609d0d13..47a12b260ff6 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -92,7 +92,6 @@ RUNTIME_GUARD(envoy_reloadable_features_validate_grpc_header_before_log_grpc_sta RUNTIME_GUARD(envoy_reloadable_features_validate_upstream_headers); RUNTIME_GUARD(envoy_restart_features_send_goaway_for_premature_rst_streams); RUNTIME_GUARD(envoy_restart_features_udp_read_normalize_addresses); -RUNTIME_GUARD(envoy_restart_features_use_fast_protobuf_hash); // Begin false flags. These should come with a TODO to flip true. // Sentinel and test flag. @@ -128,6 +127,8 @@ FALSE_RUNTIME_GUARD(envoy_restart_features_use_eds_cache_for_ads); FALSE_RUNTIME_GUARD(envoy_reloadable_features_enable_universal_header_validator); // TODO(pksohn): enable after fixing https://github.com/envoyproxy/envoy/issues/29930 FALSE_RUNTIME_GUARD(envoy_reloadable_features_quic_defer_logging_to_ack_listener); +// TODO(ravenblack): flip this to true after some test time. +FALSE_RUNTIME_GUARD(envoy_restart_features_use_fast_protobuf_hash); // Block of non-boolean flags. Use of int flags is deprecated. Do not add more. ABSL_FLAG(uint64_t, re2_max_program_size_error_level, 100, ""); // NOLINT diff --git a/test/common/protobuf/utility_speed_test.cc b/test/common/protobuf/utility_speed_test.cc index 9ba88cff00b4..491d2f71ed5b 100644 --- a/test/common/protobuf/utility_speed_test.cc +++ b/test/common/protobuf/utility_speed_test.cc @@ -54,8 +54,8 @@ static std::unique_ptr testProtoWithRepeatedFields() { } static void bmHashByTextFormat(benchmark::State& state, std::unique_ptr msg) { - TestScopedRuntime runtime_; - runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); uint64_t hash = 0; for (auto _ : state) { UNREFERENCED_PARAMETER(_); @@ -66,6 +66,8 @@ static void bmHashByTextFormat(benchmark::State& state, std::unique_ptr msg) { + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "true"}}); uint64_t hash = 0; for (auto _ : state) { UNREFERENCED_PARAMETER(_); diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc index 659cb4fc96ca..f8cd99266b12 100644 --- a/test/extensions/filters/http/cache/http_cache_test.cc +++ b/test/extensions/filters/http/cache/http_cache_test.cc @@ -295,6 +295,8 @@ TEST_F(LookupRequestTest, PragmaNoFallback) { } TEST(HttpCacheTest, StableHashKey) { + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "true"}}); Key key; key.set_host("example.com"); ASSERT_EQ(stableHashKey(key), 6153940628716543519u); @@ -302,8 +304,8 @@ TEST(HttpCacheTest, StableHashKey) { TEST(HttpCacheTest, StableHashKeyWithSlowHash) { // TODO(ravenblack): This test should be removed when the runtime guard is removed. - TestScopedRuntime runtime_; - runtime_.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); + TestScopedRuntime runtime; + runtime.mergeValues({{"envoy.restart_features.use_fast_protobuf_hash", "false"}}); Key key; key.set_host("example.com"); ASSERT_EQ(stableHashKey(key), 9582653837550152292u); From 23c6aa9605ac4f8fff179ecd4ca9cb4b47d61f3e Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 11 Dec 2023 15:31:43 +0000 Subject: [PATCH 39/41] Adi's comments Signed-off-by: Raven Black --- changelogs/current.yaml | 2 +- source/common/protobuf/deterministic_hash.cc | 13 ++++++++----- source/common/runtime/runtime_features.cc | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 5a0a4efa91cb..1f6b7dd4fc59 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -47,7 +47,7 @@ minor_behavior_changes: to allow for setting if rate limit grpc response should be RESOURCE_EXHAUSTED instead of the default UNAVAILABLE. - area: config parsing, http cache filter change: | - replaces protobuf hashing by human-readable string with a dedicated deterministic hashing algorithm. + Replaces protobuf hashing by human-readable string with a dedicated deterministic hashing algorithm. The performance of the hash operation is improved by 2-10x depending on the structure of the message, which is expected to reduce config update time or startup time by 10-25%. The new algorithm is also used for http_cache_filter hashing, which will effectively cause a one-time cache flush on update diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 68f630d2aefc..7e2e146232d1 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -58,8 +58,8 @@ bool reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Messa return reflection.GetBool(message, &field); } -// Takes a field of scalar type, which may be a repeated field, and hashes -// it (or each of it). +// Takes a field of scalar type, and hashes it. In case the field is a repeated field, +// the function hashes each of its elements. template , bool> = true> uint64_t hashScalarField(const Protobuf::Reflection& reflection, const Protobuf::Message& message, const Protobuf::FieldDescriptor& field, uint64_t seed) { @@ -128,7 +128,7 @@ uint64_t reflectionHashField(const Protobuf::Message& message, break; case FieldDescriptor::CPPTYPE_ENUM: if (field.is_repeated()) { - int c = reflection.FieldSize(message, &field); + const int c = reflection.FieldSize(message, &field); for (int i = 0; i < c; i++) { seed = HashUtil::xxHash64Value(reflection.GetRepeatedEnumValue(message, &field, i), seed); } @@ -136,16 +136,17 @@ uint64_t reflectionHashField(const Protobuf::Message& message, seed = HashUtil::xxHash64Value(reflection.GetEnumValue(message, &field), seed); } break; - case FieldDescriptor::CPPTYPE_STRING: { + case FieldDescriptor::CPPTYPE_STRING: if (field.is_repeated()) { for (const std::string& str : reflection.GetRepeatedFieldRef(message, &field)) { seed = HashUtil::xxHash64(str, seed); } } else { + // Scratch may be used by GetStringReference if the field is not already a std::string. std::string scratch; seed = HashUtil::xxHash64(reflection.GetStringReference(message, &field, &scratch), seed); } - } break; + break; case FieldDescriptor::CPPTYPE_MESSAGE: if (field.is_map()) { seed = reflectionHashMapField(message, field, seed); @@ -200,6 +201,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) seed = HashUtil::xxHash64(descriptor->full_name(), seed); if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) { const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated(&message); + ASSERT(any != nullptr, "casting to any should always work for WELLKNOWNTYPE_ANY"); std::unique_ptr submsg = unpackAnyForReflection(*any); if (submsg == nullptr) { // If we wanted to handle unknown types in Any, this is where we'd have to do it. @@ -208,6 +210,7 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) return reflectionHashMessage(*submsg, seed); } std::vector fields; + // ListFields returned the fields ordered by field number. reflection->ListFields(message, &fields); // If we wanted to handle unknown fields, we'd need to also GetUnknownFields here. for (const FieldDescriptor* field : fields) { diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 47a12b260ff6..3aac2a0217e9 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -127,7 +127,7 @@ FALSE_RUNTIME_GUARD(envoy_restart_features_use_eds_cache_for_ads); FALSE_RUNTIME_GUARD(envoy_reloadable_features_enable_universal_header_validator); // TODO(pksohn): enable after fixing https://github.com/envoyproxy/envoy/issues/29930 FALSE_RUNTIME_GUARD(envoy_reloadable_features_quic_defer_logging_to_ack_listener); -// TODO(ravenblack): flip this to true after some test time. +// TODO(#31276): flip this to true after some test time. FALSE_RUNTIME_GUARD(envoy_restart_features_use_fast_protobuf_hash); // Block of non-boolean flags. Use of int flags is deprecated. Do not add more. From 7feee62a0779020fb751de0a835d9c9346a8a7a6 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 11 Dec 2023 17:40:42 +0000 Subject: [PATCH 40/41] Add ASSERT for GetPrototype Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 7e2e146232d1..9f8d16370ea2 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -187,6 +187,7 @@ std::unique_ptr unpackAnyForReflection(const ProtobufWkt::Any } const Protobuf::Message* prototype = Protobuf::MessageFactory::generated_factory()->GetPrototype(descriptor); + ASSERT(prototype != nullptr, "should be impossible since the descriptor is known"); std::unique_ptr msg(prototype->New()); any.UnpackTo(msg.get()); return msg; From f6a44ef4703488b3992443e1990f6f872e958b24 Mon Sep 17 00:00:00 2001 From: Raven Black Date: Mon, 11 Dec 2023 20:17:15 +0000 Subject: [PATCH 41/41] Add tests for Any and repeated message Signed-off-by: Raven Black --- source/common/protobuf/deterministic_hash.cc | 3 +- .../protobuf/deterministic_hash_test.cc | 51 +++++++++++++++++++ .../protobuf/deterministic_hash_test.proto | 7 +++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/source/common/protobuf/deterministic_hash.cc b/source/common/protobuf/deterministic_hash.cc index 9f8d16370ea2..52e18c553297 100644 --- a/source/common/protobuf/deterministic_hash.cc +++ b/source/common/protobuf/deterministic_hash.cc @@ -206,7 +206,8 @@ uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) std::unique_ptr submsg = unpackAnyForReflection(*any); if (submsg == nullptr) { // If we wanted to handle unknown types in Any, this is where we'd have to do it. - return seed; + // Since we don't know the type to introspect it, we hash just its type name. + return HashUtil::xxHash64(any->type_url(), seed); } return reflectionHashMessage(*submsg, seed); } diff --git a/test/common/protobuf/deterministic_hash_test.cc b/test/common/protobuf/deterministic_hash_test.cc index 065b95350550..f5ebf94a1806 100644 --- a/test/common/protobuf/deterministic_hash_test.cc +++ b/test/common/protobuf/deterministic_hash_test.cc @@ -308,6 +308,24 @@ TEST(HashTest, RepeatedFloatDifferentOrderMismatch) { EXPECT_NE(hash(r1), hash(r2)); } +TEST(HashTest, RepeatedMessageMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_messages()->set_index(1); + r1.add_messages()->set_index(2); + r2.add_messages()->set_index(1); + r2.add_messages()->set_index(2); + EXPECT_EQ(hash(r1), hash(r2)); +} + +TEST(HashTest, RepeatedMessageOrderMisMatch) { + deterministichashtest::RepeatedFields r1, r2; + r1.add_messages()->set_index(1); + r1.add_messages()->set_index(2); + r2.add_messages()->set_index(2); + r2.add_messages()->set_index(1); + EXPECT_NE(hash(r1), hash(r2)); +} + TEST(HashTest, SingleInt32Match) { deterministichashtest::SingleFields s1, s2; s1.set_int32(5); @@ -448,5 +466,38 @@ TEST(HashTest, SingleEnumMismatch) { EXPECT_NE(hash(s1), hash(s2)); } +TEST(HashTest, AnyWithUnknownTypeMatch) { + deterministichashtest::AnyContainer a1, a2; + a1.mutable_any()->set_type_url("invalid_type"); + a2.mutable_any()->set_type_url("invalid_type"); + EXPECT_EQ(hash(a1), hash(a2)); +} + +TEST(HashTest, AnyWithUnknownTypeMismatch) { + deterministichashtest::AnyContainer a1, a2; + a1.mutable_any()->set_type_url("invalid_type"); + a2.mutable_any()->set_type_url("different_invalid_type"); + EXPECT_NE(hash(a1), hash(a2)); +} + +TEST(HashTest, AnyWithKnownTypeMatch) { + deterministichashtest::AnyContainer a1, a2; + deterministichashtest::Recursion value; + value.set_index(1); + a1.mutable_any()->PackFrom(value); + a2.mutable_any()->PackFrom(value); + EXPECT_EQ(hash(a1), hash(a2)); +} + +TEST(HashTest, AnyWithKnownTypeMismatch) { + deterministichashtest::AnyContainer a1, a2; + deterministichashtest::Recursion value; + value.set_index(1); + a1.mutable_any()->PackFrom(value); + value.set_index(2); + a2.mutable_any()->PackFrom(value); + EXPECT_NE(hash(a1), hash(a2)); +} + } // namespace DeterministicProtoHash } // namespace Envoy diff --git a/test/common/protobuf/deterministic_hash_test.proto b/test/common/protobuf/deterministic_hash_test.proto index 43cf64049045..5f56d35d41b0 100644 --- a/test/common/protobuf/deterministic_hash_test.proto +++ b/test/common/protobuf/deterministic_hash_test.proto @@ -2,6 +2,8 @@ syntax = "proto3"; package deterministichashtest; +import "google/protobuf/any.proto"; + enum FooEnum { ZERO = 0; FOO = 1; @@ -33,6 +35,7 @@ message RepeatedFields { repeated double doubles = 8; repeated float floats = 9; repeated FooEnum enums = 10; + repeated Recursion messages = 11; }; message SingleFields { @@ -47,3 +50,7 @@ message SingleFields { float f = 9; FooEnum e = 10; }; + +message AnyContainer { + google.protobuf.Any any = 1; +};