diff --git a/include/envoy/upstream/host_description.h b/include/envoy/upstream/host_description.h index aff175735b9a..9f5eb67f7ba2 100644 --- a/include/envoy/upstream/host_description.h +++ b/include/envoy/upstream/host_description.h @@ -52,10 +52,20 @@ class HostDescription { */ virtual bool canary() const PURE; + /** + * Update the canary status of the host. + */ + virtual void canary(bool is_canary) PURE; + /** * @return the metadata associated with this host */ - virtual const envoy::api::v2::core::Metadata& metadata() const PURE; + virtual const std::shared_ptr metadata() const PURE; + + /** + * Set the current metadata. + */ + virtual void metadata(const envoy::api::v2::core::Metadata& new_metadata) PURE; /** * @return the cluster the host is a member of. diff --git a/source/common/router/header_formatter.cc b/source/common/router/header_formatter.cc index b691159879e3..b76fd90f5bbe 100644 --- a/source/common/router/header_formatter.cc +++ b/source/common/router/header_formatter.cc @@ -67,7 +67,7 @@ parseUpstreamMetadataField(absl::string_view params_str) { } const ProtobufWkt::Value* value = - &Config::Metadata::metadataValue(host->metadata(), params[0], params[1]); + &Config::Metadata::metadataValue(*host->metadata(), params[0], params[1]); if (value->kind_case() == ProtobufWkt::Value::KIND_NOT_SET) { // No kind indicates default ProtobufWkt::Value which means namespace or key not // found. diff --git a/source/common/upstream/logical_dns_cluster.h b/source/common/upstream/logical_dns_cluster.h index 138285fe24dc..fa0a94955472 100644 --- a/source/common/upstream/logical_dns_cluster.h +++ b/source/common/upstream/logical_dns_cluster.h @@ -58,13 +58,18 @@ class LogicalDnsCluster : public ClusterImplBase { struct RealHostDescription : public HostDescription { RealHostDescription(Network::Address::InstanceConstSharedPtr address, HostConstSharedPtr logical_host) - : address_(address), logical_host_(logical_host) {} + : address_(address), logical_host_(logical_host), + metadata_(std::make_shared( + envoy::api::v2::core::Metadata::default_instance())) {} // Upstream:HostDescription bool canary() const override { return false; } - const envoy::api::v2::core::Metadata& metadata() const override { - return envoy::api::v2::core::Metadata::default_instance(); + void canary(bool) override {} + const std::shared_ptr metadata() const override { + return metadata_; } + void metadata(const envoy::api::v2::core::Metadata&) override {} + const ClusterInfo& cluster() const override { return logical_host_->cluster(); } HealthCheckHostMonitor& healthChecker() const override { return logical_host_->healthChecker(); @@ -84,6 +89,7 @@ class LogicalDnsCluster : public ClusterImplBase { } Network::Address::InstanceConstSharedPtr address_; HostConstSharedPtr logical_host_; + const std::shared_ptr metadata_; }; struct PerThreadCurrentHostData : public ThreadLocal::ThreadLocalObject { diff --git a/source/common/upstream/subset_lb.cc b/source/common/upstream/subset_lb.cc index 698bd472c746..441c04eacc98 100644 --- a/source/common/upstream/subset_lb.cc +++ b/source/common/upstream/subset_lb.cc @@ -248,7 +248,7 @@ void SubsetLoadBalancer::update(uint32_t priority, const HostVector& hosts_added } bool SubsetLoadBalancer::hostMatches(const SubsetMetadata& kvs, const Host& host) { - const envoy::api::v2::core::Metadata& host_metadata = host.metadata(); + const envoy::api::v2::core::Metadata& host_metadata = *host.metadata(); for (const auto& kv : kvs) { const ProtobufWkt::Value& host_value = Config::Metadata::metadataValue( @@ -269,7 +269,7 @@ SubsetLoadBalancer::extractSubsetMetadata(const std::set& subset_ke const Host& host) { SubsetMetadata kvs; - const envoy::api::v2::core::Metadata& metadata = host.metadata(); + const envoy::api::v2::core::Metadata& metadata = *host.metadata(); const auto& filter_it = metadata.filter_metadata().find(Config::MetadataFilters::get().ENVOY_LB); if (filter_it == metadata.filter_metadata().end()) { return kvs; diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc index 3fa58dc6c7a8..671c8807164e 100644 --- a/source/common/upstream/upstream_impl.cc +++ b/source/common/upstream/upstream_impl.cc @@ -684,16 +684,22 @@ bool BaseDynamicClusterImpl::updateDynamicHostList(const HostVector& new_hosts, HostVector& hosts_added, HostVector& hosts_removed) { uint64_t max_host_weight = 1; + + // Did hosts change? + // // Has the EDS health status changed the health of any endpoint? If so, we // rebuild the hosts vectors. We only do this if the health status of an // endpoint has materially changed (e.g. if previously failing active health // checks, we just note it's now failing EDS health status but don't rebuild). + // + // Likewise, if metadata for an endpoint changed we rebuild the hosts vectors. + // // TODO(htuch): We can be smarter about this potentially, and not force a full // host set update on health status change. The way this would work is to // implement a HealthChecker subclass that provides thread local health - // updates to the Cluster objeect. This will probably make sense to do in + // updates to the Cluster object. This will probably make sense to do in // conjunction with https://github.com/envoyproxy/envoy/issues/2874. - bool health_changed = false; + bool hosts_changed = false; // Go through and see if the list we have is different from what we just got. If it is, we make a // new host list and raise a change notification. This uses an N^2 search given that this does not @@ -725,15 +731,32 @@ bool BaseDynamicClusterImpl::updateDynamicHostList(const HostVector& new_hosts, (*i)->healthFlagSet(Host::HealthFlag::FAILED_EDS_HEALTH); // If the host was previously healthy and we're now unhealthy, we need to // rebuild. - health_changed |= previously_healthy; + hosts_changed |= previously_healthy; } else { (*i)->healthFlagClear(Host::HealthFlag::FAILED_EDS_HEALTH); // If the host was previously unhealthy and now healthy, we need to // rebuild. - health_changed |= !previously_healthy && (*i)->healthy(); + hosts_changed |= !previously_healthy && (*i)->healthy(); } } + // Did metadata change? + const bool metadata_changed = + !Protobuf::util::MessageDifferencer::Equivalent(*host->metadata(), *(*i)->metadata()); + if (metadata_changed) { + // First, update the entire metadata for the endpoint. + (*i)->metadata(*host->metadata()); + + // Also, given that the canary attribute of an endpoint is derived from its metadata + // (e.g.: from envoy.lb/canary), we do a blind update here since it's cheaper than testing + // to see if it actually changed. We must update this besides just updating the metadata, + // because it'll be used by the router filter to compute upstream stats. + (*i)->canary(host->canary()); + + // If metadata changed, we need to rebuild. See github issue #3810. + hosts_changed = true; + } + (*i)->weight(host->weight()); final_hosts.push_back(*i); i = current_hosts.erase(i); @@ -791,11 +814,11 @@ bool BaseDynamicClusterImpl::updateDynamicHostList(const HostVector& new_hosts, // During the search we moved all of the hosts from hosts_ into final_hosts so just // move them back. current_hosts = std::move(final_hosts); - // We return false here in the absence of EDS health status, because we + // We return false here in the absence of EDS health status or metadata changes, because we // have no changes to host vector status (modulo weights). When we have EDS - // health status, we return true, causing updateHosts() to fire in the + // health status or metadata changed, we return true, causing updateHosts() to fire in the // caller. - return health_changed; + return hosts_changed; } } diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index a33b7ea810a1..1c29e5269524 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -69,13 +70,32 @@ class HostDescriptionImpl : virtual public HostDescription { canary_(Config::Metadata::metadataValue(metadata, Config::MetadataFilters::get().ENVOY_LB, Config::MetadataEnvoyLbKeys::get().CANARY) .bool_value()), - metadata_(metadata), locality_(locality), stats_{ALL_HOST_STATS(POOL_COUNTER(stats_store_), - POOL_GAUGE(stats_store_))} { - } + metadata_(std::make_shared(metadata)), + locality_(locality), stats_{ALL_HOST_STATS(POOL_COUNTER(stats_store_), + POOL_GAUGE(stats_store_))} {} // Upstream::HostDescription bool canary() const override { return canary_; } - const envoy::api::v2::core::Metadata& metadata() const override { return metadata_; } + void canary(bool is_canary) override { canary_ = is_canary; } + + // Metadata getter/setter. + // + // It's possible that the lock that guards the metadata will become highly contended (e.g.: + // endpoints churning during a deploy of a large cluster). A possible improvement + // would be to use TLS and post metadata updates from the main thread. This model would + // possibly benefit other related and expensive computations too (e.g.: updating subsets). + // + // TODO(rgs1): we should move to absl locks, once there's support for R/W locks. We should + // also add lock annotations, once they work correctly with R/W locks. + const std::shared_ptr metadata() const override { + std::shared_lock lock(metadata_mutex_); + return metadata_; + } + virtual void metadata(const envoy::api::v2::core::Metadata& new_metadata) override { + std::unique_lock lock(metadata_mutex_); + metadata_ = std::make_shared(new_metadata); + } + const ClusterInfo& cluster() const override { return *cluster_; } HealthCheckHostMonitor& healthChecker() const override { if (health_checker_) { @@ -108,8 +128,9 @@ class HostDescriptionImpl : virtual public HostDescription { const std::string hostname_; Network::Address::InstanceConstSharedPtr address_; Network::Address::InstanceConstSharedPtr health_check_address_; - const bool canary_; - const envoy::api::v2::core::Metadata metadata_; + std::atomic canary_; + mutable std::shared_timed_mutex metadata_mutex_; + std::shared_ptr metadata_; const envoy::api::v2::core::Locality locality_; Stats::IsolatedStoreImpl stats_store_; HostStats stats_; diff --git a/test/common/router/header_formatter_test.cc b/test/common/router/header_formatter_test.cc index 1e80c0bb37e0..000549573560 100644 --- a/test/common/router/header_formatter_test.cc +++ b/test/common/router/header_formatter_test.cc @@ -83,8 +83,9 @@ TEST_F(RequestInfoHeaderFormatterTest, TestFormatWithUpstreamMetadataVariable) { std::shared_ptr> host( new NiceMock()); - envoy::api::v2::core::Metadata metadata = TestUtility::parseYaml( - R"EOF( + auto metadata = std::make_shared( + TestUtility::parseYaml( + R"EOF( filter_metadata: namespace: key: value @@ -99,11 +100,11 @@ TEST_F(RequestInfoHeaderFormatterTest, TestFormatWithUpstreamMetadataVariable) { list_key: [ list_element ] struct_key: deep_key: deep_value - )EOF"); + )EOF")); // Prove we're testing the expected types. const auto& nested_struct = - Envoy::Config::Metadata::metadataValue(metadata, "namespace", "nested").struct_value(); + Envoy::Config::Metadata::metadataValue(*metadata, "namespace", "nested").struct_value(); EXPECT_EQ(nested_struct.fields().at("str_key").kind_case(), ProtobufWkt::Value::kStringValue); EXPECT_EQ(nested_struct.fields().at("bool_key1").kind_case(), ProtobufWkt::Value::kBoolValue); EXPECT_EQ(nested_struct.fields().at("bool_key2").kind_case(), ProtobufWkt::Value::kBoolValue); @@ -114,7 +115,7 @@ TEST_F(RequestInfoHeaderFormatterTest, TestFormatWithUpstreamMetadataVariable) { EXPECT_EQ(nested_struct.fields().at("struct_key").kind_case(), ProtobufWkt::Value::kStructValue); ON_CALL(request_info, upstreamHost()).WillByDefault(Return(host)); - ON_CALL(*host, metadata()).WillByDefault(ReturnRef(metadata)); + ON_CALL(*host, metadata()).WillByDefault(Return(metadata)); // Top-level value. testFormatting(request_info, "UPSTREAM_METADATA([\"namespace\", \"key\"])", "value"); @@ -331,13 +332,14 @@ TEST(HeaderParserTest, TestParseInternal) { ON_CALL(request_info, upstreamHost()).WillByDefault(Return(host)); // Metadata with percent signs in the key. - envoy::api::v2::core::Metadata metadata = TestUtility::parseYaml( - R"EOF( + auto metadata = std::make_shared( + TestUtility::parseYaml( + R"EOF( filter_metadata: ns: key: value - )EOF"); - ON_CALL(*host, metadata()).WillByDefault(ReturnRef(metadata)); + )EOF")); + ON_CALL(*host, metadata()).WillByDefault(Return(metadata)); // "2018-04-03T23:06:09.123Z". const SystemTime start_time(std::chrono::milliseconds(1522796769123)); @@ -411,9 +413,9 @@ TEST(HeaderParserTest, EvaluateEmptyHeaders) { std::shared_ptr> host( new NiceMock()); NiceMock request_info; - envoy::api::v2::core::Metadata metadata; + auto metadata = std::make_shared(); ON_CALL(request_info, upstreamHost()).WillByDefault(Return(host)); - ON_CALL(*host, metadata()).WillByDefault(ReturnRef(metadata)); + ON_CALL(*host, metadata()).WillByDefault(Return(metadata)); req_header_parser->evaluateHeaders(headerMap, request_info); EXPECT_FALSE(headerMap.has("x-key")); } @@ -485,13 +487,14 @@ match: { prefix: "/new_endpoint" } ON_CALL(request_info, upstreamHost()).WillByDefault(Return(host)); // Metadata with percent signs in the key. - envoy::api::v2::core::Metadata metadata = TestUtility::parseYaml( - R"EOF( + auto metadata = std::make_shared( + TestUtility::parseYaml( + R"EOF( filter_metadata: namespace: "%key%": value - )EOF"); - ON_CALL(*host, metadata()).WillByDefault(ReturnRef(metadata)); + )EOF")); + ON_CALL(*host, metadata()).WillByDefault(Return(metadata)); req_header_parser->evaluateHeaders(headerMap, request_info); diff --git a/test/common/upstream/eds_test.cc b/test/common/upstream/eds_test.cc index ee7e6139444a..1d8555da7622 100644 --- a/test/common/upstream/eds_test.cc +++ b/test/common/upstream/eds_test.cc @@ -168,6 +168,9 @@ TEST_F(EdsTest, EndpointMetadata) { Config::MetadataFilters::get().ENVOY_LB, Config::MetadataEnvoyLbKeys::get().CANARY) .set_bool_value(true); + Config::Metadata::mutableMetadataValue(*canary->mutable_metadata(), + Config::MetadataFilters::get().ENVOY_LB, "version") + .set_string_value("v1"); bool initialized = false; cluster_->initialize([&initialized] { initialized = true; }); @@ -177,30 +180,46 @@ TEST_F(EdsTest, EndpointMetadata) { auto& hosts = cluster_->prioritySet().hostSetsPerPriority()[0]->hosts(); EXPECT_EQ(hosts.size(), 2); - EXPECT_EQ(hosts[0]->metadata().filter_metadata_size(), 2); - EXPECT_EQ(Config::Metadata::metadataValue(hosts[0]->metadata(), + EXPECT_EQ(hosts[0]->metadata()->filter_metadata_size(), 2); + EXPECT_EQ(Config::Metadata::metadataValue(*hosts[0]->metadata(), Config::MetadataFilters::get().ENVOY_LB, "string_key") .string_value(), std::string("string_value")); - EXPECT_EQ(Config::Metadata::metadataValue(hosts[0]->metadata(), "custom_namespace", "num_key") + EXPECT_EQ(Config::Metadata::metadataValue(*hosts[0]->metadata(), "custom_namespace", "num_key") .number_value(), 1.1); - EXPECT_FALSE(Config::Metadata::metadataValue(hosts[0]->metadata(), + EXPECT_FALSE(Config::Metadata::metadataValue(*hosts[0]->metadata(), Config::MetadataFilters::get().ENVOY_LB, Config::MetadataEnvoyLbKeys::get().CANARY) .bool_value()); EXPECT_FALSE(hosts[0]->canary()); - EXPECT_EQ(hosts[1]->metadata().filter_metadata_size(), 1); - EXPECT_TRUE(Config::Metadata::metadataValue(hosts[1]->metadata(), + EXPECT_EQ(hosts[1]->metadata()->filter_metadata_size(), 1); + EXPECT_TRUE(Config::Metadata::metadataValue(*hosts[1]->metadata(), Config::MetadataFilters::get().ENVOY_LB, Config::MetadataEnvoyLbKeys::get().CANARY) .bool_value()); EXPECT_TRUE(hosts[1]->canary()); + EXPECT_EQ(Config::Metadata::metadataValue(*hosts[1]->metadata(), + Config::MetadataFilters::get().ENVOY_LB, "version") + .string_value(), + "v1"); // We don't rebuild with the exact same config. VERBOSE_EXPECT_NO_THROW(cluster_->onConfigUpdate(resources, "")); EXPECT_EQ(1UL, stats_.counter("cluster.name.update_no_rebuild").value()); + + // New resources with Metadata updated. + Config::Metadata::mutableMetadataValue(*canary->mutable_metadata(), + Config::MetadataFilters::get().ENVOY_LB, "version") + .set_string_value("v2"); + VERBOSE_EXPECT_NO_THROW(cluster_->onConfigUpdate(resources, "")); + auto& nhosts = cluster_->prioritySet().hostSetsPerPriority()[0]->hosts(); + EXPECT_EQ(nhosts.size(), 2); + EXPECT_EQ(Config::Metadata::metadataValue(*nhosts[1]->metadata(), + Config::MetadataFilters::get().ENVOY_LB, "version") + .string_value(), + "v2"); } // Validate that onConfigUpdate() updates endpoint health status. diff --git a/test/common/upstream/logical_dns_cluster_test.cc b/test/common/upstream/logical_dns_cluster_test.cc index 6f09675f4853..df74b6f1aba4 100644 --- a/test/common/upstream/logical_dns_cluster_test.cc +++ b/test/common/upstream/logical_dns_cluster_test.cc @@ -210,8 +210,8 @@ TEST_F(LogicalDnsClusterTest, Basic) { EXPECT_EQ("", data.host_description_->locality().zone()); EXPECT_EQ("", data.host_description_->locality().sub_zone()); EXPECT_EQ("foo.bar.com", data.host_description_->hostname()); - EXPECT_EQ(&envoy::api::v2::core::Metadata::default_instance(), - &data.host_description_->metadata()); + EXPECT_TRUE(TestUtility::protoEqual(envoy::api::v2::core::Metadata::default_instance(), + *data.host_description_->metadata())); data.host_description_->outlierDetector().putHttpResponseCode(200); data.host_description_->healthChecker().setUnhealthy(); diff --git a/test/mocks/upstream/host.h b/test/mocks/upstream/host.h index 32faed83aea2..3de35b55fb43 100644 --- a/test/mocks/upstream/host.h +++ b/test/mocks/upstream/host.h @@ -78,7 +78,9 @@ class MockHostDescription : public HostDescription { MOCK_CONST_METHOD0(address, Network::Address::InstanceConstSharedPtr()); MOCK_CONST_METHOD0(healthCheckAddress, Network::Address::InstanceConstSharedPtr()); MOCK_CONST_METHOD0(canary, bool()); - MOCK_CONST_METHOD0(metadata, const envoy::api::v2::core::Metadata&()); + MOCK_METHOD1(canary, void(bool new_canary)); + MOCK_CONST_METHOD0(metadata, const std::shared_ptr()); + MOCK_METHOD1(metadata, void(const envoy::api::v2::core::Metadata&)); MOCK_CONST_METHOD0(cluster, const ClusterInfo&()); MOCK_CONST_METHOD0(outlierDetector, Outlier::DetectorHostMonitor&()); MOCK_CONST_METHOD0(healthChecker, HealthCheckHostMonitor&()); @@ -128,7 +130,9 @@ class MockHost : public Host { MOCK_CONST_METHOD0(address, Network::Address::InstanceConstSharedPtr()); MOCK_CONST_METHOD0(healthCheckAddress, Network::Address::InstanceConstSharedPtr()); MOCK_CONST_METHOD0(canary, bool()); - MOCK_CONST_METHOD0(metadata, const envoy::api::v2::core::Metadata&()); + MOCK_METHOD1(canary, void(bool new_canary)); + MOCK_CONST_METHOD0(metadata, const std::shared_ptr()); + MOCK_METHOD1(metadata, void(const envoy::api::v2::core::Metadata&)); MOCK_CONST_METHOD0(cluster, const ClusterInfo&()); MOCK_CONST_METHOD0(counters, std::vector()); MOCK_CONST_METHOD2(