Skip to content

Commit

Permalink
Peekahead.
Browse files Browse the repository at this point in the history
Signed-off-by: Alyssa Wilk <alyssar@chromium.org>
  • Loading branch information
alyssawilk committed Nov 23, 2020
1 parent 8d62990 commit 377a655
Show file tree
Hide file tree
Showing 12 changed files with 227 additions and 47 deletions.
2 changes: 0 additions & 2 deletions api/envoy/config/cluster/v3/cluster.proto
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,6 @@ message Cluster {
google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {nanos: 1000000}}];
}

// [#not-implemented-hide:]
message PrefetchPolicy {
// Indicates how many streams (rounded up) can be anticipated per-upstream for each
// incoming stream. This is useful for high-QPS or latency-sensitive services. Prefetching
Expand Down Expand Up @@ -998,7 +997,6 @@ message Cluster {
// Configuration to track optional cluster stats.
TrackClusterStats track_cluster_stats = 49;

// [#not-implemented-hide:]
// Prefetch configuration for this cluster.
PrefetchPolicy prefetch_policy = 50;

Expand Down
3 changes: 1 addition & 2 deletions api/envoy/config/cluster/v4alpha/cluster.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions generated_api_shadow/envoy/config/cluster/v3/cluster.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 38 additions & 26 deletions source/common/upstream/cluster_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -855,25 +855,31 @@ ThreadLocalCluster* ClusterManagerImpl::get(absl::string_view cluster) {

void ClusterManagerImpl::maybePrefetch(
ThreadLocalClusterManagerImpl::ClusterEntryPtr& cluster_entry,
const ClusterConnectivityState& state,
std::function<ConnectionPool::Instance*()> pick_prefetch_pool) {
// TODO(alyssawilk) As currently implemented, this will always just prefetch
// one connection ahead of actually needed connections.
//
// Instead we want to track the following metrics across the entire connection
// pool and use the same algorithm we do for per-upstream prefetch:
// ((pending_streams_ + num_active_streams_) * global_prefetch_ratio >
// (connecting_stream_capacity_ + num_active_streams_)))
// and allow multiple prefetches per pick.
// Also cap prefetches such that
// num_unused_prefetch < num hosts
// since if we have more prefetches than hosts, we should consider kicking into
// per-upstream prefetch.
//
// Once we do this, this should loop capped number of times while shouldPrefetch is true.
if (cluster_entry->cluster_info_->peekaheadRatio() > 1.0) {
auto peekahead_ratio = cluster_entry->cluster_info_->peekaheadRatio();
if (peekahead_ratio <= 1.0) {
return;
}

// 3 here is arbitrary. Just as in ConnPoolImplBase::tryCreateNewConnections
// we want to limit the work which can be done on any given prefetch attempt.
for (int i = 0; i < 3; ++i) {
if ((state.pending_streams_ + 1 + state.active_streams_) * peekahead_ratio <=
(state.connecting_stream_capacity_ + state.active_streams_)) {
return;
}
ConnectionPool::Instance* prefetch_pool = pick_prefetch_pool();
if (prefetch_pool) {
prefetch_pool->maybePrefetch(cluster_entry->cluster_info_->peekaheadRatio());
if (!prefetch_pool->maybePrefetch(cluster_entry->cluster_info_->peekaheadRatio())) {
// Given that the next prefetch pick may be entirely different, we could
// opt to try again even if the first prefetch fails. Err on the side of
// caution and wait for the next attempt.
return;
}
} else {
// If unable to find a prefetch pool, exit early.
return;
}
}
}
Expand All @@ -898,9 +904,10 @@ ClusterManagerImpl::httpConnPoolForCluster(const std::string& cluster, ResourceP
// performed here in anticipation of the new stream.
// TODO(alyssawilk) refactor to have one function call and return a pair, so this invariant is
// code-enforced.
maybePrefetch(entry->second, [&entry, &priority, &protocol, &context]() {
return entry->second->connPool(priority, protocol, context, true);
});
maybePrefetch(entry->second, cluster_manager.cluster_manager_state_,
[&entry, &priority, &protocol, &context]() {
return entry->second->connPool(priority, protocol, context, true);
});

return ret;
}
Expand All @@ -924,9 +931,10 @@ ClusterManagerImpl::tcpConnPoolForCluster(const std::string& cluster, ResourcePr
// TODO(alyssawilk) refactor to have one function call and return a pair, so this invariant is
// code-enforced.
// Now see if another host should be prefetched.
maybePrefetch(entry->second, [&entry, &priority, &context]() {
return entry->second->tcpConnPool(priority, context, true);
});
maybePrefetch(entry->second, cluster_manager.cluster_manager_state_,
[&entry, &priority, &context]() {
return entry->second->tcpConnPool(priority, context, true);
});

return ret;
}
Expand Down Expand Up @@ -1405,8 +1413,10 @@ ClusterManagerImpl::ThreadLocalClusterManagerImpl::ClusterEntry::connPool(
LoadBalancerContext* context, bool peek) {
HostConstSharedPtr host = (peek ? lb_->peekAnotherHost(context) : lb_->chooseHost(context));
if (!host) {
ENVOY_LOG(debug, "no healthy host for HTTP connection pool");
cluster_info_->stats().upstream_cx_none_healthy_.inc();
if (!peek) {
ENVOY_LOG(debug, "no healthy host for HTTP connection pool");
cluster_info_->stats().upstream_cx_none_healthy_.inc();
}
return nullptr;
}

Expand Down Expand Up @@ -1466,8 +1476,10 @@ ClusterManagerImpl::ThreadLocalClusterManagerImpl::ClusterEntry::tcpConnPool(
ResourcePriority priority, LoadBalancerContext* context, bool peek) {
HostConstSharedPtr host = (peek ? lb_->peekAnotherHost(context) : lb_->chooseHost(context));
if (!host) {
ENVOY_LOG(debug, "no healthy host for TCP connection pool");
cluster_info_->stats().upstream_cx_none_healthy_.inc();
if (!peek) {
ENVOY_LOG(debug, "no healthy host for TCP connection pool");
cluster_info_->stats().upstream_cx_none_healthy_.inc();
}
return nullptr;
}

Expand Down
1 change: 1 addition & 0 deletions source/common/upstream/cluster_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ class ClusterManagerImpl : public ClusterManager, Logger::Loggable<Logger::Id::u
void updateClusterCounts();
void clusterWarmingToActive(const std::string& cluster_name);
static void maybePrefetch(ThreadLocalClusterManagerImpl::ClusterEntryPtr& cluster_entry,
const ClusterConnectivityState& cluster_manager_state,
std::function<ConnectionPool::Instance*()> prefetch_pool);

ClusterManagerFactory& factory_;
Expand Down
27 changes: 20 additions & 7 deletions source/common/upstream/load_balancer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,16 @@ LoadBalancerBase::LoadBalancerBase(
priority_set_(priority_set) {
for (auto& host_set : priority_set_.hostSetsPerPriority()) {
recalculatePerPriorityState(host_set->priority(), priority_set_, per_priority_load_,
per_priority_health_, per_priority_degraded_);
per_priority_health_, per_priority_degraded_, total_healthy_hosts_);
}
// Recalculate panic mode for all levels.
recalculatePerPriorityPanic();

priority_set_.addPriorityUpdateCb(
[this](uint32_t priority, const HostVector&, const HostVector&) -> void {
recalculatePerPriorityState(priority, priority_set_, per_priority_load_,
per_priority_health_, per_priority_degraded_);
});
priority_set_.addPriorityUpdateCb([this](uint32_t priority, const HostVector&,
const HostVector&) -> void {
recalculatePerPriorityState(priority, priority_set_, per_priority_load_, per_priority_health_,
per_priority_degraded_, total_healthy_hosts_);
});

priority_set_.addPriorityUpdateCb(
[this](uint32_t priority, const HostVector&, const HostVector&) -> void {
Expand Down Expand Up @@ -146,11 +146,13 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority,
const PrioritySet& priority_set,
HealthyAndDegradedLoad& per_priority_load,
HealthyAvailability& per_priority_health,
DegradedAvailability& per_priority_degraded) {
DegradedAvailability& per_priority_degraded,
uint32_t& total_healthy_hosts) {
per_priority_load.healthy_priority_load_.get().resize(priority_set.hostSetsPerPriority().size());
per_priority_load.degraded_priority_load_.get().resize(priority_set.hostSetsPerPriority().size());
per_priority_health.get().resize(priority_set.hostSetsPerPriority().size());
per_priority_degraded.get().resize(priority_set.hostSetsPerPriority().size());
total_healthy_hosts = 0;

// Determine the health of the newly modified priority level.
// Health ranges from 0-100, and is the ratio of healthy/degraded hosts to total hosts, modified
Expand Down Expand Up @@ -232,6 +234,10 @@ void LoadBalancerBase::recalculatePerPriorityState(uint32_t priority,
per_priority_load.healthy_priority_load_.get().end(), 0) +
std::accumulate(per_priority_load.degraded_priority_load_.get().begin(),
per_priority_load.degraded_priority_load_.get().end(), 0));

for (auto& host_set : priority_set.hostSetsPerPriority()) {
total_healthy_hosts += host_set->healthyHosts().size();
}
}

// Method iterates through priority levels and turns on/off panic mode.
Expand Down Expand Up @@ -774,6 +780,10 @@ void EdfLoadBalancerBase::refresh(uint32_t priority) {
}

HostConstSharedPtr EdfLoadBalancerBase::peekAnotherHost(LoadBalancerContext* context) {
if (stashed_random_.size() + 1 > total_healthy_hosts_) {
return nullptr;
}

const absl::optional<HostsSource> hosts_source = hostSourceToUse(context, random(true));
if (!hosts_source) {
return nullptr;
Expand Down Expand Up @@ -859,6 +869,9 @@ HostConstSharedPtr LeastRequestLoadBalancer::unweightedHostPick(const HostVector
}

HostConstSharedPtr RandomLoadBalancer::peekAnotherHost(LoadBalancerContext* context) {
if (stashed_random_.size() + 1 > total_healthy_hosts_) {
return nullptr;
}
return peekOrChoose(context, true);
}

Expand Down
5 changes: 4 additions & 1 deletion source/common/upstream/load_balancer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ class LoadBalancerBase : public LoadBalancer {
void static recalculatePerPriorityState(uint32_t priority, const PrioritySet& priority_set,
HealthyAndDegradedLoad& priority_load,
HealthyAvailability& per_priority_health,
DegradedAvailability& per_priority_degraded);
DegradedAvailability& per_priority_degraded,
uint32_t& total_healthy_hosts);
void recalculatePerPriorityPanic();

protected:
Expand Down Expand Up @@ -154,6 +155,8 @@ class LoadBalancerBase : public LoadBalancer {
DegradedAvailability per_priority_degraded_;
// Levels which are in panic
std::vector<bool> per_priority_panic_;
// The total count of healthy hosts across all priority levels.
uint32_t total_healthy_hosts_;
};

class LoadBalancerContextBase : public LoadBalancerContext {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class PreviousPrioritiesRetryPriority : public Upstream::RetryPriority {
void recalculatePerPriorityState(uint32_t priority, const Upstream::PrioritySet& priority_set) {
// Recalculate health and priority the same way the load balancer does it.
Upstream::LoadBalancerBase::recalculatePerPriorityState(
priority, priority_set, per_priority_load_, per_priority_health_, per_priority_degraded_);
priority, priority_set, per_priority_load_, per_priority_health_, per_priority_degraded_,
total_healthy_hosts_);
}

uint32_t adjustedAvailability(std::vector<uint32_t>& per_priority_health,
Expand All @@ -47,6 +48,7 @@ class PreviousPrioritiesRetryPriority : public Upstream::RetryPriority {
Upstream::HealthyAndDegradedLoad per_priority_load_;
Upstream::HealthyAvailability per_priority_health_;
Upstream::DegradedAvailability per_priority_degraded_;
uint32_t total_healthy_hosts_;
};

} // namespace Priority
Expand Down
Loading

0 comments on commit 377a655

Please sign in to comment.