envoyproxy · mattklein123 · Oct 17, 2019 · Oct 10, 2019 · Oct 10, 2019 · Oct 11, 2019
diff --git a/api/envoy/config/filter/http/adaptive_concurrency/v2alpha/adaptive_concurrency.proto b/api/envoy/config/filter/http/adaptive_concurrency/v2alpha/adaptive_concurrency.proto
@@ -15,6 +15,10 @@ import "google/protobuf/wrappers.proto";
 
 import "validate/validate.proto";
 
+// [#protodoc-title: Adaptive Concurrency]
+// Adaptive Concurrency Control :ref:`configuration overview
+// <config_http_filters_adaptive_concurrency>`.
+
 // Configuration parameters for the gradient controller.
 message GradientControllerConfig {
   // Parameters controlling the periodic recalculation of the concurrency limit from sampled request

diff --git a/api/envoy/config/filter/http/adaptive_concurrency/v3alpha/adaptive_concurrency.proto b/api/envoy/config/filter/http/adaptive_concurrency/v3alpha/adaptive_concurrency.proto
@@ -15,6 +15,10 @@ import "google/protobuf/wrappers.proto";
 
 import "validate/validate.proto";
 
+// [#protodoc-title: Adaptive Concurrency]
+// Adaptive Concurrency Control :ref:`configuration overview
+// <config_http_filters_adaptive_concurrency>`.
+
 // Configuration parameters for the gradient controller.
 message GradientControllerConfig {
   // Parameters controlling the periodic recalculation of the concurrency limit from sampled request

diff --git a/docs/root/configuration/http/http_filters/adaptive_concurrency_filter.rst b/docs/root/configuration/http/http_filters/adaptive_concurrency_filter.rst
@@ -0,0 +1,197 @@
+.. _config_http_filters_adaptive_concurrency:
+
+Adaptive Concurrency
+====================
+
+.. attention::
+
+  The adaptive concurrency filter is experimental and is currently under active development.
+
+This filter should be configured with the name `envoy.filters.http.adaptive_concurrency`.
+
+See the :ref:`v2 API reference <envoy_api_msg_config.filter.http.adaptive_concurrency.v2alpha.AdaptiveConcurrency>` for details on each configuration parameter.
+
+Overview
+--------
+The adaptive concurrency filter dynamically adjusts the allowed number of requests that can be
+outstanding (concurrency) to all hosts in a given cluster at any time. Concurrency values are
+calculated using latency sampling of completed requests and comparing the measured samples in a time
+window against the expected latency for hosts in the cluster.
+
+Concurrency Controllers
+-----------------------
+Concurrency controllers implement the algorithm responsible for making forwarding decisions for each
+request and recording latency samples to use in the calculation of the concurrency limit.
+
+Gradient Controller
+~~~~~~~~~~~~~~~~~~~
+The gradient controller makes forwarding decisions based on a periodically measured ideal round-trip
+time (minRTT) for an upstream.
+
+:ref:`v2 API reference <envoy_api_msg_config.filter.http.adaptive_concurrency.v2alpha.GradientControllerConfig>`
+
+Calculating the minRTT
+^^^^^^^^^^^^^^^^^^^^^^
+
+The minRTT is periodically measured by only allowing a single outstanding request at a time to an
+upstream cluster and measuring the latency under these ideal conditions. The length of this minRTT
+calculation window is variable depending on the number of requests the filter is configured to
+aggregate to represent the expected latency of an upstream.
+
+A configurable *jitter* value is used to randomly delay the start of the minRTT calculation window
+by some amount of time. This is not necessary and can be disabled; however, it is recommended to
+prevent all hosts in a cluster from being in a minRTT calculation window (and having a concurrency
+limit of 1) at the same time. The jitter helps negate the effect of the minRTT calculation on the
+downstream success rate if retries are enabled.
+
+It is possible that there is a noticeable increase in request 503s during the minRTT measurement
+window because of the potentially significant drop in the concurrency limit. This is expected and it
+is recommended to enable retries for resets/503s.
+
+.. note::
+
+    It is recommended to use :ref:`the previous_hosts retry predicate
+    <arch_overview_http_retry_plugins>`. Due to the minRTT recalculation jitter, it's unlikely that
+    all hosts in the cluster will be in a minRTT calculation window, so retrying on a different host
+    in the cluster will have a higher likelihood of success in this scenario.
+
+Once calculated, the minRTT is then used in the calculation of a value referred to as the
+*gradient*.
+
+The Gradient
+^^^^^^^^^^^^
+The gradient is calculated using summarized sampled request latencies (sampleRTT):
+
+.. math::
+
+    gradient = \frac{minRTT}{sampleRTT}
+
+This gradient value has a useful property, such that it decreases as the sampled latencies increase.
+The gradient value is then used to update the concurrency limit via:
+
+.. math::
+
+    limit_{new} = gradient * limit_{old} + headroom
+
+Concurrency Limit Headroom
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+The headroom value is necessary as a driving factor to increase the concurrency limit when the
+sampleRTT is in the same ballpark as the minRTT. This value must be present in the limit
+calculation, since it forces the concurrency limit to increase until there is a deviation from the
+minRTT latency. In the absence of a headroom value, the concurrency limit could potentially stagnate
+at an unnecessary small value if the sampleRTT and minRTT are close to each other.
+
+Because the headroom value is so necessary to the proper function for the gradient controller, the
+headroom value is unconfigurable and pinned to the square-root of the concurrency limit.
+
+Limitations
+-----------
+The adaptive concurrency filter's control loop relies on latency measurements
+and adjustments to the concurrency limit based on those measurements. Because of
+this, the filter must operate in conditions where it has full control over
+request concurrency. This means that:
+
+    1. The filter works as intended in the filter chain for a local cluster.
+
+    2. The filter must be able to limit the concurrency for a cluster. This means
+       there must not be requests destined for a cluster that are not decoded by
+       the adaptive concurrency filter.
+
+Example Configuration
+---------------------
+An example filter configuration can be found below. Not all fields are required and many of the
+fields can be overridden via runtime settings.
+
+.. code-block:: yaml
+
+  name: envoy.filters.http.adaptive_concurrency
+  config:
+    gradient_controller_config:
+      sample_aggregate_percentile:
+        value: 90
+      concurrency_limit_params:
+        concurrency_update_interval: 0.1s
+      min_rtt_calc_params:
+        jitter:
+          value: 10
+        interval: 60s
+        request_count: 50
+    enabled:
+      default_value: true
+      runtime_key: "adaptive_concurrency.enabled"
+
+The above configuration can be understood as follows:
+
+* Gather latency samples for a time window of 100ms. When entering a new window, summarize the
+  requests (sampleRTT) and and update the concurrency limit using this sampleRTT.
+* When calculating the sampleRTT, use the p90 of all sampled latencies for that window.
+* Recalculate the minRTT every 60s and add a jitter (random delay) of 0s-6s to the start of the
+  minRTT recalculation. The delay is dictated by the jitter value.
+* Collect 50 request samples to calculate the minRTT and use the p90 to summarize them.
+* The filter is enabled by default.
+
+.. note::
+
+    It is recommended that the adaptive concurrency filter come after the healthcheck filter in the
+    filter chain to prevent latency sampling of health checks. If health check traffic is sampled,
+    it could potentially affect the accuracy of the minRTT measurements.
+
+Runtime
+-------
+
+The adaptive concurrency filter supports the following runtime settings:
+
+adaptive_concurrency.enabled
+    Overrides whether the adaptive concurrency filter will use the concurrency controller for
+    forwarding decisions. If set to `false`, the filter will be a no-op. Defaults to what is
+    specified for `enabled` in the filter configuration.
+
+adaptive_concurrency.gradient_controller.min_rtt_calc_interval_ms
+    Overrides the interval in which the ideal round-trip time (minRTT) will be recalculated.
+
+adaptive_concurrency.gradient_controller.min_rtt_aggregate_request_count
+    Overrides the number of requests sampled for calculation of the minRTT.
+
+adaptive_concurrency.gradient_controller.jitter
+    Overrides the random delay introduced to the minRTT calculation start time. A value of `10`
+    indicates a random delay of 10% of the configured interval. The runtime value specified is
+    clamped to the range [0,100].
+
+adaptive_concurrency.gradient_controller.sample_rtt_calc_interval_ms
+    Overrides the interval in which the concurrency limit is recalculated based on sampled latencies.
+
+adaptive_concurrency.gradient_controller.max_concurrency_limit
+    Overrides the maximum allowed concurrency limit.
+
+adaptive_concurrency.gradient_controller.max_gradient
+    Overrides the maximum allowed gradient value.
+
+adaptive_concurrency.gradient_controller.sample_aggregate_percentile
+    Overrides the percentile value used to represent the collection of latency samples in
+    calculations. A value of `95` indicates the 95th percentile. The runtime value specified is
+    clamped to the range [0,100].
+
+Statistics
+----------
+The adaptive concurrency filter outputs statistics in the
+*http.<stat_prefix>.adaptive_concurrency.* namespace. The :ref:`stat prefix
+<envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.stat_prefix>`
+comes from the owning HTTP connection manager. Statistics are specific to the concurrency
+controllers.
+
+Gradient Controller Statistics
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The gradient controller uses the namespace
+*http.<stat_prefix>.adaptive_concurrency.gradient_controller*.
+
+.. csv-table::
+  :header: Name, Type, Description
+  :widths: auto
+
+  rq_blocked, Counter, Total requests that were blocked by the filter.
+  min_rtt_calculation_active, Gauge, Set to 1 if the controller is in the process of a minRTT calculation. 0 otherwise.
+  concurrency_limit, Gauge, The current concurrency limit.
+  gradient, Gauge, The current gradient value.
+  burst_queue_size, Gauge, The current headroom value in the concurrency limit calculation.
+  min_rtt_msecs, Gauge, The current measured minRTT value.
+  sample_rtt_msecs, Gauge, The current measured sampleRTT aggregate.
diff --git a/docs/root/configuration/http/http_filters/http_filters.rst b/docs/root/configuration/http/http_filters/http_filters.rst
@@ -6,6 +6,7 @@ HTTP filters
 .. toctree::
   :maxdepth: 2
 
+  adaptive_concurrency_filter
   buffer_filter
   cors_filter
   csrf_filter

diff --git a/source/extensions/filters/http/adaptive_concurrency/BUILD b/source/extensions/filters/http/adaptive_concurrency/BUILD
@@ -2,7 +2,7 @@ licenses(["notice"])  # Apache 2
 
 # HTTP L7 filter that dynamically adjusts the number of allowed concurrent
 # requests based on sampled latencies.
-# Public docs: TODO (tonya11en)
+# Public docs: docs/root/configuration/http_filters/adaptive_concurrency_filter.rst
 
 load(
     "//bazel:envoy_build_system.bzl",

diff --git a/...xtensions/filters/http/adaptive_concurrency/concurrency_controller/gradient_controller.cc b/...xtensions/filters/http/adaptive_concurrency/concurrency_controller/gradient_controller.cc
@@ -68,6 +68,7 @@ GradientController::GradientController(GradientControllerConfig config,
     sample_reset_timer_->enableTimer(config_.sampleRTTCalcInterval());
   });
 
+  enterMinRTTSamplingWindow();
   sample_reset_timer_->enableTimer(config_.sampleRTTCalcInterval());
   stats_.concurrency_limit_.set(concurrency_limit_.load());
 }
@@ -81,6 +82,8 @@ GradientControllerStats GradientController::generateStats(Stats::Scope& scope,
 void GradientController::enterMinRTTSamplingWindow() {
   absl::MutexLock ml(&sample_mutation_mtx_);
 
+  stats_.min_rtt_calculation_active_.set(1);
+
   // Set the minRTT flag to indicate we're gathering samples to update the value. This will
   // prevent the sample window from resetting until enough requests are gathered to complete the
   // recalculation.
@@ -102,6 +105,7 @@ void GradientController::updateMinRTT() {
         std::chrono::duration_cast<std::chrono::milliseconds>(min_rtt_).count());
     updateConcurrencyLimit(deferred_limit_value_.load());
     deferred_limit_value_.store(0);
+    stats_.min_rtt_calculation_active_.set(0);
   }
 
   min_rtt_calc_timer_->enableTimer(

diff --git a/...extensions/filters/http/adaptive_concurrency/concurrency_controller/gradient_controller.h b/...extensions/filters/http/adaptive_concurrency/concurrency_controller/gradient_controller.h
@@ -27,6 +27,7 @@ namespace ConcurrencyController {
  */
 #define ALL_GRADIENT_CONTROLLER_STATS(COUNTER, GAUGE)                                              \
   COUNTER(rq_blocked)                                                                              \
+  GAUGE(min_rtt_calculation_active, Accumulate)                                                    \
   GAUGE(concurrency_limit, NeverImport)                                                            \
   GAUGE(gradient, NeverImport)                                                                     \
   GAUGE(burst_queue_size, NeverImport)                                                             \
@@ -70,36 +71,39 @@ class GradientControllerConfig : public Logger::Loggable<Logger::Id::filter> {
   }
 
   double maxGradient() const {
-    return runtime_.snapshot().getDouble(RuntimeKeys::get().MaxGradientKey, max_gradient_);
+    return std::max(
+        1.0, runtime_.snapshot().getDouble(RuntimeKeys::get().MaxGradientKey, max_gradient_));
   }
 
   // The percentage is normalized to the range [0.0, 1.0].
   double sampleAggregatePercentile() const {
-    return runtime_.snapshot().getDouble(RuntimeKeys::get().SampleAggregatePercentileKey,
-                                         sample_aggregate_percentile_) /
-           100.0;
+    const double val = runtime_.snapshot().getDouble(
+        RuntimeKeys::get().SampleAggregatePercentileKey, sample_aggregate_percentile_);
+    return std::max(0.0, std::min(val, 100.0)) / 100.0;
   }
 
-  // The percentage is normalized to the range [0.0, 1.0].
+  // The percentage is normalized and clamped to the range [0.0, 1.0].
   double jitterPercent() const {
-    return runtime_.snapshot().getDouble(RuntimeKeys::get().JitterPercentKey, jitter_pct_) / 100.0;
+    const double val =
+        runtime_.snapshot().getDouble(RuntimeKeys::get().JitterPercentKey, jitter_pct_);
+    return std::max(0.0, std::min(val, 100.0)) / 100.0;
   }
 
 private:
   class RuntimeKeyValues {
   public:
     const std::string MinRTTCalcIntervalKey =
-        "http.adaptive_concurrency.gradient_controller.min_rtt_calc_interval_ms";
+        "adaptive_concurrency.gradient_controller.min_rtt_calc_interval_ms";
     const std::string SampleRTTCalcIntervalKey =
-        "http.adaptive_concurrency.gradient_controller.sample_rtt_calc_interval_ms";
+        "adaptive_concurrency.gradient_controller.sample_rtt_calc_interval_ms";
     const std::string MaxConcurrencyLimitKey =
-        "http.adaptive_concurrency.gradient_controller.max_concurrency_limit";
+        "adaptive_concurrency.gradient_controller.max_concurrency_limit";
     const std::string MinRTTAggregateRequestCountKey =
-        "http.adaptive_concurrency.gradient_controller.min_rtt_aggregate_request_count";
-    const std::string MaxGradientKey = "http.adaptive_concurrency.gradient_controller.max_gradient";
+        "adaptive_concurrency.gradient_controller.min_rtt_aggregate_request_count";
+    const std::string MaxGradientKey = "adaptive_concurrency.gradient_controller.max_gradient";
     const std::string SampleAggregatePercentileKey =
-        "http.adaptive_concurrency.gradient_controller.sample_aggregate_percentile";
-    const std::string JitterPercentKey = "http.adaptive_concurrency.gradient_controller.jitter";
+        "adaptive_concurrency.gradient_controller.sample_aggregate_percentile";
+    const std::string JitterPercentKey = "adaptive_concurrency.gradient_controller.jitter";
   };
 
   using RuntimeKeys = ConstSingleton<RuntimeKeyValues>;

diff --git a/...ions/filters/http/adaptive_concurrency/concurrency_controller/gradient_controller_test.cc b/...ions/filters/http/adaptive_concurrency/concurrency_controller/gradient_controller_test.cc
@@ -188,6 +188,10 @@ TEST_F(GradientControllerTest, MinRTTLogicTest) {
   const auto min_rtt = std::chrono::milliseconds(13);
 
   // The controller should be measuring minRTT upon creation, so the concurrency window is 1.
+  EXPECT_EQ(
+      1,
+      stats_.gauge("test_prefix.min_rtt_calculation_active", Stats::Gauge::ImportMode::Accumulate)
+          .value());
   EXPECT_EQ(controller->concurrencyLimit(), 1);
   tryForward(controller, true);
   tryForward(controller, false);
@@ -203,6 +207,10 @@ TEST_F(GradientControllerTest, MinRTTLogicTest) {
   }
 
   // Verify the minRTT value measured is accurate.
+  EXPECT_EQ(
+      0,
+      stats_.gauge("test_prefix.min_rtt_calculation_active", Stats::Gauge::ImportMode::Accumulate)
+          .value());
   EXPECT_EQ(
       13, stats_.gauge("test_prefix.min_rtt_msecs", Stats::Gauge::ImportMode::NeverImport).value());
 }