From 5e0dcc65382cf5c3300f2386aa3c68319954ee4c Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 30 Jan 2019 18:21:53 -0500 Subject: [PATCH] stats: Add fake symbol table as an intermediate state to move to SymbolTable API without taking locks. (#5414) Adds an abstract interface for SymbolTable and alternate implementation FakeSymbolTableImpl, which doesn't take locks. Once all stat tokens are symbolized at construction time, this FakeSymbolTable implementation can be deleted, and real-symbol tables can be used, thereby reducing memory and improving stat construction time per #3585 and #4980 . Note that it is not necessary to pre-allocate all elaborated stat names because multiple StatNames can be joined together without taking locks, even in SymbolTableImpl. This implementation simply stores the characters directly in the uint8_t[] that backs each StatName, so there is no sharing or memory savings, but also no state associated with the SymbolTable, and thus no locks needed. Risk Level: low Testing: //test/common/stats/... Signed-off-by: Joshua Marantz Signed-off-by: Fred Douglas --- include/envoy/stats/symbol_table.h | 159 ++++++++- source/common/stats/BUILD | 6 + source/common/stats/fake_symbol_table_impl.h | 98 ++++++ source/common/stats/symbol_table_impl.cc | 126 ++++--- source/common/stats/symbol_table_impl.h | 229 +++++-------- test/common/stats/BUILD | 1 + test/common/stats/symbol_table_impl_test.cc | 341 +++++++++++++------ test/common/stats/symbol_table_speed_test.cc | 2 +- 8 files changed, 674 insertions(+), 288 deletions(-) create mode 100644 source/common/stats/fake_symbol_table_impl.h diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index 3f8152a6c859..3bc6ee406983 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -1,13 +1,166 @@ #pragma once +#include +#include + +#include "envoy/common/pure.h" + +#include "absl/strings/string_view.h" + namespace Envoy { namespace Stats { -// Interface for referencing a stat name. +/** + * Runtime representation of an encoded stat name. This is predeclared only in + * the interface without abstract methods, because (a) the underlying class + * representation is common to both implementations of SymbolTable, and (b) + * we do not want or need the overhead of a vptr per StatName. The common + * declaration for StatName is in source/common/stats/symbol_table_impl.h + */ class StatName; -// Interface for managing symbol tables. -class SymbolTable; +/** + * Intermediate representation for a stat-name. This helps store multiple names + * in a single packed allocation. First we encode each desired name, then sum + * their sizes for the single packed allocation. This is used to store + * MetricImpl's tags and tagExtractedName. Like StatName, we don't want to pay + * a vptr overhead per object, and the representation is shared between the + * SymbolTable implementations, so this is just a pre-declare. + */ +class SymbolEncoding; + +/** + * SymbolTable manages a namespace optimized for stat names, exploiting their + * typical composition from "."-separated tokens, with a significant overlap + * between the tokens. The interface is designed to balance optimal storage + * at scale with hiding details from users. We seek to provide the most abstract + * interface possible that avoids adding per-stat overhead or taking locks in + * the hot path. + */ +class SymbolTable { +public: + /** + * Efficient byte-encoded storage of an array of tokens. The most common + * tokens are typically < 127, and are represented directly. tokens >= 128 + * spill into the next byte, allowing for tokens of arbitrary numeric value to + * be stored. As long as the most common tokens are low-valued, the + * representation is space-efficient. This scheme is similar to UTF-8. The + * token ordering is dependent on the order in which stat-names are encoded + * into the SymbolTable, which will not be optimal, but in practice appears + * to be pretty good. + * + * This is exposed in the interface for the benefit of join(), which which is + * used in the hot-path to append two stat-names into a temp without taking + * locks. This is used then in thread-local cache lookup, so that once warm, + * no locks are taken when looking up stats. + */ + using Storage = uint8_t[]; + using StoragePtr = std::unique_ptr; + + virtual ~SymbolTable() = default; + + /** + * Encodes a stat name using the symbol table, returning a SymbolEncoding. The + * SymbolEncoding is not intended for long-term storage, but is used to help + * allocate a StatName with the correct amount of storage. + * + * When a name is encoded, it bumps reference counts held in the table for + * each symbol. The caller is responsible for creating a StatName using this + * SymbolEncoding and ultimately disposing of it by calling + * SymbolTable::free(). Users are protected from leaking symbols into the pool + * by ASSERTions in the SymbolTable destructor. + * + * @param name The name to encode. + * @return SymbolEncoding the encoded symbols. + */ + virtual SymbolEncoding encode(absl::string_view name) PURE; + + /** + * @return uint64_t the number of symbols in the symbol table. + */ + virtual uint64_t numSymbols() const PURE; + + /** + * Decodes a vector of symbols back into its period-delimited stat name. If + * decoding fails on any part of the symbol_vec, we release_assert and crash, + * since this should never happen, and we don't want to continue running + * with a corrupt stats set. + * + * @param stat_name the stat name. + * @return std::string stringifiied stat_name. + */ + virtual std::string toString(const StatName& stat_name) const PURE; + + /** + * Deterines whether one StatName lexically precedes another. Note that + * the lexical order may not exactly match the lexical order of the + * elaborated strings. For example, stat-name of "-.-" would lexically + * sort after "---" but when encoded as a StatName would come lexically + * earlier. In practice this is unlikely to matter as those are not + * reasonable names for Envoy stats. + * + * Note that this operation has to be performed with the context of the + * SymbolTable so that the individual Symbol objects can be converted + * into strings for lexical comparison. + * + * @param a the first stat name + * @param b the second stat name + * @return bool true if a lexically precedes b. + */ + virtual bool lessThan(const StatName& a, const StatName& b) const PURE; + + /** + * Joins two or more StatNames. For example if we have StatNames for {"a.b", + * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The + * advantage of using this representation is that it avoids having to + * decode/encode into the elaborated form, and does not require locking the + * SymbolTable. + * + * The caveat is that this representation does not bump reference counts on + * the referenced Symbols in the SymbolTable, so it's only valid as long for + * the lifetime of the joined StatNames. + * + * This is intended for use doing cached name lookups of scoped stats, where + * the scope prefix and the names to combine it with are already in StatName + * form. Using this class, they can be combined without acessingm the + * SymbolTable or, in particular, taking its lock. + * + * @param stat_names the names to join. + * @return Storage allocated for the joined name. + */ + virtual StoragePtr join(const std::vector& stat_names) const PURE; + +#ifndef ENVOY_CONFIG_COVERAGE + virtual void debugPrint() const PURE; +#endif + +private: + friend class StatNameStorage; + friend class StatNameList; + + /** + * Since SymbolTable does manual reference counting, a client of SymbolTable + * must manually call free(symbol_vec) when it is freeing the backing store + * for a StatName. This way, the symbol table will grow and shrink + * dynamically, instead of being write-only. + * + * @param stat_name the stat name. + */ + virtual void free(const StatName& stat_name) PURE; + + /** + * StatName backing-store can be managed by callers in a variety of ways + * to minimize overhead. But any persistent reference to a StatName needs + * to hold onto its own reference-counts for all symbols. This method + * helps callers ensure the symbol-storage is maintained for the lifetime + * of a reference. + * + * @param stat_name the stat name. + */ + virtual void incRefCount(const StatName& stat_name) PURE; +}; + +using SharedSymbolTable = std::shared_ptr; } // namespace Stats } // namespace Envoy diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index 51b3b84c4317..85052c236194 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -135,6 +135,12 @@ envoy_cc_library( ], ) +envoy_cc_library( + name = "fake_symbol_table_lib", + hdrs = ["fake_symbol_table_impl.h"], + deps = [":symbol_table_lib"], +) + envoy_cc_library( name = "stats_options_lib", hdrs = ["stats_options_impl.h"], diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h new file mode 100644 index 000000000000..6c7e2f37bdf8 --- /dev/null +++ b/source/common/stats/fake_symbol_table_impl.h @@ -0,0 +1,98 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "envoy/common/exception.h" +#include "envoy/stats/symbol_table.h" + +#include "common/common/assert.h" +#include "common/common/hash.h" +#include "common/common/lock_guard.h" +#include "common/common/non_copyable.h" +#include "common/common/thread.h" +#include "common/common/utility.h" +#include "common/stats/symbol_table_impl.h" + +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" + +namespace Envoy { +namespace Stats { + +/** + * Implements the SymbolTable interface without taking locks or saving memory. + * This implementation is intended as a transient state for the Envoy codebase + * to allow incremental conversion of Envoy stats call-sites to use the + * SymbolTable interface, pre-allocating symbols during construction time for + * all stats tokens. + * + * Once all stat tokens are symbolized at construction time, this + * FakeSymbolTable implementation can be deleted, and real-symbol tables can be + * used, thereby reducing memory and improving stat construction time. + * + * Note that it is not necessary to pre-allocate all elaborated stat names + * because multiple StatNames can be joined together without taking locks, + * even in SymbolTableImpl. + * + * This implementation simply stores the characters directly in the uint8_t[] + * that backs each StatName, so there is no sharing or memory savings, but also + * no state associated with the SymbolTable, and thus no locks needed. + * + * TODO(jmarantz): delete this class once SymbolTable is fully deployed in the + * Envoy codebase. + */ +class FakeSymbolTableImpl : public SymbolTable { +public: + SymbolEncoding encode(absl::string_view name) override { return encodeHelper(name); } + + std::string toString(const StatName& stat_name) const override { + return std::string(toStringView(stat_name)); + } + uint64_t numSymbols() const override { return 0; } + bool lessThan(const StatName& a, const StatName& b) const override { + return toStringView(a) < toStringView(b); + } + void free(const StatName&) override {} + void incRefCount(const StatName&) override {} + SymbolTable::StoragePtr join(const std::vector& names) const override { + std::vector strings; + for (StatName name : names) { + absl::string_view str = toStringView(name); + if (!str.empty()) { + strings.push_back(str); + } + } + return stringToStorage(absl::StrJoin(strings, ".")); + } + +#ifndef ENVOY_CONFIG_COVERAGE + void debugPrint() const override {} +#endif + +private: + SymbolEncoding encodeHelper(absl::string_view name) const { + SymbolEncoding encoding; + encoding.addStringForFakeSymbolTable(name); + return encoding; + } + + absl::string_view toStringView(const StatName& stat_name) const { + return {reinterpret_cast(stat_name.data()), stat_name.dataSize()}; + } + + SymbolTable::StoragePtr stringToStorage(absl::string_view name) const { + SymbolEncoding encoding = encodeHelper(name); + auto bytes = std::make_unique(encoding.bytesRequired()); + encoding.moveToStorage(bytes.get()); + return bytes; + } +}; + +} // namespace Stats +} // namespace Envoy diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 6ec094ab877e..82a217ba0a49 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -1,5 +1,6 @@ #include "common/stats/symbol_table_impl.h" +#include #include #include #include @@ -15,14 +16,10 @@ namespace Stats { static const uint32_t SpilloverMask = 0x80; static const uint32_t Low7Bits = 0x7f; -StatName::StatName(const StatName& src, SymbolStorage memory) : size_and_data_(memory) { +StatName::StatName(const StatName& src, SymbolTable::Storage memory) : size_and_data_(memory) { memcpy(memory, src.size_and_data_, src.size()); } -std::string StatName::toString(const SymbolTable& table) const { - return table.decode(data(), dataSize()); -} - #ifndef ENVOY_CONFIG_COVERAGE void StatName::debugPrint() { if (size_and_data_ == nullptr) { @@ -63,7 +60,14 @@ void SymbolEncoding::addSymbol(Symbol symbol) { } while (symbol != 0); } -SymbolVec SymbolEncoding::decodeSymbols(const SymbolStorage array, uint64_t size) { +void SymbolEncoding::addStringForFakeSymbolTable(absl::string_view str) { + if (!str.empty()) { + vec_.resize(str.size()); + memcpy(&vec_[0], str.data(), str.size()); + } +} + +SymbolVec SymbolEncoding::decodeSymbols(const SymbolTable::Storage array, uint64_t size) { SymbolVec symbol_vec; Symbol symbol = 0; for (uint32_t shift = 0; size > 0; --size, ++array) { @@ -94,7 +98,7 @@ static inline uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* return bytes; } -uint64_t SymbolEncoding::moveToStorage(SymbolStorage symbol_array) { +uint64_t SymbolEncoding::moveToStorage(SymbolTable::Storage symbol_array) { uint64_t sz = size(); symbol_array = saveLengthToBytesReturningNext(sz, symbol_array); if (sz != 0) { @@ -104,11 +108,11 @@ uint64_t SymbolEncoding::moveToStorage(SymbolStorage symbol_array) { return sz + StatNameSizeEncodingBytes; } -SymbolTable::SymbolTable() +SymbolTableImpl::SymbolTableImpl() // Have to be explicitly initialized, if we want to use the GUARDED_BY macro. : next_symbol_(0), monotonic_counter_(0) {} -SymbolTable::~SymbolTable() { +SymbolTableImpl::~SymbolTableImpl() { // To avoid leaks into the symbol table, we expect all StatNames to be freed. // Note: this could potentially be short-circuited if we decide a fast exit // is needed in production. But it would be good to ensure clean up during @@ -119,7 +123,7 @@ SymbolTable::~SymbolTable() { // TODO(ambuc): There is a possible performance optimization here for avoiding // the encoding of IPs / numbers if they appear in stat names. We don't want to // waste time symbolizing an integer as an integer, if we can help it. -SymbolEncoding SymbolTable::encode(const absl::string_view name) { +SymbolEncoding SymbolTableImpl::encode(const absl::string_view name) { SymbolEncoding encoding; if (name.empty()) { @@ -148,11 +152,17 @@ SymbolEncoding SymbolTable::encode(const absl::string_view name) { return encoding; } -std::string SymbolTable::decode(const SymbolStorage symbol_array, uint64_t size) const { - return decodeSymbolVec(SymbolEncoding::decodeSymbols(symbol_array, size)); +uint64_t SymbolTableImpl::numSymbols() const { + Thread::LockGuard lock(lock_); + ASSERT(encode_map_.size() == decode_map_.size()); + return encode_map_.size(); +} + +std::string SymbolTableImpl::toString(const StatName& stat_name) const { + return decodeSymbolVec(SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); } -std::string SymbolTable::decodeSymbolVec(const SymbolVec& symbols) const { +std::string SymbolTableImpl::decodeSymbolVec(const SymbolVec& symbols) const { std::vector name_tokens; name_tokens.reserve(symbols.size()); { @@ -165,8 +175,8 @@ std::string SymbolTable::decodeSymbolVec(const SymbolVec& symbols) const { return absl::StrJoin(name_tokens, "."); } -void SymbolTable::incRefCount(const StatName& stat_name) { - // Before taking the lock, decode the array of symbols from the SymbolStorage. +void SymbolTableImpl::incRefCount(const StatName& stat_name) { + // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); @@ -181,8 +191,8 @@ void SymbolTable::incRefCount(const StatName& stat_name) { } } -void SymbolTable::free(const StatName& stat_name) { - // Before taking the lock, decode the array of symbols from the SymbolStorage. +void SymbolTableImpl::free(const StatName& stat_name) { + // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); @@ -206,8 +216,7 @@ void SymbolTable::free(const StatName& stat_name) { } } } - -Symbol SymbolTable::toSymbol(absl::string_view sv) { +Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { Symbol result; auto encode_find = encode_map_.find(sv); // If the string segment doesn't already exist, @@ -233,13 +242,14 @@ Symbol SymbolTable::toSymbol(absl::string_view sv) { return result; } -absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const SHARED_LOCKS_REQUIRED(lock_) { +absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const + EXCLUSIVE_LOCKS_REQUIRED(lock_) { auto search = decode_map_.find(symbol); RELEASE_ASSERT(search != decode_map_.end(), "no such symbol"); - return absl::string_view(*search->second); + return {*search->second}; } -void SymbolTable::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { +void SymbolTableImpl::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { if (pool_.empty()) { next_symbol_ = ++monotonic_counter_; } else { @@ -250,16 +260,19 @@ void SymbolTable::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { ASSERT(monotonic_counter_ != 0); } -bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { +bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { // Constructing two temp vectors during lessThan is not strictly necessary. // If this becomes a performance bottleneck (e.g. during sorting), we could // provide an iterator-like interface for incrementally decoding the symbols // without allocating memory. SymbolVec av = SymbolEncoding::decodeSymbols(a.data(), a.dataSize()); SymbolVec bv = SymbolEncoding::decodeSymbols(b.data(), b.dataSize()); + + // Calling fromSymbol requires holding the lock, as it needs read-access to + // the maps that are written when adding new symbols. + Thread::LockGuard lock(lock_); for (uint64_t i = 0, n = std::min(av.size(), bv.size()); i < n; ++i) { if (av[i] != bv[i]) { - Thread::LockGuard lock(lock_); bool ret = fromSymbol(av[i]) < fromSymbol(bv[i]); return ret; } @@ -268,7 +281,7 @@ bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { } #ifndef ENVOY_CONFIG_COVERAGE -void SymbolTable::debugPrint() const { +void SymbolTableImpl::debugPrint() const { Thread::LockGuard lock(lock_); std::vector symbols; for (const auto& p : decode_map_) { @@ -309,30 +322,67 @@ void StatNameStorage::free(SymbolTable& table) { bytes_.reset(); } -StatNameJoiner::StatNameJoiner(StatName a, StatName b) { - const uint64_t a_size = a.dataSize(); - const uint64_t b_size = b.dataSize(); - uint8_t* const p = alloc(a_size + b_size); - memcpy(p, a.data(), a_size); - memcpy(p + a_size, b.data(), b_size); -} - -StatNameJoiner::StatNameJoiner(const std::vector& stat_names) { +SymbolTable::StoragePtr SymbolTableImpl::join(const std::vector& stat_names) const { uint64_t num_bytes = 0; for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - uint8_t* p = alloc(num_bytes); + auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); + uint8_t* p = saveLengthToBytesReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { num_bytes = stat_name.dataSize(); memcpy(p, stat_name.data(), num_bytes); p += num_bytes; } + return bytes; +} + +StatNameList::~StatNameList() { ASSERT(!populated()); } + +void StatNameList::populate(const std::vector& names, + SymbolTable& symbol_table) { + RELEASE_ASSERT(names.size() < 256, "Maximum number elements in a StatNameList exceeded"); + + // First encode all the names. + size_t total_size_bytes = 1; /* one byte for holding the number of names */ + std::vector encodings; + encodings.resize(names.size()); + int index = 0; + for (auto& name : names) { + SymbolEncoding encoding = symbol_table.encode(name); + total_size_bytes += encoding.bytesRequired(); + encodings[index++].swap(encoding); + } + + // Now allocate the exact number of bytes required and move the encodings + // into storage. + storage_ = std::make_unique(total_size_bytes); + uint8_t* p = &storage_[0]; + *p++ = encodings.size(); + for (auto& encoding : encodings) { + p += encoding.moveToStorage(p); + } + ASSERT(p == &storage_[0] + total_size_bytes); +} + +void StatNameList::iterate(const std::function& f) const { + uint8_t* p = &storage_[0]; + uint32_t num_elements = *p++; + for (uint32_t i = 0; i < num_elements; ++i) { + StatName stat_name(p); + p += stat_name.size(); + if (!f(stat_name)) { + break; + } + } } -uint8_t* StatNameJoiner::alloc(uint64_t num_bytes) { - bytes_ = std::make_unique(num_bytes + StatNameSizeEncodingBytes); - return saveLengthToBytesReturningNext(num_bytes, bytes_.get()); +void StatNameList::clear(SymbolTable& symbol_table) { + iterate([&symbol_table](StatName stat_name) -> bool { + symbol_table.free(stat_name); + return true; + }); + storage_.reset(); } } // namespace Stats diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 6004ab22f2c2..f7c33ebd1d93 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -28,15 +28,6 @@ namespace Stats { /** A Symbol represents a string-token with a small index. */ using Symbol = uint32_t; -/** - * Efficient byte-encoded storage of an array of tokens. The most common tokens - * are typically < 127, and are represented directly. tokens >= 128 spill into - * the next byte, allowing for tokens of arbitrary numeric value to be stored. - * As long as the most common tokens are low-valued, the representation is - * space-efficient. This scheme is similar to UTF-8. - */ -using SymbolStorage = uint8_t[]; - /** * We encode the byte-size of a StatName as its first two bytes. */ @@ -68,10 +59,18 @@ class SymbolEncoding { */ void addSymbol(Symbol symbol); + /** + * Encodes an entire string into the vec, on behalf of FakeSymbolTableImpl. + * TODO(jmarantz): delete this method when FakeSymbolTableImpl is deleted. + * + * @param str The string to encode. + */ + void addStringForFakeSymbolTable(absl::string_view str); + /** * Decodes a uint8_t array into a SymbolVec. */ - static SymbolVec decodeSymbols(const SymbolStorage array, uint64_t size); + static SymbolVec decodeSymbols(const SymbolTable::Storage array, uint64_t size); /** * Returns the number of bytes required to represent StatName as a uint8_t @@ -91,7 +90,7 @@ class SymbolEncoding { * @param array destination memory to receive the encoded bytes. * @return uint64_t the number of bytes transferred. */ - uint64_t moveToStorage(SymbolStorage array); + uint64_t moveToStorage(SymbolTable::Storage array); void swap(SymbolEncoding& src) { vec_.swap(src.vec_); } @@ -100,16 +99,17 @@ class SymbolEncoding { }; /** - * SymbolTable manages a namespace optimized for stats, which are typically + * SymbolTableImpl manages a namespace optimized for stats, which are typically * composed of arrays of "."-separated tokens, with a significant overlap * between the tokens. Each token is mapped to a Symbol (uint32_t) and * reference-counted so that no-longer-used symbols can be reclaimed. * - * We use a uint8_t array to encode arrays of symbols in order to conserve - * space, as in practice the majority of token instances in stat names draw from - * a fairly small set of common names, typically less than 100. The format is - * somewhat similar to UTF-8, with a variable-length array of uint8_t. See the - * implementation for details. + * We use a uint8_t array to encode a "."-deliminated stat-name into arrays of + * integer symbol symbol IDs in order to conserve space, as in practice the + * majority of token instances in stat names draw from a fairly small set of + * common names, typically less than 100. The format is somewhat similar to + * UTF-8, with a variable-length array of uint8_t. See the implementation for + * details. * * StatNameStorage can be used to manage memory for the byte-encoding. Not all * StatNames are backed by StatNameStorage -- the storage may be inlined into @@ -128,95 +128,24 @@ class SymbolEncoding { * same string is re-encoded, it may or may not encode to the same underlying * symbol. */ -class SymbolTable { +class SymbolTableImpl : public SymbolTable { public: - SymbolTable(); - ~SymbolTable(); - - /** - * Encodes a stat name using the symbol table, returning a SymbolEncoding. The - * SymbolEncoding is not intended for long-term storage, but is used to help - * allocate and StatName with the correct amount of storage. - * - * When a name is encoded, it bumps reference counts held in the table for - * each symbol. The caller is responsible for creating a StatName using this - * SymbolEncoding and ultimately disposing of it by calling - * StatName::free(). Otherwise the symbols will leak for the lifetime of the - * table, though they won't show up as a C++ leaks as the memory is still - * reachable from the SymbolTable. - * - * @param name The name to encode. - * @return SymbolEncoding the encoded symbols. - */ - SymbolEncoding encode(absl::string_view name); - - /** - * @return uint64_t the number of symbols in the symbol table. - */ - uint64_t numSymbols() const { - Thread::LockGuard lock(lock_); - ASSERT(encode_map_.size() == decode_map_.size()); - uint64_t sz = encode_map_.size(); - return sz; - } - - /** - * Determines whether one StatName lexically precedes another. Note that - * the lexical order may not exactly match the lexical order of the - * elaborated strings. For example, stat-name of "-.-" would lexically - * sort after "---" but when encoded as a StatName would come lexically - * earlier. In practice this is unlikely to matter as those are not - * reasonable names for Envoy stats. - * - * Note that this operation has to be performed with the context of the - * SymbolTable so that the individual Symbol objects can be converted - * into strings for lexical comparison. - * - * @param a the first stat name - * @param b the second stat name - * @return bool true if a lexically precedes b. - */ - bool lessThan(const StatName& a, const StatName& b) const; - - /** - * Since SymbolTable does manual reference counting, a client of SymbolTable - * must manually call free(stat_name) when it is freeing the backing store - * for a StatName. This way, the symbol table will grow and shrink - * dynamically, instead of being write-only. - * - * @param stat_name the stat name whose symbols should be freed. - */ - void free(const StatName& stat_name); - - /** - * StatName backing-store can be managed by callers in a variety of ways - * to minimize overhead. But any persistent reference to a StatName needs - * to hold onto its own reference-counts for all symbols. This method - * helps callers ensure the symbol-storage is maintained for the lifetime - * of a reference. - * - * @param stat_name the stat name whose symbols should be freed. - */ - void incRefCount(const StatName& stat_name); + SymbolTableImpl(); + ~SymbolTableImpl() override; + + // SymbolTable + std::string toString(const StatName& stat_name) const override; + SymbolEncoding encode(absl::string_view name) override; + uint64_t numSymbols() const override; + bool lessThan(const StatName& a, const StatName& b) const override; + void free(const StatName& stat_name) override; + void incRefCount(const StatName& stat_name) override; + SymbolTable::StoragePtr join(const std::vector& stat_names) const override; #ifndef ENVOY_CONFIG_COVERAGE - // It is convenient when debugging to be able to print the state of the table, - // but this code is not hit during tests ordinarily, and is not needed in - // production code. - void debugPrint() const; + void debugPrint() const override; #endif - /** - * Decodes a vector of symbols back into its period-delimited stat name. If - * decoding fails on any part of the symbol_vec, we release_assert, since this - * should never happen, and we don't want to continue running with a corrupt - * stats set. - * - * @param symbol_vec the vector of symbols to decode. - * @return std::string the retrieved stat name. - */ - std::string decode(const SymbolStorage symbol_vec, uint64_t size) const; - private: friend class StatName; friend class StatNameTest; @@ -256,12 +185,15 @@ class SymbolTable { * @param symbol the individual symbol to be decoded. * @return absl::string_view the decoded string. */ - absl::string_view fromSymbol(Symbol symbol) const; + absl::string_view fromSymbol(Symbol symbol) const EXCLUSIVE_LOCKS_REQUIRED(lock_); // Stages a new symbol for use. To be called after a successful insertion. void newSymbol(); - Symbol monotonicCounter() { return monotonic_counter_; } + Symbol monotonicCounter() { + Thread::LockGuard lock(lock_); + return monotonic_counter_; + } // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so // that if insertion succeeds, the value written is the correct one. @@ -332,7 +264,7 @@ class StatNameStorage { inline StatName statName() const; private: - std::unique_ptr bytes_; + SymbolTable::StoragePtr bytes_; }; /** @@ -349,7 +281,7 @@ class StatName { public: // Constructs a StatName object directly referencing the storage of another // StatName. - explicit StatName(const SymbolStorage size_and_data) : size_and_data_(size_and_data) {} + explicit StatName(const SymbolTable::Storage size_and_data) : size_and_data_(size_and_data) {} // Constructs an empty StatName object. StatName() : size_and_data_(nullptr) {} @@ -357,9 +289,7 @@ class StatName { // Constructs a StatName object with new storage, which must be of size // src.size(). This is used in the a flow where we first construct a StatName // for lookup in a cache, and then on a miss need to store the data directly. - StatName(const StatName& src, SymbolStorage memory); - - std::string toString(const SymbolTable& table) const; + StatName(const StatName& src, SymbolTable::Storage memory); /** * Note that this hash function will return a different hash than that of @@ -392,7 +322,7 @@ class StatName { */ uint64_t size() const { return dataSize() + StatNameSizeEncodingBytes; } - void copyToStorage(SymbolStorage storage) { memcpy(storage, size_and_data_, size()); } + void copyToStorage(SymbolTable::Storage storage) { memcpy(storage, size_and_data_, size()); } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); @@ -409,37 +339,6 @@ class StatName { StatName StatNameStorage::statName() const { return StatName(bytes_.get()); } -/** - * Joins two or more StatNames. For example if we have StatNames for {"a.b", - * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The advantage - * of using this representation is that it avoids having to decode/encode - * into the elaborated form, and does not require locking the SymbolTable. - * - * The caveat is that this representation does not bump reference counts on - * for the referenced Symbols in the SymbolTable, so it's only valid as long - * for the lifetime of the joined StatNames. - * - * This is intended for use doing cached name lookups of scoped stats, where - * the scope prefix and the names to combine it with are already in StatName - * form. Using this class, they can be combined without accessing the - * SymbolTable or, in particular, taking its lock. - */ -class StatNameJoiner { -public: - StatNameJoiner(StatName a, StatName b); - StatNameJoiner(const std::vector& stat_names); - - /** - * @return StatName a reference to the joined StatName. - */ - StatName statName() const { return StatName(bytes_.get()); } - -private: - uint8_t* alloc(uint64_t num_bytes); - - std::unique_ptr bytes_; -}; - /** * Contains the backing store for a StatName and enough context so it can * self-delete through RAII. This works by augmenting StatNameStorage with a @@ -465,6 +364,56 @@ class StatNameTempStorage : public StatNameStorage { SymbolTable& symbol_table_; }; +// Represents an ordered container of StatNames. The encoding for each StatName +// is byte-packed together, so this carries less overhead than allocating the +// storage separately. The tradeoff is there is no random access; you can only +// iterate through the StatNames. +// +// The maximum size of the list is 255 elements, so the length can fit in a +// byte. It would not be difficult to increase this, but there does not appear +// to be a current need. +class StatNameList { +public: + ~StatNameList(); + + /** + * Populates the StatNameList from a list of encodings. This is not done at + * construction time to enable StatNameList to be instantiated directly in + * a class that doesn't have a live SymbolTable when it is constructed. + * + * @param encodings The list names to encode. + * @param symbol_table The symbol table in which to encode the names. + */ + void populate(const std::vector& encodings, SymbolTable& symbol_table); + + /** + * @return true if populate() has been called on this list. + */ + bool populated() const { return storage_ != nullptr; } + + /** + * Iterates over each StatName in the list, calling f(StatName). f() should + * return true to keep iterating, or false to end the iteration. + * + * @param f The function to call on each stat. + */ + void iterate(const std::function& f) const; + + /** + * Frees each StatName in the list. Failure to call this before destruction + * results in an ASSERT at destruction of the list and the SymbolTable. + * + * This is not done as part of destruction as the SymbolTable may already + * be destroyed. + * + * @param symbol_table the symbol table. + */ + void clear(SymbolTable& symbol_table); + +private: + std::unique_ptr storage_; +}; + // Helper class for constructing hash-tables with StatName keys. struct StatNameHash { size_t operator()(const StatName& a) const { return a.hash(); } diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD index 2abece37c900..b3d2020e4389 100644 --- a/test/common/stats/BUILD +++ b/test/common/stats/BUILD @@ -74,6 +74,7 @@ envoy_cc_test( ":stat_test_utility_lib", "//source/common/common:mutex_tracer_lib", "//source/common/memory:stats_lib", + "//source/common/stats:fake_symbol_table_lib", "//source/common/stats:symbol_table_lib", "//test/mocks/stats:stats_mocks", "//test/test_common:logging_lib", diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index ab4b96761ba7..6b91a15236f4 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -2,6 +2,7 @@ #include "common/common/mutex_tracer_impl.h" #include "common/memory/stats.h" +#include "common/stats/fake_symbol_table_impl.h" #include "common/stats/symbol_table_impl.h" #include "test/common/stats/stat_test_utility.h" @@ -14,82 +15,116 @@ namespace Envoy { namespace Stats { -class StatNameTest : public testing::Test { +// See comments in fake_symbol_table_impl.h: we need to test two implementations +// of SymbolTable, which we'll do with a test parameterized on this enum. +// +// Note that some of the tests cover behavior that is specific to the real +// SymbolTableImpl, and thus early-exit when the param is Fake. +// +// TODO(jmarantz): un-parameterize this test once SymbolTable is fully deployed +// and FakeSymbolTableImpl can be deleted. +enum class SymbolTableType { + Real, + Fake, +}; + +class StatNameTest : public testing::TestWithParam { protected: - ~StatNameTest() { clearStorage(); } + StatNameTest() { + switch (GetParam()) { + case SymbolTableType::Real: { + auto table = std::make_unique(); + real_symbol_table_ = table.get(); + table_ = std::move(table); + break; + } + case SymbolTableType::Fake: + table_ = std::make_unique(); + break; + } + } + ~StatNameTest() override { clearStorage(); } void clearStorage() { for (auto& stat_name_storage : stat_name_storage_) { - stat_name_storage.free(table_); + stat_name_storage.free(*table_); } stat_name_storage_.clear(); - EXPECT_EQ(0, table_.numSymbols()); + EXPECT_EQ(0, table_->numSymbols()); } SymbolVec getSymbols(StatName stat_name) { return SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); } std::string decodeSymbolVec(const SymbolVec& symbol_vec) { - return table_.decodeSymbolVec(symbol_vec); + return real_symbol_table_->decodeSymbolVec(symbol_vec); } - Symbol monotonicCounter() { return table_.monotonicCounter(); } + Symbol monotonicCounter() { return real_symbol_table_->monotonicCounter(); } std::string encodeDecode(absl::string_view stat_name) { - return makeStat(stat_name).toString(table_); + return table_->toString(makeStat(stat_name)); } - StatNameStorage makeStatStorage(absl::string_view name) { return StatNameStorage(name, table_); } + StatNameStorage makeStatStorage(absl::string_view name) { return StatNameStorage(name, *table_); } StatName makeStat(absl::string_view name) { stat_name_storage_.emplace_back(makeStatStorage(name)); return stat_name_storage_.back().statName(); } - SymbolTable table_; + SymbolTableImpl* real_symbol_table_{nullptr}; + std::unique_ptr table_; std::vector stat_name_storage_; }; -TEST_F(StatNameTest, AllocFree) { encodeDecode("hello.world"); } +INSTANTIATE_TEST_CASE_P(StatNameTest, StatNameTest, + testing::ValuesIn({SymbolTableType::Real, SymbolTableType::Fake})); + +TEST_P(StatNameTest, AllocFree) { encodeDecode("hello.world"); } -TEST_F(StatNameTest, TestArbitrarySymbolRoundtrip) { +TEST_P(StatNameTest, TestArbitrarySymbolRoundtrip) { const std::vector stat_names = {"", " ", " ", ",", "\t", "$", "%", "`", "."}; - for (auto stat_name : stat_names) { + for (auto& stat_name : stat_names) { EXPECT_EQ(stat_name, encodeDecode(stat_name)); } } -TEST_F(StatNameTest, Test100kSymbolsRoundtrip) { +TEST_P(StatNameTest, Test100KSymbolsRoundtrip) { for (int i = 0; i < 100 * 1000; ++i) { const std::string stat_name = absl::StrCat("symbol_", i); EXPECT_EQ(stat_name, encodeDecode(stat_name)); } } -TEST_F(StatNameTest, TestUnusualDelimitersRoundtrip) { +TEST_P(StatNameTest, TestUnusualDelimitersRoundtrip) { const std::vector stat_names = {".", "..", "...", "foo", "foo.", ".foo", ".foo.", ".foo..", "..foo.", "..foo.."}; - for (auto stat_name : stat_names) { + for (auto& stat_name : stat_names) { EXPECT_EQ(stat_name, encodeDecode(stat_name)); } } -TEST_F(StatNameTest, TestSuccessfulDoubleLookup) { +TEST_P(StatNameTest, TestSuccessfulDoubleLookup) { StatName stat_name_1(makeStat("foo.bar.baz")); StatName stat_name_2(makeStat("foo.bar.baz")); EXPECT_EQ(stat_name_1, stat_name_2); } -TEST_F(StatNameTest, TestSuccessfulDecode) { +TEST_P(StatNameTest, TestSuccessfulDecode) { std::string stat_name = "foo.bar.baz"; StatName stat_name_1(makeStat(stat_name)); StatName stat_name_2(makeStat(stat_name)); - EXPECT_EQ(stat_name_1.toString(table_), stat_name_2.toString(table_)); - EXPECT_EQ(stat_name_1.toString(table_), stat_name); + EXPECT_EQ(table_->toString(stat_name_1), table_->toString(stat_name_2)); + EXPECT_EQ(table_->toString(stat_name_1), stat_name); } class StatNameDeathTest : public StatNameTest {}; -TEST_F(StatNameDeathTest, TestBadDecodes) { +TEST_P(StatNameDeathTest, TestBadDecodes) { + if (GetParam() == SymbolTableType::Fake) { + return; + } + { // If a symbol doesn't exist, decoding it should trigger an ASSERT() and crash. SymbolVec bad_symbol_vec = {1}; // symbol 0 is the empty symbol. @@ -107,14 +142,17 @@ TEST_F(StatNameDeathTest, TestBadDecodes) { } } -TEST_F(StatNameTest, TestDifferentStats) { +TEST_P(StatNameTest, TestDifferentStats) { StatName stat_name_1(makeStat("foo.bar")); StatName stat_name_2(makeStat("bar.foo")); - EXPECT_NE(stat_name_1.toString(table_), stat_name_2.toString(table_)); + EXPECT_NE(table_->toString(stat_name_1), table_->toString(stat_name_2)); EXPECT_NE(stat_name_1, stat_name_2); } -TEST_F(StatNameTest, TestSymbolConsistency) { +TEST_P(StatNameTest, TestSymbolConsistency) { + if (GetParam() == SymbolTableType::Fake) { + return; + } StatName stat_name_1(makeStat("foo.bar")); StatName stat_name_2(makeStat("bar.foo")); // We expect the encoding of "foo" in one context to be the same as another. @@ -124,13 +162,13 @@ TEST_F(StatNameTest, TestSymbolConsistency) { EXPECT_EQ(vec_2[0], vec_1[1]); } -TEST_F(StatNameTest, TestSameValueOnPartialFree) { +TEST_P(StatNameTest, TestSameValueOnPartialFree) { // This should hold true for components as well. Since "foo" persists even when "foo.bar" is // freed, we expect both instances of "foo" to have the same symbol. makeStat("foo"); StatNameStorage stat_foobar_1(makeStatStorage("foo.bar")); SymbolVec stat_foobar_1_symbols = getSymbols(stat_foobar_1.statName()); - stat_foobar_1.free(table_); + stat_foobar_1.free(*table_); StatName stat_foobar_2(makeStat("foo.bar")); SymbolVec stat_foobar_2_symbols = getSymbols(stat_foobar_2); @@ -139,7 +177,11 @@ TEST_F(StatNameTest, TestSameValueOnPartialFree) { // And we have no expectation for the "bar" components, because of the free pool. } -TEST_F(StatNameTest, FreePoolTest) { +TEST_P(StatNameTest, FreePoolTest) { + if (GetParam() == SymbolTableType::Fake) { + return; + } + // To ensure that the free pool is being used, we should be able to cycle through a large number // of stats while validating that: // a) the size of the table has not increased, and @@ -153,11 +195,11 @@ TEST_F(StatNameTest, FreePoolTest) { makeStat("4a"); makeStat("5a"); EXPECT_EQ(monotonicCounter(), 5); - EXPECT_EQ(table_.numSymbols(), 5); + EXPECT_EQ(table_->numSymbols(), 5); clearStorage(); } EXPECT_EQ(monotonicCounter(), 5); - EXPECT_EQ(table_.numSymbols(), 0); + EXPECT_EQ(table_->numSymbols(), 0); // These are different strings being encoded, but they should recycle through the same symbols as // the stats above. @@ -167,55 +209,55 @@ TEST_F(StatNameTest, FreePoolTest) { makeStat("4b"); makeStat("5b"); EXPECT_EQ(monotonicCounter(), 5); - EXPECT_EQ(table_.numSymbols(), 5); + EXPECT_EQ(table_->numSymbols(), 5); makeStat("6"); EXPECT_EQ(monotonicCounter(), 6); - EXPECT_EQ(table_.numSymbols(), 6); + EXPECT_EQ(table_->numSymbols(), 6); } -TEST_F(StatNameTest, TestShrinkingExpectation) { +TEST_P(StatNameTest, TestShrinkingExpectation) { // We expect that as we free stat names, the memory used to store those underlying symbols will // be freed. // ::size() is a public function, but should only be used for testing. - size_t table_size_0 = table_.numSymbols(); + size_t table_size_0 = table_->numSymbols(); StatNameStorage stat_a(makeStatStorage("a")); - size_t table_size_1 = table_.numSymbols(); + size_t table_size_1 = table_->numSymbols(); StatNameStorage stat_aa(makeStatStorage("a.a")); - EXPECT_EQ(table_size_1, table_.numSymbols()); + EXPECT_EQ(table_size_1, table_->numSymbols()); StatNameStorage stat_ab(makeStatStorage("a.b")); - size_t table_size_2 = table_.numSymbols(); + size_t table_size_2 = table_->numSymbols(); StatNameStorage stat_ac(makeStatStorage("a.c")); - size_t table_size_3 = table_.numSymbols(); + size_t table_size_3 = table_->numSymbols(); StatNameStorage stat_acd(makeStatStorage("a.c.d")); - size_t table_size_4 = table_.numSymbols(); + size_t table_size_4 = table_->numSymbols(); StatNameStorage stat_ace(makeStatStorage("a.c.e")); - size_t table_size_5 = table_.numSymbols(); + size_t table_size_5 = table_->numSymbols(); EXPECT_GE(table_size_5, table_size_4); - stat_ace.free(table_); - EXPECT_EQ(table_size_4, table_.numSymbols()); + stat_ace.free(*table_); + EXPECT_EQ(table_size_4, table_->numSymbols()); - stat_acd.free(table_); - EXPECT_EQ(table_size_3, table_.numSymbols()); + stat_acd.free(*table_); + EXPECT_EQ(table_size_3, table_->numSymbols()); - stat_ac.free(table_); - EXPECT_EQ(table_size_2, table_.numSymbols()); + stat_ac.free(*table_); + EXPECT_EQ(table_size_2, table_->numSymbols()); - stat_ab.free(table_); - EXPECT_EQ(table_size_1, table_.numSymbols()); + stat_ab.free(*table_); + EXPECT_EQ(table_size_1, table_->numSymbols()); - stat_aa.free(table_); - EXPECT_EQ(table_size_1, table_.numSymbols()); + stat_aa.free(*table_); + EXPECT_EQ(table_size_1, table_->numSymbols()); - stat_a.free(table_); - EXPECT_EQ(table_size_0, table_.numSymbols()); + stat_a.free(*table_); + EXPECT_EQ(table_size_0, table_->numSymbols()); } // In the tests above we use the StatNameStorage abstraction which is not the @@ -223,29 +265,35 @@ TEST_F(StatNameTest, TestShrinkingExpectation) { // you may want to store bytes in a larger structure. For example, you might // want to allocate two different StatName objects in contiguous memory. The // safety-net here in terms of leaks is that SymbolTable will assert-fail if -// you don't free all the StatNames you've allocated bytes for. -TEST_F(StatNameTest, StoringWithoutStatNameStorage) { - SymbolEncoding hello_encoding = table_.encode("hello.world"); - SymbolEncoding goodbye_encoding = table_.encode("goodbye.world"); - size_t size = hello_encoding.bytesRequired() + goodbye_encoding.bytesRequired(); - size_t goodbye_offset = hello_encoding.bytesRequired(); - std::unique_ptr storage(new uint8_t[size]); - hello_encoding.moveToStorage(storage.get()); - goodbye_encoding.moveToStorage(storage.get() + goodbye_offset); - - StatName hello(storage.get()); - StatName goodbye(storage.get() + goodbye_offset); - - EXPECT_EQ("hello.world", hello.toString(table_)); - EXPECT_EQ("goodbye.world", goodbye.toString(table_)); - - // If we don't explicitly call free() on the the StatName objects the - // SymbolTable will assert on destruction. - table_.free(hello); - table_.free(goodbye); +// you don't free all the StatNames you've allocated bytes for. StatNameList +// provides this capability. +TEST_P(StatNameTest, List) { + std::vector names{"hello.world", "goodbye.world"}; + StatNameList name_list; + EXPECT_FALSE(name_list.populated()); + name_list.populate(names, *table_); + EXPECT_TRUE(name_list.populated()); + + // First, decode only the first name. + name_list.iterate([this](StatName stat_name) -> bool { + EXPECT_EQ("hello.world", table_->toString(stat_name)); + return false; + }); + + // Decode all the names. + std::vector decoded_strings; + name_list.iterate([this, &decoded_strings](StatName stat_name) -> bool { + decoded_strings.push_back(table_->toString(stat_name)); + return true; + }); + ASSERT_EQ(2, decoded_strings.size()); + EXPECT_EQ("hello.world", decoded_strings[0]); + EXPECT_EQ("goodbye.world", decoded_strings[1]); + name_list.clear(*table_); + EXPECT_FALSE(name_list.populated()); } -TEST_F(StatNameTest, HashTable) { +TEST_P(StatNameTest, HashTable) { StatName ac = makeStat("a.c"); StatName ab = makeStat("a.b"); StatName de = makeStat("d.e"); @@ -263,62 +311,141 @@ TEST_F(StatNameTest, HashTable) { EXPECT_EQ(3, name_int_map[de]); } -TEST_F(StatNameTest, Sort) { +TEST_P(StatNameTest, Sort) { std::vector names{makeStat("a.c"), makeStat("a.b"), makeStat("d.e"), makeStat("d.a.a"), makeStat("d.a"), makeStat("a.c")}; const std::vector sorted_names{makeStat("a.b"), makeStat("a.c"), makeStat("a.c"), makeStat("d.a"), makeStat("d.a.a"), makeStat("d.e")}; EXPECT_NE(names, sorted_names); - std::sort(names.begin(), names.end(), StatNameLessThan(table_)); + std::sort(names.begin(), names.end(), StatNameLessThan(*table_)); EXPECT_EQ(names, sorted_names); } -TEST_F(StatNameTest, Concat2) { - StatNameJoiner joiner(makeStat("a.b"), makeStat("c.d")); - EXPECT_EQ("a.b.c.d", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Concat2) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat("c.d")}); + EXPECT_EQ("a.b.c.d", table_->toString(StatName(joined.get()))); +} + +TEST_P(StatNameTest, ConcatFirstEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat("c.d")}); + EXPECT_EQ("c.d", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, ConcatFirstEmpty) { - StatNameJoiner joiner(makeStat(""), makeStat("c.d")); - EXPECT_EQ("c.d", joiner.statName().toString(table_)); +TEST_P(StatNameTest, ConcatSecondEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat("")}); + EXPECT_EQ("a.b", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, ConcatSecondEmpty) { - StatNameJoiner joiner(makeStat("a.b"), makeStat("")); - EXPECT_EQ("a.b", joiner.statName().toString(table_)); +TEST_P(StatNameTest, ConcatAllEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat("")}); + EXPECT_EQ("", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, ConcatAllEmpty) { - StatNameJoiner joiner(makeStat(""), makeStat("")); - EXPECT_EQ("", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3) { + SymbolTable::StoragePtr joined = + table_->join({makeStat("a.b"), makeStat("c.d"), makeStat("e.f")}); + EXPECT_EQ("a.b.c.d.e.f", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3) { - StatNameJoiner joiner({makeStat("a.b"), makeStat("c.d"), makeStat("e.f")}); - EXPECT_EQ("a.b.c.d.e.f", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3FirstEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat("c.d"), makeStat("e.f")}); + EXPECT_EQ("c.d.e.f", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3FirstEmpty) { - StatNameJoiner joiner({makeStat(""), makeStat("c.d"), makeStat("e.f")}); - EXPECT_EQ("c.d.e.f", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3SecondEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat(""), makeStat("e.f")}); + EXPECT_EQ("a.b.e.f", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3SecondEmpty) { - StatNameJoiner joiner({makeStat("a.b"), makeStat(""), makeStat("e.f")}); - EXPECT_EQ("a.b.e.f", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3ThirdEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat("c.d"), makeStat("")}); + EXPECT_EQ("a.b.c.d", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3ThirdEmpty) { - StatNameJoiner joiner({makeStat("a.b"), makeStat("c.d"), makeStat("")}); - EXPECT_EQ("a.b.c.d", joiner.statName().toString(table_)); +TEST_P(StatNameTest, JoinAllEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat(""), makeStat("")}); + EXPECT_EQ("", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, JoinAllEmpty) { - StatNameJoiner joiner({makeStat(""), makeStat(""), makeStat("")}); - EXPECT_EQ("", joiner.statName().toString(table_)); +TEST_P(StatNameTest, RacingSymbolCreation) { + Thread::ThreadFactory& thread_factory = Thread::threadFactoryForTest(); + MutexTracerImpl& mutex_tracer = MutexTracerImpl::getOrCreateTracer(); + + // Make 100 threads, each of which will race to encode an overlapping set of + // symbols, triggering corner-cases in SymbolTable::toSymbol. + constexpr int num_threads = 100; + std::vector threads; + threads.reserve(num_threads); + ConditionalInitializer creation, access, wait; + absl::BlockingCounter creates(num_threads), accesses(num_threads); + for (int i = 0; i < num_threads; ++i) { + threads.push_back( + thread_factory.createThread([this, i, &creation, &access, &wait, &creates, &accesses]() { + // Rotate between 20 different symbols to try to get some + // contention. Based on a logging print statement in + // SymbolTable::toSymbol(), this appears to trigger creation-races, + // even when compiled with optimization. + std::string stat_name_string = absl::StrCat("symbol", i % 20); + + // Block each thread on waking up a common condition variable, + // so we make it likely to race on creation. + creation.wait(); + StatNameTempStorage initial(stat_name_string, *table_); + creates.DecrementCount(); + + access.wait(); + StatNameTempStorage second(stat_name_string, *table_); + accesses.DecrementCount(); + + wait.wait(); + })); + } + creation.setReady(); + creates.Wait(); + + int64_t create_contentions = mutex_tracer.numContentions(); + ENVOY_LOG_MISC(info, "Number of contentions: {}", create_contentions); + + // But when we access the already-existing symbols, we guarantee that no + // further mutex contentions occur. + int64_t start_contentions = mutex_tracer.numContentions(); + access.setReady(); + accesses.Wait(); + + // With fake symbol tables, there are no contentions as there are is no state + // in the symbol table to lock. This is why fake symbol tables are a safe way + // to transition the codebase to use the SymbolTable API without impacting + // hot-path performance. + if (GetParam() == SymbolTableType::Fake) { + EXPECT_EQ(create_contentions, mutex_tracer.numContentions()); + EXPECT_EQ(start_contentions, create_contentions); + } + + // In a perfect world, we could use reader-locks in the SymbolTable + // implementation, and there should be zero additional contentions + // after latching 'create_contentions' above. And we can definitely + // have this world, but this slows down BM_CreateRace in + // symbol_table_speed_test.cc, even on a 72-core machine. + // + // Thus it is better to avoid symbol-table contention by refactoring + // all stat-creation code to symbolize all stat string elements at + // construction, as composition does not require a lock. + // + // See this commit + // https://github.com/envoyproxy/envoy/pull/5321/commits/ef712d0f5a11ff49831c1935e8a2ef8a0a935bc9 + // for a working reader-lock implementation, which would pass this EXPECT: + // EXPECT_EQ(create_contentions, mutex_tracer.numContentions()); + // + // Note also that we cannot guarantee there *will* be contentions + // as a machine or OS is free to run all threads serially. + + wait.setReady(); + for (auto& thread : threads) { + thread->join(); + } } -TEST_F(StatNameTest, MutexContentionOnExistingSymbols) { +TEST_P(StatNameTest, MutexContentionOnExistingSymbols) { Thread::ThreadFactory& thread_factory = Thread::threadFactoryForTest(); MutexTracerImpl& mutex_tracer = MutexTracerImpl::getOrCreateTracer(); @@ -341,11 +468,11 @@ TEST_F(StatNameTest, MutexContentionOnExistingSymbols) { // Block each thread on waking up a common condition variable, // so we make it likely to race on creation. creation.wait(); - StatNameTempStorage initial(stat_name_string, table_); + StatNameTempStorage initial(stat_name_string, *table_); creates.DecrementCount(); access.wait(); - StatNameTempStorage second(stat_name_string, table_); + StatNameTempStorage second(stat_name_string, *table_); accesses.DecrementCount(); wait.wait(); @@ -386,8 +513,10 @@ TEST_F(StatNameTest, MutexContentionOnExistingSymbols) { } } -// Tests the memory savings realized from using symbol tables with 1k clusters. This -// test shows the memory drops from almost 8M to less than 2M. +// Tests the memory savings realized from using symbol tables with 1k +// clusters. This test shows the memory drops from almost 8M to less than +// 2M. Note that only SymbolTableImpl is tested for memory consumption, +// and not FakeSymbolTableImpl. TEST(SymbolTableTest, Memory) { if (!TestUtil::hasDeterministicMallocStats()) { return; @@ -411,7 +540,7 @@ TEST(SymbolTableTest, Memory) { string_mem_used = test_memory_usage(record_stat); } { - SymbolTable table; + SymbolTableImpl table; std::vector names; auto record_stat = [&names, &table](absl::string_view stat) { names.emplace_back(StatNameStorage(stat, table)); diff --git a/test/common/stats/symbol_table_speed_test.cc b/test/common/stats/symbol_table_speed_test.cc index beefeadc3e94..663de03fe358 100644 --- a/test/common/stats/symbol_table_speed_test.cc +++ b/test/common/stats/symbol_table_speed_test.cc @@ -22,7 +22,7 @@ static void BM_CreateRace(benchmark::State& state) { threads.reserve(num_threads); Envoy::ConditionalInitializer access, wait; absl::BlockingCounter accesses(num_threads); - Envoy::Stats::SymbolTable table; + Envoy::Stats::SymbolTableImpl table; const absl::string_view stat_name_string = "here.is.a.stat.name"; Envoy::Stats::StatNameStorage initial(stat_name_string, table);