Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus support on v1/sys/metrics endpoint #5308

Merged
merged 12 commits into from
Feb 14, 2019
53 changes: 38 additions & 15 deletions command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics/circonus"
"github.com/armon/go-metrics/datadog"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
Expand Down Expand Up @@ -466,7 +467,8 @@ func (c *ServerCommand) Run(args []string) int {
"in a Docker container, provide the IPC_LOCK cap to the container."))
}

if err := c.setupTelemetry(config); err != nil {
inMemMetrics, err := c.setupTelemetry(config)
if err != nil {
c.UI.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
return 1
}
Expand Down Expand Up @@ -561,6 +563,7 @@ func (c *ServerCommand) Run(args []string) int {
AllLoggers: allLoggers,
BuiltinRegistry: builtinplugins.Registry,
DisableKeyEncodingChecks: config.DisablePrintableCheck,
InMemSink: inMemMetrics,
}
if c.flagDev {
coreConfig.DevToken = c.flagDevRootTokenID
Expand Down Expand Up @@ -1666,8 +1669,8 @@ func (c *ServerCommand) detectRedirect(detect physical.RedirectDetect,
return url.String(), nil
}

// setupTelemetry is used to setup the telemetry sub-systems
func (c *ServerCommand) setupTelemetry(config *server.Config) error {
// setupTelemetry is used to setup the telemetry sub-systems and returns the in-memory sink to be used in http configuration
func (c *ServerCommand) setupTelemetry(config *server.Config) (*metrics.InmemSink, error) {
/* Setup telemetry
Aggregate on 10 second intervals for 1 minute. Expose the
metrics over stderr when there is a SIGUSR1 received.
Expand All @@ -1676,21 +1679,38 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
metrics.DefaultInmemSignal(inm)

var telConfig *server.Telemetry
if config.Telemetry == nil {
telConfig = &server.Telemetry{}
} else {
if config.Telemetry != nil {
telConfig = config.Telemetry
} else {
telConfig = &server.Telemetry{}
}

metricsConf := metrics.DefaultConfig("vault")
metricsConf.EnableHostname = !telConfig.DisableHostname

// Configure the statsite sink
var fanout metrics.FanoutSink

// Configure the Prometheus sink
if telConfig.PrometheusRetentionTime == 0 {
return nil, fmt.Errorf("telemetry.prometheus_retention_time must be > 0")
}

prometheusOpts := prometheus.PrometheusOpts{
Expiration: telConfig.PrometheusRetentionTime,
}

sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't you only want to do this if the retention time is not zero? That way it'd gate it based on configuration existing, like the other types.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I may have misunderstood your previous comment then :(
You mentioned it might be better if users have all formats available with no gates, so right now, specifying a 0 value for Prometheus retention is return an error, with the default being 24h retention

Would you prefer that the behavior is still disabled if an explicit 0 is used ?

if err != nil {
return nil, err
}

fanout = append(fanout, sink)

if telConfig.StatsiteAddr != "" {
sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand All @@ -1699,7 +1719,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
if telConfig.StatsdAddr != "" {
sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand Down Expand Up @@ -1735,7 +1755,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := circonus.NewCirconusSink(cfg)
if err != nil {
return err
return nil, err
}
sink.Start()
fanout = append(fanout, sink)
Expand All @@ -1750,21 +1770,24 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := datadog.NewDogStatsdSink(telConfig.DogStatsDAddr, metricsConf.HostName)
if err != nil {
return errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
return nil, errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
}
sink.SetTags(tags)
fanout = append(fanout, sink)
}

// Initialize the global sink
if len(fanout) > 0 {
fanout = append(fanout, inm)
metrics.NewGlobal(metricsConf, fanout)
if len(fanout) > 1 {
// Hostname enabled will create poor quality metrics name for prometheus
if !telConfig.DisableHostname {
c.UI.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.")
}
} else {
metricsConf.EnableHostname = false
metrics.NewGlobal(metricsConf, inm)
}
return nil
fanout = append(fanout, inm)
metrics.NewGlobal(metricsConf, fanout)
return inm, nil
}

func (c *ServerCommand) Reload(lock *sync.RWMutex, reloadFuncs *map[string][]reload.ReloadFunc, configPath []string) error {
Expand Down
25 changes: 24 additions & 1 deletion command/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import (
"github.com/hashicorp/vault/helper/parseutil"
)

const (
prometheusDefaultRetentionTime = 24 * time.Hour
)

// Config is the configuration for the vault server.
type Config struct {
Listeners []*Listener `hcl:"-"`
Expand Down Expand Up @@ -98,7 +102,10 @@ func DevConfig(ha, transactional bool) *Config {

EnableUI: true,

Telemetry: &Telemetry{},
Telemetry: &Telemetry{
PrometheusRetentionTime: prometheusDefaultRetentionTime,
DisableHostname: true,
},
}

switch {
Expand Down Expand Up @@ -233,6 +240,12 @@ type Telemetry struct {
// DogStatsdTags are the global tags that should be sent with each packet to dogstatsd
// It is a list of strings, where each string looks like "my_tag_name:my_tag_value"
DogStatsDTags []string `hcl:"dogstatsd_tags"`

// Prometheus:
// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
// Default: 24h
PrometheusRetentionTime time.Duration `hcl:-`
PrometheusRetentionTimeRaw interface{} `hcl:"prometheus_retention_time"`
}

func (s *Telemetry) GoString() string {
Expand Down Expand Up @@ -816,5 +829,15 @@ func parseTelemetry(result *Config, list *ast.ObjectList) error {
if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil {
return multierror.Prefix(err, "telemetry:")
}

if result.Telemetry.PrometheusRetentionTimeRaw != nil {
var err error
if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil {
return err
}
} else {
result.Telemetry.PrometheusRetentionTime = prometheusDefaultRetentionTime
}

return nil
}
7 changes: 7 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,9 @@ type Core struct {
// Stores loggers so we can reset the level
allLoggers []log.Logger
allLoggersLock sync.RWMutex

// Telemetry objects
inMemSink *metrics.InmemSink
}

// CoreConfig is used to parameterize a core
Expand Down Expand Up @@ -470,6 +473,9 @@ type CoreConfig struct {
DisableKeyEncodingChecks bool

AllLoggers []log.Logger

// Telemetry objects
InMemSink *metrics.InmemSink
}

func (c *CoreConfig) Clone() *CoreConfig {
Expand Down Expand Up @@ -576,6 +582,7 @@ func NewCore(conf *CoreConfig) (*Core, error) {
activeContextCancelFunc: new(atomic.Value),
allLoggers: conf.AllLoggers,
builtinRegistry: conf.BuiltinRegistry,
inMemSink: conf.InMemSink,
}

atomic.StoreUint32(c.sealed, 1)
Expand Down
57 changes: 57 additions & 0 deletions vault/logical_system.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package vault

import (
"bytes"
"context"
"crypto/sha256"
"crypto/sha512"
Expand All @@ -18,6 +19,8 @@ import (
"sync"
"time"

"github.com/prometheus/common/expfmt"

"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
Expand All @@ -33,6 +36,7 @@ import (
"github.com/hashicorp/vault/logical"
"github.com/hashicorp/vault/logical/framework"
"github.com/mitchellh/mapstructure"
"github.com/prometheus/client_golang/prometheus"
)

var (
Expand Down Expand Up @@ -145,6 +149,7 @@ func NewSystemBackend(core *Core, logger log.Logger) *SystemBackend {
b.Backend.Paths = append(b.Backend.Paths, b.capabilitiesPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.internalPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.remountPath())
b.Backend.Paths = append(b.Backend.Paths, b.metricsPath())

if core.rawEnabled {
b.Backend.Paths = append(b.Backend.Paths, &framework.Path{
Expand Down Expand Up @@ -2473,6 +2478,54 @@ func (b *SystemBackend) responseWrappingUnwrap(ctx context.Context, te *logical.
return response, nil
}

func (b *SystemBackend) handleMetrics(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {

format := data.Get("format").(string)

acceptHeaders := req.Headers["Accept"]
if format == "prometheus" || (len(acceptHeaders) > 0 && strings.HasPrefix(acceptHeaders[0], "application/openmetrics-text")) {

metricsFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil && len(metricsFamilies) == 0 {
return nil, fmt.Errorf("no prometheus metrics could be decoded: %s", err)
}

// Initialize a byte buffer.
buf := &bytes.Buffer{}
defer buf.Reset()

e := expfmt.NewEncoder(buf, expfmt.FmtText)
for _, mf := range metricsFamilies {
err := e.Encode(mf)
if err != nil {
return nil, fmt.Errorf("error during the encoding of metrics: %s", err)
}
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: expfmt.FmtText,
logical.HTTPRawBody: buf.Bytes(),
logical.HTTPStatusCode: 200,
},
}, nil
}
summary, err := b.Core.inMemSink.DisplayMetrics(nil, nil)
if err != nil {
return nil, fmt.Errorf("error while fetching the in-memory metrics: %s", err)
}
content, err := json.Marshal(summary)
if err != nil {
return nil, fmt.Errorf("error while marshalling the in-memory metrics: %s", err)
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: "application/json",
logical.HTTPRawBody: content,
logical.HTTPStatusCode: 200,
},
}, nil
}

func (b *SystemBackend) handleWrappingLookup(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
// This ordering of lookups has been validated already in the wrapping
// validation func, we're just doing this for a safety check
Expand Down Expand Up @@ -3853,4 +3906,8 @@ This path responds to the following HTTP methods.
"Information about a token's resultant ACL. Internal API; its location, inputs, and outputs may change.",
"",
},
"metrics": {
"Export the metrics aggregated for telemetry purpose.",
"",
},
}
18 changes: 18 additions & 0 deletions vault/logical_system_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,24 @@ func (b *SystemBackend) remountPath() *framework.Path {
}
}

func (b *SystemBackend) metricsPath() *framework.Path {
return &framework.Path{
Pattern: "metrics",
Fields: map[string]*framework.FieldSchema{
"format": &framework.FieldSchema{
Type: framework.TypeString,
Description: "Format to export metrics into. Currently accept only \"prometheus\"",
},
},
Callbacks: map[logical.Operation]framework.OperationFunc{
logical.ReadOperation: b.handleMetrics,
},
HelpSynopsis: strings.TrimSpace(sysHelp["metrics"][0]),
HelpDescription: strings.TrimSpace(sysHelp["metrics"][1]),
}

}

func (b *SystemBackend) authPaths() []*framework.Path {
return []*framework.Path{
{
Expand Down
4 changes: 4 additions & 0 deletions vault/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,7 @@ func (c *Core) loadMounts(ctx context.Context) error {
for _, coreMount := range c.mounts.Entries {
if coreMount.Type == requiredMount.Type {
foundRequired = true
coreMount.Config = requiredMount.Config
break
}
}
Expand Down Expand Up @@ -1267,6 +1268,9 @@ func (c *Core) requiredMountTable() *MountTable {
UUID: sysUUID,
Accessor: sysAccessor,
BackendAwareUUID: sysBackendUUID,
Config: MountConfig{
PassthroughRequestHeaders: []string{"Accept"},
},
}

identityUUID, err := uuid.GenerateUUID()
Expand Down
Loading