Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/common metrics #66

Merged
merged 6 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
repos:
- repo: git@github.com:Yelp/detect-secrets
rev: v1.1.0
rev: v1.5.0
hooks:
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']
exclude: poetry.lock
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v5.0.0
hooks:
- id: end-of-file-fixer
- id: no-commit-to-branch
args: [--branch, develop, --branch, master, --pattern, release/.*]
- repo: https://github.com/psf/black
rev: 21.5b2
rev: 24.10.0
hooks:
- id: black
2 changes: 1 addition & 1 deletion .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
}
]
},
"version": "1.1.0",
"version": "1.5.0",
"filters_used": [
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
This package includes several utility Python tools for the Gen3 stack. It is meant to be imported as a means to access
supplementary tools and resources that are reusable and not exclusive to any specific repo we use.

## metrics

- Prometheus


## profiling

Expand Down
166 changes: 166 additions & 0 deletions cdispyutils/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""
Small wrapper around the Prometheus client for metrics gathering in a multi-
process Python environment. This is intended to be extended and instantiated by
services, stored at some application context level, and then used to add metrics
(which are likely later exposed at the /metrics endpoint for Prometheus to scrape).
"""

import os
import pathlib

from cdislogging import get_logger
from prometheus_client import (
CONTENT_TYPE_LATEST,
CollectorRegistry,
Counter,
Gauge,
generate_latest,
multiprocess,
)

logger = get_logger(__name__)


class BaseMetrics(object):
"""
Class to handle Prometheus metrics

Attributes:
enabled (bool): If this is false, the class functions will be no-ops (no operations), effectively
doing nothing. This is the behavior when metrics are disabled. Why? So application code
doesn't have to check, it always tries to log a metric.
prometheus_metrics (dict): Dictionary to store Prometheus metrics
_registry (CollectorRegistry): Prometheus registry
"""

def __init__(self, enabled=True, prometheus_dir="/var/tmp/prometheus_metrics"):
"""
Create a metrics class.

Args:
enabled (bool): If this is false, the class functions will be no-ops (no operations), effectively
doing nothing. This is the behavior when metrics are disabled. Why? So application code
doesn't have to check, it always tries to log a metric.
prometheus_dir (str): Directory to use when setting PROMETHEUS_MULTIPROC_DIR env var (which prometheus requires
for multiprocess metrics collection). Note that this the prometheus client is very
finicky about when the ENV var is set.
"""
self.enabled = enabled
self.prometheus_metrics = {}
if not enabled:
return

pathlib.Path(prometheus_dir).mkdir(parents=True, exist_ok=True)
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_dir

logger.info(
f"PROMETHEUS_MULTIPROC_DIR is {os.environ['PROMETHEUS_MULTIPROC_DIR']}"
)

self._registry = CollectorRegistry()
multiprocess.MultiProcessCollector(self._registry)

def get_latest_metrics(self):
"""
Generate the latest Prometheus metrics
Returns:
str: Latest Prometheus metrics
str: Content type of the latest Prometheus metrics
"""
# When metrics gathering is not enabled, the metrics endpoint should not error, but it should
# not return any data.
if not self.enabled:
return "", CONTENT_TYPE_LATEST

return generate_latest(self._registry), CONTENT_TYPE_LATEST

def increment_counter(self, name, labels, description=""):
"""
Increment a Prometheus counter metric.
Note that this function should not be called directly - implement a function like
`add_login_event` instead. A metric's labels should always be consistent.
Args:
name (str): Name of the metric
labels (dict): Dictionary of labels for the metric
"""
if not self.enabled:
return

# create the counter if it doesn't already exist
if name not in self.prometheus_metrics:
logger.info(
f"Creating counter '{name}' with description '{description}' and labels: {labels}"
)
self.prometheus_metrics[name] = Counter(name, description, [*labels.keys()])
elif type(self.prometheus_metrics[name]) is not Counter:
raise ValueError(
f"Trying to create counter '{name}' but a {type(self.prometheus_metrics[name])} with this name already exists"
)

logger.debug(f"Incrementing counter '{name}' with labels: {labels}")
self.prometheus_metrics[name].labels(*labels.values()).inc()

def dec_gauge(self, name, labels, value, description=""):
"""
Decrement a Prometheus gauge metric.
Note that this function should not be called directly - implement a function like
`add_signed_url_event` instead. A metric's labels should always be consistent.
Args:
name (str): Name of the metric
labels (dict): Dictionary of labels for the metric
value (int): Value to set the metric to
description (str): describing the gauge in case it doesn't already exist
"""
if not self.enabled:
return

self._create_gauge_if_not_exist(name, labels, value, description)
logger.debug(f"Decrementing gauge '{name}' by '{value}' with labels: {labels}")
self.prometheus_metrics[name].labels(*labels.values()).dec(value)

def inc_gauge(self, name, labels, value, description=""):
"""
Increment a Prometheus gauge metric.
Note that this function should not be called directly - implement a function like
`add_signed_url_event` instead. A metric's labels should always be consistent.
Args:
name (str): Name of the metric
labels (dict): Dictionary of labels for the metric
value (int): Value to set the metric to
description (str): describing the gauge in case it doesn't already exist
"""
if not self.enabled:
return

self._create_gauge_if_not_exist(name, labels, value, description)
logger.debug(f"Incrementing gauge '{name}' by '{value}' with labels: {labels}")
self.prometheus_metrics[name].labels(*labels.values()).inc(value)

def set_gauge(self, name, labels, value, description=""):
"""
Set a Prometheus gauge metric.
Note that this function should not be called directly - implement a function like
`add_signed_url_event` instead. A metric's labels should always be consistent.
Args:
name (str): Name of the metric
labels (dict): Dictionary of labels for the metric
value (int): Value to set the metric to
"""
if not self.enabled:
return

self._create_gauge_if_not_exist(name, labels, value, description)
logger.debug(f"Setting gauge '{name}' with '{value}' with labels: {labels}")
self.prometheus_metrics[name].labels(*labels.values()).set(value)

def _create_gauge_if_not_exist(self, name, labels, value, description):
# create the gauge if it doesn't already exist
if name not in self.prometheus_metrics:
logger.info(
f"Creating gauge '{name}' with description '{description}' and labels: {labels}"
)
self.prometheus_metrics[name] = Gauge(name, description, [*labels.keys()])
elif type(self.prometheus_metrics[name]) is not Gauge:
raise ValueError(
f"Trying to create gauge '{name}' but a {type(self.prometheus_metrics[name])} with this name already exists"
)
Loading