From 7d7bc636241148b67d7f56195a2c5b24ee3828df Mon Sep 17 00:00:00 2001 From: Lucien Fregosi Date: Tue, 16 Aug 2022 22:35:12 +0200 Subject: [PATCH] add documentation for metrics setup (#15690) --- docs/operator-guides/collecting-metrics.md | 197 +++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/docs/operator-guides/collecting-metrics.md b/docs/operator-guides/collecting-metrics.md index f9c6ad6a34282..565469cabfe27 100644 --- a/docs/operator-guides/collecting-metrics.md +++ b/docs/operator-guides/collecting-metrics.md @@ -3,6 +3,18 @@ Airbyte supports two ways to collect metrics - using datadog or open telemetry. Fill in `METRIC_CLIENT` field in `.env` file to get started! +**Prerequisite:** +In order to get metrics from airbyte we need to deploy a container / pod called metrics-reporter like below +``` +airbyte-metrics: + image: airbyte/metrics-reporter:${VERSION} + container_name: airbyte-metrics + environment: + - METRIC_CLIENT=${METRIC_CLIENT} + - OTEL_COLLECTOR_ENDPOINT=${OTEL_COLLECTOR_ENDPOINT} +``` + + # Open Telemetry 1. In `.env` change `METRIC_CLIENT` to `otel`. @@ -53,6 +65,191 @@ OTEL_COLLECTOR_ENDPOINT=
If you started open telemetry collector in the link above, the address should be `http://otel-collector:4317`. Note the format - unlike the base `.env`, there is no quote in `.env` file under kubernetes. + +## Tutorial + +Deploy the airbyte metric pod : +``` +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-metrics + namespace: {{ .Release.Namespace }} + labels: {{ set . "component" "metrics" | include "labels" | nindent 4 }} +spec: + selector: + matchLabels: {{ set . "component" "metrics" | include "labels" | nindent 6 }} + template: + metadata: + labels: {{ set . "component" "metrics" | include "labels" | nindent 8 }} + spec: + containers: + - name: airbyte-metrics + image: "airbyte/metrics-reporter:latest" + imagePullPolicy: IfNotPresent + env: + - name: AIRBYTE_VERSION + value: latest + - name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "airbyte.database.secret.name" . }} + key: DATABASE_PASSWORD + - name: DATABASE_URL + value: {{ include "airbyte.database.url" . | quote }} + - name: DATABASE_USER + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-secrets + key: DATABASE_USER + - name: CONFIGS_DATABASE_MINIMUM_FLYWAY_MIGRATION_VERSION + value: 0.35.15.001 + - name: METRIC_CLIENT + value: otel + - name: OTEL_COLLECTOR_ENDPOINT + value: http://otel-collector:4317 +``` + + +Deploy an Open telemetry pod like below : + +``` +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + namespace: {{ .Release.Namespace }} + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 4 }} +spec: + selector: + matchLabels: {{ set . "component" "otel-collector" | include "labels" | nindent 6 }} + replicas: 1 + template: + metadata: + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 8 }} + spec: + containers: + - command: + - "/otelcol" + - "--config=/conf/otel-collector-config.yaml" + image: "otel/opentelemetry-collector:latest" + name: otel-collector + ports: + - containerPort: 4317 # Default endpoint for OpenTelemetry receiver. + - containerPort: 8889 # Port for Prometheus instance + volumeMounts: + - name: config + mountPath: /conf + volumes: + - configMap: + name: otel-collector-conf + items: + - key: otel-collector-config + path: otel-collector-config.yaml + name: config +``` + +WIth this Config Map : + +``` +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf + namespace: {{ .Release.Namespace }} + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 4 }} +data: + otel-collector-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: "0.0.0.0:4317" + processors: + batch: + memory_limiter: + limit_mib: 1500 + spike_limit_mib: 512 + check_interval: 5s + exporters: + prometheus: + endpoint: "0.0.0.0:8889" + namespace: "default" + service: + pipelines: + metrics: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [prometheus] +``` + +Then we need a service to be able to access both open telemetry GRPC and Prometheus +``` +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + namespace: {{ .Release.Namespace }} + labels: {{ set . "component" "otel-collector" | include "labels" | nindent 4 }} +spec: + ports: + - name: otlp-grpc # Default endpoint for OpenTelemetry gRPC receiver. + port: 4317 + protocol: TCP + targetPort: 4317 + - name: prometheus + port: 8889 + selector: {{ set . "component" "otel-collector" | include "labels" | nindent 6 }} +``` + +And finally We can add a service monitor to receive metrics in prometheus and optionally add some prometheus rules to generate alerts. +You can replace with your prometheus name. +``` +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + {{ set . "component" "metrics" | include "labels" | nindent 4 }} + prometheus: +spec: + endpoints: + - interval: 30s + port: prometheus + path: /metrics + relabelings: + - action: labeldrop + regex: (service|endpoint|namespace|container) + selector: + matchLabels: {{ set . "component" "otel-collector" | include "labels" | nindent 6 }} +``` + +One rule example : +``` +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ .Release.Name }}-rules + namespace: {{ .Release.Namespace }} + labels: + {{ set . "component" "prometheus-rules" | include "labels" | nindent 4 }} + prometheus: +spec: + groups: + - name: airbyte + rules: + - alert: AirbyteJobFail + for: 0m + expr: min(airbyte_job_failed_by_release_stage) > 0 + labels: + priority: P2 + annotations: + summary: {{ `"An Airbyte Job has failed"` }} +``` + + # Datadog TBD