Revert "docker: temporarily remove monitoring stuff"

This reverts commit 8bef9e1c08.
This commit is contained in:
Vincent Bernat
2024-01-14 11:05:25 +01:00
parent 705e56cac4
commit 05a02d24d6
10 changed files with 896 additions and 13 deletions

6
.env
View File

@@ -9,6 +9,12 @@ COMPOSE_FILE=${COMPOSE_FILE}:docker/docker-compose-ipinfo.yml
# GEOIPUPDATE_ACCOUNT_ID=...
# GEOIPUPDATE_LICENSE_KEY=...
## Monitoring (Prometheus)
COMPOSE_FILE=${COMPOSE_FILE}:docker/docker-compose-monitoring.yml
## Grafana
COMPOSE_FILE=${COMPOSE_FILE}:docker/docker-compose-grafana.yml
## Demo
COMPOSE_FILE=${COMPOSE_FILE}:docker/docker-compose-demo.yml

View File

@@ -0,0 +1,26 @@
---
version: "3"
volumes:
akvorado-grafana:
services:
grafana:
image: grafana/grafana-oss:10.2.0
environment:
- GF_INSTALL_PLUGINS=marcusolsson-json-datasource 1.3.8
- GF_SERVER_ROOT_URL=/grafana
- GF_SERVER_SERVE_FROM_SUB_PATH=true
depends_on:
- prometheus
- akvorado-console
restart: unless-stopped
volumes:
- akvorado-grafana:/var/lib/grafana
- ./grafana:/etc/grafana
labels:
- traefik.enable=true
- traefik.http.routers.grafana.entrypoints=public
- traefik.http.routers.grafana.rule=PathPrefix(`/grafana`)
- traefik.http.routers.grafana.middlewares=console-auth-headers,grafana-avatar
- traefik.http.middlewares.grafana-avatar.redirectRegex.regex=^(https?://.+)/grafana/avatar/.+
- traefik.http.middlewares.grafana-avatar.redirectRegex.replacement=$${1}/api/v0/console/user/avatar
- traefik.http.services.grafana.loadbalancer.server.port=3000

View File

@@ -0,0 +1,62 @@
---
version: "3"
volumes:
akvorado-prometheus:
services:
prometheus:
image: prom/prometheus:v2.47.2
restart: unless-stopped
user: root # for access to /var/run/docker.sock
volumes:
- akvorado-prometheus:/prometheus
- /var/run/docker.sock:/var/run/docker.sock:ro
- ./prometheus.yml:/etc/prometheus/prometheus.yml
command:
# Those are the defaults
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --web.console.libraries=/usr/share/prometheus/console_libraries
- --web.console.templates=/usr/share/prometheus/consoles
# Those are not the defaults
- --web.external-url=/prometheus
labels:
- traefik.enable=true
- traefik.http.routers.prometheus.entrypoints=public
- traefik.http.routers.prometheus.rule=PathPrefix(`/prometheus`)
- traefik.http.services.prometheus.loadbalancer.server.port=9090
node-exporter:
image: prom/node-exporter:v1.7.0
restart: unless-stopped
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
command:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --no-collector.filesystem
kafka-jmx-exporter:
image: bitnami/jmx-exporter:0
restart: unless-stopped
depends_on:
- kafka
volumes:
- ./kafka-jmx-exporter-config.yaml:/etc/jmx-kafka/config.yaml:ro
command:
- "5556"
- /etc/jmx-kafka/config.yaml
kafka-ui:
image: provectuslabs/kafka-ui:v0.5.0
restart: unless-stopped
depends_on:
- kafka
environment:
- KAFKA_CLUSTERS_0_NAME=local
- KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS=kafka:9092
- SERVER_SERVLET_CONTEXT_PATH=/kafka-ui
labels:
- traefik.enable=true
- traefik.http.routers.kafka-ui.entrypoints=private
- traefik.http.routers.kafka-ui.rule=PathPrefix(`/kafka-ui`)

View File

@@ -40,6 +40,7 @@ services:
- KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT
- KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
- KAFKA_CFG_INTER_BROKER_LISTENER_NAME=PLAINTEXT
- JMX_PORT=5555
restart: unless-stopped
depends_on:
- zookeeper
@@ -50,19 +51,6 @@ services:
timeout: 10s
retries: 3
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
kafka-ui:
image: provectuslabs/kafka-ui:v0.5.0
restart: unless-stopped
depends_on:
- kafka
environment:
- KAFKA_CLUSTERS_0_NAME=local
- KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS=kafka:9092
- SERVER_SERVLET_CONTEXT_PATH=/kafka-ui
labels:
- traefik.enable=true
- traefik.http.routers.kafka-ui.entrypoints=private
- traefik.http.routers.kafka-ui.rule=PathPrefix(`/kafka-ui`)
redis:
image: bitnami/redis:7.0

View File

@@ -0,0 +1,616 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Number of received UDP packets per second per exporter. Each exporter should appear here.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "pps"
},
"overrides": []
},
"gridPos": {
"h": 10,
"w": 10,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "rate(akvorado_inlet_flow_input_udp_packets_total[$__rate_interval])",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "{{exporter}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "UDP: packets received",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Errors from inlet component when processing received flows. Some errors are OK during start, but then they should disappear. Errors include decoding errors, SNMP errors, and processing errors.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"noValue": "No errors",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "pps"
},
"overrides": [
{
"matcher": {
"id": "byValue",
"options": {
"op": "gte",
"reducer": "allIsZero",
"value": 0
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": true,
"tooltip": true,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 10,
"w": 10,
"x": 10,
"y": 0
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "rate(akvorado_inlet_core_flows_errors_total[$__rate_interval])",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "{{exporter}}: {{error}}",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "rate(akvorado_inlet_flow_decoder_netflow_errors_total[$__rate_interval])",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "{{exporter}}: {{error}}",
"range": true,
"refId": "B",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "rate(akvorado_inlet_flow_decoder_sflow_errors_total[$__rate_interval])",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "{{exporter}}: {{error}}",
"range": true,
"refId": "C",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "rate(akvorado_inlet_metadata_provider_snmp_poller_error_requests_total[$__rate_interval])",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "{{exporter}}: {{error}}",
"range": true,
"refId": "D",
"useBackend": false
}
],
"title": "Inlet: flow errors",
"transformations": [],
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Number of dropped packets because workers were too busy to handle them. This should always be 0. Otherwise, increase the number of workers or the listening queue.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 1
},
{
"color": "red",
"value": 5
}
]
},
"unit": "pps"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 5,
"x": 0,
"y": 10
},
"id": 2,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(akvorado_inlet_flow_input_udp_in_dropped_packets_total[$__rate_interval]) or vector(0))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"interval": "",
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "UDP: packets dropped (in)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Number of dropped packets because internal queue was full. This should be 0. Either increase the internal queue or add more core workers.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 1
},
{
"color": "red",
"value": 5
}
]
},
"unit": "pps"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 5,
"x": 5,
"y": 10
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "sum(rate(akvorado_inlet_flow_input_udp_out_dropped_packets_total[$__rate_interval]) or vector(0))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"interval": "",
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "UDP: packets dropped (out)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "The number of decoded packets, by flow type.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 5,
"x": 10,
"y": 10
},
"id": 6,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "rate(akvorado_inlet_flow_decoder_flows_total[$__rate_interval])",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{name}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Inlet: decoded packets",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "The number of flows forwarded to Kafka.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 5,
"x": 15,
"y": 10
},
"id": 7,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"pluginVersion": "10.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum(rate(akvorado_inlet_core_forwarded_flows_total[$__rate_interval]))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Forwarded",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Inlet: forwarded flows",
"type": "stat"
}
],
"refresh": "1m",
"schemaVersion": 38,
"tags": [
"akvorado"
],
"templating": {
"list": []
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Inlet",
"uid": "c6241364-fd78-46a6-b13e-d520d2776a20",
"version": 2,
"weekStart": ""
}

View File

@@ -0,0 +1,36 @@
# Authentication is "synced" with Akvorado: it uses the same HTTP headers.
[security]
disable_initial_admin_creation = false
viewers_can_edit = true
[users]
allow_sign_up = false
allow_org_create = false
auto_assign_org = true
auto_assign_org_role = Viewer # Viewer, Admin, Editor, or None
[auth]
disable_signout_menu = true
[auth.proxy]
enabled = true
auto_sign_up = true
header_name = Remote-User
header_property = username
headers = Name:Remote-Name Email:Remote-Email
# Default dashboard
[dashboards]
default_home_dashboard_path = /etc/grafana/dashboards/akvorado/inlet.json
# More privacy
[news]
news_feed_enabled = false
[analytics]
enabled = false
reporting_enabled = false
check_for_updates = false
check_for_plugin_updates = false

View File

@@ -0,0 +1,9 @@
---
apiVersion: 1
providers:
- name: default
type: file
updateIntervalSeconds: 10
options:
path: /etc/grafana/dashboards
foldersFromFileStructure: true

View File

@@ -0,0 +1,8 @@
---
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090/prometheus
uid: PBFA97CFB590B2093

View File

@@ -0,0 +1,61 @@
---
jmxUrl: service:jmx:rmi:///jndi/rmi://kafka:5555/jmxrmi
lowercaseOutputName: true
lowercaseOutputLabelNames: true
ssl: false
# This is mostly stolen from Bitnami Helm chart for Kafka.
# https://github.com/bitnami/charts/blob/main/bitnami/kafka/templates/metrics/jmx-configmap.yaml
whitelistObjectNames:
- kafka.controller:*
- kafka.server:*
- java.lang:*
- kafka.network:*
- kafka.log:*
rules:
- pattern: kafka.controller<type=(ControllerChannelManager), name=(QueueSize), broker-id=(\d+)><>(Value)
name: kafka_controller_$1_$2_$4
labels:
broker_id: "$3"
- pattern: kafka.controller<type=(ControllerChannelManager), name=(TotalQueueSize)><>(Value)
name: kafka_controller_$1_$2_$3
- pattern: kafka.controller<type=(KafkaController), name=(.+)><>(Value)
name: kafka_controller_$1_$2_$3
- pattern: kafka.controller<type=(ControllerStats), name=(.+)><>(Count)
name: kafka_controller_$1_$2_$3
- pattern : kafka.network<type=(Processor), name=(IdlePercent), networkProcessor=(.+)><>(Value)
name: kafka_network_$1_$2_$4
labels:
network_processor: $3
- pattern : kafka.network<type=(RequestMetrics), name=(.+), request=(.+)><>(Count|Value)
name: kafka_network_$1_$2_$4
labels:
request: $3
- pattern : kafka.network<type=(SocketServer), name=(.+)><>(Count|Value)
name: kafka_network_$1_$2_$3
- pattern : kafka.network<type=(RequestChannel), name=(.+)><>(Count|Value)
name: kafka_network_$1_$2_$3
- pattern: kafka.server<type=(.+), name=(.+), topic=(.+)><>(Count|OneMinuteRate)
name: kafka_server_$1_$2_$4
labels:
topic: $3
- pattern: kafka.server<type=(ReplicaFetcherManager), name=(.+), clientId=(.+)><>(Value)
name: kafka_server_$1_$2_$4
labels:
client_id: "$3"
- pattern: kafka.server<type=(DelayedOperationPurgatory), name=(.+), delayedOperation=(.+)><>(Value)
name: kafka_server_$1_$2_$3_$4
- pattern: kafka.server<type=(.+), name=(.+)><>(Count|Value|OneMinuteRate)
name: kafka_server_$1_total_$2_$3
- pattern: kafka.server<type=(.+)><>(queue-size)
name: kafka_server_$1_$2
- pattern: java.lang<type=(.+), name=(.+)><(.+)>(\w+)
name: java_lang_$1_$4_$3_$2
- pattern: java.lang<type=(.+), name=(.+)><>(\w+)
name: java_lang_$1_$3_$2
- pattern : java.lang<type=(.*)>
- pattern: kafka.log<type=(.+), name=(.+), topic=(.+), partition=(.+)><>Value
name: kafka_log_$1_$2
labels:
topic: $3
partition: $4

71
docker/prometheus.yml Normal file
View File

@@ -0,0 +1,71 @@
---
global:
scrape_interval: 30s
evaluation_interval: 30s
scrape_configs:
- job_name: prometheus
metrics_path: /prometheus/metrics
static_configs:
- targets:
- 127.0.0.1:9090
- job_name: grafana
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 30s
filters:
- name: label
values:
- com.docker.compose.project=akvorado
- com.docker.compose.service=grafana
- job_name: zookeeper
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 30s
filters:
- name: label
values:
- com.docker.compose.project=akvorado
- com.docker.compose.service=zookeeper
relabel_configs:
- source_labels: [__meta_docker_port_private]
regex: 7000
action: keep
- job_name: kafka
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 30s
filters:
- name: label
values:
- com.docker.compose.project=akvorado
- com.docker.compose.service=kafka-jmx-exporter
- job_name: node-exporter
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 30s
filters:
- name: label
values:
- com.docker.compose.project=akvorado
- com.docker.compose.service=node-exporter
- job_name: akvorado
metrics_path: /api/v0/metrics
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 30s
filters:
- name: label
values:
- com.docker.compose.project=akvorado
relabel_configs:
- source_labels: [__meta_docker_container_label_com_docker_compose_service]
regex: akvorado-(inlet|orchestrator|console)
action: keep
- source_labels: [__meta_docker_port_private]
regex: 8080
action: keep
- source_labels: [__meta_docker_container_label_com_docker_compose_service]
target_label: service
regex: akvorado-(.*)
replacement: $1