diff --git a/alertmanager/config.yml b/alertmanager/config.yml new file mode 100644 index 0000000..85e5ac9 --- /dev/null +++ b/alertmanager/config.yml @@ -0,0 +1,10 @@ +route: + receiver: 'slack' + +receivers: + - name: 'slack' +# slack_configs: +# - send_resolved: true +# username: '' +# channel: '#' +# api_url: '' diff --git a/docker-compose.yml b/docker-compose.yml index 4054566..605dbe3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,41 +2,158 @@ networks: traefik_front_network: external: true - back_network_: + back_network_pg: driver: bridge attachable: true #### SERVICES services: -### hello_world - hello_world: - container_name: gitea-app - hostname: gitea-app - image: hello-world - environment: +### prometheus + prometheus: + container_name: prometheus-app + hostname: prometheus-app + image: prom/prometheus:latest restart: always - networks: -# - back_network_gitea - - traefik_front_network volumes: + - ./prometheus:/etc/prometheus/ + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' +# ports: +# - 9090:9090 + networks: + - traefik_front_network + - back_network_pg + links: + - cadvisor:cadvisor + - alertmanager:alertmanager + depends_on: + - cadvisor labels: - "traefik.enable=true" - "traefik.docker.network=traefik_front_network" # HTTP - - "traefik.http.routers.hello-world-http.rule=Host(`hello-world.tips-of-mine.com`)" - - "traefik.http.routers.hello-world-http.entrypoints=http" - - "traefik.http.routers.hello-world-http.priority=49" + - "traefik.http.routers.prometheus-http.rule=Host(`prometheus.tips-of-mine.com`)" + - "traefik.http.routers.prometheus-http.entrypoints=http" + - "traefik.http.routers.prometheus-http.priority=39" # HTTPS - - "traefik.http.routers.hello-world-https.rule=Host(`hello-world.tips-of-mine.com`)" - - "traefik.http.routers.hello-world-https.entrypoints=https" - - "traefik.http.routers.hello-world-https.tls=true" - - "traefik.http.routers.hello-world-https.priority=50" - - "traefik.http.routers.gitea.service=gitea-https-service" + - "traefik.http.routers.prometheus-https.rule=Host(`prometheus.tips-of-mine.com`)" + - "traefik.http.routers.prometheus-https.entrypoints=https" + - "traefik.http.routers.prometheus-https.tls=true" + - "traefik.http.routers.prometheus-https.priority=40" + - "traefik.http.routers.prometheus.service=prometheus-https-service" # Middleware # Service -# - "traefik.http.services.gitea-https-service.loadbalancer.server.port=3000" -# - "traefik.http.services.gitea-https-service.loadbalancer.server.scheme=https" -# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.hostname=gitea.traefik.me" -# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.method=foobar" -# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.timeout=10" -# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.interval=30" + - "traefik.http.services.prometheus-https-service.loadbalancer.server.port=9090" +# - "traefik.http.services.prometheus-https-service.loadbalancer.server.scheme=https" + - "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.hostname=prometheus.tips-of-mine.com" + - "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.method=foobar" + - "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.timeout=10" + - "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.interval=30" + +### node-exporter + node-exporter: + container_name: prometheus-node-exporter + hostname: prometheus-node-exporter + image: prom/node-exporter:latest + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)' + ports: + - 9100:9100 + networks: + - back_network_pg + restart: always + deploy: + mode: global + +### alertmanager + alertmanager: + container_name: prometheus-alertmanager + hostname: prometheus-alertmanager + image: prom/alertmanager:latest + restart: always + ports: + - 9093:9093 + networks: + - back_network_pg + volumes: + - ./alertmanager/:/etc/alertmanager/ + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' + +### cadvisor + cadvisor: + container_name: prometheus-cadvisor + hostname: prometheus-cadvisor + image: gcr.io/cadvisor/cadvisor:latest + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + ports: + - 8080:8080 + networks: + - back_network_pg + restart: always + deploy: + mode: global + +### grafana + grafana: + container_name: grafana-app + hostname: grafana-app + image: grafana/grafana:latest + user: '472' + restart: always + environment: + GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource' + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/provisioning/:/etc/grafana/provisioning/ + env_file: + - ./grafana/config.monitoring +# ports: +# - 3000:3000 + networks: + - traefik_front_network + - back_network_pg + depends_on: + - prometheus + labels: + - "traefik.enable=true" + - "traefik.docker.network=traefik_front_network" +# HTTP + - "traefik.http.routers.grafana-http.rule=Host(`grafana.tips-of-mine.com`)" + - "traefik.http.routers.grafana-http.entrypoints=http" + - "traefik.http.routers.grafana-http.priority=41" +# HTTPS + - "traefik.http.routers.grafana-https.rule=Host(`grafana.tips-of-mine.com`)" + - "traefik.http.routers.grafana-https.entrypoints=https" + - "traefik.http.routers.grafana-https.tls=true" + - "traefik.http.routers.grafana-https.priority=42" + - "traefik.http.routers.grafana.service=grafana-https-service" +# Middleware +# Service + - "traefik.http.services.grafana-https-service.loadbalancer.server.port=3000" +# - "traefik.http.services.grafana-https-service.loadbalancer.server.scheme=https" + - "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.hostname=grafana.tips-of-mine.com" + - "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.method=foobar" + - "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.timeout=10" + - "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.interval=30" + +#### VOLUMES +#volumes: +# prometheus_data: {} +# grafana_data: {} diff --git a/grafana/config.monitoring b/grafana/config.monitoring new file mode 100644 index 0000000..a4a54c8 --- /dev/null +++ b/grafana/config.monitoring @@ -0,0 +1,3 @@ +GF_SECURITY_ADMIN_USER=admin +GF_SECURITY_ADMIN_PASSWORD=foobar +GF_USERS_ALLOW_SIGN_UP=false diff --git a/grafana/provisioning/dashboards/authentik.json b/grafana/provisioning/dashboards/authentik.json new file mode 100644 index 0000000..d0383d0 --- /dev/null +++ b/grafana/provisioning/dashboards/authentik.json @@ -0,0 +1,1388 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.1.4" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Grafana Dashboard for Prometheus metrics exposed by authentik.", + "editable": true, + "gnetId": 14837, + "graphTooltip": 1, + "id": null, + "iteration": 1631795206449, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 23, + "panels": [], + "title": "authentik Core", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 17, + "x": 0, + "y": 1 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": true, + "expr": "avg by (flow_slug) (rate(authentik_flows_plan_time_sum{namespace=~\"$namespace\"}[5m]) / rate(authentik_flows_plan_time_count{namespace=~\"$namespace\"}[5m]))", + "interval": "", + "legendFormat": "{{ flow_slug }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "FlowPlanner time by flow", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 17, + "y": 1 + }, + "id": 10, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "values": [] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "exemplar": true, + "expr": "sum(authentik_system_tasks{namespace=~\"$namespace\",status=\"TaskResultStatus.ERROR\"})", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "Failed", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(authentik_system_tasks{namespace=~\"$namespace\",status=\"TaskResultStatus.SUCCESSFUL\"})", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Successful", + "refId": "B" + } + ], + "title": "Task status", + "transparent": true, + "type": "piechart" + }, + { + "datasource": null, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 13, + "options": { + "content": "\n\n", + "mode": "html" + }, + "pluginVersion": "8.1.4", + "timeFrom": null, + "timeShift": null, + "transparent": true, + "type": "text" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "None" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 6 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "exemplar": true, + "expr": "max(authentik_admin_workers{namespace=~\"$namespace\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Connected Workers", + "transparent": true, + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 17, + "y": 7 + }, + "id": 6, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "list", + "placement": "bottom", + "values": [] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.5.6", + "targets": [ + { + "exemplar": true, + "expr": "sum(authentik_policies_cached{namespace=~\"$namespace\"})", + "instant": true, + "interval": "", + "legendFormat": "Cached policies", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(authentik_models{namespace=~\"$namespace\",app=\"authentik_policies\", model_name=\"policy\"}) - authentik_policies_cached", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Total policies", + "refId": "B" + } + ], + "title": "Cached policies", + "transparent": true, + "type": "piechart" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "text": "None" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 9 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "exemplar": true, + "expr": "sum(authentik_outposts_connected{namespace=~\"$namespace\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Connected Outposts", + "transparent": true, + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 800000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 4, + "x": 0, + "y": 13 + }, + "id": 15, + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "exemplar": true, + "expr": "avg by (task_name) (authentik_system_tasks{namespace=~\"$namespace\"})", + "instant": true, + "interval": "", + "legendFormat": "{{ task_name }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "System task duration", + "transparent": true, + "type": "bargauge" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 20, + "x": 4, + "y": 13 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": true, + "expr": "topk(5, avg by(binding_target_type) (rate(authentik_policies_execution_time_sum{namespace=~\"$namespace\"}[5m]) / rate(authentik_policies_execution_time_count{namespace=~\"$namespace\"}[5m])))", + "interval": "", + "legendFormat": "{{ binding_target_type }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "PolicyEngine Execution time by binding type (Top 5)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 20, + "x": 4, + "y": 21 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": true, + "expr": "topk(5, avg by(object_type) (rate(authentik_policies_execution_time_sum{namespace=~\"$namespace\"}[5m]) / rate(authentik_policies_execution_time_count{namespace=~\"$namespace\"}[5m])))", + "interval": "", + "legendFormat": "{{ object_type }}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "PolicyEngine Execution time by binding target (Top 5)", + "transparent": true, + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 18, + "panels": [], + "repeat": "outpost_proxy", + "title": "authentik Proxy Outpost $outpost_proxy", + "type": "row" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (host) (rate(authentik_outpost_proxy_requests_sum{namespace=~\"$namespace\", outpost_name=\"$outpost_proxy\"}[5m]))", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Outpost requests (per 5 minutes)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "sum by (host) (rate(authentik_outpost_proxy_requests_sum{outpost_name=\"$outpost_proxy\", namespace=~\"$namespace\",user=\"\"}[5m]))", + "interval": "", + "legendFormat": "{{ host }}", + "refId": "A" + } + ], + "title": "Outpost requests (per 5 minutes) (unauthenticated, but allow-listed)", + "transparent": true, + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 25, + "panels": [], + "repeat": "outpost_ldap", + "title": "authentik LDAP Outpost $outpost_ldap", + "type": "row" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "avg by (dn) (rate(authentik_outpost_ldap_requests_bucket{namespace=~\"$namespace\", outpost_name=\"$outpost_ldap\"}[5m]))", + "interval": "", + "legendFormat": "{{ dn }}", + "refId": "A" + } + ], + "title": "LDAP Requests (per 5 minutes)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "avg by (reason) (rate(authentik_outpost_ldap_requests_rejected{namespace=~\"$namespace\", outpost_name=\"$outpost_ldap\"}[5m]))", + "interval": "", + "legendFormat": "{{ reason }}", + "refId": "A" + } + ], + "title": "LDAP Rejected Requests (per 5 minutes)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 47 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "avg by (flow) (rate(authentik_outpost_flow_timing_get_bucket{namespace=~\"$namespace\"}[5m]))", + "hide": false, + "interval": "", + "legendFormat": "{{ flow }} GET", + "refId": "A" + }, + { + "exemplar": true, + "expr": "avg by (flow) (rate(authentik_outpost_flow_timing_post_bucket{namespace=~\"$namespace\"}[5m]))", + "hide": false, + "interval": "", + "legendFormat": "{{ flow }} POST", + "refId": "B" + } + ], + "title": "FlowExecutor Timings", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 16, + "y": 47 + }, + "id": 29, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "group by (type) (authentik_outpost_ldap_requests_sum)", + "hide": false, + "interval": "", + "legendFormat": "{{ type }}", + "refId": "A" + } + ], + "title": "LDAP Requests by type", + "transparent": true, + "type": "piechart" + }, + { + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 47 + }, + "id": 30, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "group by (reason) (authentik_outpost_ldap_requests_rejected)", + "hide": false, + "interval": "", + "legendFormat": "{{ reason }}", + "refId": "A" + } + ], + "title": "LDAP Rejected Requests by reason", + "transparent": true, + "type": "piechart" + } + ], + "refresh": false, + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "authentik_outpost_connection", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "authentik_outpost_connection", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*namespace=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "authentik_outpost_info{outpost_type=\"proxy\"}", + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "outpost_proxy", + "options": [], + "query": { + "query": "authentik_outpost_info{outpost_type=\"proxy\"}", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*outpost_name=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "authentik_outpost_info{outpost_type=\"ldap\"}", + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "outpost_ldap", + "options": [], + "query": { + "query": "authentik_outpost_info{outpost_type=\"ldap\"}", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*outpost_name=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "authentik", + "uid": "authentik", + "version": 32 +} diff --git a/grafana/provisioning/dashboards/dashboard.yml b/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..14716ee --- /dev/null +++ b/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: +- name: 'Prometheus' + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/grafana/provisioning/dashboards/traefik.json b/grafana/provisioning/dashboards/traefik.json new file mode 100644 index 0000000..5c3d140 --- /dev/null +++ b/grafana/provisioning/dashboards/traefik.json @@ -0,0 +1,1605 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "9.3.1" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Official dashboard for Standalone Traefik", + "editable": false, + "fiscalYearStartMonth": 0, + "gnetId": 17346, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "General", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 5, + "x": 0, + "y": 1 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "count(traefik_config_reloads_total)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Traefik Instances", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 7, + "x": 5, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\"}[1m])) by (entrypoint)", + "legendFormat": "{{entrypoint}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests per Entrypoint", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "https://medium.com/@tristan_96324/prometheus-apdex-alerting-d17a065e39d0", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "(sum(rate(traefik_entrypoint_request_duration_seconds_bucket{le=\"0.3\",code=\"200\",entrypoint=~\"$entrypoint\"}[5m])) by (method) + \n sum(rate(traefik_entrypoint_request_duration_seconds_bucket{le=\"1.2\",code=\"200\",entrypoint=~\"$entrypoint\"}[5m])) by (method)) / 2 / \n sum(rate(traefik_entrypoint_request_duration_seconds_count{code=\"200\",entrypoint=~\"$entrypoint\"}[5m])) by (method)\n", + "legendFormat": "{{method}}", + "range": true, + "refId": "A" + } + ], + "title": "Apdex score", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Mean Distribution", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 0, + "y": 3 + }, + "id": 14, + "options": { + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(traefik_service_requests_total{service=~\"$service.*\",protocol=\"http\"}[1m])) by (method, code)", + "legendFormat": "{{method}}[{{code}}]", + "range": true, + "refId": "A" + } + ], + "title": "Http Code ", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n traefik_service_request_duration_seconds_sum{service=~\"$service.*\",protocol=\"http\"} / \n traefik_service_request_duration_seconds_count{service=~\"$service.*\",protocol=\"http\"},\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)\n\n", + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Top slow services", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n sum by (service,code) \n (rate(traefik_service_requests_total{service=~\"$service.*\",protocol=\"http\"}[5m])) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)", + "legendFormat": "[{{code}}] on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Most requested services", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 11, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "label_replace(\n 1 - (sum by (service)\n (rate(traefik_service_request_duration_seconds_bucket{le=\"1.2\",service=~\"$service.*\"}[5m])) / sum by (service) \n (rate(traefik_service_request_duration_seconds_count{service=~\"$service.*\"}[5m]))\n ) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"\n)", + "legendFormat": "{{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Services failing SLO of 1200ms", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "label_replace(\n 1 - (sum by (service)\n (rate(traefik_service_request_duration_seconds_bucket{le=\"0.3\",service=~\"$service.*\"}[5m])) / sum by (service) \n (rate(traefik_service_request_duration_seconds_count{service=~\"$service.*\"}[5m]))\n ) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\"\n)", + "legendFormat": "{{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Services failing SLO of 300ms", + "type": "timeseries" + } + ], + "title": "SLO", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 16, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 19 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n sum by (service,method,code) \n (rate(traefik_service_requests_total{service=~\"$service.*\",code=~\"2..\",protocol=\"http\"}[5m])) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)", + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "2xx over 5 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 19 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n sum by (service,method,code) \n (rate(traefik_service_requests_total{service=~\"$service.*\",code=~\"5..\",protocol=\"http\"}[5m])) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)", + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "5xx over 5 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 19 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n sum by (service,method,code) \n (rate(traefik_service_requests_total{service=~\"$service.*\",code!~\"2..|5..\",protocol=\"http\"}[5m])) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)", + "legendFormat": "{{method}}[{{code}}] on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Other codes over 5 min", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 31 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n sum by (service,method) \n (rate(traefik_service_requests_bytes_total{service=~\"$service.*\",protocol=\"http\"}[1m])) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)", + "legendFormat": "{{method}} on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 31 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15,\n label_replace(\n sum by (service,method) \n (rate(traefik_service_responses_bytes_total{service=~\"$service.*\",protocol=\"http\"}[1m])) > 0,\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")\n)", + "legendFormat": "{{method}} on {{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Responses Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "label_replace(\n sum(traefik_service_open_connections{service=~\"$service.*\"}) by (service),\n \"service\", \"$1\", \"service\", \"([^-]+-[^-]+).*\")", + "legendFormat": "{{service}}", + "range": true, + "refId": "A" + } + ], + "title": "Connections per Service", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(traefik_entrypoint_open_connections{entrypoint=~\"$entrypoint\"}) by (entrypoint)\n", + "legendFormat": "{{entrypoint}}", + "range": true, + "refId": "A" + } + ], + "title": "Connections per Entrypoint", + "type": "timeseries" + } + ], + "title": "HTTP Details", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "DS_PROMETHEUS", + "label": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(traefik_entrypoint_open_connections, entrypoint)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "entrypoint", + "options": [], + "query": { + "query": "label_values(traefik_entrypoint_open_connections, entrypoint)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(traefik_service_open_connections, service)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "service", + "options": [], + "query": { + "query": "label_values(traefik_service_open_connections, service)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Traefik Official Standalone Dashboard", + "uid": "n5bu_kv45", + "version": 6, + "weekStart": "" +} diff --git a/grafana/provisioning/datasources/datasource.yml b/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..c02bb38 --- /dev/null +++ b/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,50 @@ +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# whats available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://prometheus:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/prometheus/alert.rules b/prometheus/alert.rules new file mode 100644 index 0000000..543bf91 --- /dev/null +++ b/prometheus/alert.rules @@ -0,0 +1,22 @@ +groups: +- name: example + rules: + + # Alert for any instance that is unreachable for >2 minutes. + - alert: service_down + expr: up == 0 + for: 2m + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} down" + description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes." + + - alert: high_load + expr: node_load1 > 0.5 + for: 2m + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} under high load" + description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load." diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml new file mode 100644 index 0000000..8cd30ba --- /dev/null +++ b/prometheus/prometheus.yml @@ -0,0 +1,94 @@ +# my global config +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'my-project' + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +rule_files: + - 'alert.rules' + # - "first.rules" + # - "second.rules" + +# alert +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager:9093" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: app + scrape_interval: 5s + static_configs: + - targets: ['host.docker.internal:8000'] + + - job_name: 'prometheus' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'cadvisor' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + dns_sd_configs: + - names: + - 'tasks.cadvisor' + type: 'A' + port: 8080 + +# static_configs: +# - targets: ['cadvisor:8080'] + + - job_name: 'node-exporter' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + dns_sd_configs: + - names: + - 'tasks.node-exporter' + type: 'A' + port: 9100 + +# - job_name: 'pushgateway' +# scrape_interval: 10s +# dns_sd_configs: +# - names: +# - 'tasks.pushgateway' +# type: 'A' +# port: 9091 + +# static_configs: +# - targets: ['node-exporter:9100'] + + - job_name: 'traefik-app' + scrape_interval: 5s + static_configs: + - targets: ['10.0.4.29:8181'] + + - job_name: 'keycloak-app' + scrape_interval: 5s + static_configs: + - targets: ['10.0.4.29:8282'] + + - job_name: 'airflow' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + static_configs: + - targets: + - '10.12.1.14:9102'