This commit is contained in:
parent
be8109e7c6
commit
b46894e48b
10
alertmanager/config.yml
Normal file
10
alertmanager/config.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
route:
|
||||||
|
receiver: 'slack'
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: 'slack'
|
||||||
|
# slack_configs:
|
||||||
|
# - send_resolved: true
|
||||||
|
# username: '<username>'
|
||||||
|
# channel: '#<channel-name>'
|
||||||
|
# api_url: '<incomming-webhook-url>'
|
@ -2,41 +2,158 @@
|
|||||||
networks:
|
networks:
|
||||||
traefik_front_network:
|
traefik_front_network:
|
||||||
external: true
|
external: true
|
||||||
back_network_:
|
back_network_pg:
|
||||||
driver: bridge
|
driver: bridge
|
||||||
attachable: true
|
attachable: true
|
||||||
|
|
||||||
#### SERVICES
|
#### SERVICES
|
||||||
services:
|
services:
|
||||||
### hello_world
|
### prometheus
|
||||||
hello_world:
|
prometheus:
|
||||||
container_name: gitea-app
|
container_name: prometheus-app
|
||||||
hostname: gitea-app
|
hostname: prometheus-app
|
||||||
image: hello-world
|
image: prom/prometheus:latest
|
||||||
environment:
|
|
||||||
restart: always
|
restart: always
|
||||||
networks:
|
|
||||||
# - back_network_gitea
|
|
||||||
- traefik_front_network
|
|
||||||
volumes:
|
volumes:
|
||||||
|
- ./prometheus:/etc/prometheus/
|
||||||
|
- ./prometheus_data:/prometheus
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||||
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||||
|
# ports:
|
||||||
|
# - 9090:9090
|
||||||
|
networks:
|
||||||
|
- traefik_front_network
|
||||||
|
- back_network_pg
|
||||||
|
links:
|
||||||
|
- cadvisor:cadvisor
|
||||||
|
- alertmanager:alertmanager
|
||||||
|
depends_on:
|
||||||
|
- cadvisor
|
||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.docker.network=traefik_front_network"
|
- "traefik.docker.network=traefik_front_network"
|
||||||
# HTTP
|
# HTTP
|
||||||
- "traefik.http.routers.hello-world-http.rule=Host(`hello-world.tips-of-mine.com`)"
|
- "traefik.http.routers.prometheus-http.rule=Host(`prometheus.tips-of-mine.com`)"
|
||||||
- "traefik.http.routers.hello-world-http.entrypoints=http"
|
- "traefik.http.routers.prometheus-http.entrypoints=http"
|
||||||
- "traefik.http.routers.hello-world-http.priority=49"
|
- "traefik.http.routers.prometheus-http.priority=39"
|
||||||
# HTTPS
|
# HTTPS
|
||||||
- "traefik.http.routers.hello-world-https.rule=Host(`hello-world.tips-of-mine.com`)"
|
- "traefik.http.routers.prometheus-https.rule=Host(`prometheus.tips-of-mine.com`)"
|
||||||
- "traefik.http.routers.hello-world-https.entrypoints=https"
|
- "traefik.http.routers.prometheus-https.entrypoints=https"
|
||||||
- "traefik.http.routers.hello-world-https.tls=true"
|
- "traefik.http.routers.prometheus-https.tls=true"
|
||||||
- "traefik.http.routers.hello-world-https.priority=50"
|
- "traefik.http.routers.prometheus-https.priority=40"
|
||||||
- "traefik.http.routers.gitea.service=gitea-https-service"
|
- "traefik.http.routers.prometheus.service=prometheus-https-service"
|
||||||
# Middleware
|
# Middleware
|
||||||
# Service
|
# Service
|
||||||
# - "traefik.http.services.gitea-https-service.loadbalancer.server.port=3000"
|
- "traefik.http.services.prometheus-https-service.loadbalancer.server.port=9090"
|
||||||
# - "traefik.http.services.gitea-https-service.loadbalancer.server.scheme=https"
|
# - "traefik.http.services.prometheus-https-service.loadbalancer.server.scheme=https"
|
||||||
# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.hostname=gitea.traefik.me"
|
- "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.hostname=prometheus.tips-of-mine.com"
|
||||||
# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.method=foobar"
|
- "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.method=foobar"
|
||||||
# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.timeout=10"
|
- "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.timeout=10"
|
||||||
# - "traefik.http.services.gitea-https-service.loadbalancer.healthcheck.interval=30"
|
- "traefik.http.services.prometheus-https-service.loadbalancer.healthcheck.interval=30"
|
||||||
|
|
||||||
|
### node-exporter
|
||||||
|
node-exporter:
|
||||||
|
container_name: prometheus-node-exporter
|
||||||
|
hostname: prometheus-node-exporter
|
||||||
|
image: prom/node-exporter:latest
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
command:
|
||||||
|
- '--path.procfs=/host/proc'
|
||||||
|
- '--path.sysfs=/host/sys'
|
||||||
|
- --collector.filesystem.ignored-mount-points
|
||||||
|
- '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
|
||||||
|
ports:
|
||||||
|
- 9100:9100
|
||||||
|
networks:
|
||||||
|
- back_network_pg
|
||||||
|
restart: always
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
|
||||||
|
### alertmanager
|
||||||
|
alertmanager:
|
||||||
|
container_name: prometheus-alertmanager
|
||||||
|
hostname: prometheus-alertmanager
|
||||||
|
image: prom/alertmanager:latest
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- 9093:9093
|
||||||
|
networks:
|
||||||
|
- back_network_pg
|
||||||
|
volumes:
|
||||||
|
- ./alertmanager/:/etc/alertmanager/
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/alertmanager/config.yml'
|
||||||
|
- '--storage.path=/alertmanager'
|
||||||
|
|
||||||
|
### cadvisor
|
||||||
|
cadvisor:
|
||||||
|
container_name: prometheus-cadvisor
|
||||||
|
hostname: prometheus-cadvisor
|
||||||
|
image: gcr.io/cadvisor/cadvisor:latest
|
||||||
|
volumes:
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /var/run:/var/run:rw
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
networks:
|
||||||
|
- back_network_pg
|
||||||
|
restart: always
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
|
||||||
|
### grafana
|
||||||
|
grafana:
|
||||||
|
container_name: grafana-app
|
||||||
|
hostname: grafana-app
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
user: '472'
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
|
||||||
|
volumes:
|
||||||
|
- ./grafana_data:/var/lib/grafana
|
||||||
|
- ./grafana/provisioning/:/etc/grafana/provisioning/
|
||||||
|
env_file:
|
||||||
|
- ./grafana/config.monitoring
|
||||||
|
# ports:
|
||||||
|
# - 3000:3000
|
||||||
|
networks:
|
||||||
|
- traefik_front_network
|
||||||
|
- back_network_pg
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.docker.network=traefik_front_network"
|
||||||
|
# HTTP
|
||||||
|
- "traefik.http.routers.grafana-http.rule=Host(`grafana.tips-of-mine.com`)"
|
||||||
|
- "traefik.http.routers.grafana-http.entrypoints=http"
|
||||||
|
- "traefik.http.routers.grafana-http.priority=41"
|
||||||
|
# HTTPS
|
||||||
|
- "traefik.http.routers.grafana-https.rule=Host(`grafana.tips-of-mine.com`)"
|
||||||
|
- "traefik.http.routers.grafana-https.entrypoints=https"
|
||||||
|
- "traefik.http.routers.grafana-https.tls=true"
|
||||||
|
- "traefik.http.routers.grafana-https.priority=42"
|
||||||
|
- "traefik.http.routers.grafana.service=grafana-https-service"
|
||||||
|
# Middleware
|
||||||
|
# Service
|
||||||
|
- "traefik.http.services.grafana-https-service.loadbalancer.server.port=3000"
|
||||||
|
# - "traefik.http.services.grafana-https-service.loadbalancer.server.scheme=https"
|
||||||
|
- "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.hostname=grafana.tips-of-mine.com"
|
||||||
|
- "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.method=foobar"
|
||||||
|
- "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.timeout=10"
|
||||||
|
- "traefik.http.services.grafana-https-service.loadbalancer.healthcheck.interval=30"
|
||||||
|
|
||||||
|
#### VOLUMES
|
||||||
|
#volumes:
|
||||||
|
# prometheus_data: {}
|
||||||
|
# grafana_data: {}
|
||||||
|
3
grafana/config.monitoring
Normal file
3
grafana/config.monitoring
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
GF_SECURITY_ADMIN_USER=admin
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD=foobar
|
||||||
|
GF_USERS_ALLOW_SIGN_UP=false
|
1388
grafana/provisioning/dashboards/authentik.json
Normal file
1388
grafana/provisioning/dashboards/authentik.json
Normal file
File diff suppressed because it is too large
Load Diff
11
grafana/provisioning/dashboards/dashboard.yml
Normal file
11
grafana/provisioning/dashboards/dashboard.yml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Prometheus'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
editable: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
1605
grafana/provisioning/dashboards/traefik.json
Normal file
1605
grafana/provisioning/dashboards/traefik.json
Normal file
File diff suppressed because it is too large
Load Diff
50
grafana/provisioning/datasources/datasource.yml
Normal file
50
grafana/provisioning/datasources/datasource.yml
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# config file version
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
# list of datasources that should be deleted from the database
|
||||||
|
deleteDatasources:
|
||||||
|
- name: Prometheus
|
||||||
|
orgId: 1
|
||||||
|
|
||||||
|
# list of datasources to insert/update depending
|
||||||
|
# whats available in the database
|
||||||
|
datasources:
|
||||||
|
# <string, required> name of the datasource. Required
|
||||||
|
- name: Prometheus
|
||||||
|
# <string, required> datasource type. Required
|
||||||
|
type: prometheus
|
||||||
|
# <string, required> access mode. direct or proxy. Required
|
||||||
|
access: proxy
|
||||||
|
# <int> org id. will default to orgId 1 if not specified
|
||||||
|
orgId: 1
|
||||||
|
# <string> url
|
||||||
|
url: http://prometheus:9090
|
||||||
|
# <string> database password, if used
|
||||||
|
password:
|
||||||
|
# <string> database user, if used
|
||||||
|
user:
|
||||||
|
# <string> database name, if used
|
||||||
|
database:
|
||||||
|
# <bool> enable/disable basic auth
|
||||||
|
basicAuth: false
|
||||||
|
# <string> basic auth username, if used
|
||||||
|
basicAuthUser:
|
||||||
|
# <string> basic auth password, if used
|
||||||
|
basicAuthPassword:
|
||||||
|
# <bool> enable/disable with credentials headers
|
||||||
|
withCredentials:
|
||||||
|
# <bool> mark as default datasource. Max one per org
|
||||||
|
isDefault: true
|
||||||
|
# <map> fields that will be converted to json and stored in json_data
|
||||||
|
jsonData:
|
||||||
|
graphiteVersion: "1.1"
|
||||||
|
tlsAuth: false
|
||||||
|
tlsAuthWithCACert: false
|
||||||
|
# <string> json object of data that will be encrypted.
|
||||||
|
secureJsonData:
|
||||||
|
tlsCACert: "..."
|
||||||
|
tlsClientCert: "..."
|
||||||
|
tlsClientKey: "..."
|
||||||
|
version: 1
|
||||||
|
# <bool> allow users to edit datasources from the UI.
|
||||||
|
editable: true
|
22
prometheus/alert.rules
Normal file
22
prometheus/alert.rules
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
groups:
|
||||||
|
- name: example
|
||||||
|
rules:
|
||||||
|
|
||||||
|
# Alert for any instance that is unreachable for >2 minutes.
|
||||||
|
- alert: service_down
|
||||||
|
expr: up == 0
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||||
|
|
||||||
|
- alert: high_load
|
||||||
|
expr: node_load1 > 0.5
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} under high load"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
|
94
prometheus/prometheus.yml
Normal file
94
prometheus/prometheus.yml
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
# my global config
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'my-project'
|
||||||
|
|
||||||
|
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||||
|
rule_files:
|
||||||
|
- 'alert.rules'
|
||||||
|
# - "first.rules"
|
||||||
|
# - "second.rules"
|
||||||
|
|
||||||
|
# alert
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- scheme: http
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- "alertmanager:9093"
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: app
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['host.docker.internal:8000']
|
||||||
|
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: 'cadvisor'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.cadvisor'
|
||||||
|
type: 'A'
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
# static_configs:
|
||||||
|
# - targets: ['cadvisor:8080']
|
||||||
|
|
||||||
|
- job_name: 'node-exporter'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.node-exporter'
|
||||||
|
type: 'A'
|
||||||
|
port: 9100
|
||||||
|
|
||||||
|
# - job_name: 'pushgateway'
|
||||||
|
# scrape_interval: 10s
|
||||||
|
# dns_sd_configs:
|
||||||
|
# - names:
|
||||||
|
# - 'tasks.pushgateway'
|
||||||
|
# type: 'A'
|
||||||
|
# port: 9091
|
||||||
|
|
||||||
|
# static_configs:
|
||||||
|
# - targets: ['node-exporter:9100']
|
||||||
|
|
||||||
|
- job_name: 'traefik-app'
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['10.0.4.29:8181']
|
||||||
|
|
||||||
|
- job_name: 'keycloak-app'
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['10.0.4.29:8282']
|
||||||
|
|
||||||
|
- job_name: 'airflow'
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- '10.12.1.14:9102'
|
Loading…
x
Reference in New Issue
Block a user