feat: integrate Uptime Kuma and Alertmanager into Docker setup

- Add Uptime Kuma service for status monitoring with health checks.
- Introduce Alertmanager service for alert management and notifications.
- Update docker-compose.yml to include new services and their configurations.
- Enhance Makefile with commands for managing Uptime Kuma and Alertmanager logs.
- Modify Ansible playbook to install necessary packages and configure SSL for new services.
- Update Nginx configuration to route traffic to Uptime Kuma and Alertmanager.
- Adjust Prometheus configuration to include alert rules and external URLs.
This commit is contained in:
2025-09-16 21:50:56 +03:00
parent 5e10204137
commit 9ec3f02767
20 changed files with 2173 additions and 38 deletions

View File

@@ -12,10 +12,12 @@ services:
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=${PROMETHEUS_RETENTION_DAYS:-30}d'
- '--web.enable-lifecycle'
- '--web.external-url=https://${SERVER_IP}/prometheus/'
ports:
- "9090:9090"
volumes:
- ./infra/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./infra/prometheus/alert_rules.yml:/etc/prometheus/alert_rules.yml:ro
- prometheus_data:/prometheus
networks:
- bots_network
@@ -35,9 +37,9 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource
- GF_SERVER_ROOT_URL=https://${SERVER_IP:-localhost}/grafana/
- GF_SERVER_ROOT_URL=https://${SERVER_IP}/grafana/
- GF_SERVER_SERVE_FROM_SUB_PATH=true
- GF_SERVER_DOMAIN=${SERVER_IP:-localhost}
- GF_SERVER_DOMAIN=${SERVER_IP}
ports:
- "3000:3000"
volumes:
@@ -53,6 +55,51 @@ services:
timeout: 10s
retries: 3
# Uptime Kuma Status Page
uptime-kuma:
image: louislam/uptime-kuma:latest
container_name: bots_uptime_kuma
restart: unless-stopped
volumes:
- uptime_kuma_data:/app/data
ports:
- "3001:3001"
environment:
- UPTIME_KUMA_PORT=3001
networks:
- bots_network
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# Alertmanager
alertmanager:
image: prom/alertmanager:latest
container_name: bots_alertmanager
restart: unless-stopped
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=https://${SERVER_IP}/alertmanager/'
- '--web.route-prefix=/'
ports:
- "9093:9093"
volumes:
- alertmanager_data:/alertmanager
- ./infra/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
networks:
- bots_network
depends_on:
- prometheus
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9093/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
# Nginx Reverse Proxy
nginx:
image: nginx:alpine
@@ -61,16 +108,20 @@ services:
ports:
- "80:80"
- "443:443"
environment:
- SERVER_IP=${SERVER_IP}
volumes:
- ./infra/nginx/nginx.conf:/etc/nginx/nginx.conf:ro
- ./infra/nginx/nginx.conf:/etc/nginx/templates/nginx.conf.template:ro
- ./infra/nginx/conf.d:/etc/nginx/conf.d:ro
- ./infra/nginx/ssl:/etc/nginx/ssl:ro
- ./infra/nginx/.htpasswd:/etc/nginx/.htpasswd:ro
- /etc/letsencrypt:/etc/letsencrypt:ro
networks:
- bots_network
depends_on:
- grafana
- prometheus
- uptime-kuma
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost/nginx-health"]
interval: 30s
@@ -194,6 +245,10 @@ volumes:
driver: local
grafana_data:
driver: local
uptime_kuma_data:
driver: local
alertmanager_data:
driver: local
networks:
bots_network: