diff --git a/Makefile b/Makefile
index aa72363..7f169ce 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,8 @@ help: ## Показать справку
@echo "📊 Мониторинг:"
@echo " Prometheus: http://localhost:9090"
@echo " Grafana: http://localhost:3000 (admin/admin)"
+ @echo " Uptime Kuma: http://localhost:3001"
+ @echo " Alertmanager: http://localhost:9093"
@echo " Server Monitor: http://localhost:9091/health"
@echo " Bot Health: http://localhost:8080/health"
@echo " AnonBot Health: http://localhost:8081/health"
@@ -37,6 +39,12 @@ logs-bot: ## Показать логи Telegram бота
logs-anonBot: ## Показать логи AnonBot
docker-compose logs -f anon-bot
+logs-uptime-kuma: ## Показать логи Uptime Kuma
+ docker-compose logs -f uptime-kuma
+
+logs-alertmanager: ## Показать логи Alertmanager
+ docker-compose logs -f alertmanager
+
restart: ## Перезапустить все сервисы
docker-compose down
docker-compose build --no-cache
@@ -54,6 +62,12 @@ restart-bot: ## Перезапустить только Telegram бота
restart-anonBot: ## Перезапустить только AnonBot
docker-compose restart anon-bot
+restart-uptime-kuma: ## Перезапустить только Uptime Kuma
+ docker-compose restart uptime-kuma
+
+restart-alertmanager: ## Перезапустить только Alertmanager
+ docker-compose restart alertmanager
+
status: ## Показать статус контейнеров
docker-compose ps
@@ -63,6 +77,8 @@ health: ## Проверить здоровье сервисов
@curl -f http://localhost:8081/health || echo "❌ AnonBot health check failed"
@curl -f http://localhost:9090/-/healthy || echo "❌ Prometheus health check failed"
@curl -f http://localhost:3000/api/health || echo "❌ Grafana health check failed"
+ @curl -f http://localhost:3001 || echo "❌ Uptime Kuma health check failed"
+ @curl -f http://localhost:9093/-/healthy || echo "❌ Alertmanager health check failed"
@curl -f http://localhost:9091/health || echo "❌ Server monitor health check failed"
deploy: ## Полный деплой на продакшен
@@ -120,6 +136,8 @@ start: build up ## Собрать и запустить все сервисы
@echo "🏗️ Production Infrastructure запущена!"
@echo "📊 Prometheus: http://localhost:9090"
@echo "📈 Grafana: http://localhost:3000 (admin/admin)"
+ @echo "📊 Uptime Kuma: http://localhost:3001"
+ @echo "🚨 Alertmanager: http://localhost:9093"
@echo "🤖 Bot Health: http://localhost:8080/health"
@echo "🔒 AnonBot Health: http://localhost:8081/health"
@echo "📡 Server Monitor: http://localhost:9091/health"
@@ -191,6 +209,7 @@ test-clean: ## Очистить все файлы тестирования и о
@find . -name "*.pyc" -delete 2>/dev/null || true
@find . -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
@echo "✅ Файлы тестирования очищены"
+
check-ports: ## Проверить занятые порты
@echo "🔍 Checking occupied ports..."
@@ -242,3 +261,37 @@ reload-prometheus: ## Перезагрузить конфигурацию Promet
reload-grafana: ## Перезагрузить конфигурацию Grafana
@echo "🔄 Reloading Grafana configuration..."
@docker-compose restart grafana
+
+ssl-setup: ## Настроить SSL сертификаты (самоподписанный)
+ @echo "🔒 Setting up self-signed SSL certificates..."
+ @if [ -z "$(SERVER_IP)" ]; then echo "❌ Please set SERVER_IP variable in .env file"; exit 1; fi
+ @mkdir -p /etc/letsencrypt/live/$(SERVER_IP)
+ @openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
+ -keyout /etc/letsencrypt/live/$(SERVER_IP)/privkey.pem \
+ -out /etc/letsencrypt/live/$(SERVER_IP)/fullchain.pem \
+ -subj "/CN=$(SERVER_IP)"
+ @echo "✅ Self-signed certificate created for $(SERVER_IP)"
+
+ssl-renew: ## Обновить SSL сертификаты
+ @echo "🔄 Renewing SSL certificates..."
+ @sudo /usr/local/bin/ssl-renewal.sh
+
+ssl-status: ## Проверить статус SSL сертификатов
+ @echo "🔍 Checking SSL certificate status..."
+ @sudo certbot certificates
+
+uptime-kuma: ## Открыть Uptime Kuma в браузере
+ @echo "📊 Opening Uptime Kuma..."
+ @open http://localhost:3001 || xdg-open http://localhost:3001 || echo "Please open manually: http://localhost:3001"
+
+alertmanager: ## Открыть Alertmanager в браузере
+ @echo "🚨 Opening Alertmanager..."
+ @open http://localhost:9093 || xdg-open http://localhost:9093 || echo "Please open manually: http://localhost:9093"
+
+monitoring-all: ## Открыть все мониторинг сервисы
+ @echo "📊 Opening all monitoring services..."
+ @echo " - Grafana: http://localhost:3000"
+ @echo " - Prometheus: http://localhost:9090"
+ @echo " - Uptime Kuma: http://localhost:3001"
+ @echo " - Alertmanager: http://localhost:9093"
+ @open http://localhost:3000 || xdg-open http://localhost:3000 || echo "Please open manually"
diff --git a/docker-compose.yml b/docker-compose.yml
index d620e95..9a700bb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,10 +12,12 @@ services:
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=${PROMETHEUS_RETENTION_DAYS:-30}d'
- '--web.enable-lifecycle'
+ - '--web.external-url=https://${SERVER_IP}/prometheus/'
ports:
- "9090:9090"
volumes:
- ./infra/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+ - ./infra/prometheus/alert_rules.yml:/etc/prometheus/alert_rules.yml:ro
- prometheus_data:/prometheus
networks:
- bots_network
@@ -35,9 +37,9 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource
- - GF_SERVER_ROOT_URL=https://${SERVER_IP:-localhost}/grafana/
+ - GF_SERVER_ROOT_URL=https://${SERVER_IP}/grafana/
- GF_SERVER_SERVE_FROM_SUB_PATH=true
- - GF_SERVER_DOMAIN=${SERVER_IP:-localhost}
+ - GF_SERVER_DOMAIN=${SERVER_IP}
ports:
- "3000:3000"
volumes:
@@ -53,6 +55,51 @@ services:
timeout: 10s
retries: 3
+ # Uptime Kuma Status Page
+ uptime-kuma:
+ image: louislam/uptime-kuma:latest
+ container_name: bots_uptime_kuma
+ restart: unless-stopped
+ volumes:
+ - uptime_kuma_data:/app/data
+ ports:
+ - "3001:3001"
+ environment:
+ - UPTIME_KUMA_PORT=3001
+ networks:
+ - bots_network
+ healthcheck:
+ test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 40s
+
+ # Alertmanager
+ alertmanager:
+ image: prom/alertmanager:latest
+ container_name: bots_alertmanager
+ restart: unless-stopped
+ command:
+ - '--config.file=/etc/alertmanager/alertmanager.yml'
+ - '--storage.path=/alertmanager'
+ - '--web.external-url=https://${SERVER_IP}/alertmanager/'
+ - '--web.route-prefix=/'
+ ports:
+ - "9093:9093"
+ volumes:
+ - alertmanager_data:/alertmanager
+ - ./infra/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
+ networks:
+ - bots_network
+ depends_on:
+ - prometheus
+ healthcheck:
+ test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9093/-/healthy"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+
# Nginx Reverse Proxy
nginx:
image: nginx:alpine
@@ -61,16 +108,20 @@ services:
ports:
- "80:80"
- "443:443"
+ environment:
+ - SERVER_IP=${SERVER_IP}
volumes:
- - ./infra/nginx/nginx.conf:/etc/nginx/nginx.conf:ro
+ - ./infra/nginx/nginx.conf:/etc/nginx/templates/nginx.conf.template:ro
- ./infra/nginx/conf.d:/etc/nginx/conf.d:ro
- ./infra/nginx/ssl:/etc/nginx/ssl:ro
- ./infra/nginx/.htpasswd:/etc/nginx/.htpasswd:ro
+ - /etc/letsencrypt:/etc/letsencrypt:ro
networks:
- bots_network
depends_on:
- grafana
- prometheus
+ - uptime-kuma
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost/nginx-health"]
interval: 30s
@@ -194,6 +245,10 @@ volumes:
driver: local
grafana_data:
driver: local
+ uptime_kuma_data:
+ driver: local
+ alertmanager_data:
+ driver: local
networks:
bots_network:
diff --git a/infra/alertmanager/alertmanager-simple.yml b/infra/alertmanager/alertmanager-simple.yml
new file mode 100644
index 0000000..d12a1f3
--- /dev/null
+++ b/infra/alertmanager/alertmanager-simple.yml
@@ -0,0 +1,17 @@
+# Simplified Alertmanager Configuration
+global:
+ smtp_smarthost: 'localhost:587'
+ smtp_from: 'alerts@localhost'
+
+route:
+ group_by: ['alertname']
+ group_wait: 10s
+ group_interval: 10s
+ repeat_interval: 1h
+ receiver: 'web.hook'
+
+receivers:
+ - name: 'web.hook'
+ webhook_configs:
+ - url: 'http://localhost:5001/'
+ send_resolved: true
diff --git a/infra/alertmanager/alertmanager.yml b/infra/alertmanager/alertmanager.yml
new file mode 100644
index 0000000..933c0ba
--- /dev/null
+++ b/infra/alertmanager/alertmanager.yml
@@ -0,0 +1,185 @@
+# Alertmanager Configuration
+# This file configures how alerts are handled and routed
+
+global:
+ # SMTP configuration for email notifications
+ smtp_smarthost: 'localhost:587'
+ smtp_from: 'alerts@{{DOMAIN}}'
+ smtp_auth_username: 'alerts@{{DOMAIN}}'
+ smtp_auth_password: '{{SMTP_PASSWORD}}'
+ smtp_require_tls: true
+
+ # Resolve timeout
+ resolve_timeout: 5m
+
+# Templates for alert formatting
+templates:
+ - '/etc/alertmanager/templates/*.tmpl'
+
+# Route configuration - defines how alerts are routed
+route:
+ group_by: ['alertname', 'cluster', 'service']
+ group_wait: 10s
+ group_interval: 10s
+ repeat_interval: 1h
+ receiver: 'web.hook'
+ routes:
+ # Critical alerts - immediate notification
+ - match:
+ severity: critical
+ receiver: 'critical-alerts'
+ group_wait: 5s
+ repeat_interval: 5m
+
+ # Warning alerts - grouped notification
+ - match:
+ severity: warning
+ receiver: 'warning-alerts'
+ group_wait: 30s
+ repeat_interval: 30m
+
+ # Bot-specific alerts
+ - match:
+ service: telegram-bot
+ receiver: 'bot-alerts'
+ group_wait: 10s
+ repeat_interval: 15m
+
+ - match:
+ service: anon-bot
+ receiver: 'bot-alerts'
+ group_wait: 10s
+ repeat_interval: 15m
+
+ # Infrastructure alerts
+ - match:
+ service: prometheus
+ receiver: 'infrastructure-alerts'
+ group_wait: 30s
+ repeat_interval: 1h
+
+ - match:
+ service: grafana
+ receiver: 'infrastructure-alerts'
+ group_wait: 30s
+ repeat_interval: 1h
+
+ - match:
+ service: nginx
+ receiver: 'infrastructure-alerts'
+ group_wait: 30s
+ repeat_interval: 1h
+
+# Inhibition rules - suppress certain alerts when others are firing
+inhibit_rules:
+ # Suppress warning alerts when critical alerts are firing
+ - source_match:
+ severity: 'critical'
+ target_match:
+ severity: 'warning'
+ equal: ['alertname', 'cluster', 'service']
+
+ # Suppress individual instance alerts when the entire service is down
+ - source_match:
+ alertname: 'ServiceDown'
+ target_match:
+ alertname: 'InstanceDown'
+ equal: ['service']
+
+# Receiver configurations
+receivers:
+ # Default webhook receiver (for testing)
+ - name: 'web.hook'
+ webhook_configs:
+ - url: 'http://localhost:5001/'
+ send_resolved: true
+
+ # Critical alerts - immediate notification via multiple channels
+ - name: 'critical-alerts'
+ email_configs:
+ - to: 'admin@{{DOMAIN}}'
+ subject: '🚨 CRITICAL ALERT: {{ .GroupLabels.alertname }}'
+ body: |
+ {{ range .Alerts }}
+ Alert: {{ .Annotations.summary }}
+ Description: {{ .Annotations.description }}
+ Severity: {{ .Labels.severity }}
+ Service: {{ .Labels.service }}
+ Instance: {{ .Labels.instance }}
+ Time: {{ .StartsAt }}
+ {{ end }}
+ html: |
+
🚨 Critical Alert
+
+ | Alert: | {{ .GroupLabels.alertname }} |
+ | Service: | {{ .GroupLabels.service }} |
+ | Time: | {{ .GroupLabels.time }} |
+
+ Alerts:
+
+ {{ range .Alerts }}
+ - {{ .Annotations.summary }}
+ {{ .Annotations.description }}
+ Instance: {{ .Labels.instance }} | Time: {{ .StartsAt }}
+
+ {{ end }}
+
+ webhook_configs:
+ - url: 'http://localhost:5001/critical'
+ send_resolved: true
+
+ # Warning alerts - less urgent notification
+ - name: 'warning-alerts'
+ email_configs:
+ - to: 'admin@{{DOMAIN}}'
+ subject: '⚠️ WARNING: {{ .GroupLabels.alertname }}'
+ body: |
+ {{ range .Alerts }}
+ Alert: {{ .Annotations.summary }}
+ Description: {{ .Annotations.description }}
+ Severity: {{ .Labels.severity }}
+ Service: {{ .Labels.service }}
+ Instance: {{ .Labels.instance }}
+ Time: {{ .StartsAt }}
+ {{ end }}
+ webhook_configs:
+ - url: 'http://localhost:5001/warning'
+ send_resolved: true
+
+ # Bot-specific alerts
+ - name: 'bot-alerts'
+ email_configs:
+ - to: 'bot-admin@{{DOMAIN}}'
+ subject: '🤖 Bot Alert: {{ .GroupLabels.alertname }}'
+ body: |
+ Bot Alert: {{ .GroupLabels.alertname }}
+ Service: {{ .GroupLabels.service }}
+
+ {{ range .Alerts }}
+ - {{ .Annotations.summary }}
+ {{ .Annotations.description }}
+ Instance: {{ .Labels.instance }}
+ Time: {{ .StartsAt }}
+ {{ end }}
+ webhook_configs:
+ - url: 'http://localhost:5001/bot'
+ send_resolved: true
+
+ # Infrastructure alerts
+ - name: 'infrastructure-alerts'
+ email_configs:
+ - to: 'infra@{{DOMAIN}}'
+ subject: '🏗️ Infrastructure Alert: {{ .GroupLabels.alertname }}'
+ body: |
+ Infrastructure Alert: {{ .GroupLabels.alertname }}
+ Service: {{ .GroupLabels.service }}
+
+ {{ range .Alerts }}
+ - {{ .Annotations.summary }}
+ {{ .Annotations.description }}
+ Instance: {{ .Labels.instance }}
+ Time: {{ .StartsAt }}
+ {{ end }}
+ webhook_configs:
+ - url: 'http://localhost:5001/infrastructure'
+ send_resolved: true
diff --git a/infra/ansible/playbook.yml b/infra/ansible/playbook.yml
index 40005bf..df7ec7e 100644
--- a/infra/ansible/playbook.yml
+++ b/infra/ansible/playbook.yml
@@ -57,6 +57,15 @@
- nginx
- openssl
- apache2-utils
+ - certbot
+ - python3-certbot-nginx
+ state: present
+
+ - name: Установить Python библиотеки для Ansible
+ pip:
+ name:
+ - passlib
+ - bcrypt
state: present
- name: Установить часовой пояс Europe/Moscow
@@ -278,14 +287,40 @@
- "{{ project_root }}/infra/nginx"
- "{{ project_root }}/infra/nginx/ssl"
- "{{ project_root }}/infra/nginx/conf.d"
+ - "{{ project_root }}/infra/uptime-kuma"
+ - "{{ project_root }}/infra/alertmanager"
+ - "{{ project_root }}/infra/grafana/dashboards"
+ - "{{ project_root }}/scripts"
- - name: Сгенерировать самоподписанный SSL сертификат
+ - name: Сгенерировать самоподписанный SSL сертификат (fallback)
command: >
openssl req -x509 -newkey rsa:4096 -keyout {{ project_root }}/infra/nginx/ssl/key.pem
-out {{ project_root }}/infra/nginx/ssl/cert.pem -days 365 -nodes
-subj "/CN={{ ansible_host }}/O=Monitoring/C=RU"
args:
creates: "{{ project_root }}/infra/nginx/ssl/cert.pem"
+ when: not use_letsencrypt | default(false)
+
+ - name: Создать директории для Let's Encrypt
+ file:
+ path: "{{ item }}"
+ state: directory
+ owner: root
+ group: root
+ mode: '0755'
+ loop:
+ - /etc/letsencrypt
+ - /etc/letsencrypt/live
+ - /etc/letsencrypt/archive
+ - /etc/letsencrypt/renewal
+ when: use_letsencrypt | default(false)
+
+ - name: Настроить cron для автоматического обновления SSL сертификатов
+ cron:
+ name: "SSL Certificate Renewal"
+ job: "0 2 * * 1 /usr/local/bin/ssl-renewal.sh"
+ user: root
+ when: use_letsencrypt | default(false)
- name: Установить права на SSL сертификаты
file:
@@ -314,6 +349,7 @@
group: root
mode: '0644'
backup: yes
+ remote_src: yes
- name: Скопировать конфигурации nginx для сервисов
copy:
@@ -323,6 +359,7 @@
group: root
mode: '0644'
backup: yes
+ remote_src: yes
- name: Скопировать SSL сертификаты
copy:
@@ -332,6 +369,7 @@
group: root
mode: '0600'
backup: yes
+ remote_src: yes
- name: Скопировать htpasswd файл
copy:
@@ -341,6 +379,47 @@
group: root
mode: '0644'
backup: yes
+ remote_src: yes
+
+ - name: Скопировать конфигурацию Alertmanager
+ copy:
+ src: "{{ project_root }}/infra/alertmanager/alertmanager.yml"
+ dest: "{{ project_root }}/infra/alertmanager/alertmanager.yml"
+ owner: "{{ deploy_user }}"
+ group: "{{ deploy_user }}"
+ mode: '0644'
+ backup: yes
+ remote_src: yes
+
+ - name: Скопировать правила алертов Prometheus
+ copy:
+ src: "{{ project_root }}/infra/prometheus/alert_rules.yml"
+ dest: "{{ project_root }}/infra/prometheus/alert_rules.yml"
+ owner: "{{ deploy_user }}"
+ group: "{{ deploy_user }}"
+ mode: '0644'
+ backup: yes
+ remote_src: yes
+
+ - name: Скопировать дашборды Grafana
+ copy:
+ src: "{{ project_root }}/infra/grafana/dashboards/"
+ dest: "{{ project_root }}/infra/grafana/dashboards/"
+ owner: "{{ deploy_user }}"
+ group: "{{ deploy_user }}"
+ mode: '0644'
+ backup: yes
+ remote_src: yes
+
+ - name: Скопировать скрипт настройки SSL
+ copy:
+ src: "{{ project_root }}/scripts/setup-ssl.sh"
+ dest: /usr/local/bin/setup-ssl.sh
+ owner: root
+ group: root
+ mode: '0755'
+ backup: yes
+ remote_src: yes
- name: Проверить конфигурацию nginx
command: nginx -t
@@ -811,6 +890,20 @@
timeout: 30
state: started
+ - name: Проверить, что порт 3001 (Uptime Kuma) открыт
+ wait_for:
+ port: 3001
+ host: "{{ ansible_host }}"
+ timeout: 30
+ state: started
+
+ - name: Проверить, что порт 9093 (Alertmanager) открыт
+ wait_for:
+ port: 9093
+ host: "{{ ansible_host }}"
+ timeout: 30
+ state: started
+
- name: Проверить доступность Nginx
uri:
url: "http://{{ ansible_host }}/nginx-health"
@@ -849,6 +942,26 @@
retries: 5
delay: 10
+ - name: Проверить доступность Uptime Kuma через Nginx
+ uri:
+ url: "https://{{ ansible_host }}/status"
+ method: GET
+ status_code: 200
+ validate_certs: no
+ register: uptime_kuma_nginx_health
+ retries: 5
+ delay: 10
+
+ - name: Проверить доступность Alertmanager через Nginx
+ uri:
+ url: "https://{{ ansible_host }}/alertmanager/"
+ method: GET
+ status_code: 200
+ validate_certs: no
+ register: alertmanager_nginx_health
+ retries: 5
+ delay: 10
+
- name: Закрыть старый SSH порт 22 в UFW (финальный шаг)
ufw:
@@ -858,7 +971,7 @@
- name: Проверка запуска ботов завершена — всё работает 🟢
debug:
- msg: "Все сервисы запущены и слушают нужные порты. SSH настроен на порт 15722, Fail2ban активен, параметры безопасности ядра применены. Порт 22 закрыт для безопасности."
+ msg: "Все сервисы запущены и слушают нужные порты. SSH настроен на порт 15722, Fail2ban активен, параметры безопасности ядра применены. Порт 22 закрыт для безопасности. Добавлены: Uptime Kuma (статусная страница), Alertmanager (мониторинг), Let's Encrypt SSL, Grafana дашборды."
# handlers для перезагрузки сервисов
handlers:
diff --git a/infra/grafana/dashboards/bot-monitoring.json b/infra/grafana/dashboards/bot-monitoring.json
new file mode 100644
index 0000000..8b106d7
--- /dev/null
+++ b/infra/grafana/dashboards/bot-monitoring.json
@@ -0,0 +1,529 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "reqps"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 0
+ },
+ "id": 1,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "rate(http_requests_total{job=~\"telegram-bot|anon-bot\"}[5m])",
+ "interval": "",
+ "legendFormat": "{{job}} - {{method}} {{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Bot Request Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 2,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job=~\"telegram-bot|anon-bot\"}[5m]))",
+ "interval": "",
+ "legendFormat": "{{job}} - 95th percentile",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{job=~\"telegram-bot|anon-bot\"}[5m]))",
+ "interval": "",
+ "legendFormat": "{{job}} - 50th percentile",
+ "refId": "B"
+ }
+ ],
+ "title": "Bot Response Time",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 8
+ },
+ "id": 3,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "rate(http_requests_total{job=~\"telegram-bot|anon-bot\",status=~\"5..\"}[5m]) / rate(http_requests_total{job=~\"telegram-bot|anon-bot\"}[5m]) * 100",
+ "interval": "",
+ "legendFormat": "{{job}} - Error Rate",
+ "refId": "A"
+ }
+ ],
+ "title": "Bot Error Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 8
+ },
+ "id": 4,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "process_resident_memory_bytes{job=~\"telegram-bot|anon-bot\"}",
+ "interval": "",
+ "legendFormat": "{{job}} - Memory Usage",
+ "refId": "A"
+ }
+ ],
+ "title": "Bot Memory Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 16
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "up{job=~\"telegram-bot|anon-bot\"}",
+ "interval": "",
+ "legendFormat": "{{job}} - Status",
+ "refId": "A"
+ }
+ ],
+ "title": "Bot Health Status",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 16
+ },
+ "id": 6,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "rate(process_cpu_seconds_total{job=~\"telegram-bot|anon-bot\"}[5m]) * 100",
+ "interval": "",
+ "legendFormat": "{{job}} - CPU Usage",
+ "refId": "A"
+ }
+ ],
+ "title": "Bot CPU Usage",
+ "type": "timeseries"
+ }
+ ],
+ "schemaVersion": 27,
+ "style": "dark",
+ "tags": ["bots", "monitoring"],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "Bot Monitoring Dashboard",
+ "uid": "bot-monitoring",
+ "version": 1
+}
diff --git a/infra/grafana/dashboards/infrastructure-monitoring.json b/infra/grafana/dashboards/infrastructure-monitoring.json
new file mode 100644
index 0000000..4a77335
--- /dev/null
+++ b/infra/grafana/dashboards/infrastructure-monitoring.json
@@ -0,0 +1,523 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "links": [],
+ "panels": [
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 0
+ },
+ "id": 1,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
+ "interval": "",
+ "legendFormat": "CPU Usage - {{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "System CPU Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 2,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
+ "interval": "",
+ "legendFormat": "Memory Usage - {{instance}}",
+ "refId": "A"
+ }
+ ],
+ "title": "System Memory Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 8
+ },
+ "id": 3,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "(1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100",
+ "interval": "",
+ "legendFormat": "Disk Usage - {{instance}} {{mountpoint}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Disk Usage",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 8
+ },
+ "id": 4,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "up{job=~\"prometheus|grafana|nginx|alertmanager|uptime-kuma\"}",
+ "interval": "",
+ "legendFormat": "{{job}} - Status",
+ "refId": "A"
+ }
+ ],
+ "title": "Service Health Status",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "reqps"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 16
+ },
+ "id": 5,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "rate(nginx_http_requests_total[5m])",
+ "interval": "",
+ "legendFormat": "Nginx - {{status}}",
+ "refId": "A"
+ }
+ ],
+ "title": "Nginx Request Rate",
+ "type": "timeseries"
+ },
+ {
+ "datasource": "Prometheus",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "vis": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 16
+ },
+ "id": 6,
+ "options": {
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom"
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "targets": [
+ {
+ "expr": "container_memory_usage_bytes{name=~\"bots_.*\"}",
+ "interval": "",
+ "legendFormat": "{{name}} - Memory",
+ "refId": "A"
+ }
+ ],
+ "title": "Container Memory Usage",
+ "type": "timeseries"
+ }
+ ],
+ "schemaVersion": 27,
+ "style": "dark",
+ "tags": ["infrastructure", "monitoring"],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {},
+ "timezone": "",
+ "title": "Infrastructure Monitoring Dashboard",
+ "uid": "infrastructure-monitoring",
+ "version": 1
+}
diff --git a/infra/grafana/provisioning/dashboards/dashboards.yml b/infra/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 0000000..5781e55
--- /dev/null
+++ b/infra/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,16 @@
+# Grafana Dashboard Provisioning Configuration
+# This file configures automatic dashboard import
+
+apiVersion: 1
+
+providers:
+ - name: 'default'
+ orgId: 1
+ folder: ''
+ type: file
+ disableDeletion: false
+ updateIntervalSeconds: 10
+ allowUiUpdates: true
+ options:
+ path: /etc/grafana/provisioning/dashboards
+ foldersFromFilesStructure: true
diff --git a/infra/grafana/provisioning/datasources/prometheus.yml b/infra/grafana/provisioning/datasources/prometheus.yml
index 86fd346..a0e6527 100644
--- a/infra/grafana/provisioning/datasources/prometheus.yml
+++ b/infra/grafana/provisioning/datasources/prometheus.yml
@@ -4,5 +4,13 @@ datasources:
- name: Prometheus
type: prometheus
access: proxy
- url: http://prometheus:9090
+ url: http://prometheus:9090/prometheus
isDefault: true
+ jsonData:
+ httpMethod: POST
+ manageAlerts: true
+ prometheusType: Prometheus
+ prometheusVersion: 2.40.0
+ cacheLevel: 'High'
+ disableRecordingRules: false
+ incrementalQueryOverlapWindow: 10m
diff --git a/infra/nginx/conf.d/alertmanager.conf b/infra/nginx/conf.d/alertmanager.conf
new file mode 100644
index 0000000..4406026
--- /dev/null
+++ b/infra/nginx/conf.d/alertmanager.conf
@@ -0,0 +1,61 @@
+# Alertmanager Nginx Configuration
+# Proxies requests to Alertmanager
+
+# Alertmanager location
+location /alertmanager/ {
+ # Rate limiting
+ limit_req zone=api burst=10 nodelay;
+
+ # Remove trailing slash for proxy
+ rewrite ^/alertmanager/(.*)$ /$1 break;
+
+ # Proxy to Alertmanager
+ proxy_pass http://alertmanager_backend;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # Timeouts
+ proxy_connect_timeout 30s;
+ proxy_send_timeout 30s;
+ proxy_read_timeout 30s;
+
+ # Buffer settings
+ proxy_buffering on;
+ proxy_buffer_size 4k;
+ proxy_buffers 8 4k;
+
+ # Security headers
+ add_header X-Frame-Options "SAMEORIGIN" always;
+ add_header X-Content-Type-Options "nosniff" always;
+}
+
+# Alertmanager API
+location /api/v1/ {
+ # Rate limiting
+ limit_req zone=api burst=20 nodelay;
+
+ # Proxy to Alertmanager
+ proxy_pass http://alertmanager_backend;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # CORS headers
+ add_header Access-Control-Allow-Origin "*" always;
+ add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
+ add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always;
+
+ # Handle preflight requests
+ if ($request_method = 'OPTIONS') {
+ add_header Access-Control-Allow-Origin "*";
+ add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
+ add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization";
+ add_header Access-Control-Max-Age 1728000;
+ add_header Content-Type "text/plain; charset=utf-8";
+ add_header Content-Length 0;
+ return 204;
+ }
+}
diff --git a/infra/nginx/conf.d/grafana.conf b/infra/nginx/conf.d/grafana.conf
index 5a4e2ed..166b8b5 100644
--- a/infra/nginx/conf.d/grafana.conf
+++ b/infra/nginx/conf.d/grafana.conf
@@ -1,9 +1,3 @@
-# Grafana reverse proxy configuration
-upstream grafana_backend {
- server grafana:3000;
- keepalive 32;
-}
-
# Grafana proxy configuration
location /grafana/ {
proxy_pass http://grafana_backend/;
diff --git a/infra/nginx/conf.d/prometheus.conf b/infra/nginx/conf.d/prometheus.conf
index b3a3156..c189cd7 100644
--- a/infra/nginx/conf.d/prometheus.conf
+++ b/infra/nginx/conf.d/prometheus.conf
@@ -1,12 +1,7 @@
-# Prometheus reverse proxy configuration
-upstream prometheus_backend {
- server prometheus:9090;
- keepalive 32;
-}
-
# Prometheus proxy configuration
location /prometheus/ {
proxy_pass http://prometheus_backend/;
+ proxy_redirect / /prometheus/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -31,4 +26,4 @@ location /prometheus/-/healthy {
proxy_pass http://prometheus_backend/-/healthy;
proxy_set_header Host $host;
access_log off;
-}
+}
\ No newline at end of file
diff --git a/infra/nginx/conf.d/status.conf b/infra/nginx/conf.d/status.conf
index 9b89b20..13bcc62 100644
--- a/infra/nginx/conf.d/status.conf
+++ b/infra/nginx/conf.d/status.conf
@@ -1,16 +1,35 @@
-# Status page configuration (for future uptime kuma integration)
+# Status page configuration (Uptime Kuma integration)
# Rate limiting for status page
location /status {
- # Basic authentication for status page
- auth_basic "Status Page Access";
- auth_basic_user_file /etc/nginx/.htpasswd;
+ # Rate limiting
+ limit_req zone=status burst=5 nodelay;
- # Placeholder for future uptime kuma integration
- # For now, show nginx status
- access_log off;
- return 200 '{"status": "ok", "nginx": "running", "timestamp": "$time_iso8601"}';
- add_header Content-Type application/json;
+ # Proxy to Uptime Kuma
+ proxy_pass http://uptime_kuma_backend;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # WebSocket support
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+
+ # Timeouts
+ proxy_connect_timeout 30s;
+ proxy_send_timeout 30s;
+ proxy_read_timeout 30s;
+
+ # Buffer settings
+ proxy_buffering on;
+ proxy_buffer_size 4k;
+ proxy_buffers 8 4k;
+
+ # Security headers
+ add_header X-Frame-Options "SAMEORIGIN" always;
+ add_header X-Content-Type-Options "nosniff" always;
}
# Nginx status stub (for monitoring)
@@ -21,4 +40,4 @@ location /nginx_status {
allow 172.16.0.0/12; # Docker networks
allow 192.168.0.0/16; # Private networks
deny all;
-}
+}
\ No newline at end of file
diff --git a/infra/nginx/conf.d/uptime-kuma.conf b/infra/nginx/conf.d/uptime-kuma.conf
new file mode 100644
index 0000000..7c77a1f
--- /dev/null
+++ b/infra/nginx/conf.d/uptime-kuma.conf
@@ -0,0 +1,69 @@
+# Uptime Kuma Nginx Configuration
+# Proxies requests to Uptime Kuma status page
+
+# Upstream for Uptime Kuma
+upstream uptime_kuma_backend {
+ server uptime-kuma:3001;
+ keepalive 32;
+}
+
+# Status page location
+location /status {
+ # Rate limiting
+ limit_req zone=status burst=5 nodelay;
+
+ # Proxy to Uptime Kuma
+ proxy_pass http://uptime_kuma_backend;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # WebSocket support
+ proxy_http_version 1.1;
+ proxy_set_header Upgrade $http_upgrade;
+ proxy_set_header Connection "upgrade";
+
+ # Timeouts
+ proxy_connect_timeout 30s;
+ proxy_send_timeout 30s;
+ proxy_read_timeout 30s;
+
+ # Buffer settings
+ proxy_buffering on;
+ proxy_buffer_size 4k;
+ proxy_buffers 8 4k;
+
+ # Security headers
+ add_header X-Frame-Options "SAMEORIGIN" always;
+ add_header X-Content-Type-Options "nosniff" always;
+}
+
+# API endpoints for Uptime Kuma
+location /api/ {
+ # Rate limiting
+ limit_req zone=api burst=10 nodelay;
+
+ # Proxy to Uptime Kuma
+ proxy_pass http://uptime_kuma_backend;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # CORS headers
+ add_header Access-Control-Allow-Origin "*" always;
+ add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
+ add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always;
+
+ # Handle preflight requests
+ if ($request_method = 'OPTIONS') {
+ add_header Access-Control-Allow-Origin "*";
+ add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
+ add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization";
+ add_header Access-Control-Max-Age 1728000;
+ add_header Content-Type "text/plain; charset=utf-8";
+ add_header Content-Length 0;
+ return 204;
+ }
+}
diff --git a/infra/nginx/nginx.conf b/infra/nginx/nginx.conf
index 645bd02..aaffe7d 100644
--- a/infra/nginx/nginx.conf
+++ b/infra/nginx/nginx.conf
@@ -63,6 +63,27 @@ http {
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
+ # Upstream configurations
+ upstream grafana_backend {
+ server grafana:3000;
+ keepalive 32;
+ }
+
+ upstream prometheus_backend {
+ server prometheus:9090;
+ keepalive 32;
+ }
+
+ upstream uptime_kuma_backend {
+ server uptime-kuma:3001;
+ keepalive 32;
+ }
+
+ upstream alertmanager_backend {
+ server alertmanager:9093;
+ keepalive 32;
+ }
+
# Main server block
server {
listen 80;
@@ -74,17 +95,19 @@ http {
listen 443 ssl http2;
server_name _;
- # SSL configuration
- ssl_certificate /etc/nginx/ssl/cert.pem;
- ssl_certificate_key /etc/nginx/ssl/key.pem;
+ # SSL configuration (self-signed certificate)
+ ssl_certificate /etc/letsencrypt/live/{{SERVER_IP}}/fullchain.pem;
+ ssl_certificate_key /etc/letsencrypt/live/{{SERVER_IP}}/privkey.pem;еще
+ ssl_protocols TLSv1.2 TLSv1.3;
+ ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
+ ssl_prefer_server_ciphers off;
+ ssl_session_cache shared:SSL:10m;
+ ssl_session_timeout 10m;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
- # Rate limiting
- limit_req zone=api burst=20 nodelay;
-
# Redirect root to Grafana
location = / {
return 301 /grafana/;
diff --git a/infra/nginx/ssl/letsencrypt.conf b/infra/nginx/ssl/letsencrypt.conf
new file mode 100644
index 0000000..e2afe99
--- /dev/null
+++ b/infra/nginx/ssl/letsencrypt.conf
@@ -0,0 +1,27 @@
+# Let's Encrypt SSL Configuration
+# This file contains the SSL configuration for Let's Encrypt certificates
+
+# SSL certificate paths (Let's Encrypt)
+ssl_certificate /etc/letsencrypt/live/{{DOMAIN}}/fullchain.pem;
+ssl_certificate_key /etc/letsencrypt/live/{{DOMAIN}}/privkey.pem;
+
+# SSL Security Configuration
+ssl_protocols TLSv1.2 TLSv1.3;
+ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
+ssl_prefer_server_ciphers off;
+ssl_session_cache shared:SSL:10m;
+ssl_session_timeout 10m;
+ssl_session_tickets off;
+
+# OCSP Stapling
+ssl_stapling on;
+ssl_stapling_verify on;
+ssl_trusted_certificate /etc/letsencrypt/live/{{DOMAIN}}/chain.pem;
+
+# Security Headers
+add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
+add_header X-Frame-Options "SAMEORIGIN" always;
+add_header X-Content-Type-Options "nosniff" always;
+add_header X-XSS-Protection "1; mode=block" always;
+add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self' data:; connect-src 'self' wss: https:;" always;
diff --git a/infra/prometheus/alert_rules.yml b/infra/prometheus/alert_rules.yml
new file mode 100644
index 0000000..7f5bb0f
--- /dev/null
+++ b/infra/prometheus/alert_rules.yml
@@ -0,0 +1,253 @@
+# Prometheus Alert Rules
+# This file defines alerting rules for monitoring the bot infrastructure
+
+groups:
+ # Bot Health Monitoring
+ - name: bot_health
+ rules:
+ # Telegram Bot Health
+ - alert: TelegramBotDown
+ expr: up{job="telegram-bot"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ service: telegram-bot
+ annotations:
+ summary: "Telegram Bot is down"
+ description: "Telegram Bot has been down for more than 1 minute"
+ runbook_url: "https://docs.example.com/runbooks/telegram-bot-down"
+
+ - alert: TelegramBotHighErrorRate
+ expr: rate(http_requests_total{job="telegram-bot",status=~"5.."}[5m]) > 0.1
+ for: 2m
+ labels:
+ severity: warning
+ service: telegram-bot
+ annotations:
+ summary: "Telegram Bot high error rate"
+ description: "Telegram Bot error rate is {{ $value }} errors per second"
+
+ - alert: TelegramBotHighResponseTime
+ expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="telegram-bot"}[5m])) > 2
+ for: 5m
+ labels:
+ severity: warning
+ service: telegram-bot
+ annotations:
+ summary: "Telegram Bot high response time"
+ description: "95th percentile response time is {{ $value }} seconds"
+
+ # AnonBot Health
+ - alert: AnonBotDown
+ expr: up{job="anon-bot"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ service: anon-bot
+ annotations:
+ summary: "AnonBot is down"
+ description: "AnonBot has been down for more than 1 minute"
+ runbook_url: "https://docs.example.com/runbooks/anon-bot-down"
+
+ - alert: AnonBotHighErrorRate
+ expr: rate(http_requests_total{job="anon-bot",status=~"5.."}[5m]) > 0.1
+ for: 2m
+ labels:
+ severity: warning
+ service: anon-bot
+ annotations:
+ summary: "AnonBot high error rate"
+ description: "AnonBot error rate is {{ $value }} errors per second"
+
+ - alert: AnonBotHighResponseTime
+ expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="anon-bot"}[5m])) > 2
+ for: 5m
+ labels:
+ severity: warning
+ service: anon-bot
+ annotations:
+ summary: "AnonBot high response time"
+ description: "95th percentile response time is {{ $value }} seconds"
+
+ # Infrastructure Health Monitoring
+ - name: infrastructure_health
+ rules:
+ # Prometheus Health
+ - alert: PrometheusDown
+ expr: up{job="prometheus"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ service: prometheus
+ annotations:
+ summary: "Prometheus is down"
+ description: "Prometheus has been down for more than 1 minute"
+
+ - alert: PrometheusHighMemoryUsage
+ expr: (prometheus_tsdb_head_series / prometheus_tsdb_head_series_limit) > 0.8
+ for: 5m
+ labels:
+ severity: warning
+ service: prometheus
+ annotations:
+ summary: "Prometheus high memory usage"
+ description: "Prometheus memory usage is {{ $value | humanizePercentage }} of limit"
+
+ # Grafana Health
+ - alert: GrafanaDown
+ expr: up{job="grafana"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ service: grafana
+ annotations:
+ summary: "Grafana is down"
+ description: "Grafana has been down for more than 1 minute"
+
+ # Nginx Health
+ - alert: NginxDown
+ expr: up{job="nginx"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ service: nginx
+ annotations:
+ summary: "Nginx is down"
+ description: "Nginx has been down for more than 1 minute"
+
+ - alert: NginxHighErrorRate
+ expr: rate(nginx_http_requests_total{status=~"5.."}[5m]) > 0.1
+ for: 2m
+ labels:
+ severity: warning
+ service: nginx
+ annotations:
+ summary: "Nginx high error rate"
+ description: "Nginx error rate is {{ $value }} errors per second"
+
+ # System Resource Monitoring
+ - name: system_resources
+ rules:
+ # High CPU Usage
+ - alert: HighCPUUsage
+ expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ service: system
+ annotations:
+ summary: "High CPU usage"
+ description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
+
+ - alert: VeryHighCPUUsage
+ expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
+ for: 2m
+ labels:
+ severity: critical
+ service: system
+ annotations:
+ summary: "Very high CPU usage"
+ description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
+
+ # High Memory Usage
+ - alert: HighMemoryUsage
+ expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
+ for: 5m
+ labels:
+ severity: warning
+ service: system
+ annotations:
+ summary: "High memory usage"
+ description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
+
+ - alert: VeryHighMemoryUsage
+ expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
+ for: 2m
+ labels:
+ severity: critical
+ service: system
+ annotations:
+ summary: "Very high memory usage"
+ description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
+
+ # Disk Space
+ - alert: LowDiskSpace
+ expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 80
+ for: 5m
+ labels:
+ severity: warning
+ service: system
+ annotations:
+ summary: "Low disk space"
+ description: "Disk usage is {{ $value }}% on {{ $labels.instance }} ({{ $labels.mountpoint }})"
+
+ - alert: VeryLowDiskSpace
+ expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 95
+ for: 2m
+ labels:
+ severity: critical
+ service: system
+ annotations:
+ summary: "Very low disk space"
+ description: "Disk usage is {{ $value }}% on {{ $labels.instance }} ({{ $labels.mountpoint }})"
+
+ # Docker Container Monitoring
+ - name: docker_containers
+ rules:
+ # Container Restart
+ - alert: ContainerRestarting
+ expr: rate(container_start_time_seconds[10m]) > 0
+ for: 0m
+ labels:
+ severity: warning
+ service: docker
+ annotations:
+ summary: "Container restarting"
+ description: "Container {{ $labels.name }} is restarting frequently"
+
+ # Container High Memory Usage
+ - alert: ContainerHighMemoryUsage
+ expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 80
+ for: 5m
+ labels:
+ severity: warning
+ service: docker
+ annotations:
+ summary: "Container high memory usage"
+ description: "Container {{ $labels.name }} memory usage is {{ $value }}%"
+
+ # Container High CPU Usage
+ - alert: ContainerHighCPUUsage
+ expr: (rate(container_cpu_usage_seconds_total[5m]) / container_spec_cpu_quota * 100) > 80
+ for: 5m
+ labels:
+ severity: warning
+ service: docker
+ annotations:
+ summary: "Container high CPU usage"
+ description: "Container {{ $labels.name }} CPU usage is {{ $value }}%"
+
+ # Database Monitoring
+ - name: database_health
+ rules:
+ # Database Connection Issues
+ - alert: DatabaseConnectionFailed
+ expr: increase(database_connection_errors_total[5m]) > 5
+ for: 1m
+ labels:
+ severity: critical
+ service: database
+ annotations:
+ summary: "Database connection failures"
+ description: "{{ $value }} database connection failures in the last 5 minutes"
+
+ # Database High Query Time
+ - alert: DatabaseHighQueryTime
+ expr: histogram_quantile(0.95, rate(database_query_duration_seconds_bucket[5m])) > 1
+ for: 5m
+ labels:
+ severity: warning
+ service: database
+ annotations:
+ summary: "Database high query time"
+ description: "95th percentile database query time is {{ $value }} seconds"
diff --git a/infra/prometheus/prometheus.yml b/infra/prometheus/prometheus.yml
index 0bafff2..fe9481b 100644
--- a/infra/prometheus/prometheus.yml
+++ b/infra/prometheus/prometheus.yml
@@ -3,8 +3,7 @@ global:
evaluation_interval: 15s
rule_files:
- # - "first_rules.yml"
- # - "second_rules.yml"
+ - "alert_rules.yml"
scrape_configs:
- job_name: 'prometheus'
@@ -46,4 +45,4 @@ alerting:
alertmanagers:
- static_configs:
- targets:
- # - alertmanager:9093
+ - alertmanager:9093
diff --git a/infra/uptime-kuma/docker-compose.yml b/infra/uptime-kuma/docker-compose.yml
new file mode 100644
index 0000000..1e7398f
--- /dev/null
+++ b/infra/uptime-kuma/docker-compose.yml
@@ -0,0 +1,33 @@
+# Uptime Kuma Configuration
+# This is a separate docker-compose file for Uptime Kuma
+# It will be included in the main docker-compose.yml
+
+version: '3.8'
+
+services:
+ uptime-kuma:
+ image: louislam/uptime-kuma:latest
+ container_name: bots_uptime_kuma
+ restart: unless-stopped
+ volumes:
+ - uptime_kuma_data:/app/data
+ ports:
+ - "3001:3001"
+ environment:
+ - UPTIME_KUMA_PORT=3001
+ networks:
+ - bots_network
+ healthcheck:
+ test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 40s
+
+volumes:
+ uptime_kuma_data:
+ driver: local
+
+networks:
+ bots_network:
+ external: true
diff --git a/scripts/setup-ssl.sh b/scripts/setup-ssl.sh
new file mode 100755
index 0000000..dd6b765
--- /dev/null
+++ b/scripts/setup-ssl.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+# SSL Setup Script for Let's Encrypt
+# This script sets up SSL certificates using Let's Encrypt
+
+set -e
+
+# Configuration
+DOMAIN="${DOMAIN:-localhost}"
+EMAIL="${EMAIL:-admin@${DOMAIN}}"
+NGINX_CONTAINER="bots_nginx"
+CERTBOT_IMAGE="certbot/certbot:latest"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Logging function
+log() {
+ echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
+}
+
+warn() {
+ echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
+}
+
+error() {
+ echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
+ exit 1
+}
+
+# Check if running as root
+if [[ $EUID -eq 0 ]]; then
+ error "This script should not be run as root for security reasons"
+fi
+
+# Check if domain is localhost
+if [[ "$DOMAIN" == "localhost" ]]; then
+ warn "Domain is set to localhost. Let's Encrypt certificates cannot be issued for localhost."
+ warn "Please set the DOMAIN environment variable to your actual domain name."
+ warn "Example: DOMAIN=example.com ./scripts/setup-ssl.sh"
+ exit 1
+fi
+
+# Check if Docker is running
+if ! docker info > /dev/null 2>&1; then
+ error "Docker is not running. Please start Docker and try again."
+fi
+
+# Check if nginx container is running
+if ! docker ps | grep -q "$NGINX_CONTAINER"; then
+ error "Nginx container ($NGINX_CONTAINER) is not running. Please start it first with 'docker-compose up -d nginx'"
+fi
+
+log "Setting up SSL certificates for domain: $DOMAIN"
+log "Email for Let's Encrypt: $EMAIL"
+
+# Create necessary directories
+log "Creating Let's Encrypt directories..."
+sudo mkdir -p /etc/letsencrypt/live
+sudo mkdir -p /etc/letsencrypt/archive
+sudo mkdir -p /etc/letsencrypt/renewal
+sudo chmod 755 /etc/letsencrypt
+
+# Stop nginx temporarily for certificate generation
+log "Stopping nginx container for certificate generation..."
+docker stop "$NGINX_CONTAINER" || true
+
+# Generate certificate using certbot
+log "Generating SSL certificate using Let's Encrypt..."
+docker run --rm \
+ -v /etc/letsencrypt:/etc/letsencrypt \
+ -v /var/lib/letsencrypt:/var/lib/letsencrypt \
+ -p 80:80 \
+ -p 443:443 \
+ "$CERTBOT_IMAGE" certonly \
+ --standalone \
+ --non-interactive \
+ --agree-tos \
+ --email "$EMAIL" \
+ --domains "$DOMAIN" \
+ --expand
+
+# Check if certificate was generated successfully
+if [[ ! -f "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" ]]; then
+ error "Failed to generate SSL certificate for $DOMAIN"
+fi
+
+log "SSL certificate generated successfully!"
+
+# Set proper permissions
+log "Setting proper permissions for SSL certificates..."
+sudo chmod 755 /etc/letsencrypt/live
+sudo chmod 755 /etc/letsencrypt/archive
+sudo chmod 644 /etc/letsencrypt/live/"$DOMAIN"/*.pem
+sudo chmod 600 /etc/letsencrypt/live/"$DOMAIN"/privkey.pem
+
+# Update nginx configuration to use Let's Encrypt certificates
+log "Updating nginx configuration..."
+if [[ -f "infra/nginx/ssl/letsencrypt.conf" ]]; then
+ # Replace domain placeholder in letsencrypt.conf
+ sed "s/{{DOMAIN}}/$DOMAIN/g" infra/nginx/ssl/letsencrypt.conf > /tmp/letsencrypt.conf
+ sudo cp /tmp/letsencrypt.conf /etc/letsencrypt/live/"$DOMAIN"/letsencrypt.conf
+ rm /tmp/letsencrypt.conf
+fi
+
+# Start nginx container
+log "Starting nginx container..."
+docker start "$NGINX_CONTAINER"
+
+# Wait for nginx to start
+log "Waiting for nginx to start..."
+sleep 10
+
+# Test SSL certificate
+log "Testing SSL certificate..."
+if curl -k -s "https://$DOMAIN" > /dev/null; then
+ log "SSL certificate is working correctly!"
+else
+ warn "SSL certificate test failed. Please check nginx configuration."
+fi
+
+# Set up automatic renewal
+log "Setting up automatic certificate renewal..."
+cat > /tmp/ssl-renewal.sh << EOF
+#!/bin/bash
+# SSL Certificate Renewal Script
+
+set -e
+
+DOMAIN="$DOMAIN"
+NGINX_CONTAINER="$NGINX_CONTAINER"
+CERTBOT_IMAGE="$CERTBOT_IMAGE"
+
+# Renew certificates
+docker run --rm \\
+ -v /etc/letsencrypt:/etc/letsencrypt \\
+ -v /var/lib/letsencrypt:/var/lib/letsencrypt \\
+ "$CERTBOT_IMAGE" renew --quiet
+
+# Reload nginx
+docker exec "\$NGINX_CONTAINER" nginx -s reload
+
+echo "\$(date): SSL certificates renewed successfully" >> /var/log/ssl-renewal.log
+EOF
+
+sudo mv /tmp/ssl-renewal.sh /usr/local/bin/ssl-renewal.sh
+sudo chmod +x /usr/local/bin/ssl-renewal.sh
+
+# Add cron job for automatic renewal (every Monday at 2 AM)
+log "Adding cron job for automatic renewal..."
+(crontab -l 2>/dev/null; echo "0 2 * * 1 /usr/local/bin/ssl-renewal.sh") | crontab -
+
+log "SSL setup completed successfully!"
+log "Certificate location: /etc/letsencrypt/live/$DOMAIN/"
+log "Automatic renewal is configured to run every Monday at 2 AM"
+log "You can test the renewal manually with: sudo /usr/local/bin/ssl-renewal.sh"
+
+# Display certificate information
+log "Certificate information:"
+openssl x509 -in "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" -text -noout | grep -E "(Subject:|Not Before|Not After|DNS:)"