feat: integrate Uptime Kuma and Alertmanager into Docker setup

- Add Uptime Kuma service for status monitoring with health checks. - Introduce Alertmanager service for alert management and notifications. - Update docker-compose.yml to include new services and their configurations. - Enhance Makefile with commands for managing Uptime Kuma and Alertmanager logs. - Modify Ansible playbook to install necessary packages and configure SSL for new services. - Update Nginx configuration to route traffic to Uptime Kuma and Alertmanager. - Adjust Prometheus configuration to include alert rules and external URLs.
2025-09-16 21:50:56 +03:00
parent 5e10204137
commit 9ec3f02767
20 changed files with 2173 additions and 38 deletions
--- a/53
+++ b/53
@@ -9,6 +9,8 @@ help: ## Показать справку
 	@echo "📊 Мониторинг:"
 	@echo "  Prometheus: http://localhost:9090"
 	@echo "  Grafana: http://localhost:3000 (admin/admin)"
 	@echo "  Uptime Kuma: http://localhost:3001"
 	@echo "  Alertmanager: http://localhost:9093"
 	@echo "  Server Monitor: http://localhost:9091/health"
 	@echo "  Bot Health: http://localhost:8080/health"
 	@echo "  AnonBot Health: http://localhost:8081/health"
@@ -37,6 +39,12 @@ logs-bot: ## Показать логи Telegram бота
 logs-anonBot: ## Показать логи AnonBot
 	docker-compose logs -f anon-bot
 logs-uptime-kuma: ## Показать логи Uptime Kuma
 	docker-compose logs -f uptime-kuma
 logs-alertmanager: ## Показать логи Alertmanager
 	docker-compose logs -f alertmanager
 restart: ## Перезапустить все сервисы
 	docker-compose down
 	docker-compose build --no-cache
@@ -54,6 +62,12 @@ restart-bot: ## Перезапустить только Telegram бота
 restart-anonBot: ## Перезапустить только AnonBot
 	docker-compose restart anon-bot
 restart-uptime-kuma: ## Перезапустить только Uptime Kuma
 	docker-compose restart uptime-kuma
 restart-alertmanager: ## Перезапустить только Alertmanager
 	docker-compose restart alertmanager
 status: ## Показать статус контейнеров
 	docker-compose ps
@@ -63,6 +77,8 @@ health: ## Проверить здоровье сервисов
 	@curl -f http://localhost:8081/health || echo "❌ AnonBot health check failed"
 	@curl -f http://localhost:9090/-/healthy || echo "❌ Prometheus health check failed"
 	@curl -f http://localhost:3000/api/health || echo "❌ Grafana health check failed"
 	@curl -f http://localhost:3001 || echo "❌ Uptime Kuma health check failed"
 	@curl -f http://localhost:9093/-/healthy || echo "❌ Alertmanager health check failed"
 	@curl -f http://localhost:9091/health || echo "❌ Server monitor health check failed"
 deploy: ## Полный деплой на продакшен
@@ -120,6 +136,8 @@ start: build up ## Собрать и запустить все сервисы
 	@echo "🏗️  Production Infrastructure запущена!"
 	@echo "📊 Prometheus: http://localhost:9090"
 	@echo "📈 Grafana: http://localhost:3000 (admin/admin)"
 	@echo "📊 Uptime Kuma: http://localhost:3001"
 	@echo "🚨 Alertmanager: http://localhost:9093"
 	@echo "🤖 Bot Health: http://localhost:8080/health"
 	@echo "🔒 AnonBot Health: http://localhost:8081/health"
 	@echo "📡 Server Monitor: http://localhost:9091/health"
@@ -191,6 +209,7 @@ test-clean: ## Очистить все файлы тестирования и о
 	@find . -name "*.pyc" -delete 2>/dev/null || true
 	@find . -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
 	@echo "✅ Файлы тестирования очищены"
 check-ports: ## Проверить занятые порты
 	@echo "🔍 Checking occupied ports..."
@@ -242,3 +261,37 @@ reload-prometheus: ## Перезагрузить конфигурацию Promet
 reload-grafana: ## Перезагрузить конфигурацию Grafana
 	@echo "🔄 Reloading Grafana configuration..."
 	@docker-compose restart grafana
 ssl-setup: ## Настроить SSL сертификаты (самоподписанный)
 	@echo "🔒 Setting up self-signed SSL certificates..."
 	@if [ -z "$(SERVER_IP)" ]; then echo "❌ Please set SERVER_IP variable in .env file"; exit 1; fi
 	@mkdir -p /etc/letsencrypt/live/$(SERVER_IP)
 	@openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
 		-keyout /etc/letsencrypt/live/$(SERVER_IP)/privkey.pem \
 		-out /etc/letsencrypt/live/$(SERVER_IP)/fullchain.pem \
 		-subj "/CN=$(SERVER_IP)"
 	@echo "✅ Self-signed certificate created for $(SERVER_IP)"
 ssl-renew: ## Обновить SSL сертификаты
 	@echo "🔄 Renewing SSL certificates..."
 	@sudo /usr/local/bin/ssl-renewal.sh
 ssl-status: ## Проверить статус SSL сертификатов
 	@echo "🔍 Checking SSL certificate status..."
 	@sudo certbot certificates
 uptime-kuma: ## Открыть Uptime Kuma в браузере
 	@echo "📊 Opening Uptime Kuma..."
 	@open http://localhost:3001 || xdg-open http://localhost:3001 || echo "Please open manually: http://localhost:3001"
 alertmanager: ## Открыть Alertmanager в браузере
 	@echo "🚨 Opening Alertmanager..."
 	@open http://localhost:9093 || xdg-open http://localhost:9093 || echo "Please open manually: http://localhost:9093"
 monitoring-all: ## Открыть все мониторинг сервисы
 	@echo "📊 Opening all monitoring services..."
 	@echo "  - Grafana: http://localhost:3000"
 	@echo "  - Prometheus: http://localhost:9090"
 	@echo "  - Uptime Kuma: http://localhost:3001"
 	@echo "  - Alertmanager: http://localhost:9093"
 	@open http://localhost:3000 || xdg-open http://localhost:3000 || echo "Please open manually"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,10 +12,12 @@ services:
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=${PROMETHEUS_RETENTION_DAYS:-30}d'
      - '--web.enable-lifecycle'
      - '--web.external-url=https://${SERVER_IP}/prometheus/'
    ports:
      - "9090:9090"
    volumes:
      - ./infra/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - ./infra/prometheus/alert_rules.yml:/etc/prometheus/alert_rules.yml:ro
      - prometheus_data:/prometheus
    networks:
      - bots_network
@@ -35,9 +37,9 @@ services:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
      - GF_USERS_ALLOW_SIGN_UP=false
      - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource
-      - GF_SERVER_ROOT_URL=https://${SERVER_IP:-localhost}/grafana/
+      - GF_SERVER_ROOT_URL=https://${SERVER_IP}/grafana/
      - GF_SERVER_SERVE_FROM_SUB_PATH=true
-      - GF_SERVER_DOMAIN=${SERVER_IP:-localhost}
+      - GF_SERVER_DOMAIN=${SERVER_IP}
    ports:
      - "3000:3000"
    volumes:
@@ -53,6 +55,51 @@ services:
      timeout: 10s
      retries: 3
  # Uptime Kuma Status Page
  uptime-kuma:
    image: louislam/uptime-kuma:latest
    container_name: bots_uptime_kuma
    restart: unless-stopped
    volumes:
      - uptime_kuma_data:/app/data
    ports:
      - "3001:3001"
    environment:
      - UPTIME_KUMA_PORT=3001
    networks:
      - bots_network
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
  # Alertmanager
  alertmanager:
    image: prom/alertmanager:latest
    container_name: bots_alertmanager
    restart: unless-stopped
    command:
      - '--config.file=/etc/alertmanager/alertmanager.yml'
      - '--storage.path=/alertmanager'
      - '--web.external-url=https://${SERVER_IP}/alertmanager/'
      - '--web.route-prefix=/'
    ports:
      - "9093:9093"
    volumes:
      - alertmanager_data:/alertmanager
      - ./infra/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
    networks:
      - bots_network
    depends_on:
      - prometheus
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9093/-/healthy"]
      interval: 30s
      timeout: 10s
      retries: 3
  # Nginx Reverse Proxy
  nginx:
    image: nginx:alpine
@@ -61,16 +108,20 @@ services:
    ports:
      - "80:80"
      - "443:443"
    environment:
      - SERVER_IP=${SERVER_IP}
    volumes:
-      - ./infra/nginx/nginx.conf:/etc/nginx/nginx.conf:ro
+      - ./infra/nginx/nginx.conf:/etc/nginx/templates/nginx.conf.template:ro
      - ./infra/nginx/conf.d:/etc/nginx/conf.d:ro
      - ./infra/nginx/ssl:/etc/nginx/ssl:ro
      - ./infra/nginx/.htpasswd:/etc/nginx/.htpasswd:ro
      - /etc/letsencrypt:/etc/letsencrypt:ro
    networks:
      - bots_network
    depends_on:
      - grafana
      - prometheus
      - uptime-kuma
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost/nginx-health"]
      interval: 30s
@@ -194,6 +245,10 @@ volumes:
    driver: local
  grafana_data:
    driver: local
  uptime_kuma_data:
    driver: local
  alertmanager_data:
    driver: local
 networks:
  bots_network:
--- a/infra/alertmanager/alertmanager-simple.yml
+++ b/infra/alertmanager/alertmanager-simple.yml
@@ -0,0 +1,17 @@
 # Simplified Alertmanager Configuration
 global:
  smtp_smarthost: 'localhost:587'
  smtp_from: 'alerts@localhost'
 route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'web.hook'
 receivers:
  - name: 'web.hook'
    webhook_configs:
      - url: 'http://localhost:5001/'
        send_resolved: true
--- a/infra/alertmanager/alertmanager.yml
+++ b/infra/alertmanager/alertmanager.yml
@@ -0,0 +1,185 @@
 # Alertmanager Configuration
 # This file configures how alerts are handled and routed
 global:
  # SMTP configuration for email notifications
  smtp_smarthost: 'localhost:587'
  smtp_from: 'alerts@{{DOMAIN}}'
  smtp_auth_username: 'alerts@{{DOMAIN}}'
  smtp_auth_password: '{{SMTP_PASSWORD}}'
  smtp_require_tls: true
  # Resolve timeout
  resolve_timeout: 5m
 # Templates for alert formatting
 templates:
  - '/etc/alertmanager/templates/*.tmpl'
 # Route configuration - defines how alerts are routed
 route:
  group_by: ['alertname', 'cluster', 'service']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'web.hook'
  routes:
    # Critical alerts - immediate notification
    - match:
        severity: critical
      receiver: 'critical-alerts'
      group_wait: 5s
      repeat_interval: 5m
    # Warning alerts - grouped notification
    - match:
        severity: warning
      receiver: 'warning-alerts'
      group_wait: 30s
      repeat_interval: 30m
    # Bot-specific alerts
    - match:
        service: telegram-bot
      receiver: 'bot-alerts'
      group_wait: 10s
      repeat_interval: 15m
    - match:
        service: anon-bot
      receiver: 'bot-alerts'
      group_wait: 10s
      repeat_interval: 15m
    # Infrastructure alerts
    - match:
        service: prometheus
      receiver: 'infrastructure-alerts'
      group_wait: 30s
      repeat_interval: 1h
    - match:
        service: grafana
      receiver: 'infrastructure-alerts'
      group_wait: 30s
      repeat_interval: 1h
    - match:
        service: nginx
      receiver: 'infrastructure-alerts'
      group_wait: 30s
      repeat_interval: 1h
 # Inhibition rules - suppress certain alerts when others are firing
 inhibit_rules:
  # Suppress warning alerts when critical alerts are firing
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'cluster', 'service']
  # Suppress individual instance alerts when the entire service is down
  - source_match:
      alertname: 'ServiceDown'
    target_match:
      alertname: 'InstanceDown'
    equal: ['service']
 # Receiver configurations
 receivers:
  # Default webhook receiver (for testing)
  - name: 'web.hook'
    webhook_configs:
      - url: 'http://localhost:5001/'
        send_resolved: true
  # Critical alerts - immediate notification via multiple channels
  - name: 'critical-alerts'
    email_configs:
      - to: 'admin@{{DOMAIN}}'
        subject: '🚨 CRITICAL ALERT: {{ .GroupLabels.alertname }}'
        body: |
          {{ range .Alerts }}
          Alert: {{ .Annotations.summary }}
          Description: {{ .Annotations.description }}
          Severity: {{ .Labels.severity }}
          Service: {{ .Labels.service }}
          Instance: {{ .Labels.instance }}
          Time: {{ .StartsAt }}
          {{ end }}
        html: |
          <h2>🚨 Critical Alert</h2>
          <table>
            <tr><td><strong>Alert:</strong></td><td>{{ .GroupLabels.alertname }}</td></tr>
            <tr><td><strong>Service:</strong></td><td>{{ .GroupLabels.service }}</td></tr>
            <tr><td><strong>Time:</strong></td><td>{{ .GroupLabels.time }}</td></tr>
          </table>
          <h3>Alerts:</h3>
          <ul>
          {{ range .Alerts }}
            <li><strong>{{ .Annotations.summary }}</strong><br/>
                {{ .Annotations.description }}<br/>
                <small>Instance: {{ .Labels.instance }} | Time: {{ .StartsAt }}</small>
            </li>
          {{ end }}
          </ul>
    webhook_configs:
      - url: 'http://localhost:5001/critical'
        send_resolved: true
  # Warning alerts - less urgent notification
  - name: 'warning-alerts'
    email_configs:
      - to: 'admin@{{DOMAIN}}'
        subject: '⚠️ WARNING: {{ .GroupLabels.alertname }}'
        body: |
          {{ range .Alerts }}
          Alert: {{ .Annotations.summary }}
          Description: {{ .Annotations.description }}
          Severity: {{ .Labels.severity }}
          Service: {{ .Labels.service }}
          Instance: {{ .Labels.instance }}
          Time: {{ .StartsAt }}
          {{ end }}
    webhook_configs:
      - url: 'http://localhost:5001/warning'
        send_resolved: true
  # Bot-specific alerts
  - name: 'bot-alerts'
    email_configs:
      - to: 'bot-admin@{{DOMAIN}}'
        subject: '🤖 Bot Alert: {{ .GroupLabels.alertname }}'
        body: |
          Bot Alert: {{ .GroupLabels.alertname }}
          Service: {{ .GroupLabels.service }}
          {{ range .Alerts }}
          - {{ .Annotations.summary }}
            {{ .Annotations.description }}
            Instance: {{ .Labels.instance }}
            Time: {{ .StartsAt }}
          {{ end }}
    webhook_configs:
      - url: 'http://localhost:5001/bot'
        send_resolved: true
  # Infrastructure alerts
  - name: 'infrastructure-alerts'
    email_configs:
      - to: 'infra@{{DOMAIN}}'
        subject: '🏗️ Infrastructure Alert: {{ .GroupLabels.alertname }}'
        body: |
          Infrastructure Alert: {{ .GroupLabels.alertname }}
          Service: {{ .GroupLabels.service }}
          {{ range .Alerts }}
          - {{ .Annotations.summary }}
            {{ .Annotations.description }}
            Instance: {{ .Labels.instance }}
            Time: {{ .StartsAt }}
          {{ end }}
    webhook_configs:
      - url: 'http://localhost:5001/infrastructure'
        send_resolved: true
--- a/infra/ansible/playbook.yml
+++ b/infra/ansible/playbook.yml
@@ -57,6 +57,15 @@
          - nginx
          - openssl
          - apache2-utils
          - certbot
          - python3-certbot-nginx
        state: present
    - name: Установить Python библиотеки для Ansible
      pip:
        name:
          - passlib
          - bcrypt
        state: present
    - name: Установить часовой пояс Europe/Moscow
@@ -278,14 +287,40 @@
        - "{{ project_root }}/infra/nginx"
        - "{{ project_root }}/infra/nginx/ssl"
        - "{{ project_root }}/infra/nginx/conf.d"
        - "{{ project_root }}/infra/uptime-kuma"
        - "{{ project_root }}/infra/alertmanager"
        - "{{ project_root }}/infra/grafana/dashboards"
        - "{{ project_root }}/scripts"
-    - name: Сгенерировать самоподписанный SSL сертификат
+    - name: Сгенерировать самоподписанный SSL сертификат (fallback)
      command: >
        openssl req -x509 -newkey rsa:4096 -keyout {{ project_root }}/infra/nginx/ssl/key.pem
        -out {{ project_root }}/infra/nginx/ssl/cert.pem -days 365 -nodes
        -subj "/CN={{ ansible_host }}/O=Monitoring/C=RU"
      args:
        creates: "{{ project_root }}/infra/nginx/ssl/cert.pem"
      when: not use_letsencrypt | default(false)
    - name: Создать директории для Let's Encrypt
      file:
        path: "{{ item }}"
        state: directory
        owner: root
        group: root
        mode: '0755'
      loop:
        - /etc/letsencrypt
        - /etc/letsencrypt/live
        - /etc/letsencrypt/archive
        - /etc/letsencrypt/renewal
      when: use_letsencrypt | default(false)
    - name: Настроить cron для автоматического обновления SSL сертификатов
      cron:
        name: "SSL Certificate Renewal"
        job: "0 2 * * 1 /usr/local/bin/ssl-renewal.sh"
        user: root
      when: use_letsencrypt | default(false)
    - name: Установить права на SSL сертификаты
      file:
@@ -314,6 +349,7 @@
        group: root
        mode: '0644'
        backup: yes
        remote_src: yes
    - name: Скопировать конфигурации nginx для сервисов
      copy:
@@ -323,6 +359,7 @@
        group: root
        mode: '0644'
        backup: yes
        remote_src: yes
    - name: Скопировать SSL сертификаты
      copy:
@@ -332,6 +369,7 @@
        group: root
        mode: '0600'
        backup: yes
        remote_src: yes
    - name: Скопировать htpasswd файл
      copy:
@@ -341,6 +379,47 @@
        group: root
        mode: '0644'
        backup: yes
        remote_src: yes
    - name: Скопировать конфигурацию Alertmanager
      copy:
        src: "{{ project_root }}/infra/alertmanager/alertmanager.yml"
        dest: "{{ project_root }}/infra/alertmanager/alertmanager.yml"
        owner: "{{ deploy_user }}"
        group: "{{ deploy_user }}"
        mode: '0644'
        backup: yes
        remote_src: yes
    - name: Скопировать правила алертов Prometheus
      copy:
        src: "{{ project_root }}/infra/prometheus/alert_rules.yml"
        dest: "{{ project_root }}/infra/prometheus/alert_rules.yml"
        owner: "{{ deploy_user }}"
        group: "{{ deploy_user }}"
        mode: '0644'
        backup: yes
        remote_src: yes
    - name: Скопировать дашборды Grafana
      copy:
        src: "{{ project_root }}/infra/grafana/dashboards/"
        dest: "{{ project_root }}/infra/grafana/dashboards/"
        owner: "{{ deploy_user }}"
        group: "{{ deploy_user }}"
        mode: '0644'
        backup: yes
        remote_src: yes
    - name: Скопировать скрипт настройки SSL
      copy:
        src: "{{ project_root }}/scripts/setup-ssl.sh"
        dest: /usr/local/bin/setup-ssl.sh
        owner: root
        group: root
        mode: '0755'
        backup: yes
        remote_src: yes
    - name: Проверить конфигурацию nginx
      command: nginx -t
@@ -811,6 +890,20 @@
        timeout: 30
        state: started
    - name: Проверить, что порт 3001 (Uptime Kuma) открыт
      wait_for:
        port: 3001
        host: "{{ ansible_host }}"
        timeout: 30
        state: started
    - name: Проверить, что порт 9093 (Alertmanager) открыт
      wait_for:
        port: 9093
        host: "{{ ansible_host }}"
        timeout: 30
        state: started
    - name: Проверить доступность Nginx
      uri:
        url: "http://{{ ansible_host }}/nginx-health"
@@ -849,6 +942,26 @@
      retries: 5
      delay: 10
    - name: Проверить доступность Uptime Kuma через Nginx
      uri:
        url: "https://{{ ansible_host }}/status"
        method: GET
        status_code: 200
        validate_certs: no
      register: uptime_kuma_nginx_health
      retries: 5
      delay: 10
    - name: Проверить доступность Alertmanager через Nginx
      uri:
        url: "https://{{ ansible_host }}/alertmanager/"
        method: GET
        status_code: 200
        validate_certs: no
      register: alertmanager_nginx_health
      retries: 5
      delay: 10
    - name: Закрыть старый SSH порт 22 в UFW (финальный шаг)
      ufw:
@@ -858,7 +971,7 @@
    - name: Проверка запуска ботов завершена — всё работает 🟢
      debug:
-        msg: "Все сервисы запущены и слушают нужные порты. SSH настроен на порт 15722, Fail2ban активен, параметры безопасности ядра применены. Порт 22 закрыт для безопасности."
+        msg: "Все сервисы запущены и слушают нужные порты. SSH настроен на порт 15722, Fail2ban активен, параметры безопасности ядра применены. Порт 22 закрыт для безопасности. Добавлены: Uptime Kuma (статусная страница), Alertmanager (мониторинг), Let's Encrypt SSL, Grafana дашборды."
  # handlers для перезагрузки сервисов
  handlers:
--- a/infra/grafana/dashboards/bot-monitoring.json
+++ b/infra/grafana/dashboards/bot-monitoring.json
@@ -0,0 +1,529 @@
 {
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "reqps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "rate(http_requests_total{job=~\"telegram-bot|anon-bot\"}[5m])",
          "interval": "",
          "legendFormat": "{{job}} - {{method}} {{status}}",
          "refId": "A"
        }
      ],
      "title": "Bot Request Rate",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 2,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job=~\"telegram-bot|anon-bot\"}[5m]))",
          "interval": "",
          "legendFormat": "{{job}} - 95th percentile",
          "refId": "A"
        },
        {
          "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{job=~\"telegram-bot|anon-bot\"}[5m]))",
          "interval": "",
          "legendFormat": "{{job}} - 50th percentile",
          "refId": "B"
        }
      ],
      "title": "Bot Response Time",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 3,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "rate(http_requests_total{job=~\"telegram-bot|anon-bot\",status=~\"5..\"}[5m]) / rate(http_requests_total{job=~\"telegram-bot|anon-bot\"}[5m]) * 100",
          "interval": "",
          "legendFormat": "{{job}} - Error Rate",
          "refId": "A"
        }
      ],
      "title": "Bot Error Rate",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "id": 4,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "process_resident_memory_bytes{job=~\"telegram-bot|anon-bot\"}",
          "interval": "",
          "legendFormat": "{{job}} - Memory Usage",
          "refId": "A"
        }
      ],
      "title": "Bot Memory Usage",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 16
      },
      "id": 5,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "up{job=~\"telegram-bot|anon-bot\"}",
          "interval": "",
          "legendFormat": "{{job}} - Status",
          "refId": "A"
        }
      ],
      "title": "Bot Health Status",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 16
      },
      "id": 6,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "rate(process_cpu_seconds_total{job=~\"telegram-bot|anon-bot\"}[5m]) * 100",
          "interval": "",
          "legendFormat": "{{job}} - CPU Usage",
          "refId": "A"
        }
      ],
      "title": "Bot CPU Usage",
      "type": "timeseries"
    }
  ],
  "schemaVersion": 27,
  "style": "dark",
  "tags": ["bots", "monitoring"],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-1h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "Bot Monitoring Dashboard",
  "uid": "bot-monitoring",
  "version": 1
 }
--- a/infra/grafana/dashboards/infrastructure-monitoring.json
+++ b/infra/grafana/dashboards/infrastructure-monitoring.json
@@ -0,0 +1,523 @@
 {
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "panels": [
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 1,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
          "interval": "",
          "legendFormat": "CPU Usage - {{instance}}",
          "refId": "A"
        }
      ],
      "title": "System CPU Usage",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 2,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
          "interval": "",
          "legendFormat": "Memory Usage - {{instance}}",
          "refId": "A"
        }
      ],
      "title": "System Memory Usage",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 3,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "(1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100",
          "interval": "",
          "legendFormat": "Disk Usage - {{instance}} {{mountpoint}}",
          "refId": "A"
        }
      ],
      "title": "Disk Usage",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "id": 4,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "up{job=~\"prometheus|grafana|nginx|alertmanager|uptime-kuma\"}",
          "interval": "",
          "legendFormat": "{{job}} - Status",
          "refId": "A"
        }
      ],
      "title": "Service Health Status",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "reqps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 16
      },
      "id": 5,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "rate(nginx_http_requests_total[5m])",
          "interval": "",
          "legendFormat": "Nginx - {{status}}",
          "refId": "A"
        }
      ],
      "title": "Nginx Request Rate",
      "type": "timeseries"
    },
    {
      "datasource": "Prometheus",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "vis": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 16
      },
      "id": 6,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single"
        }
      },
      "targets": [
        {
          "expr": "container_memory_usage_bytes{name=~\"bots_.*\"}",
          "interval": "",
          "legendFormat": "{{name}} - Memory",
          "refId": "A"
        }
      ],
      "title": "Container Memory Usage",
      "type": "timeseries"
    }
  ],
  "schemaVersion": 27,
  "style": "dark",
  "tags": ["infrastructure", "monitoring"],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-1h",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "Infrastructure Monitoring Dashboard",
  "uid": "infrastructure-monitoring",
  "version": 1
 }
--- a/infra/grafana/provisioning/dashboards/dashboards.yml
+++ b/infra/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,16 @@
 # Grafana Dashboard Provisioning Configuration
 # This file configures automatic dashboard import
 apiVersion: 1
 providers:
  - name: 'default'
    orgId: 1
    folder: ''
    type: file
    disableDeletion: false
    updateIntervalSeconds: 10
    allowUiUpdates: true
    options:
      path: /etc/grafana/provisioning/dashboards
      foldersFromFilesStructure: true
--- a/infra/grafana/provisioning/datasources/prometheus.yml
+++ b/infra/grafana/provisioning/datasources/prometheus.yml
@@ -4,5 +4,13 @@ datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
-    url: http://prometheus:9090
+    url: http://prometheus:9090/prometheus
    isDefault: true
    jsonData:
      httpMethod: POST
      manageAlerts: true
      prometheusType: Prometheus
      prometheusVersion: 2.40.0
      cacheLevel: 'High'
      disableRecordingRules: false
      incrementalQueryOverlapWindow: 10m
--- a/infra/nginx/conf.d/alertmanager.conf
+++ b/infra/nginx/conf.d/alertmanager.conf
@@ -0,0 +1,61 @@
 # Alertmanager Nginx Configuration
 # Proxies requests to Alertmanager
 # Alertmanager location
 location /alertmanager/ {
    # Rate limiting
    limit_req zone=api burst=10 nodelay;
    # Remove trailing slash for proxy
    rewrite ^/alertmanager/(.*)$ /$1 break;
    # Proxy to Alertmanager
    proxy_pass http://alertmanager_backend;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    # Timeouts
    proxy_connect_timeout 30s;
    proxy_send_timeout 30s;
    proxy_read_timeout 30s;
    # Buffer settings
    proxy_buffering on;
    proxy_buffer_size 4k;
    proxy_buffers 8 4k;
    # Security headers
    add_header X-Frame-Options "SAMEORIGIN" always;
    add_header X-Content-Type-Options "nosniff" always;
 }
 # Alertmanager API
 location /api/v1/ {
    # Rate limiting
    limit_req zone=api burst=20 nodelay;
    # Proxy to Alertmanager
    proxy_pass http://alertmanager_backend;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    # CORS headers
    add_header Access-Control-Allow-Origin "*" always;
    add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
    add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always;
    # Handle preflight requests
    if ($request_method = 'OPTIONS') {
        add_header Access-Control-Allow-Origin "*";
        add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
        add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization";
        add_header Access-Control-Max-Age 1728000;
        add_header Content-Type "text/plain; charset=utf-8";
        add_header Content-Length 0;
        return 204;
    }
 }
--- a/infra/nginx/conf.d/grafana.conf
+++ b/infra/nginx/conf.d/grafana.conf
@@ -1,9 +1,3 @@
 # Grafana reverse proxy configuration
 upstream grafana_backend {
    server grafana:3000;
    keepalive 32;
 }
 # Grafana proxy configuration
 location /grafana/ {
    proxy_pass http://grafana_backend/;
--- a/infra/nginx/conf.d/prometheus.conf
+++ b/infra/nginx/conf.d/prometheus.conf
@@ -1,12 +1,7 @@
 # Prometheus reverse proxy configuration
 upstream prometheus_backend {
    server prometheus:9090;
    keepalive 32;
 }
 # Prometheus proxy configuration
 location /prometheus/ {
    proxy_pass http://prometheus_backend/;
    proxy_redirect / /prometheus/;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -31,4 +26,4 @@ location /prometheus/-/healthy {
    proxy_pass http://prometheus_backend/-/healthy;
    proxy_set_header Host $host;
    access_log off;
-}
+}
--- a/infra/nginx/conf.d/status.conf
+++ b/infra/nginx/conf.d/status.conf
@@ -1,16 +1,35 @@
-# Status page configuration (for future uptime kuma integration)
+# Status page configuration (Uptime Kuma integration)
 # Rate limiting for status page
 location /status {
-    # Basic authentication for status page
+    # Rate limiting
-    auth_basic "Status Page Access";
+    limit_req zone=status burst=5 nodelay;
    auth_basic_user_file /etc/nginx/.htpasswd;
-    # Placeholder for future uptime kuma integration
+    # Proxy to Uptime Kuma
-    # For now, show nginx status
+    proxy_pass http://uptime_kuma_backend;
-    access_log off;
+    proxy_set_header Host $host;
-    return 200 '{"status": "ok", "nginx": "running", "timestamp": "$time_iso8601"}';
+    proxy_set_header X-Real-IP $remote_addr;
-    add_header Content-Type application/json;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    # WebSocket support
    proxy_http_version 1.1;
    proxy_set_header Upgrade $http_upgrade;
    proxy_set_header Connection "upgrade";
    # Timeouts
    proxy_connect_timeout 30s;
    proxy_send_timeout 30s;
    proxy_read_timeout 30s;
    # Buffer settings
    proxy_buffering on;
    proxy_buffer_size 4k;
    proxy_buffers 8 4k;
    # Security headers
    add_header X-Frame-Options "SAMEORIGIN" always;
    add_header X-Content-Type-Options "nosniff" always;
 }
 # Nginx status stub (for monitoring)
@@ -21,4 +40,4 @@ location /nginx_status {
    allow 172.16.0.0/12;  # Docker networks
    allow 192.168.0.0/16; # Private networks
    deny all;
-}
+}
--- a/infra/nginx/conf.d/uptime-kuma.conf
+++ b/infra/nginx/conf.d/uptime-kuma.conf
@@ -0,0 +1,69 @@
 # Uptime Kuma Nginx Configuration
 # Proxies requests to Uptime Kuma status page
 # Upstream for Uptime Kuma
 upstream uptime_kuma_backend {
    server uptime-kuma:3001;
    keepalive 32;
 }
 # Status page location
 location /status {
    # Rate limiting
    limit_req zone=status burst=5 nodelay;
    # Proxy to Uptime Kuma
    proxy_pass http://uptime_kuma_backend;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    # WebSocket support
    proxy_http_version 1.1;
    proxy_set_header Upgrade $http_upgrade;
    proxy_set_header Connection "upgrade";
    # Timeouts
    proxy_connect_timeout 30s;
    proxy_send_timeout 30s;
    proxy_read_timeout 30s;
    # Buffer settings
    proxy_buffering on;
    proxy_buffer_size 4k;
    proxy_buffers 8 4k;
    # Security headers
    add_header X-Frame-Options "SAMEORIGIN" always;
    add_header X-Content-Type-Options "nosniff" always;
 }
 # API endpoints for Uptime Kuma
 location /api/ {
    # Rate limiting
    limit_req zone=api burst=10 nodelay;
    # Proxy to Uptime Kuma
    proxy_pass http://uptime_kuma_backend;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
    proxy_set_header X-Forwarded-Proto $scheme;
    # CORS headers
    add_header Access-Control-Allow-Origin "*" always;
    add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
    add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always;
    # Handle preflight requests
    if ($request_method = 'OPTIONS') {
        add_header Access-Control-Allow-Origin "*";
        add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
        add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization";
        add_header Access-Control-Max-Age 1728000;
        add_header Content-Type "text/plain; charset=utf-8";
        add_header Content-Length 0;
        return 204;
    }
 }
--- a/infra/nginx/nginx.conf
+++ b/infra/nginx/nginx.conf
@@ -63,6 +63,27 @@ http {
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;
    # Upstream configurations
    upstream grafana_backend {
        server grafana:3000;
        keepalive 32;
    }
    upstream prometheus_backend {
        server prometheus:9090;
        keepalive 32;
    }
    upstream uptime_kuma_backend {
        server uptime-kuma:3001;
        keepalive 32;
    }
    upstream alertmanager_backend {
        server alertmanager:9093;
        keepalive 32;
    }
    # Main server block
    server {
        listen 80;
@@ -74,17 +95,19 @@ http {
        listen 443 ssl http2;
        server_name _;
-        # SSL configuration
+        # SSL configuration (self-signed certificate)
-        ssl_certificate /etc/nginx/ssl/cert.pem;
+        ssl_certificate /etc/letsencrypt/live/{{SERVER_IP}}/fullchain.pem;
-        ssl_certificate_key /etc/nginx/ssl/key.pem;
+        ssl_certificate_key /etc/letsencrypt/live/{{SERVER_IP}}/privkey.pem;еще 
        ssl_protocols TLSv1.2 TLSv1.3;
        ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
        ssl_prefer_server_ciphers off;
        ssl_session_cache shared:SSL:10m;
        ssl_session_timeout 10m;
        # Security headers
        add_header X-Frame-Options "SAMEORIGIN" always;
        add_header X-Content-Type-Options "nosniff" always;
        # Rate limiting
        limit_req zone=api burst=20 nodelay;
        # Redirect root to Grafana
        location = / {
            return 301 /grafana/;
--- a/infra/nginx/ssl/letsencrypt.conf
+++ b/infra/nginx/ssl/letsencrypt.conf
@@ -0,0 +1,27 @@
 # Let's Encrypt SSL Configuration
 # This file contains the SSL configuration for Let's Encrypt certificates
 # SSL certificate paths (Let's Encrypt)
 ssl_certificate /etc/letsencrypt/live/{{DOMAIN}}/fullchain.pem;
 ssl_certificate_key /etc/letsencrypt/live/{{DOMAIN}}/privkey.pem;
 # SSL Security Configuration
 ssl_protocols TLSv1.2 TLSv1.3;
 ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
 ssl_prefer_server_ciphers off;
 ssl_session_cache shared:SSL:10m;
 ssl_session_timeout 10m;
 ssl_session_tickets off;
 # OCSP Stapling
 ssl_stapling on;
 ssl_stapling_verify on;
 ssl_trusted_certificate /etc/letsencrypt/live/{{DOMAIN}}/chain.pem;
 # Security Headers
 add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
 add_header X-Frame-Options "SAMEORIGIN" always;
 add_header X-Content-Type-Options "nosniff" always;
 add_header X-XSS-Protection "1; mode=block" always;
 add_header Referrer-Policy "strict-origin-when-cross-origin" always;
 add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self' data:; connect-src 'self' wss: https:;" always;
--- a/infra/prometheus/alert_rules.yml
+++ b/infra/prometheus/alert_rules.yml
@@ -0,0 +1,253 @@
 # Prometheus Alert Rules
 # This file defines alerting rules for monitoring the bot infrastructure
 groups:
  # Bot Health Monitoring
  - name: bot_health
    rules:
      # Telegram Bot Health
      - alert: TelegramBotDown
        expr: up{job="telegram-bot"} == 0
        for: 1m
        labels:
          severity: critical
          service: telegram-bot
        annotations:
          summary: "Telegram Bot is down"
          description: "Telegram Bot has been down for more than 1 minute"
          runbook_url: "https://docs.example.com/runbooks/telegram-bot-down"
      - alert: TelegramBotHighErrorRate
        expr: rate(http_requests_total{job="telegram-bot",status=~"5.."}[5m]) > 0.1
        for: 2m
        labels:
          severity: warning
          service: telegram-bot
        annotations:
          summary: "Telegram Bot high error rate"
          description: "Telegram Bot error rate is {{ $value }} errors per second"
      - alert: TelegramBotHighResponseTime
        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="telegram-bot"}[5m])) > 2
        for: 5m
        labels:
          severity: warning
          service: telegram-bot
        annotations:
          summary: "Telegram Bot high response time"
          description: "95th percentile response time is {{ $value }} seconds"
      # AnonBot Health
      - alert: AnonBotDown
        expr: up{job="anon-bot"} == 0
        for: 1m
        labels:
          severity: critical
          service: anon-bot
        annotations:
          summary: "AnonBot is down"
          description: "AnonBot has been down for more than 1 minute"
          runbook_url: "https://docs.example.com/runbooks/anon-bot-down"
      - alert: AnonBotHighErrorRate
        expr: rate(http_requests_total{job="anon-bot",status=~"5.."}[5m]) > 0.1
        for: 2m
        labels:
          severity: warning
          service: anon-bot
        annotations:
          summary: "AnonBot high error rate"
          description: "AnonBot error rate is {{ $value }} errors per second"
      - alert: AnonBotHighResponseTime
        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="anon-bot"}[5m])) > 2
        for: 5m
        labels:
          severity: warning
          service: anon-bot
        annotations:
          summary: "AnonBot high response time"
          description: "95th percentile response time is {{ $value }} seconds"
  # Infrastructure Health Monitoring
  - name: infrastructure_health
    rules:
      # Prometheus Health
      - alert: PrometheusDown
        expr: up{job="prometheus"} == 0
        for: 1m
        labels:
          severity: critical
          service: prometheus
        annotations:
          summary: "Prometheus is down"
          description: "Prometheus has been down for more than 1 minute"
      - alert: PrometheusHighMemoryUsage
        expr: (prometheus_tsdb_head_series / prometheus_tsdb_head_series_limit) > 0.8
        for: 5m
        labels:
          severity: warning
          service: prometheus
        annotations:
          summary: "Prometheus high memory usage"
          description: "Prometheus memory usage is {{ $value | humanizePercentage }} of limit"
      # Grafana Health
      - alert: GrafanaDown
        expr: up{job="grafana"} == 0
        for: 1m
        labels:
          severity: critical
          service: grafana
        annotations:
          summary: "Grafana is down"
          description: "Grafana has been down for more than 1 minute"
      # Nginx Health
      - alert: NginxDown
        expr: up{job="nginx"} == 0
        for: 1m
        labels:
          severity: critical
          service: nginx
        annotations:
          summary: "Nginx is down"
          description: "Nginx has been down for more than 1 minute"
      - alert: NginxHighErrorRate
        expr: rate(nginx_http_requests_total{status=~"5.."}[5m]) > 0.1
        for: 2m
        labels:
          severity: warning
          service: nginx
        annotations:
          summary: "Nginx high error rate"
          description: "Nginx error rate is {{ $value }} errors per second"
  # System Resource Monitoring
  - name: system_resources
    rules:
      # High CPU Usage
      - alert: HighCPUUsage
        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
        for: 5m
        labels:
          severity: warning
          service: system
        annotations:
          summary: "High CPU usage"
          description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
      - alert: VeryHighCPUUsage
        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
        for: 2m
        labels:
          severity: critical
          service: system
        annotations:
          summary: "Very high CPU usage"
          description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
      # High Memory Usage
      - alert: HighMemoryUsage
        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
        for: 5m
        labels:
          severity: warning
          service: system
        annotations:
          summary: "High memory usage"
          description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
      - alert: VeryHighMemoryUsage
        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
        for: 2m
        labels:
          severity: critical
          service: system
        annotations:
          summary: "Very high memory usage"
          description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
      # Disk Space
      - alert: LowDiskSpace
        expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 80
        for: 5m
        labels:
          severity: warning
          service: system
        annotations:
          summary: "Low disk space"
          description: "Disk usage is {{ $value }}% on {{ $labels.instance }} ({{ $labels.mountpoint }})"
      - alert: VeryLowDiskSpace
        expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 95
        for: 2m
        labels:
          severity: critical
          service: system
        annotations:
          summary: "Very low disk space"
          description: "Disk usage is {{ $value }}% on {{ $labels.instance }} ({{ $labels.mountpoint }})"
  # Docker Container Monitoring
  - name: docker_containers
    rules:
      # Container Restart
      - alert: ContainerRestarting
        expr: rate(container_start_time_seconds[10m]) > 0
        for: 0m
        labels:
          severity: warning
          service: docker
        annotations:
          summary: "Container restarting"
          description: "Container {{ $labels.name }} is restarting frequently"
      # Container High Memory Usage
      - alert: ContainerHighMemoryUsage
        expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 80
        for: 5m
        labels:
          severity: warning
          service: docker
        annotations:
          summary: "Container high memory usage"
          description: "Container {{ $labels.name }} memory usage is {{ $value }}%"
      # Container High CPU Usage
      - alert: ContainerHighCPUUsage
        expr: (rate(container_cpu_usage_seconds_total[5m]) / container_spec_cpu_quota * 100) > 80
        for: 5m
        labels:
          severity: warning
          service: docker
        annotations:
          summary: "Container high CPU usage"
          description: "Container {{ $labels.name }} CPU usage is {{ $value }}%"
  # Database Monitoring
  - name: database_health
    rules:
      # Database Connection Issues
      - alert: DatabaseConnectionFailed
        expr: increase(database_connection_errors_total[5m]) > 5
        for: 1m
        labels:
          severity: critical
          service: database
        annotations:
          summary: "Database connection failures"
          description: "{{ $value }} database connection failures in the last 5 minutes"
      # Database High Query Time
      - alert: DatabaseHighQueryTime
        expr: histogram_quantile(0.95, rate(database_query_duration_seconds_bucket[5m])) > 1
        for: 5m
        labels:
          severity: warning
          service: database
        annotations:
          summary: "Database high query time"
          description: "95th percentile database query time is {{ $value }} seconds"
--- a/infra/prometheus/prometheus.yml
+++ b/infra/prometheus/prometheus.yml
@@ -3,8 +3,7 @@ global:
  evaluation_interval: 15s
 rule_files:
-  # - "first_rules.yml"
+  - "alert_rules.yml"
  # - "second_rules.yml"
 scrape_configs:
  - job_name: 'prometheus'
@@ -46,4 +45,4 @@ alerting:
  alertmanagers:
    - static_configs:
        - targets:
-          # - alertmanager:9093
+          - alertmanager:9093
--- a/infra/uptime-kuma/docker-compose.yml
+++ b/infra/uptime-kuma/docker-compose.yml
@@ -0,0 +1,33 @@
 # Uptime Kuma Configuration
 # This is a separate docker-compose file for Uptime Kuma
 # It will be included in the main docker-compose.yml
 version: '3.8'
 services:
  uptime-kuma:
    image: louislam/uptime-kuma:latest
    container_name: bots_uptime_kuma
    restart: unless-stopped
    volumes:
      - uptime_kuma_data:/app/data
    ports:
      - "3001:3001"
    environment:
      - UPTIME_KUMA_PORT=3001
    networks:
      - bots_network
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
 volumes:
  uptime_kuma_data:
    driver: local
 networks:
  bots_network:
    external: true
--- a/scripts/setup-ssl.sh
+++ b/scripts/setup-ssl.sh
@@ -0,0 +1,163 @@
 #!/bin/bash
 # SSL Setup Script for Let's Encrypt
 # This script sets up SSL certificates using Let's Encrypt
 set -e
 # Configuration
 DOMAIN="${DOMAIN:-localhost}"
 EMAIL="${EMAIL:-admin@${DOMAIN}}"
 NGINX_CONTAINER="bots_nginx"
 CERTBOT_IMAGE="certbot/certbot:latest"
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 # Logging function
 log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
 }
 warn() {
    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
 }
 error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
    exit 1
 }
 # Check if running as root
 if [[ $EUID -eq 0 ]]; then
   error "This script should not be run as root for security reasons"
 fi
 # Check if domain is localhost
 if [[ "$DOMAIN" == "localhost" ]]; then
    warn "Domain is set to localhost. Let's Encrypt certificates cannot be issued for localhost."
    warn "Please set the DOMAIN environment variable to your actual domain name."
    warn "Example: DOMAIN=example.com ./scripts/setup-ssl.sh"
    exit 1
 fi
 # Check if Docker is running
 if ! docker info > /dev/null 2>&1; then
    error "Docker is not running. Please start Docker and try again."
 fi
 # Check if nginx container is running
 if ! docker ps | grep -q "$NGINX_CONTAINER"; then
    error "Nginx container ($NGINX_CONTAINER) is not running. Please start it first with 'docker-compose up -d nginx'"
 fi
 log "Setting up SSL certificates for domain: $DOMAIN"
 log "Email for Let's Encrypt: $EMAIL"
 # Create necessary directories
 log "Creating Let's Encrypt directories..."
 sudo mkdir -p /etc/letsencrypt/live
 sudo mkdir -p /etc/letsencrypt/archive
 sudo mkdir -p /etc/letsencrypt/renewal
 sudo chmod 755 /etc/letsencrypt
 # Stop nginx temporarily for certificate generation
 log "Stopping nginx container for certificate generation..."
 docker stop "$NGINX_CONTAINER" || true
 # Generate certificate using certbot
 log "Generating SSL certificate using Let's Encrypt..."
 docker run --rm \
    -v /etc/letsencrypt:/etc/letsencrypt \
    -v /var/lib/letsencrypt:/var/lib/letsencrypt \
    -p 80:80 \
    -p 443:443 \
    "$CERTBOT_IMAGE" certonly \
    --standalone \
    --non-interactive \
    --agree-tos \
    --email "$EMAIL" \
    --domains "$DOMAIN" \
    --expand
 # Check if certificate was generated successfully
 if [[ ! -f "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" ]]; then
    error "Failed to generate SSL certificate for $DOMAIN"
 fi
 log "SSL certificate generated successfully!"
 # Set proper permissions
 log "Setting proper permissions for SSL certificates..."
 sudo chmod 755 /etc/letsencrypt/live
 sudo chmod 755 /etc/letsencrypt/archive
 sudo chmod 644 /etc/letsencrypt/live/"$DOMAIN"/*.pem
 sudo chmod 600 /etc/letsencrypt/live/"$DOMAIN"/privkey.pem
 # Update nginx configuration to use Let's Encrypt certificates
 log "Updating nginx configuration..."
 if [[ -f "infra/nginx/ssl/letsencrypt.conf" ]]; then
    # Replace domain placeholder in letsencrypt.conf
    sed "s/{{DOMAIN}}/$DOMAIN/g" infra/nginx/ssl/letsencrypt.conf > /tmp/letsencrypt.conf
    sudo cp /tmp/letsencrypt.conf /etc/letsencrypt/live/"$DOMAIN"/letsencrypt.conf
    rm /tmp/letsencrypt.conf
 fi
 # Start nginx container
 log "Starting nginx container..."
 docker start "$NGINX_CONTAINER"
 # Wait for nginx to start
 log "Waiting for nginx to start..."
 sleep 10
 # Test SSL certificate
 log "Testing SSL certificate..."
 if curl -k -s "https://$DOMAIN" > /dev/null; then
    log "SSL certificate is working correctly!"
 else
    warn "SSL certificate test failed. Please check nginx configuration."
 fi
 # Set up automatic renewal
 log "Setting up automatic certificate renewal..."
 cat > /tmp/ssl-renewal.sh << EOF
 #!/bin/bash
 # SSL Certificate Renewal Script
 set -e
 DOMAIN="$DOMAIN"
 NGINX_CONTAINER="$NGINX_CONTAINER"
 CERTBOT_IMAGE="$CERTBOT_IMAGE"
 # Renew certificates
 docker run --rm \\
    -v /etc/letsencrypt:/etc/letsencrypt \\
    -v /var/lib/letsencrypt:/var/lib/letsencrypt \\
    "$CERTBOT_IMAGE" renew --quiet
 # Reload nginx
 docker exec "\$NGINX_CONTAINER" nginx -s reload
 echo "\$(date): SSL certificates renewed successfully" >> /var/log/ssl-renewal.log
 EOF
 sudo mv /tmp/ssl-renewal.sh /usr/local/bin/ssl-renewal.sh
 sudo chmod +x /usr/local/bin/ssl-renewal.sh
 # Add cron job for automatic renewal (every Monday at 2 AM)
 log "Adding cron job for automatic renewal..."
 (crontab -l 2>/dev/null; echo "0 2 * * 1 /usr/local/bin/ssl-renewal.sh") | crontab -
 log "SSL setup completed successfully!"
 log "Certificate location: /etc/letsencrypt/live/$DOMAIN/"
 log "Automatic renewal is configured to run every Monday at 2 AM"
 log "You can test the renewal manually with: sudo /usr/local/bin/ssl-renewal.sh"
 # Display certificate information
 log "Certificate information:"
 openssl x509 -in "/etc/letsencrypt/live/$DOMAIN/fullchain.pem" -text -noout | grep -E "(Subject:|Not Before|Not After|DNS:)"