feat: integrate Uptime Kuma and Alertmanager into Docker setup

- Add Uptime Kuma service for status monitoring with health checks. - Introduce Alertmanager service for alert management and notifications. - Update docker-compose.yml to include new services and their configurations. - Enhance Makefile with commands for managing Uptime Kuma and Alertmanager logs. - Modify Ansible playbook to install necessary packages and configure SSL for new services. - Update Nginx configuration to route traffic to Uptime Kuma and Alertmanager. - Adjust Prometheus configuration to include alert rules and external URLs.
2025-09-16 21:50:56 +03:00
parent 5e10204137
commit 9ec3f02767
20 changed files with 2173 additions and 38 deletions
--- a/infra/alertmanager/alertmanager-simple.yml
+++ b/infra/alertmanager/alertmanager-simple.yml
@@ -0,0 +1,17 @@
+# Simplified Alertmanager Configuration
+global:
+  smtp_smarthost: 'localhost:587'
+  smtp_from: 'alerts@localhost'
+
+route:
+  group_by: ['alertname']
+  group_wait: 10s
+  group_interval: 10s
+  repeat_interval: 1h
+  receiver: 'web.hook'
+
+receivers:
+  - name: 'web.hook'
+    webhook_configs:
+      - url: 'http://localhost:5001/'
+        send_resolved: true
--- a/infra/alertmanager/alertmanager.yml
+++ b/infra/alertmanager/alertmanager.yml
@@ -0,0 +1,185 @@
+# Alertmanager Configuration
+# This file configures how alerts are handled and routed
+
+global:
+  # SMTP configuration for email notifications
+  smtp_smarthost: 'localhost:587'
+  smtp_from: 'alerts@{{DOMAIN}}'
+  smtp_auth_username: 'alerts@{{DOMAIN}}'
+  smtp_auth_password: '{{SMTP_PASSWORD}}'
+  smtp_require_tls: true
+
+  # Resolve timeout
+  resolve_timeout: 5m
+
+# Templates for alert formatting
+templates:
+  - '/etc/alertmanager/templates/*.tmpl'
+
+# Route configuration - defines how alerts are routed
+route:
+  group_by: ['alertname', 'cluster', 'service']
+  group_wait: 10s
+  group_interval: 10s
+  repeat_interval: 1h
+  receiver: 'web.hook'
+  routes:
+    # Critical alerts - immediate notification
+    - match:
+        severity: critical
+      receiver: 'critical-alerts'
+      group_wait: 5s
+      repeat_interval: 5m
+      
+    # Warning alerts - grouped notification
+    - match:
+        severity: warning
+      receiver: 'warning-alerts'
+      group_wait: 30s
+      repeat_interval: 30m
+      
+    # Bot-specific alerts
+    - match:
+        service: telegram-bot
+      receiver: 'bot-alerts'
+      group_wait: 10s
+      repeat_interval: 15m
+      
+    - match:
+        service: anon-bot
+      receiver: 'bot-alerts'
+      group_wait: 10s
+      repeat_interval: 15m
+      
+    # Infrastructure alerts
+    - match:
+        service: prometheus
+      receiver: 'infrastructure-alerts'
+      group_wait: 30s
+      repeat_interval: 1h
+      
+    - match:
+        service: grafana
+      receiver: 'infrastructure-alerts'
+      group_wait: 30s
+      repeat_interval: 1h
+      
+    - match:
+        service: nginx
+      receiver: 'infrastructure-alerts'
+      group_wait: 30s
+      repeat_interval: 1h
+
+# Inhibition rules - suppress certain alerts when others are firing
+inhibit_rules:
+  # Suppress warning alerts when critical alerts are firing
+  - source_match:
+      severity: 'critical'
+    target_match:
+      severity: 'warning'
+    equal: ['alertname', 'cluster', 'service']
+    
+  # Suppress individual instance alerts when the entire service is down
+  - source_match:
+      alertname: 'ServiceDown'
+    target_match:
+      alertname: 'InstanceDown'
+    equal: ['service']
+
+# Receiver configurations
+receivers:
+  # Default webhook receiver (for testing)
+  - name: 'web.hook'
+    webhook_configs:
+      - url: 'http://localhost:5001/'
+        send_resolved: true
+
+  # Critical alerts - immediate notification via multiple channels
+  - name: 'critical-alerts'
+    email_configs:
+      - to: 'admin@{{DOMAIN}}'
+        subject: '🚨 CRITICAL ALERT: {{ .GroupLabels.alertname }}'
+        body: |
+          {{ range .Alerts }}
+          Alert: {{ .Annotations.summary }}
+          Description: {{ .Annotations.description }}
+          Severity: {{ .Labels.severity }}
+          Service: {{ .Labels.service }}
+          Instance: {{ .Labels.instance }}
+          Time: {{ .StartsAt }}
+          {{ end }}
+        html: |
+          <h2>🚨 Critical Alert</h2>
+          <table>
+            <tr><td><strong>Alert:</strong></td><td>{{ .GroupLabels.alertname }}</td></tr>
+            <tr><td><strong>Service:</strong></td><td>{{ .GroupLabels.service }}</td></tr>
+            <tr><td><strong>Time:</strong></td><td>{{ .GroupLabels.time }}</td></tr>
+          </table>
+          <h3>Alerts:</h3>
+          <ul>
+          {{ range .Alerts }}
+            <li><strong>{{ .Annotations.summary }}</strong><br/>
+                {{ .Annotations.description }}<br/>
+                <small>Instance: {{ .Labels.instance }} | Time: {{ .StartsAt }}</small>
+            </li>
+          {{ end }}
+          </ul>
+    webhook_configs:
+      - url: 'http://localhost:5001/critical'
+        send_resolved: true
+
+  # Warning alerts - less urgent notification
+  - name: 'warning-alerts'
+    email_configs:
+      - to: 'admin@{{DOMAIN}}'
+        subject: '⚠️ WARNING: {{ .GroupLabels.alertname }}'
+        body: |
+          {{ range .Alerts }}
+          Alert: {{ .Annotations.summary }}
+          Description: {{ .Annotations.description }}
+          Severity: {{ .Labels.severity }}
+          Service: {{ .Labels.service }}
+          Instance: {{ .Labels.instance }}
+          Time: {{ .StartsAt }}
+          {{ end }}
+    webhook_configs:
+      - url: 'http://localhost:5001/warning'
+        send_resolved: true
+
+  # Bot-specific alerts
+  - name: 'bot-alerts'
+    email_configs:
+      - to: 'bot-admin@{{DOMAIN}}'
+        subject: '🤖 Bot Alert: {{ .GroupLabels.alertname }}'
+        body: |
+          Bot Alert: {{ .GroupLabels.alertname }}
+          Service: {{ .GroupLabels.service }}
+          
+          {{ range .Alerts }}
+          - {{ .Annotations.summary }}
+            {{ .Annotations.description }}
+            Instance: {{ .Labels.instance }}
+            Time: {{ .StartsAt }}
+          {{ end }}
+    webhook_configs:
+      - url: 'http://localhost:5001/bot'
+        send_resolved: true
+
+  # Infrastructure alerts
+  - name: 'infrastructure-alerts'
+    email_configs:
+      - to: 'infra@{{DOMAIN}}'
+        subject: '🏗️ Infrastructure Alert: {{ .GroupLabels.alertname }}'
+        body: |
+          Infrastructure Alert: {{ .GroupLabels.alertname }}
+          Service: {{ .GroupLabels.service }}
+          
+          {{ range .Alerts }}
+          - {{ .Annotations.summary }}
+            {{ .Annotations.description }}
+            Instance: {{ .Labels.instance }}
+            Time: {{ .StartsAt }}
+          {{ end }}
+    webhook_configs:
+      - url: 'http://localhost:5001/infrastructure'
+        send_resolved: true
--- a/infra/ansible/playbook.yml
+++ b/infra/ansible/playbook.yml
@@ -57,6 +57,15 @@
          - nginx
          - openssl
          - apache2-utils
+          - certbot
+          - python3-certbot-nginx
+        state: present
+
+    - name: Установить Python библиотеки для Ansible
+      pip:
+        name:
+          - passlib
+          - bcrypt
        state: present

    - name: Установить часовой пояс Europe/Moscow
@@ -278,14 +287,40 @@
        - "{{ project_root }}/infra/nginx"
        - "{{ project_root }}/infra/nginx/ssl"
        - "{{ project_root }}/infra/nginx/conf.d"
+        - "{{ project_root }}/infra/uptime-kuma"
+        - "{{ project_root }}/infra/alertmanager"
+        - "{{ project_root }}/infra/grafana/dashboards"
+        - "{{ project_root }}/scripts"

-    - name: Сгенерировать самоподписанный SSL сертификат
+    - name: Сгенерировать самоподписанный SSL сертификат (fallback)
      command: >
        openssl req -x509 -newkey rsa:4096 -keyout {{ project_root }}/infra/nginx/ssl/key.pem
        -out {{ project_root }}/infra/nginx/ssl/cert.pem -days 365 -nodes
        -subj "/CN={{ ansible_host }}/O=Monitoring/C=RU"
      args:
        creates: "{{ project_root }}/infra/nginx/ssl/cert.pem"
+      when: not use_letsencrypt | default(false)
+
+    - name: Создать директории для Let's Encrypt
+      file:
+        path: "{{ item }}"
+        state: directory
+        owner: root
+        group: root
+        mode: '0755'
+      loop:
+        - /etc/letsencrypt
+        - /etc/letsencrypt/live
+        - /etc/letsencrypt/archive
+        - /etc/letsencrypt/renewal
+      when: use_letsencrypt | default(false)
+
+    - name: Настроить cron для автоматического обновления SSL сертификатов
+      cron:
+        name: "SSL Certificate Renewal"
+        job: "0 2 * * 1 /usr/local/bin/ssl-renewal.sh"
+        user: root
+      when: use_letsencrypt | default(false)

    - name: Установить права на SSL сертификаты
      file:
@@ -314,6 +349,7 @@
        group: root
        mode: '0644'
        backup: yes
+        remote_src: yes

    - name: Скопировать конфигурации nginx для сервисов
      copy:
@@ -323,6 +359,7 @@
        group: root
        mode: '0644'
        backup: yes
+        remote_src: yes

    - name: Скопировать SSL сертификаты
      copy:
@@ -332,6 +369,7 @@
        group: root
        mode: '0600'
        backup: yes
+        remote_src: yes

    - name: Скопировать htpasswd файл
      copy:
@@ -341,6 +379,47 @@
        group: root
        mode: '0644'
        backup: yes
+        remote_src: yes
+
+    - name: Скопировать конфигурацию Alertmanager
+      copy:
+        src: "{{ project_root }}/infra/alertmanager/alertmanager.yml"
+        dest: "{{ project_root }}/infra/alertmanager/alertmanager.yml"
+        owner: "{{ deploy_user }}"
+        group: "{{ deploy_user }}"
+        mode: '0644'
+        backup: yes
+        remote_src: yes
+
+    - name: Скопировать правила алертов Prometheus
+      copy:
+        src: "{{ project_root }}/infra/prometheus/alert_rules.yml"
+        dest: "{{ project_root }}/infra/prometheus/alert_rules.yml"
+        owner: "{{ deploy_user }}"
+        group: "{{ deploy_user }}"
+        mode: '0644'
+        backup: yes
+        remote_src: yes
+
+    - name: Скопировать дашборды Grafana
+      copy:
+        src: "{{ project_root }}/infra/grafana/dashboards/"
+        dest: "{{ project_root }}/infra/grafana/dashboards/"
+        owner: "{{ deploy_user }}"
+        group: "{{ deploy_user }}"
+        mode: '0644'
+        backup: yes
+        remote_src: yes
+
+    - name: Скопировать скрипт настройки SSL
+      copy:
+        src: "{{ project_root }}/scripts/setup-ssl.sh"
+        dest: /usr/local/bin/setup-ssl.sh
+        owner: root
+        group: root
+        mode: '0755'
+        backup: yes
+        remote_src: yes

    - name: Проверить конфигурацию nginx
      command: nginx -t
@@ -811,6 +890,20 @@
        timeout: 30
        state: started

+    - name: Проверить, что порт 3001 (Uptime Kuma) открыт
+      wait_for:
+        port: 3001
+        host: "{{ ansible_host }}"
+        timeout: 30
+        state: started
+
+    - name: Проверить, что порт 9093 (Alertmanager) открыт
+      wait_for:
+        port: 9093
+        host: "{{ ansible_host }}"
+        timeout: 30
+        state: started
+
    - name: Проверить доступность Nginx
      uri:
        url: "http://{{ ansible_host }}/nginx-health"
@@ -849,6 +942,26 @@
      retries: 5
      delay: 10

+    - name: Проверить доступность Uptime Kuma через Nginx
+      uri:
+        url: "https://{{ ansible_host }}/status"
+        method: GET
+        status_code: 200
+        validate_certs: no
+      register: uptime_kuma_nginx_health
+      retries: 5
+      delay: 10
+
+    - name: Проверить доступность Alertmanager через Nginx
+      uri:
+        url: "https://{{ ansible_host }}/alertmanager/"
+        method: GET
+        status_code: 200
+        validate_certs: no
+      register: alertmanager_nginx_health
+      retries: 5
+      delay: 10
+

    - name: Закрыть старый SSH порт 22 в UFW (финальный шаг)
      ufw:
@@ -858,7 +971,7 @@

    - name: Проверка запуска ботов завершена — всё работает 🟢
      debug:
-        msg: "Все сервисы запущены и слушают нужные порты. SSH настроен на порт 15722, Fail2ban активен, параметры безопасности ядра применены. Порт 22 закрыт для безопасности."
+        msg: "Все сервисы запущены и слушают нужные порты. SSH настроен на порт 15722, Fail2ban активен, параметры безопасности ядра применены. Порт 22 закрыт для безопасности. Добавлены: Uptime Kuma (статусная страница), Alertmanager (мониторинг), Let's Encrypt SSL, Grafana дашборды."

  # handlers для перезагрузки сервисов
  handlers:
--- a/infra/grafana/dashboards/bot-monitoring.json
+++ b/infra/grafana/dashboards/bot-monitoring.json
@@ -0,0 +1,529 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "rate(http_requests_total{job=~\"telegram-bot|anon-bot\"}[5m])",
+          "interval": "",
+          "legendFormat": "{{job}} - {{method}} {{status}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Bot Request Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job=~\"telegram-bot|anon-bot\"}[5m]))",
+          "interval": "",
+          "legendFormat": "{{job}} - 95th percentile",
+          "refId": "A"
+        },
+        {
+          "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{job=~\"telegram-bot|anon-bot\"}[5m]))",
+          "interval": "",
+          "legendFormat": "{{job}} - 50th percentile",
+          "refId": "B"
+        }
+      ],
+      "title": "Bot Response Time",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "rate(http_requests_total{job=~\"telegram-bot|anon-bot\",status=~\"5..\"}[5m]) / rate(http_requests_total{job=~\"telegram-bot|anon-bot\"}[5m]) * 100",
+          "interval": "",
+          "legendFormat": "{{job}} - Error Rate",
+          "refId": "A"
+        }
+      ],
+      "title": "Bot Error Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "bytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "process_resident_memory_bytes{job=~\"telegram-bot|anon-bot\"}",
+          "interval": "",
+          "legendFormat": "{{job}} - Memory Usage",
+          "refId": "A"
+        }
+      ],
+      "title": "Bot Memory Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "up{job=~\"telegram-bot|anon-bot\"}",
+          "interval": "",
+          "legendFormat": "{{job}} - Status",
+          "refId": "A"
+        }
+      ],
+      "title": "Bot Health Status",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "rate(process_cpu_seconds_total{job=~\"telegram-bot|anon-bot\"}[5m]) * 100",
+          "interval": "",
+          "legendFormat": "{{job}} - CPU Usage",
+          "refId": "A"
+        }
+      ],
+      "title": "Bot CPU Usage",
+      "type": "timeseries"
+    }
+  ],
+  "schemaVersion": 27,
+  "style": "dark",
+  "tags": ["bots", "monitoring"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Bot Monitoring Dashboard",
+  "uid": "bot-monitoring",
+  "version": 1
+}
--- a/infra/grafana/dashboards/infrastructure-monitoring.json
+++ b/infra/grafana/dashboards/infrastructure-monitoring.json
@@ -0,0 +1,523 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
+          "interval": "",
+          "legendFormat": "CPU Usage - {{instance}}",
+          "refId": "A"
+        }
+      ],
+      "title": "System CPU Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
+          "interval": "",
+          "legendFormat": "Memory Usage - {{instance}}",
+          "refId": "A"
+        }
+      ],
+      "title": "System Memory Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "(1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100",
+          "interval": "",
+          "legendFormat": "Disk Usage - {{instance}} {{mountpoint}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Disk Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "up{job=~\"prometheus|grafana|nginx|alertmanager|uptime-kuma\"}",
+          "interval": "",
+          "legendFormat": "{{job}} - Status",
+          "refId": "A"
+        }
+      ],
+      "title": "Service Health Status",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "rate(nginx_http_requests_total[5m])",
+          "interval": "",
+          "legendFormat": "Nginx - {{status}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Nginx Request Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "vis": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "bytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "single"
+        }
+      },
+      "targets": [
+        {
+          "expr": "container_memory_usage_bytes{name=~\"bots_.*\"}",
+          "interval": "",
+          "legendFormat": "{{name}} - Memory",
+          "refId": "A"
+        }
+      ],
+      "title": "Container Memory Usage",
+      "type": "timeseries"
+    }
+  ],
+  "schemaVersion": 27,
+  "style": "dark",
+  "tags": ["infrastructure", "monitoring"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Infrastructure Monitoring Dashboard",
+  "uid": "infrastructure-monitoring",
+  "version": 1
+}
--- a/infra/grafana/provisioning/dashboards/dashboards.yml
+++ b/infra/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,16 @@
+# Grafana Dashboard Provisioning Configuration
+# This file configures automatic dashboard import
+
+apiVersion: 1
+
+providers:
+  - name: 'default'
+    orgId: 1
+    folder: ''
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/provisioning/dashboards
+      foldersFromFilesStructure: true
--- a/infra/grafana/provisioning/datasources/prometheus.yml
+++ b/infra/grafana/provisioning/datasources/prometheus.yml
@@ -4,5 +4,13 @@ datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
-    url: http://prometheus:9090
+    url: http://prometheus:9090/prometheus
    isDefault: true
+    jsonData:
+      httpMethod: POST
+      manageAlerts: true
+      prometheusType: Prometheus
+      prometheusVersion: 2.40.0
+      cacheLevel: 'High'
+      disableRecordingRules: false
+      incrementalQueryOverlapWindow: 10m
--- a/infra/nginx/conf.d/alertmanager.conf
+++ b/infra/nginx/conf.d/alertmanager.conf
@@ -0,0 +1,61 @@
+# Alertmanager Nginx Configuration
+# Proxies requests to Alertmanager
+
+# Alertmanager location
+location /alertmanager/ {
+    # Rate limiting
+    limit_req zone=api burst=10 nodelay;
+    
+    # Remove trailing slash for proxy
+    rewrite ^/alertmanager/(.*)$ /$1 break;
+    
+    # Proxy to Alertmanager
+    proxy_pass http://alertmanager_backend;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    
+    # Timeouts
+    proxy_connect_timeout 30s;
+    proxy_send_timeout 30s;
+    proxy_read_timeout 30s;
+    
+    # Buffer settings
+    proxy_buffering on;
+    proxy_buffer_size 4k;
+    proxy_buffers 8 4k;
+    
+    # Security headers
+    add_header X-Frame-Options "SAMEORIGIN" always;
+    add_header X-Content-Type-Options "nosniff" always;
+}
+
+# Alertmanager API
+location /api/v1/ {
+    # Rate limiting
+    limit_req zone=api burst=20 nodelay;
+    
+    # Proxy to Alertmanager
+    proxy_pass http://alertmanager_backend;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    
+    # CORS headers
+    add_header Access-Control-Allow-Origin "*" always;
+    add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
+    add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always;
+    
+    # Handle preflight requests
+    if ($request_method = 'OPTIONS') {
+        add_header Access-Control-Allow-Origin "*";
+        add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
+        add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization";
+        add_header Access-Control-Max-Age 1728000;
+        add_header Content-Type "text/plain; charset=utf-8";
+        add_header Content-Length 0;
+        return 204;
+    }
+}
--- a/infra/nginx/conf.d/grafana.conf
+++ b/infra/nginx/conf.d/grafana.conf
@@ -1,9 +1,3 @@
-# Grafana reverse proxy configuration
-upstream grafana_backend {
-    server grafana:3000;
-    keepalive 32;
-}
-
 # Grafana proxy configuration
 location /grafana/ {
    proxy_pass http://grafana_backend/;
--- a/infra/nginx/conf.d/prometheus.conf
+++ b/infra/nginx/conf.d/prometheus.conf
@@ -1,12 +1,7 @@
-# Prometheus reverse proxy configuration
-upstream prometheus_backend {
-    server prometheus:9090;
-    keepalive 32;
-}
-
 # Prometheus proxy configuration
 location /prometheus/ {
    proxy_pass http://prometheus_backend/;
+    proxy_redirect / /prometheus/;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -31,4 +26,4 @@ location /prometheus/-/healthy {
    proxy_pass http://prometheus_backend/-/healthy;
    proxy_set_header Host $host;
    access_log off;
-}
+}
--- a/infra/nginx/conf.d/status.conf
+++ b/infra/nginx/conf.d/status.conf
@@ -1,16 +1,35 @@
-# Status page configuration (for future uptime kuma integration)
+# Status page configuration (Uptime Kuma integration)

 # Rate limiting for status page
 location /status {
-    # Basic authentication for status page
-    auth_basic "Status Page Access";
-    auth_basic_user_file /etc/nginx/.htpasswd;
+    # Rate limiting
+    limit_req zone=status burst=5 nodelay;
    
-    # Placeholder for future uptime kuma integration
-    # For now, show nginx status
-    access_log off;
-    return 200 '{"status": "ok", "nginx": "running", "timestamp": "$time_iso8601"}';
-    add_header Content-Type application/json;
+    # Proxy to Uptime Kuma
+    proxy_pass http://uptime_kuma_backend;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    
+    # WebSocket support
+    proxy_http_version 1.1;
+    proxy_set_header Upgrade $http_upgrade;
+    proxy_set_header Connection "upgrade";
+    
+    # Timeouts
+    proxy_connect_timeout 30s;
+    proxy_send_timeout 30s;
+    proxy_read_timeout 30s;
+    
+    # Buffer settings
+    proxy_buffering on;
+    proxy_buffer_size 4k;
+    proxy_buffers 8 4k;
+    
+    # Security headers
+    add_header X-Frame-Options "SAMEORIGIN" always;
+    add_header X-Content-Type-Options "nosniff" always;
 }

 # Nginx status stub (for monitoring)
@@ -21,4 +40,4 @@ location /nginx_status {
    allow 172.16.0.0/12;  # Docker networks
    allow 192.168.0.0/16; # Private networks
    deny all;
-}
+}
--- a/infra/nginx/conf.d/uptime-kuma.conf
+++ b/infra/nginx/conf.d/uptime-kuma.conf
@@ -0,0 +1,69 @@
+# Uptime Kuma Nginx Configuration
+# Proxies requests to Uptime Kuma status page
+
+# Upstream for Uptime Kuma
+upstream uptime_kuma_backend {
+    server uptime-kuma:3001;
+    keepalive 32;
+}
+
+# Status page location
+location /status {
+    # Rate limiting
+    limit_req zone=status burst=5 nodelay;
+    
+    # Proxy to Uptime Kuma
+    proxy_pass http://uptime_kuma_backend;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    
+    # WebSocket support
+    proxy_http_version 1.1;
+    proxy_set_header Upgrade $http_upgrade;
+    proxy_set_header Connection "upgrade";
+    
+    # Timeouts
+    proxy_connect_timeout 30s;
+    proxy_send_timeout 30s;
+    proxy_read_timeout 30s;
+    
+    # Buffer settings
+    proxy_buffering on;
+    proxy_buffer_size 4k;
+    proxy_buffers 8 4k;
+    
+    # Security headers
+    add_header X-Frame-Options "SAMEORIGIN" always;
+    add_header X-Content-Type-Options "nosniff" always;
+}
+
+# API endpoints for Uptime Kuma
+location /api/ {
+    # Rate limiting
+    limit_req zone=api burst=10 nodelay;
+    
+    # Proxy to Uptime Kuma
+    proxy_pass http://uptime_kuma_backend;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+    
+    # CORS headers
+    add_header Access-Control-Allow-Origin "*" always;
+    add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always;
+    add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" always;
+    
+    # Handle preflight requests
+    if ($request_method = 'OPTIONS') {
+        add_header Access-Control-Allow-Origin "*";
+        add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS";
+        add_header Access-Control-Allow-Headers "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization";
+        add_header Access-Control-Max-Age 1728000;
+        add_header Content-Type "text/plain; charset=utf-8";
+        add_header Content-Length 0;
+        return 204;
+    }
+}
--- a/infra/nginx/nginx.conf
+++ b/infra/nginx/nginx.conf
@@ -63,6 +63,27 @@ http {
    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;

+    # Upstream configurations
+    upstream grafana_backend {
+        server grafana:3000;
+        keepalive 32;
+    }
+
+    upstream prometheus_backend {
+        server prometheus:9090;
+        keepalive 32;
+    }
+
+    upstream uptime_kuma_backend {
+        server uptime-kuma:3001;
+        keepalive 32;
+    }
+
+    upstream alertmanager_backend {
+        server alertmanager:9093;
+        keepalive 32;
+    }
+
    # Main server block
    server {
        listen 80;
@@ -74,17 +95,19 @@ http {
        listen 443 ssl http2;
        server_name _;

-        # SSL configuration
-        ssl_certificate /etc/nginx/ssl/cert.pem;
-        ssl_certificate_key /etc/nginx/ssl/key.pem;
+        # SSL configuration (self-signed certificate)
+        ssl_certificate /etc/letsencrypt/live/{{SERVER_IP}}/fullchain.pem;
+        ssl_certificate_key /etc/letsencrypt/live/{{SERVER_IP}}/privkey.pem;еще 
+        ssl_protocols TLSv1.2 TLSv1.3;
+        ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
+        ssl_prefer_server_ciphers off;
+        ssl_session_cache shared:SSL:10m;
+        ssl_session_timeout 10m;

        # Security headers
        add_header X-Frame-Options "SAMEORIGIN" always;
        add_header X-Content-Type-Options "nosniff" always;

-        # Rate limiting
-        limit_req zone=api burst=20 nodelay;
-
        # Redirect root to Grafana
        location = / {
            return 301 /grafana/;
--- a/infra/nginx/ssl/letsencrypt.conf
+++ b/infra/nginx/ssl/letsencrypt.conf
@@ -0,0 +1,27 @@
+# Let's Encrypt SSL Configuration
+# This file contains the SSL configuration for Let's Encrypt certificates
+
+# SSL certificate paths (Let's Encrypt)
+ssl_certificate /etc/letsencrypt/live/{{DOMAIN}}/fullchain.pem;
+ssl_certificate_key /etc/letsencrypt/live/{{DOMAIN}}/privkey.pem;
+
+# SSL Security Configuration
+ssl_protocols TLSv1.2 TLSv1.3;
+ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384;
+ssl_prefer_server_ciphers off;
+ssl_session_cache shared:SSL:10m;
+ssl_session_timeout 10m;
+ssl_session_tickets off;
+
+# OCSP Stapling
+ssl_stapling on;
+ssl_stapling_verify on;
+ssl_trusted_certificate /etc/letsencrypt/live/{{DOMAIN}}/chain.pem;
+
+# Security Headers
+add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
+add_header X-Frame-Options "SAMEORIGIN" always;
+add_header X-Content-Type-Options "nosniff" always;
+add_header X-XSS-Protection "1; mode=block" always;
+add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self' data:; connect-src 'self' wss: https:;" always;
--- a/infra/prometheus/alert_rules.yml
+++ b/infra/prometheus/alert_rules.yml
@@ -0,0 +1,253 @@
+# Prometheus Alert Rules
+# This file defines alerting rules for monitoring the bot infrastructure
+
+groups:
+  # Bot Health Monitoring
+  - name: bot_health
+    rules:
+      # Telegram Bot Health
+      - alert: TelegramBotDown
+        expr: up{job="telegram-bot"} == 0
+        for: 1m
+        labels:
+          severity: critical
+          service: telegram-bot
+        annotations:
+          summary: "Telegram Bot is down"
+          description: "Telegram Bot has been down for more than 1 minute"
+          runbook_url: "https://docs.example.com/runbooks/telegram-bot-down"
+
+      - alert: TelegramBotHighErrorRate
+        expr: rate(http_requests_total{job="telegram-bot",status=~"5.."}[5m]) > 0.1
+        for: 2m
+        labels:
+          severity: warning
+          service: telegram-bot
+        annotations:
+          summary: "Telegram Bot high error rate"
+          description: "Telegram Bot error rate is {{ $value }} errors per second"
+
+      - alert: TelegramBotHighResponseTime
+        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="telegram-bot"}[5m])) > 2
+        for: 5m
+        labels:
+          severity: warning
+          service: telegram-bot
+        annotations:
+          summary: "Telegram Bot high response time"
+          description: "95th percentile response time is {{ $value }} seconds"
+
+      # AnonBot Health
+      - alert: AnonBotDown
+        expr: up{job="anon-bot"} == 0
+        for: 1m
+        labels:
+          severity: critical
+          service: anon-bot
+        annotations:
+          summary: "AnonBot is down"
+          description: "AnonBot has been down for more than 1 minute"
+          runbook_url: "https://docs.example.com/runbooks/anon-bot-down"
+
+      - alert: AnonBotHighErrorRate
+        expr: rate(http_requests_total{job="anon-bot",status=~"5.."}[5m]) > 0.1
+        for: 2m
+        labels:
+          severity: warning
+          service: anon-bot
+        annotations:
+          summary: "AnonBot high error rate"
+          description: "AnonBot error rate is {{ $value }} errors per second"
+
+      - alert: AnonBotHighResponseTime
+        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="anon-bot"}[5m])) > 2
+        for: 5m
+        labels:
+          severity: warning
+          service: anon-bot
+        annotations:
+          summary: "AnonBot high response time"
+          description: "95th percentile response time is {{ $value }} seconds"
+
+  # Infrastructure Health Monitoring
+  - name: infrastructure_health
+    rules:
+      # Prometheus Health
+      - alert: PrometheusDown
+        expr: up{job="prometheus"} == 0
+        for: 1m
+        labels:
+          severity: critical
+          service: prometheus
+        annotations:
+          summary: "Prometheus is down"
+          description: "Prometheus has been down for more than 1 minute"
+
+      - alert: PrometheusHighMemoryUsage
+        expr: (prometheus_tsdb_head_series / prometheus_tsdb_head_series_limit) > 0.8
+        for: 5m
+        labels:
+          severity: warning
+          service: prometheus
+        annotations:
+          summary: "Prometheus high memory usage"
+          description: "Prometheus memory usage is {{ $value | humanizePercentage }} of limit"
+
+      # Grafana Health
+      - alert: GrafanaDown
+        expr: up{job="grafana"} == 0
+        for: 1m
+        labels:
+          severity: critical
+          service: grafana
+        annotations:
+          summary: "Grafana is down"
+          description: "Grafana has been down for more than 1 minute"
+
+      # Nginx Health
+      - alert: NginxDown
+        expr: up{job="nginx"} == 0
+        for: 1m
+        labels:
+          severity: critical
+          service: nginx
+        annotations:
+          summary: "Nginx is down"
+          description: "Nginx has been down for more than 1 minute"
+
+      - alert: NginxHighErrorRate
+        expr: rate(nginx_http_requests_total{status=~"5.."}[5m]) > 0.1
+        for: 2m
+        labels:
+          severity: warning
+          service: nginx
+        annotations:
+          summary: "Nginx high error rate"
+          description: "Nginx error rate is {{ $value }} errors per second"
+
+  # System Resource Monitoring
+  - name: system_resources
+    rules:
+      # High CPU Usage
+      - alert: HighCPUUsage
+        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
+        for: 5m
+        labels:
+          severity: warning
+          service: system
+        annotations:
+          summary: "High CPU usage"
+          description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
+
+      - alert: VeryHighCPUUsage
+        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
+        for: 2m
+        labels:
+          severity: critical
+          service: system
+        annotations:
+          summary: "Very high CPU usage"
+          description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
+
+      # High Memory Usage
+      - alert: HighMemoryUsage
+        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 80
+        for: 5m
+        labels:
+          severity: warning
+          service: system
+        annotations:
+          summary: "High memory usage"
+          description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
+
+      - alert: VeryHighMemoryUsage
+        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 95
+        for: 2m
+        labels:
+          severity: critical
+          service: system
+        annotations:
+          summary: "Very high memory usage"
+          description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
+
+      # Disk Space
+      - alert: LowDiskSpace
+        expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 80
+        for: 5m
+        labels:
+          severity: warning
+          service: system
+        annotations:
+          summary: "Low disk space"
+          description: "Disk usage is {{ $value }}% on {{ $labels.instance }} ({{ $labels.mountpoint }})"
+
+      - alert: VeryLowDiskSpace
+        expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 95
+        for: 2m
+        labels:
+          severity: critical
+          service: system
+        annotations:
+          summary: "Very low disk space"
+          description: "Disk usage is {{ $value }}% on {{ $labels.instance }} ({{ $labels.mountpoint }})"
+
+  # Docker Container Monitoring
+  - name: docker_containers
+    rules:
+      # Container Restart
+      - alert: ContainerRestarting
+        expr: rate(container_start_time_seconds[10m]) > 0
+        for: 0m
+        labels:
+          severity: warning
+          service: docker
+        annotations:
+          summary: "Container restarting"
+          description: "Container {{ $labels.name }} is restarting frequently"
+
+      # Container High Memory Usage
+      - alert: ContainerHighMemoryUsage
+        expr: (container_memory_usage_bytes / container_spec_memory_limit_bytes) * 100 > 80
+        for: 5m
+        labels:
+          severity: warning
+          service: docker
+        annotations:
+          summary: "Container high memory usage"
+          description: "Container {{ $labels.name }} memory usage is {{ $value }}%"
+
+      # Container High CPU Usage
+      - alert: ContainerHighCPUUsage
+        expr: (rate(container_cpu_usage_seconds_total[5m]) / container_spec_cpu_quota * 100) > 80
+        for: 5m
+        labels:
+          severity: warning
+          service: docker
+        annotations:
+          summary: "Container high CPU usage"
+          description: "Container {{ $labels.name }} CPU usage is {{ $value }}%"
+
+  # Database Monitoring
+  - name: database_health
+    rules:
+      # Database Connection Issues
+      - alert: DatabaseConnectionFailed
+        expr: increase(database_connection_errors_total[5m]) > 5
+        for: 1m
+        labels:
+          severity: critical
+          service: database
+        annotations:
+          summary: "Database connection failures"
+          description: "{{ $value }} database connection failures in the last 5 minutes"
+
+      # Database High Query Time
+      - alert: DatabaseHighQueryTime
+        expr: histogram_quantile(0.95, rate(database_query_duration_seconds_bucket[5m])) > 1
+        for: 5m
+        labels:
+          severity: warning
+          service: database
+        annotations:
+          summary: "Database high query time"
+          description: "95th percentile database query time is {{ $value }} seconds"
--- a/infra/prometheus/prometheus.yml
+++ b/infra/prometheus/prometheus.yml
@@ -3,8 +3,7 @@ global:
  evaluation_interval: 15s

 rule_files:
-  # - "first_rules.yml"
-  # - "second_rules.yml"
+  - "alert_rules.yml"

 scrape_configs:
  - job_name: 'prometheus'
@@ -46,4 +45,4 @@ alerting:
  alertmanagers:
    - static_configs:
        - targets:
-          # - alertmanager:9093
+          - alertmanager:9093
--- a/infra/uptime-kuma/docker-compose.yml
+++ b/infra/uptime-kuma/docker-compose.yml
@@ -0,0 +1,33 @@
+# Uptime Kuma Configuration
+# This is a separate docker-compose file for Uptime Kuma
+# It will be included in the main docker-compose.yml
+
+version: '3.8'
+
+services:
+  uptime-kuma:
+    image: louislam/uptime-kuma:latest
+    container_name: bots_uptime_kuma
+    restart: unless-stopped
+    volumes:
+      - uptime_kuma_data:/app/data
+    ports:
+      - "3001:3001"
+    environment:
+      - UPTIME_KUMA_PORT=3001
+    networks:
+      - bots_network
+    healthcheck:
+      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3001"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+
+volumes:
+  uptime_kuma_data:
+    driver: local
+
+networks:
+  bots_network:
+    external: true