diff --git a/CHANGES_SUMMARY.md b/CHANGES_SUMMARY.md
deleted file mode 100644
index 0519ecb..0000000
--- a/CHANGES_SUMMARY.md
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/Dockerfile.bot b/Dockerfile.bot
index a4c9aba..ca2dc1d 100644
--- a/Dockerfile.bot
+++ b/Dockerfile.bot
@@ -52,10 +52,10 @@ USER deploy
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
- CMD curl -f http://localhost:8000/health || exit 1
+ CMD curl -f http://localhost:8080/health || exit 1
# Expose metrics port
-EXPOSE 8000
+EXPOSE 8080
# Graceful shutdown
STOPSIGNAL SIGTERM
diff --git a/docker-compose.yml b/docker-compose.yml
deleted file mode 100644
index cb4580c..0000000
--- a/docker-compose.yml
+++ /dev/null
@@ -1,130 +0,0 @@
-version: '3.8'
-
-services:
- telegram-bot:
- build:
- context: .
- dockerfile: Dockerfile.bot
- container_name: telegram-bot
- restart: unless-stopped
- expose:
- - "8000"
- environment:
- - PYTHONPATH=/app
- - DOCKER_CONTAINER=true
- - LOG_LEVEL=${LOG_LEVEL:-INFO}
- - LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-30}
- - METRICS_HOST=${METRICS_HOST:-0.0.0.0}
- - METRICS_PORT=${METRICS_PORT:-8000}
- # Telegram settings
- - TELEGRAM_BOT_TOKEN=${BOT_TOKEN}
- - TELEGRAM_LISTEN_BOT_TOKEN=${LISTEN_BOT_TOKEN}
- - TELEGRAM_TEST_BOT_TOKEN=${TEST_BOT_TOKEN}
- - TELEGRAM_PREVIEW_LINK=${PREVIEW_LINK:-false}
- - TELEGRAM_MAIN_PUBLIC=${MAIN_PUBLIC}
- - TELEGRAM_GROUP_FOR_POSTS=${GROUP_FOR_POSTS}
- - TELEGRAM_GROUP_FOR_MESSAGE=${GROUP_FOR_MESSAGE}
- - TELEGRAM_GROUP_FOR_LOGS=${GROUP_FOR_LOGS}
- - TELEGRAM_IMPORTANT_LOGS=${IMPORTANT_LOGS}
- - TELEGRAM_ARCHIVE=${ARCHIVE}
- - TELEGRAM_TEST_GROUP=${TEST_GROUP}
- # Bot settings
- - SETTINGS_LOGS=${LOGS:-false}
- - SETTINGS_TEST=${TEST:-false}
- # Database
- - DATABASE_PATH=${DATABASE_PATH:-database/tg-bot-database.db}
- volumes:
- - ./database:/app/database:rw
- - ./logs:/app/logs:rw
- - ./.env:/app/.env:ro
- networks:
- - bot-internal
- depends_on:
- - prometheus
- - grafana
- healthcheck:
- test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 40s
- deploy:
- resources:
- limits:
- memory: 512M
- cpus: '0.5'
- reservations:
- memory: 256M
- cpus: '0.25'
-
- prometheus:
- image: prom/prometheus:latest
- container_name: prometheus
- expose:
- - "9090"
- volumes:
- - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- - prometheus_data:/prometheus
- command:
- - '--config.file=/etc/prometheus/prometheus.yml'
- - '--storage.tsdb.path=/prometheus'
- - '--web.console.libraries=/etc/prometheus/console_libraries'
- - '--web.console.templates=/etc/prometheus/consoles'
- - '--storage.tsdb.retention.time=200h'
- - '--web.enable-lifecycle'
- restart: unless-stopped
- networks:
- - bot-internal
- healthcheck:
- test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
- interval: 30s
- timeout: 10s
- retries: 3
- deploy:
- resources:
- limits:
- memory: 256M
- cpus: '0.25'
-
- grafana:
- image: grafana/grafana:latest
- container_name: grafana
- ports:
- - "3000:3000" # Grafana доступна извне
- environment:
- - GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin}
- - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
- - GF_USERS_ALLOW_SIGN_UP=false
- - GF_SERVER_ROOT_URL=http://localhost:3000
- volumes:
- - grafana_data:/var/lib/grafana
- - ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
- - ./grafana/datasources:/etc/grafana/provisioning/datasources:ro
- restart: unless-stopped
- networks:
- - bot-internal
- depends_on:
- - prometheus
- healthcheck:
- test: ["CMD-SHELL", "curl -f http://localhost:3000/api/health || exit 1"]
- interval: 30s
- timeout: 10s
- retries: 3
- deploy:
- resources:
- limits:
- memory: 256M
- cpus: '0.25'
-
-volumes:
- prometheus_data:
- driver: local
- grafana_data:
- driver: local
-
-networks:
- bot-internal:
- driver: bridge
- ipam:
- config:
- - subnet: 172.20.0.0/16
diff --git a/env.example b/env.example
index 588a34f..bb48ef3 100644
--- a/env.example
+++ b/env.example
@@ -20,9 +20,9 @@ TEST=false
# Database
DATABASE_PATH=database/tg-bot-database.db
-# Monitoring
+# Monitoring (Centralized Prometheus)
METRICS_HOST=0.0.0.0
-METRICS_PORT=8000
+METRICS_PORT=8080
# Logging
LOG_LEVEL=INFO
diff --git a/grafana/dashboards/dashboards.yml b/grafana/dashboards/dashboards.yml
deleted file mode 100644
index 304cbc9..0000000
--- a/grafana/dashboards/dashboards.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-apiVersion: 1
-
-providers:
- - name: 'Telegram Bot Dashboards'
- orgId: 1
- folder: ''
- type: file
- disableDeletion: false
- updateIntervalSeconds: 10
- allowUiUpdates: true
- options:
- path: /etc/grafana/provisioning/dashboards
diff --git a/grafana/dashboards/telegram-bot-dashboard.json b/grafana/dashboards/telegram-bot-dashboard.json
deleted file mode 100644
index f6f6e18..0000000
--- a/grafana/dashboards/telegram-bot-dashboard.json
+++ /dev/null
@@ -1,1012 +0,0 @@
-{
- "annotations": {
- "list": [
- {
- "builtIn": 1,
- "datasource": {
- "type": "grafana",
- "uid": "-- Grafana --"
- },
- "enable": true,
- "hide": true,
- "iconColor": "rgba(0, 211, 255, 1)",
- "name": "Annotations & Alerts",
- "type": "dashboard"
- }
- ]
- },
- "editable": true,
- "fiscalYearStartMonth": 0,
- "graphTooltip": 0,
- "id": null,
- "links": [],
- "liveNow": false,
- "panels": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 0
- },
- "id": 1,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum(rate(bot_commands_total[5m]))",
- "refId": "A"
- }
- ],
- "title": "Commands per Second",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 0
- },
- "id": 2,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "histogram_quantile(0.95, rate(method_duration_seconds_bucket[5m]))",
- "refId": "A"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "histogram_quantile(0.99, rate(method_duration_seconds_bucket[5m]))",
- "refId": "B"
- }
- ],
- "title": "Method Response Time (P95, P99)",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 8
- },
- "id": 3,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum(rate(errors_total[5m]))",
- "refId": "A"
- }
- ],
- "title": "Errors per Second",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 8
- },
- "id": 4,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum(active_users)",
- "refId": "A"
- }
- ],
- "title": "Active Users",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "s"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 16
- },
- "id": 5,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "histogram_quantile(0.95, rate(db_query_duration_seconds_bucket[5m]))",
- "refId": "A"
- }
- ],
- "title": "Database Query Time (P95)",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 16
- },
- "id": 6,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum(rate(messages_processed_total[5m]))",
- "refId": "A"
- }
- ],
- "title": "Messages Processed per Second",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 24
- },
- "id": 7,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum by(query_type) (rate(db_queries_total[5m]))",
- "refId": "A"
- }
- ],
- "title": "Database Queries by Type",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 24
- },
- "id": 8,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "rate(db_errors_total[5m])",
- "refId": "A"
- }
- ],
- "title": "Database Errors per Second",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
- "y": 32
- },
- "id": 9,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum by(command) (rate(bot_commands_total[5m]))",
- "refId": "A"
- }
- ],
- "title": "Commands by Type",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 32
- },
- "id": 10,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "sum by(status) (rate(bot_commands_total[5m]))",
- "refId": "A"
- }
- ],
- "title": "Commands by Status",
- "type": "timeseries"
- },
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "palette-classic"
- },
- "custom": {
- "axisLabel": "",
- "axisPlacement": "auto",
- "barAlignment": 0,
- "drawStyle": "line",
- "fillOpacity": 10,
- "gradientMode": "none",
- "hideFrom": {
- "legend": false,
- "tooltip": false,
- "vis": false
- },
- "lineInterpolation": "linear",
- "lineWidth": 1,
- "pointSize": 5,
- "scaleDistribution": {
- "type": "linear"
- },
- "showPoints": "never",
- "spanNulls": false,
- "stacking": {
- "group": "A",
- "mode": "none"
- },
- "thresholdsStyle": {
- "mode": "off"
- }
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "green",
- "value": null
- },
- {
- "color": "red",
- "value": 80
- }
- ]
- },
- "unit": "short"
- },
- "overrides": []
- },
- "gridPos": {
- "h": 8,
- "w": 24,
- "x": 0,
- "y": 40
- },
- "id": 11,
- "options": {
- "legend": {
- "calcs": [],
- "displayMode": "list",
- "placement": "bottom"
- },
- "tooltip": {
- "mode": "single",
- "sort": "none"
- }
- },
- "targets": [
- {
- "datasource": {
- "type": "prometheus",
- "uid": "PBFA97CFB590B2093"
- },
- "expr": "topk(5, sum by(command) (rate(bot_commands_total[5m])))",
- "refId": "A"
- }
- ],
- "title": "Top Commands",
- "type": "timeseries"
- }
- ],
- "refresh": "5s",
- "schemaVersion": 38,
- "style": "dark",
- "tags": [
- "telegram",
- "bot",
- "monitoring"
- ],
- "templating": {
- "list": []
- },
- "time": {
- "from": "now-1h",
- "to": "now"
- },
- "timepicker": {},
- "timezone": "",
- "title": "Telegram Bot Dashboard",
- "uid": "telegram-bot",
- "version": 1,
- "weekStart": ""
-}
diff --git a/grafana/datasources/prometheus.yml b/grafana/datasources/prometheus.yml
deleted file mode 100644
index 86fd346..0000000
--- a/grafana/datasources/prometheus.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-apiVersion: 1
-
-datasources:
- - name: Prometheus
- type: prometheus
- access: proxy
- url: http://prometheus:9090
- isDefault: true
diff --git a/helper_bot/__init__.py b/helper_bot/__init__.py
index 3ed7b11..e69de29 100644
--- a/helper_bot/__init__.py
+++ b/helper_bot/__init__.py
@@ -1 +0,0 @@
-from . import server_monitor
diff --git a/helper_bot/main.py b/helper_bot/main.py
index da740dd..3b109ea 100644
--- a/helper_bot/main.py
+++ b/helper_bot/main.py
@@ -2,6 +2,8 @@ from aiogram import Bot, Dispatcher
from aiogram.client.default import DefaultBotProperties
from aiogram.fsm.storage.memory import MemoryStorage
from aiogram.fsm.strategy import FSMStrategy
+import asyncio
+import logging
from helper_bot.handlers.admin import admin_router
from helper_bot.handlers.callback import callback_router
@@ -10,6 +12,7 @@ from helper_bot.handlers.private import private_router
from helper_bot.middlewares.dependencies_middleware import DependenciesMiddleware
from helper_bot.middlewares.blacklist_middleware import BlacklistMiddleware
from helper_bot.middlewares.metrics_middleware import MetricsMiddleware, ErrorMetricsMiddleware
+from helper_bot.server_prometheus import start_metrics_server, stop_metrics_server
async def start_bot(bdf):
@@ -33,4 +36,21 @@ async def start_bot(bdf):
dp.include_routers(admin_router, private_router, callback_router, group_router)
await bot.delete_webhook(drop_pending_updates=True)
- await dp.start_polling(bot, skip_updates=True)
+
+ # Запускаем HTTP сервер для метрик параллельно с ботом
+ metrics_host = bdf.settings.get('Metrics', {}).get('host', '0.0.0.0')
+ metrics_port = bdf.settings.get('Metrics', {}).get('port', 8080)
+
+ try:
+ # Запускаем метрики сервер
+ await start_metrics_server(metrics_host, metrics_port)
+
+ # Запускаем бота
+ await dp.start_polling(bot, skip_updates=True)
+
+ except Exception as e:
+ logging.error(f"Error in bot startup: {e}")
+ raise
+ finally:
+ # Останавливаем метрики сервер при завершении
+ await stop_metrics_server()
diff --git a/helper_bot/server_monitor.py b/helper_bot/server_monitor.py
deleted file mode 100644
index d568b2c..0000000
--- a/helper_bot/server_monitor.py
+++ /dev/null
@@ -1,623 +0,0 @@
-import asyncio
-import os
-import psutil
-import time
-import platform
-from datetime import datetime, timedelta
-from typing import Dict, Optional, Tuple
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class ServerMonitor:
- def __init__(self, bot, group_for_logs: str, important_logs: str):
- self.bot = bot
- self.group_for_logs = group_for_logs
- self.important_logs = important_logs
-
- # Определяем ОС
- self.os_type = self._detect_os()
- logger.info(f"Обнаружена ОС: {self.os_type}")
-
- # Пороговые значения для алертов
- self.threshold = 80.0
- self.recovery_threshold = 75.0
-
- # Состояние алертов для предотвращения спама
- self.alert_states = {
- 'cpu': False,
- 'ram': False,
- 'disk': False
- }
-
- # PID файлы для отслеживания процессов
- self.pid_files = {
- 'voice_bot': 'voice_bot.pid',
- 'helper_bot': 'helper_bot.pid'
- }
-
- # Время последней отправки статуса
- self.last_status_time = None
-
- # Для расчета скорости диска
- self.last_disk_io = None
- self.last_disk_io_time = None
-
- # Время запуска бота для расчета uptime
- self.bot_start_time = time.time()
-
- def _detect_os(self) -> str:
- """Определение типа операционной системы"""
- system = platform.system().lower()
- if system == "darwin":
- return "macos"
- elif system == "linux":
- return "ubuntu"
- else:
- return "unknown"
-
- def _get_disk_path(self) -> str:
- """Получение пути к диску в зависимости от ОС"""
- if self.os_type == "macos":
- return "/"
- elif self.os_type == "ubuntu":
- return "/"
- else:
- return "/"
-
- def _get_disk_usage(self) -> Optional[object]:
- """Получение информации о диске с учетом ОС"""
- try:
- if self.os_type == "macos":
- # На macOS используем diskutil для получения реального использования диска
- return self._get_macos_disk_usage()
- else:
- disk_path = self._get_disk_path()
- return psutil.disk_usage(disk_path)
- except Exception as e:
- logger.error(f"Ошибка при получении информации о диске: {e}")
- return None
-
- def _get_macos_disk_usage(self) -> Optional[object]:
- """Получение информации о диске на macOS через diskutil"""
- try:
- import subprocess
- import re
-
- # Получаем информацию о диске через diskutil
- result = subprocess.run(['diskutil', 'info', '/'], capture_output=True, text=True)
- if result.returncode != 0:
- # Fallback к psutil
- return psutil.disk_usage('/')
-
- output = result.stdout
-
- # Извлекаем размеры из вывода diskutil
- total_match = re.search(r'Container Total Space:\s+(\d+\.\d+)\s+GB', output)
- free_match = re.search(r'Container Free Space:\s+(\d+\.\d+)\s+GB', output)
-
- if total_match and free_match:
- total_gb = float(total_match.group(1))
- free_gb = float(free_match.group(1))
- used_gb = total_gb - free_gb
-
- # Создаем объект, похожий на результат psutil.disk_usage
- class DiskUsage:
- def __init__(self, total, used, free):
- self.total = total * (1024**3) # Конвертируем в байты
- self.used = used * (1024**3)
- self.free = free * (1024**3)
-
- return DiskUsage(total_gb, used_gb, free_gb)
- else:
- # Fallback к psutil
- return psutil.disk_usage('/')
-
- except Exception as e:
- logger.error(f"Ошибка при получении информации о диске macOS: {e}")
- # Fallback к psutil
- return psutil.disk_usage('/')
-
- def _get_disk_io_counters(self):
- """Получение статистики диска с учетом ОС"""
- try:
- if self.os_type == "macos":
- # На macOS может быть несколько дисков, берем основной
- return psutil.disk_io_counters(perdisk=False)
- elif self.os_type == "ubuntu":
- # На Ubuntu обычно один диск
- return psutil.disk_io_counters(perdisk=False)
- else:
- return psutil.disk_io_counters()
- except Exception as e:
- logger.error(f"Ошибка при получении статистики диска: {e}")
- return None
-
- def _get_system_uptime(self) -> float:
- """Получение uptime системы с учетом ОС"""
- try:
- if self.os_type == "macos":
- # На macOS используем boot_time
- boot_time = psutil.boot_time()
- return time.time() - boot_time
- elif self.os_type == "ubuntu":
- # На Ubuntu также используем boot_time
- boot_time = psutil.boot_time()
- return time.time() - boot_time
- else:
- boot_time = psutil.boot_time()
- return time.time() - boot_time
- except Exception as e:
- logger.error(f"Ошибка при получении uptime системы: {e}")
- return 0.0
-
- def get_bot_uptime(self) -> str:
- """Получение uptime бота"""
- uptime_seconds = time.time() - self.bot_start_time
- return self._format_uptime(uptime_seconds)
-
- def get_system_info(self) -> Dict:
- """Получение информации о системе"""
- try:
- # CPU
- cpu_percent = psutil.cpu_percent(interval=1)
- load_avg = psutil.getloadavg()
- cpu_count = psutil.cpu_count()
-
- # Память
- memory = psutil.virtual_memory()
- swap = psutil.swap_memory()
-
- # Используем единый расчет для всех ОС: used / total для получения процента занятой памяти
- # Это обеспечивает консистентность между macOS и Ubuntu
- ram_percent = (memory.used / memory.total) * 100
-
- # Диск
- disk = self._get_disk_usage()
- disk_io = self._get_disk_io_counters()
-
- if disk is None:
- logger.error("Не удалось получить информацию о диске")
- return {}
-
- # Расчет скорости диска
- disk_read_speed, disk_write_speed = self._calculate_disk_speed(disk_io)
-
- # Система
- system_uptime = self._get_system_uptime()
-
- # Получаем имя хоста в зависимости от ОС
- if self.os_type == "macos":
- hostname = os.uname().nodename
- elif self.os_type == "ubuntu":
- hostname = os.uname().nodename
- else:
- hostname = "unknown"
-
- return {
- 'cpu_percent': cpu_percent,
- 'load_avg_1m': round(load_avg[0], 2),
- 'load_avg_5m': round(load_avg[1], 2),
- 'load_avg_15m': round(load_avg[2], 2),
- 'cpu_count': cpu_count,
- 'ram_used': round(memory.used / (1024**3), 2),
- 'ram_total': round(memory.total / (1024**3), 2),
- 'ram_percent': round(ram_percent, 1), # Исправленный процент занятой памяти
- 'swap_used': round(swap.used / (1024**3), 2),
- 'swap_total': round(swap.total / (1024**3), 2),
- 'swap_percent': swap.percent,
- 'disk_used': round(disk.used / (1024**3), 2),
- 'disk_total': round(disk.total / (1024**3), 2),
- 'disk_percent': round((disk.used / disk.total) * 100, 1),
- 'disk_free': round(disk.free / (1024**3), 2),
- 'disk_read_speed': disk_read_speed,
- 'disk_write_speed': disk_write_speed,
- 'disk_io_percent': self._calculate_disk_io_percent(),
- 'system_uptime': self._format_uptime(system_uptime),
- 'bot_uptime': self.get_bot_uptime(),
- 'server_hostname': hostname,
- 'current_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
- }
- except Exception as e:
- logger.error(f"Ошибка при получении информации о системе: {e}")
- return {}
-
- def _get_disk_space_emoji(self, disk_percent: float) -> str:
- """Получение эмодзи для дискового пространства"""
- if disk_percent < 60:
- return "🟢"
- elif disk_percent < 90:
- return "⚠️"
- else:
- return "🚨"
-
- def _format_bytes(self, bytes_value: int) -> str:
- """Форматирование байтов в человекочитаемый вид"""
- if bytes_value == 0:
- return "0 B"
-
- size_names = ["B", "KB", "MB", "GB", "TB"]
- i = 0
- while bytes_value >= 1024 and i < len(size_names) - 1:
- bytes_value /= 1024.0
- i += 1
-
- return f"{bytes_value:.1f} {size_names[i]}"
-
- def _format_uptime(self, seconds: float) -> str:
- """Форматирование времени работы системы"""
- days = int(seconds // 86400)
- hours = int((seconds % 86400) // 3600)
- minutes = int((seconds % 3600) // 60)
-
- if days > 0:
- return f"{days}д {hours}ч {minutes}м"
- elif hours > 0:
- return f"{hours}ч {minutes}м"
- else:
- return f"{minutes}м"
-
- def check_process_status(self, process_name: str) -> Tuple[str, str]:
- """Проверка статуса процесса и возврат статуса с uptime"""
- try:
- # Сначала проверяем по PID файлу
- pid_file = self.pid_files.get(process_name)
- if pid_file and os.path.exists(pid_file):
- try:
- with open(pid_file, 'r') as f:
- content = f.read().strip()
- if content and content != '# Этот файл будет автоматически обновляться при запуске бота':
- pid = int(content)
- if psutil.pid_exists(pid):
- # Получаем uptime процесса
- try:
- proc = psutil.Process(pid)
- proc_uptime = time.time() - proc.create_time()
- uptime_str = self._format_uptime(proc_uptime)
- return "✅", f"Uptime {uptime_str}"
- except:
- return "✅", "Uptime неизвестно"
- except (ValueError, FileNotFoundError):
- pass
-
- # Проверяем по имени процесса более точно
- for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
- try:
- proc_name = proc.info['name'].lower()
- cmdline = ' '.join(proc.info['cmdline']).lower() if proc.info['cmdline'] else ''
-
- # Более точная проверка для каждого бота
- if process_name == 'voice_bot':
- # Проверяем voice_bot
- if ('voice_bot' in proc_name or
- 'voice_bot' in cmdline or
- 'voice_bot_v2.py' in cmdline):
- # Получаем uptime процесса
- try:
- proc_uptime = time.time() - proc.create_time()
- uptime_str = self._format_uptime(proc_uptime)
- return "✅", f"Uptime {uptime_str}"
- except:
- return "✅", "Uptime неизвестно"
- elif process_name == 'helper_bot':
- # Проверяем helper_bot
- if ('helper_bot' in proc_name or
- 'helper_bot' in cmdline or
- 'run_helper.py' in cmdline or
- 'python' in proc_name and 'helper_bot' in cmdline):
- # Получаем uptime процесса
- try:
- proc_uptime = time.time() - proc.create_time()
- uptime_str = self._format_uptime(proc_uptime)
- return "✅", f"Uptime {uptime_str}"
- except:
- return "✅", "Uptime неизвестно"
- except (psutil.NoSuchProcess, psutil.AccessDenied):
- continue
-
- return "❌", "Выключен"
- except Exception as e:
- logger.error(f"Ошибка при проверке процесса {process_name}: {e}")
- return "❌", "Выключен"
-
- def should_send_status(self) -> bool:
- """Проверка, нужно ли отправить статус (каждые 30 минут в 00 и 30 минут часа)"""
- now = datetime.now()
-
- # Проверяем, что сейчас 00 или 30 минут часа
- if now.minute in [0, 30]:
- # Проверяем, не отправляли ли мы уже статус в эту минуту
- if (self.last_status_time is None or
- self.last_status_time.hour != now.hour or
- self.last_status_time.minute != now.minute):
- self.last_status_time = now
- return True
-
- return False
-
- def _calculate_disk_speed(self, current_disk_io) -> Tuple[str, str]:
- """Расчет скорости чтения/записи диска"""
- current_time = time.time()
-
- if self.last_disk_io is None or self.last_disk_io_time is None:
- self.last_disk_io = current_disk_io
- self.last_disk_io_time = current_time
- return "0 B/s", "0 B/s"
-
- time_diff = current_time - self.last_disk_io_time
- if time_diff < 1: # Минимальный интервал 1 секунда
- return "0 B/s", "0 B/s"
-
- read_diff = current_disk_io.read_bytes - self.last_disk_io.read_bytes
- write_diff = current_disk_io.write_bytes - self.last_disk_io.write_bytes
-
- read_speed = read_diff / time_diff
- write_speed = write_diff / time_diff
-
- # Обновляем предыдущие значения
- self.last_disk_io = current_disk_io
- self.last_disk_io_time = current_time
-
- return self._format_bytes(read_speed) + "/s", self._format_bytes(write_speed) + "/s"
-
- def _calculate_disk_io_percent(self) -> int:
- """Расчет процента загрузки диска на основе IOPS"""
- try:
- # Получаем статистику диска
- disk_io = self._get_disk_io_counters()
- if disk_io is None:
- return 0
-
- # Простая эвристика: считаем общее количество операций
- total_ops = disk_io.read_count + disk_io.write_count
-
- # Нормализуем к проценту (это приблизительная оценка)
- # На macOS обычно нормальная нагрузка до 1000-5000 операций в секунду
- if total_ops < 1000:
- return 10
- elif total_ops < 5000:
- return 30
- elif total_ops < 10000:
- return 50
- elif total_ops < 20000:
- return 70
- else:
- return 90
- except:
- return 0
-
- def should_send_startup_status(self) -> bool:
- """Проверка, нужно ли отправить статус при запуске"""
- return self.last_status_time is None
-
- async def send_startup_message(self):
- """Отправка сообщения о запуске бота"""
- try:
- message = f"""🚀 **Бот запущен!**
----------------------------------
-**Время запуска:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-**Сервер:** `{psutil.os.uname().nodename}`
-**Система:** {psutil.os.uname().sysname} {psutil.os.uname().release}
-**ОС:** {self.os_type.upper()}
-
-✅ Мониторинг сервера активирован
-✅ Статус будет отправляться каждые 30 минут (в 00 и 30 минут часа)
-✅ Алерты будут отправляться при превышении пороговых значений
----------------------------------"""
-
- await self.bot.send_message(
- chat_id=self.important_logs,
- text=message,
- parse_mode='HTML'
- )
- logger.info("Сообщение о запуске бота отправлено")
-
- except Exception as e:
- logger.error(f"Ошибка при отправке сообщения о запуске: {e}")
-
- async def send_shutdown_message(self):
- """Отправка сообщения об отключении бота"""
- try:
- # Получаем финальную информацию о системе
- system_info = self.get_system_info()
- if not system_info:
- system_info = {
- 'current_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
- 'server_hostname': psutil.os.uname().nodename
- }
-
- message = f"""🛑 **Бот отключен!**
----------------------------------
-**Время отключения:** {system_info['current_time']}
-**Сервер:** `{system_info['server_hostname']}`
-
-❌ Мониторинг сервера остановлен
-❌ Статус больше не будет отправляться
-❌ Алерты отключены
-
-⚠️ **Внимание:** Проверьте состояние сервера!
----------------------------------"""
-
- await self.bot.send_message(
- chat_id=self.important_logs,
- text=message,
- parse_mode='HTML'
- )
- logger.info("Сообщение об отключении бота отправлено")
-
- except Exception as e:
- logger.error(f"Ошибка при отправке сообщения об отключении: {e}")
-
- def check_alerts(self, system_info: Dict) -> Tuple[bool, Optional[str]]:
- """Проверка необходимости отправки алертов"""
- alerts = []
-
- # Проверка CPU
- if system_info['cpu_percent'] > self.threshold and not self.alert_states['cpu']:
- self.alert_states['cpu'] = True
- alerts.append(('cpu', system_info['cpu_percent'], f"Нагрузка за 1 мин: {system_info['load_avg_1m']}"))
-
- # Проверка RAM
- if system_info['ram_percent'] > self.threshold and not self.alert_states['ram']:
- self.alert_states['ram'] = True
- alerts.append(('ram', system_info['ram_percent'], f"Используется: {system_info['ram_used']} GB из {system_info['ram_total']} GB"))
-
- # Проверка диска
- if system_info['disk_percent'] > self.threshold and not self.alert_states['disk']:
- self.alert_states['disk'] = True
- alerts.append(('disk', system_info['disk_percent'], f"Свободно: {system_info['disk_free']} GB на /"))
-
- # Проверка восстановления
- recoveries = []
- if system_info['cpu_percent'] < self.recovery_threshold and self.alert_states['cpu']:
- self.alert_states['cpu'] = False
- recoveries.append(('cpu', system_info['cpu_percent']))
-
- if system_info['ram_percent'] < self.recovery_threshold and self.alert_states['ram']:
- self.alert_states['ram'] = False
- recoveries.append(('ram', system_info['ram_percent']))
-
- if system_info['disk_percent'] < self.recovery_threshold and self.alert_states['disk']:
- self.alert_states['disk'] = False
- recoveries.append(('disk', system_info['disk_percent']))
-
- return alerts, recoveries
-
- async def send_status_message(self, system_info: Dict):
- """Отправка сообщения со статусом сервера"""
- try:
- voice_bot_status, voice_bot_uptime = self.check_process_status('voice_bot')
- helper_bot_status, helper_bot_uptime = self.check_process_status('helper_bot')
-
- # Получаем эмодзи для дискового пространства
- disk_emoji = self._get_disk_space_emoji(system_info['disk_percent'])
-
- message = f"""🖥 **Статус Сервера** | {system_info['current_time']}
----------------------------------
-**📊 Общая нагрузка:**
-CPU: {system_info['cpu_percent']}% | LA: {system_info['load_avg_1m']} / {system_info['cpu_count']} | IO Wait: {system_info['disk_percent']}%
-
-**💾 Память:**
-RAM: {system_info['ram_used']}/{system_info['ram_total']} GB ({system_info['ram_percent']}%)
-Swap: {system_info['swap_used']}/{system_info['swap_total']} GB ({system_info['swap_percent']}%)
-
-**🗂️ Дисковое пространство:**
-Диск (/): {system_info['disk_used']}/{system_info['disk_total']} GB ({system_info['disk_percent']}%) {disk_emoji}
-
-**💿 Диск I/O:**
-Read: {system_info['disk_read_speed']} | Write: {system_info['disk_write_speed']}
-Диск загружен: {system_info['disk_io_percent']}%
-
-**🤖 Процессы:**
-{voice_bot_status} voice-bot - {voice_bot_uptime}
-{helper_bot_status} helper-bot - {helper_bot_uptime}
----------------------------------
-⏰ Uptime сервера: {system_info['system_uptime']}"""
-
- await self.bot.send_message(
- chat_id=self.group_for_logs,
- text=message,
- parse_mode='HTML'
- )
- logger.info("Статус сервера отправлен")
-
- except Exception as e:
- logger.error(f"Ошибка при отправке статуса сервера: {e}")
-
- async def send_alert_message(self, metric_name: str, current_value: float, details: str):
- """Отправка сообщения об алерте"""
- try:
- message = f"""🚨 **ALERT: Высокая нагрузка на сервере!**
----------------------------------
-**Показатель:** {metric_name}
-**Текущее значение:** {current_value}% ⚠️
-**Пороговое значение:** 80%
-
-**Детали:**
-{details}
-
-**Сервер:** `{psutil.os.uname().nodename}`
-**Время:** `{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}`
----------------------------------"""
-
- await self.bot.send_message(
- chat_id=self.important_logs,
- text=message,
- parse_mode='HTML'
- )
- logger.warning(f"Алерт отправлен: {metric_name} - {current_value}%")
-
- except Exception as e:
- logger.error(f"Ошибка при отправке алерта: {e}")
-
- async def send_recovery_message(self, metric_name: str, current_value: float, peak_value: float):
- """Отправка сообщения о восстановлении"""
- try:
- message = f"""✅ **RECOVERY: Нагрузка нормализовалась**
----------------------------------
-**Показатель:** {metric_name}
-**Текущее значение:** {current_value}% ✔️
-**Было превышение:** До {peak_value}%
-
-**Сервер:** `{psutil.os.uname().nodename}`
-**Время:** `{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}`
----------------------------------"""
-
- await self.bot.send_message(
- chat_id=self.important_logs,
- text=message,
- parse_mode='HTML'
- )
- logger.info(f"Сообщение о восстановлении отправлено: {metric_name}")
-
- except Exception as e:
- logger.error(f"Ошибка при отправке сообщения о восстановлении: {e}")
-
- async def monitor_loop(self):
- """Основной цикл мониторинга"""
- logger.info(f"Модуль мониторинга сервера запущен на {self.os_type.upper()}")
-
- # Отправляем сообщение о запуске при первом запуске
- if self.should_send_startup_status():
- await self.send_startup_message()
-
- while True:
- try:
- system_info = self.get_system_info()
- if not system_info:
- await asyncio.sleep(60)
- continue
-
- # Проверка алертов
- alerts, recoveries = self.check_alerts(system_info)
-
- # Отправка алертов
- for metric_type, value, details in alerts:
- metric_names = {
- 'cpu': 'Использование CPU',
- 'ram': 'Использование оперативной памяти',
- 'disk': 'Заполнение диска (/)'
- }
- await self.send_alert_message(metric_names[metric_type], value, details)
-
- # Отправка сообщений о восстановлении
- for metric_type, value in recoveries:
- metric_names = {
- 'cpu': 'Использование CPU',
- 'ram': 'Использование оперативной памяти',
- 'disk': 'Заполнение диска (/)'
- }
- # Находим пиковое значение (используем 80% как пример)
- await self.send_recovery_message(metric_names[metric_type], value, 80.0)
-
- # Отправка статуса каждые 30 минут в 00 и 30 минут часа
- if self.should_send_status():
- await self.send_status_message(system_info)
-
- # Пауза между проверками (1 минута)
- await asyncio.sleep(60)
-
- except Exception as e:
- logger.error(f"Ошибка в цикле мониторинга: {e}")
- await asyncio.sleep(60)
diff --git a/helper_bot/server_prometheus.py b/helper_bot/server_prometheus.py
new file mode 100644
index 0000000..9aa140a
--- /dev/null
+++ b/helper_bot/server_prometheus.py
@@ -0,0 +1,126 @@
+
+"""
+HTTP server for metrics endpoint integration with centralized Prometheus monitoring.
+Provides /metrics endpoint and health check for the bot.
+"""
+
+import asyncio
+import logging
+from aiohttp import web
+from typing import Optional
+from .utils.metrics import metrics
+
+
+class MetricsServer:
+ """HTTP server for Prometheus metrics and health checks."""
+
+ def __init__(self, host: str = '0.0.0.0', port: int = 8080):
+ self.host = host
+ self.port = port
+ self.app = web.Application()
+ self.runner: Optional[web.AppRunner] = None
+ self.site: Optional[web.TCPSite] = None
+ self.logger = logging.getLogger(__name__)
+
+ # Настраиваем роуты
+ self.app.router.add_get('/metrics', self.metrics_handler)
+ self.app.router.add_get('/health', self.health_handler)
+
+ async def metrics_handler(self, request: web.Request) -> web.Response:
+ """Handle /metrics endpoint for Prometheus scraping."""
+ try:
+ self.logger.info("Generating metrics...")
+
+ # Проверяем, что metrics доступен
+ if not metrics:
+ self.logger.error("Metrics object is not available")
+ return web.Response(
+ text="Metrics not available",
+ status=500
+ )
+
+ # Генерируем метрики в формате Prometheus
+ self.logger.info("Calling metrics.get_metrics()...")
+ metrics_data = metrics.get_metrics()
+ self.logger.info(f"Generated metrics: {len(metrics_data)} bytes")
+
+ return web.Response(
+ body=metrics_data,
+ content_type='text/plain; version=0.0.4'
+ )
+ except Exception as e:
+ self.logger.error(f"Error generating metrics: {e}")
+ import traceback
+ self.logger.error(f"Traceback: {traceback.format_exc()}")
+ return web.Response(
+ text=f"Error generating metrics: {e}",
+ status=500
+ )
+
+ async def health_handler(self, request: web.Request) -> web.Response:
+ """Handle /health endpoint for health checks."""
+ return web.Response(
+ text="OK",
+ content_type='text/plain'
+ )
+
+ async def start(self) -> None:
+ """Start the HTTP server."""
+ try:
+ self.runner = web.AppRunner(self.app)
+ await self.runner.setup()
+
+ self.site = web.TCPSite(self.runner, self.host, self.port)
+ await self.site.start()
+
+ self.logger.info(f"Metrics server started on {self.host}:{self.port}")
+ self.logger.info("Available endpoints:")
+ self.logger.info(f" - /metrics - Prometheus metrics")
+ self.logger.info(f" - /health - Health check")
+
+ except Exception as e:
+ self.logger.error(f"Failed to start metrics server: {e}")
+ raise
+
+ async def stop(self) -> None:
+ """Stop the HTTP server."""
+ try:
+ if self.site:
+ await self.site.stop()
+ self.logger.info("Metrics server site stopped")
+
+ if self.runner:
+ await self.runner.cleanup()
+ self.logger.info("Metrics server runner cleaned up")
+
+ except Exception as e:
+ self.logger.error(f"Error stopping metrics server: {e}")
+
+ async def __aenter__(self):
+ """Async context manager entry."""
+ await self.start()
+ return self
+
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
+ """Async context manager exit."""
+ await self.stop()
+
+
+# Глобальный экземпляр сервера для использования в main.py
+metrics_server: Optional[MetricsServer] = None
+
+
+async def start_metrics_server(host: str = '0.0.0.0', port: int = 8080) -> MetricsServer:
+ """Start metrics server and return instance."""
+ global metrics_server
+ metrics_server = MetricsServer(host, port)
+ await metrics_server.start()
+ return metrics_server
+
+
+async def stop_metrics_server() -> None:
+ """Stop metrics server if running."""
+ global metrics_server
+ if metrics_server:
+ await metrics_server.stop()
+ metrics_server = None
diff --git a/prometheus.yml b/prometheus.yml
deleted file mode 100644
index fd60240..0000000
--- a/prometheus.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-global:
- scrape_interval: 15s
- evaluation_interval: 15s
-
-rule_files:
- # - "first_rules.yml"
- # - "second_rules.yml"
-
-scrape_configs:
- - job_name: 'telegram-bot'
- static_configs:
- - targets: ['telegram-bot:8000']
- metrics_path: '/metrics'
- scrape_interval: 10s
- scrape_timeout: 10s
- honor_labels: true
-
- - job_name: 'prometheus'
- static_configs:
- - targets: ['localhost:9090']
-
-alerting:
- alertmanagers:
- - static_configs:
- - targets:
- # - alertmanager:9093
diff --git a/run_helper.py b/run_helper.py
index 82a960a..0bb12ff 100644
--- a/run_helper.py
+++ b/run_helper.py
@@ -10,40 +10,26 @@ if CURRENT_DIR not in sys.path:
from helper_bot.main import start_bot
from helper_bot.utils.base_dependency_factory import get_global_instance
-from helper_bot.server_monitor import ServerMonitor
from helper_bot.utils.auto_unban_scheduler import get_auto_unban_scheduler
-async def start_monitoring(bdf, bot):
- """Запуск модуля мониторинга сервера"""
- monitor = ServerMonitor(
- bot=bot,
- group_for_logs=bdf.settings['Telegram']['group_for_logs'],
- important_logs=bdf.settings['Telegram']['important_logs']
- )
- return monitor
-
-
async def main():
"""Основная функция запуска"""
bdf = get_global_instance()
- # Создаем бота для мониторинга
+ # Создаем бота для автоматического разбана
from aiogram import Bot
from aiogram.client.default import DefaultBotProperties
- monitor_bot = Bot(
+ auto_unban_bot = Bot(
token=bdf.settings['Telegram']['bot_token'],
default=DefaultBotProperties(parse_mode='HTML'),
timeout=30.0
)
- # Создаем экземпляр монитора
- monitor = await start_monitoring(bdf, monitor_bot)
-
# Инициализируем планировщик автоматического разбана
auto_unban_scheduler = get_auto_unban_scheduler()
- auto_unban_scheduler.set_bot(monitor_bot)
+ auto_unban_scheduler.set_bot(auto_unban_bot)
auto_unban_scheduler.start_scheduler()
# Инициализируем метрики ПОСЛЕ импорта всех модулей
@@ -63,9 +49,8 @@ async def main():
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
- # Запускаем бота, мониторинг и метрики
+ # Запускаем бота и метрики
bot_task = asyncio.create_task(start_bot(bdf))
- monitor_task = asyncio.create_task(monitor.monitor_loop())
metrics_task = asyncio.create_task(metrics_manager.start())
try:
@@ -76,13 +61,6 @@ async def main():
except KeyboardInterrupt:
print("Получен сигнал завершения...")
finally:
- print("Отправляем сообщение об отключении...")
- try:
- # Отправляем сообщение об отключении
- await monitor.send_shutdown_message()
- except Exception as e:
- print(f"Ошибка при отправке сообщения об отключении: {e}")
-
print("Останавливаем планировщик автоматического разбана...")
auto_unban_scheduler.stop_scheduler()
@@ -92,17 +70,16 @@ async def main():
print("Останавливаем задачи...")
# Отменяем задачи
bot_task.cancel()
- monitor_task.cancel()
metrics_task.cancel()
# Ждем завершения задач
try:
- await asyncio.gather(bot_task, monitor_task, metrics_task, return_exceptions=True)
+ await asyncio.gather(bot_task, metrics_task, return_exceptions=True)
except Exception as e:
print(f"Ошибка при остановке задач: {e}")
# Закрываем сессию бота
- await monitor_bot.session.close()
+ await auto_unban_bot.session.close()
print("Бот корректно остановлен")
diff --git a/tests/test_monitor.py b/tests/test_monitor.py
deleted file mode 100644
index 41f39b5..0000000
--- a/tests/test_monitor.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""
-Тестовый скрипт для проверки модуля мониторинга сервера
-"""
-import pytest
-import asyncio
-import sys
-import os
-
-# Добавляем путь к проекту
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-from helper_bot.server_monitor import ServerMonitor
-
-
-class MockBot:
- """Мок объект бота для тестирования"""
-
- async def send_message(self, chat_id, text, parse_mode=None):
- print(f"\n{'='*60}")
- print(f"Отправка в чат: {chat_id}")
- print(f"Текст сообщения:")
- print(text)
- print(f"{'='*60}\n")
-
-
-@pytest.mark.asyncio
-async def test_monitor():
- """Тестирование модуля мониторинга"""
- print("🧪 Тестирование модуля мониторинга сервера")
- print("=" * 60)
-
- # Создаем мок бота
- mock_bot = MockBot()
-
- # Создаем монитор
- monitor = ServerMonitor(
- bot=mock_bot,
- group_for_logs="-123456789",
- important_logs="-987654321"
- )
-
- print("📊 Получение информации о системе...")
- system_info = monitor.get_system_info()
-
- if system_info:
- print("✅ Информация о системе получена успешно")
- print(f"CPU: {system_info['cpu_percent']}%")
- print(f"RAM: {system_info['ram_percent']}%")
- print(f"Disk: {system_info['disk_percent']}%")
- print(f"Uptime: {system_info['system_uptime']}")
-
- print("\n🤖 Проверка статуса процессов...")
- voice_status, voice_uptime = monitor.check_process_status('voice_bot')
- helper_status, helper_uptime = monitor.check_process_status('helper_bot')
- print(f"Voice Bot: {voice_status} - {voice_uptime}")
- print(f"Helper Bot: {helper_status} - {helper_uptime}")
-
- print("\n📝 Тестирование отправки статуса...")
- await monitor.send_status_message(system_info)
-
- print("\n🚨 Тестирование отправки алерта...")
- await monitor.send_alert_message(
- "Использование CPU",
- 85.5,
- "Нагрузка за 1 мин: 2.5"
- )
-
- print("\n✅ Тестирование отправки сообщения о восстановлении...")
- await monitor.send_recovery_message(
- "Использование CPU",
- 70.0,
- 85.5
- )
-
- else:
- print("❌ Не удалось получить информацию о системе")
-
- print("\n🎯 Тестирование завершено!")
-
-
-if __name__ == "__main__":
- asyncio.run(test_monitor())