From 40968dd075267cf5d82f17d0c9eadaca38f39cef Mon Sep 17 00:00:00 2001 From: Andrey Date: Fri, 5 Sep 2025 01:29:28 +0300 Subject: [PATCH] WIP: Development changes moved from master - Modified Grafana dashboards - Updated message sender and metrics collector - Added new rate limiting dashboard - Removed count_tests.py --- count_tests.py | 77 -- .../grafana-rate-limiting-dashboard.json | 1227 +++++++++++++++++ .../dashboards/server-dashboard.json | 157 ++- .../dashboards/telegram-bot-dashboards.json | 184 +++ infra/monitoring/message_sender.py | 18 +- tests/infra/test_metrics_collector.py | 10 +- tests/infra/test_prometheus_config.py | 4 +- 7 files changed, 1584 insertions(+), 93 deletions(-) delete mode 100644 count_tests.py create mode 100644 infra/grafana/provisioning/dashboards/grafana-rate-limiting-dashboard.json diff --git a/count_tests.py b/count_tests.py deleted file mode 100644 index 6932b7d..0000000 --- a/count_tests.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -""" -Скрипт для подсчета количества тестов в проекте -""" - -import subprocess -import sys -import os - -def count_tests_in_directory(directory): - """Подсчитывает количество тестов в указанной директории""" - try: - # Запускаем pytest --collect-only для подсчета тестов - result = subprocess.run( - [sys.executable, '-m', 'pytest', directory, '--collect-only', '-q'], - capture_output=True, - text=True, - cwd=os.getcwd() - ) - - if result.returncode == 0: - # Ищем строку с количеством собранных тестов - for line in result.stdout.split('\n'): - if 'collected' in line: - # Извлекаем число из строки вида "78 collected" - parts = line.strip().split() - for part in parts: - if part.isdigit(): - return int(part) - return 0 - except Exception as e: - print(f"Ошибка при подсчете тестов в {directory}: {e}", file=sys.stderr) - return 0 - -def count_bot_tests(): - """Подсчитывает количество тестов бота""" - try: - # Переходим в директорию бота и запускаем pytest - bot_dir = os.path.join(os.getcwd(), 'bots', 'telegram-helper-bot') - result = subprocess.run( - [sys.executable, '-m', 'pytest', 'tests/', '--collect-only', '-q'], - capture_output=True, - text=True, - cwd=bot_dir - ) - - if result.returncode == 0: - # Ищем строку с количеством собранных тестов - for line in result.stdout.split('\n'): - if 'collected' in line: - # Извлекаем число из строки вида "201 collected" - parts = line.strip().split() - for part in parts: - if part.isdigit(): - return int(part) - return 0 - except Exception as e: - print(f"Ошибка при подсчете тестов бота: {e}", file=sys.stderr) - return 0 - -def main(): - """Основная функция""" - # Подсчитываем тесты инфраструктуры - infra_tests = count_tests_in_directory('tests/infra/') - - # Подсчитываем тесты бота - bot_tests = count_bot_tests() - - total_tests = infra_tests + bot_tests - - # Выводим результат в формате для Makefile - print(f"{infra_tests}") - print(f"{bot_tests}") - print(f"{total_tests}") - -if __name__ == '__main__': - main() diff --git a/infra/grafana/provisioning/dashboards/grafana-rate-limiting-dashboard.json b/infra/grafana/provisioning/dashboards/grafana-rate-limiting-dashboard.json new file mode 100644 index 0000000..ec86d69 --- /dev/null +++ b/infra/grafana/provisioning/dashboards/grafana-rate-limiting-dashboard.json @@ -0,0 +1,1227 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.9 + }, + { + "color": "green", + "value": 0.95 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "avg(rate_limit_success_rate)", + "refId": "A" + } + ], + "title": "Rate Limit Success Rate", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "rate_limit_active_chats", + "refId": "A" + } + ], + "title": "Active Chats", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "red", + "value": 200 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "sum(rate_limit_requests_per_minute)", + "refId": "A" + } + ], + "title": "Total Requests/min", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 2 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "avg(rate_limit_avg_wait_time_seconds)", + "refId": "A" + } + ], + "title": "Avg Wait Time", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate_limit_success_rate", + "legendFormat": "Chat {{chat_id}}", + "refId": "A" + }, + { + "expr": "avg(rate_limit_success_rate)", + "legendFormat": "Average", + "refId": "B" + } + ], + "title": "Success Rate Trend", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate_limit_requests_per_minute", + "legendFormat": "Chat {{chat_id}}", + "refId": "A" + }, + { + "expr": "sum(rate_limit_requests_per_minute)", + "legendFormat": "Total", + "refId": "B" + } + ], + "title": "Requests per Minute Trend", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate_limit_avg_wait_time_seconds", + "legendFormat": "Chat {{chat_id}}", + "refId": "A" + }, + { + "expr": "avg(rate_limit_avg_wait_time_seconds)", + "legendFormat": "Average", + "refId": "B" + } + ], + "title": "Wait Time Trend", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate_limit_total_requests", + "legendFormat": "Chat {{chat_id}}", + "refId": "A" + }, + { + "expr": "sum(rate_limit_total_requests)", + "legendFormat": "Total", + "refId": "B" + } + ], + "title": "Total Requests Trend", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 9, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by(error_type) (rate_limit_total_errors)", + "legendFormat": "{{error_type}}", + "refId": "A" + } + ], + "title": "Error Rate by Type", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by(error_type) (rate(rate_limit_errors_total[5m]))", + "legendFormat": "{{error_type}}", + "refId": "A" + } + ], + "title": "Error Trends", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "histogram_quantile(0.95, rate(rate_limit_wait_duration_seconds_bucket[5m]))", + "legendFormat": "P95", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.99, rate(rate_limit_wait_duration_seconds_bucket[5m]))", + "legendFormat": "P99", + "refId": "B" + } + ], + "title": "Wait Time Percentiles", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "topk(10, rate_limit_requests_per_minute)", + "legendFormat": "Chat {{chat_id}}", + "refId": "A" + } + ], + "title": "Request Rate by Chat", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 40 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "count(rate_limit_success_rate < 0.9)", + "legendFormat": "Low Success Rate", + "refId": "A" + } + ], + "title": "Rate Limit Alerts", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 40 + }, + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "sum(rate(rate_limit_errors_total{error_type=\"RetryAfter\"}[5m]))", + "legendFormat": "RetryAfter/min", + "refId": "A" + } + ], + "title": "Error Rate Alerts", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 40 + }, + "id": 15, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "sum(rate(rate_limit_errors_total{error_type=\"TelegramAPIError\"}[5m]))", + "legendFormat": "API Errors/min", + "refId": "A" + } + ], + "title": "API Error Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 40 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "expr": "count(rate_limit_avg_wait_time_seconds > 5)", + "legendFormat": "High Wait Time", + "refId": "A" + } + ], + "title": "Wait Time Alerts", + "type": "stat" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "telegram", + "bot", + "rate-limiting", + "monitoring" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Telegram Bot Rate Limiting Dashboard", + "uid": "telegram-bot-rate-limiting", + "version": 1, + "weekStart": "" +} diff --git a/infra/grafana/provisioning/dashboards/server-dashboard.json b/infra/grafana/provisioning/dashboards/server-dashboard.json index 62ce6e4..5ebc127 100644 --- a/infra/grafana/provisioning/dashboards/server-dashboard.json +++ b/infra/grafana/provisioning/dashboards/server-dashboard.json @@ -84,7 +84,7 @@ "unit": "percent" } }, - "gridPos": {"h": 8, "w": 6, "x": 12, "y": 0} + "gridPos": {"h": 8, "w": 6, "x": 18, "y": 16} }, { "id": 4, @@ -139,7 +139,7 @@ } } }, - "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8} + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16} }, { "id": 5, @@ -159,7 +159,7 @@ "unit": "s" } }, - "gridPos": {"h": 8, "w": 6, "x": 12, "y": 8} + "gridPos": {"h": 8, "w": 6, "x": 12, "y": 16} }, { "id": 6, @@ -186,7 +186,7 @@ "unit": "percent" } }, - "gridPos": {"h": 8, "w": 6, "x": 0, "y": 16} + "gridPos": {"h": 8, "w": 6, "x": 12, "y": 16} }, { "id": 7, @@ -213,7 +213,154 @@ "unit": "percent" } }, - "gridPos": {"h": 8, "w": 6, "x": 6, "y": 16} + "gridPos": {"h": 8, "w": 6, "x": 18, "y": 16} + }, + { + "id": 8, + "title": "CPU Usage Gauge", + "type": "gauge", + "targets": [ + { + "expr": "cpu_usage_percent", + "legendFormat": "CPU %" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 70}, + {"color": "red", "value": 90} + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + } + }, + "options": { + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "gridPos": {"h": 8, "w": 6, "x": 12, "y": 0} + }, + { + "id": 9, + "title": "RAM Usage Gauge", + "type": "gauge", + "targets": [ + { + "expr": "ram_usage_percent", + "legendFormat": "RAM %" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "steps": [ + {"color": "green", "value": null}, + {"color": "yellow", "value": 70}, + {"color": "red", "value": 90} + ] + }, + "unit": "percent", + "min": 0, + "max": 100 + } + }, + "options": { + "orientation": "auto", + "reduceOptions": { + "values": false, + "calcs": ["lastNotNull"], + "fields": "" + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "gridPos": {"h": 8, "w": 6, "x": 18, "y": 0} + }, + { + "id": 10, + "title": "System Resources Overview", + "type": "timeseries", + "targets": [ + { + "expr": "cpu_usage_percent", + "legendFormat": "CPU %" + }, + { + "expr": "ram_usage_percent", + "legendFormat": "RAM %" + }, + { + "expr": "disk_usage_percent", + "legendFormat": "Disk %" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Usage %", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "percent", + "min": 0, + "max": 100 + } + }, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8} } ], "time": { diff --git a/infra/grafana/provisioning/dashboards/telegram-bot-dashboards.json b/infra/grafana/provisioning/dashboards/telegram-bot-dashboards.json index 311eae9..a239e4c 100644 --- a/infra/grafana/provisioning/dashboards/telegram-bot-dashboards.json +++ b/infra/grafana/provisioning/dashboards/telegram-bot-dashboards.json @@ -899,6 +899,190 @@ ], "title": "Database Query Time (P95)", "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "topk(5, sum by(content_type) (rate(media_processing_total[5m])))", + "refId": "A" + } + ], + "title": "Top Media Types", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "histogram_quantile(0.5, rate(file_download_size_bytes_bucket[5m])) by (content_type)", + "refId": "A", + "legendFormat": "{{content_type}} P50" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "histogram_quantile(0.95, rate(file_download_size_bytes_bucket[5m])) by (content_type)", + "refId": "B", + "legendFormat": "{{content_type}} P95" + } + ], + "title": "File Download Size Distribution", + "type": "timeseries" } ], "refresh": "5s", diff --git a/infra/monitoring/message_sender.py b/infra/monitoring/message_sender.py index a2368c1..27fa026 100644 --- a/infra/monitoring/message_sender.py +++ b/infra/monitoring/message_sender.py @@ -74,7 +74,6 @@ class MessageSender: if self.last_status_time is None: logger.info(f"should_send_status: last_status_time is None, отправляем статус") - self.last_status_time = now return True # Вычисляем разницу в минутах @@ -84,7 +83,6 @@ class MessageSender: # Проверяем, что прошло N минут с последней отправки if time_diff_minutes >= self.status_update_interval_minutes: logger.info(f"should_send_status: отправляем статус (прошло {time_diff_minutes:.1f} минут)") - self.last_status_time = now return True logger.info(f"should_send_status: статус не отправляем (прошло {time_diff_minutes:.1f} минут)") @@ -92,7 +90,12 @@ class MessageSender: def should_send_startup_status(self) -> bool: """Проверка, нужно ли отправить статус при запуске""" - return self.last_status_time is None + # Отправляем статус при запуске только если он еще не был отправлен + if self.last_status_time is None: + logger.info("should_send_startup_status: отправляем статус при запуске") + return True + logger.info("should_send_startup_status: статус уже был отправлен, пропускаем") + return False def _get_disk_space_emoji(self, disk_percent: float) -> str: """Получение эмодзи для дискового пространства""" @@ -257,7 +260,14 @@ Read: {system_info['disk_read_speed']} | Write: {system_info['disk_wri return False status_message = self.get_status_message(system_info) - return await self.send_telegram_message(self.group_for_logs, status_message) + success = await self.send_telegram_message(self.group_for_logs, status_message) + + # Обновляем время последней отправки только при успешной отправке + if success: + self.last_status_time = datetime.now() + logger.info("send_status_message: время последней отправки обновлено") + + return success except Exception as e: logger.error(f"Ошибка при отправке статуса: {e}") diff --git a/tests/infra/test_metrics_collector.py b/tests/infra/test_metrics_collector.py index acb2cc1..77d8236 100644 --- a/tests/infra/test_metrics_collector.py +++ b/tests/infra/test_metrics_collector.py @@ -252,21 +252,21 @@ class TestMetricsCollector: def test_check_process_status_pid_file(self, metrics_collector, tmp_path): """Тест проверки статуса процесса по PID файлу""" # Создаем временный PID файл - pid_file = tmp_path / "helper_bot.pid" + pid_file = tmp_path / "test_bot.pid" pid_file.write_text("12345") # Временно заменяем путь к PID файлу original_pid_files = metrics_collector.pid_files.copy() - metrics_collector.pid_files['helper_bot'] = str(pid_file) + metrics_collector.pid_files['test_bot'] = str(pid_file) - with patch('metrics_collector.psutil.pid_exists', return_value=True), \ - patch('metrics_collector.psutil.Process') as mock_process: + with patch('infra.monitoring.metrics_collector.psutil.pid_exists', return_value=True), \ + patch('infra.monitoring.metrics_collector.psutil.Process') as mock_process: mock_proc = Mock() mock_proc.create_time.return_value = time.time() - 3600 mock_process.return_value = mock_proc - status, uptime = metrics_collector.check_process_status('helper_bot') + status, uptime = metrics_collector.check_process_status('test_bot') assert status == "✅" assert "Uptime" in uptime diff --git a/tests/infra/test_prometheus_config.py b/tests/infra/test_prometheus_config.py index 26ea982..9d478a8 100644 --- a/tests/infra/test_prometheus_config.py +++ b/tests/infra/test_prometheus_config.py @@ -111,7 +111,7 @@ class TestPrometheusConfig: assert len(static_configs) > 0, "Should have at least one static config" targets = static_configs[0].get('targets', []) - assert 'host.docker.internal:9091' in targets, "Should scrape host.docker.internal:9091" + assert 'bots_server_monitor:9091' in targets, "Should scrape bots_server_monitor:9091" def test_telegram_bot_job(self, prometheus_config): """Тест job для telegram-helper-bot""" @@ -145,7 +145,7 @@ class TestPrometheusConfig: # Проверяем targets targets = static_configs[0].get('targets', []) - assert 'host.docker.internal:8080' in targets, "Should scrape host.docker.internal:8080" + assert 'bots_telegram_bot:8080' in targets, "Should scrape bots_telegram_bot:8080" # Проверяем labels labels = static_configs[0].get('labels', {})