385 lines
16 KiB
Python
385 lines
16 KiB
Python
"""
|
||
Сервис для работы с Prometheus метриками
|
||
"""
|
||
import time
|
||
import inspect
|
||
from typing import Optional, Callable
|
||
from prometheus_client import Counter, Histogram, Gauge, Info, generate_latest, CONTENT_TYPE_LATEST
|
||
from loguru import logger
|
||
|
||
|
||
|
||
|
||
class MetricsService:
|
||
"""Сервис для управления Prometheus метриками"""
|
||
|
||
def __init__(self):
|
||
self._init_metrics()
|
||
|
||
def _init_metrics(self):
|
||
"""Инициализация метрик"""
|
||
|
||
# Информация о боте
|
||
self.bot_info = Info('anon_bot_info', 'Information about the AnonBot')
|
||
self.bot_info.info({
|
||
'version': '1.0.0',
|
||
'service': 'anon-bot'
|
||
})
|
||
|
||
# Счетчики сообщений
|
||
self.messages_total = Counter(
|
||
'anon_bot_messages_total',
|
||
'Total number of messages processed',
|
||
['message_type', 'status']
|
||
)
|
||
|
||
# Счетчики вопросов
|
||
self.questions_total = Counter(
|
||
'anon_bot_questions_total',
|
||
'Total number of questions received',
|
||
['status']
|
||
)
|
||
|
||
# Счетчики ответов
|
||
self.answers_total = Counter(
|
||
'anon_bot_answers_total',
|
||
'Total number of answers sent',
|
||
['status']
|
||
)
|
||
|
||
# Счетчики пользователей
|
||
self.users_total = Counter(
|
||
'anon_bot_users_total',
|
||
'Total number of users',
|
||
['action']
|
||
)
|
||
|
||
# Время обработки сообщений
|
||
self.message_processing_time = Histogram(
|
||
'anon_bot_message_processing_seconds',
|
||
'Time spent processing messages',
|
||
['message_type'],
|
||
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
)
|
||
|
||
# Время обработки вопросов
|
||
self.question_processing_time = Histogram(
|
||
'anon_bot_question_processing_seconds',
|
||
'Time spent processing questions',
|
||
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
)
|
||
|
||
# Время обработки ответов
|
||
self.answer_processing_time = Histogram(
|
||
'anon_bot_answer_processing_seconds',
|
||
'Time spent processing answers',
|
||
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
)
|
||
|
||
# Активные пользователи
|
||
self.active_users = Gauge(
|
||
'anon_bot_active_users',
|
||
'Number of active users'
|
||
)
|
||
|
||
# Активные вопросы
|
||
self.active_questions = Gauge(
|
||
'anon_bot_active_questions',
|
||
'Number of active questions'
|
||
)
|
||
|
||
# Ошибки
|
||
self.errors_total = Counter(
|
||
'anon_bot_errors_total',
|
||
'Total number of errors',
|
||
['error_type', 'component']
|
||
)
|
||
|
||
# HTTP запросы к эндпоинтам
|
||
self.http_requests_total = Counter(
|
||
'anon_bot_http_requests_total',
|
||
'Total number of HTTP requests',
|
||
['method', 'endpoint', 'status_code']
|
||
)
|
||
|
||
# Метрики производительности БД
|
||
self.db_queries_total = Counter(
|
||
'anon_bot_db_queries_total',
|
||
'Total number of database queries',
|
||
['operation', 'table', 'status']
|
||
)
|
||
|
||
self.db_query_duration = Histogram(
|
||
'anon_bot_db_query_duration_seconds',
|
||
'Database query duration',
|
||
['operation', 'table'],
|
||
buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0]
|
||
)
|
||
|
||
self.db_connections_active = Gauge(
|
||
'anon_bot_db_connections_active',
|
||
'Number of active database connections'
|
||
)
|
||
|
||
self.db_connections_total = Counter(
|
||
'anon_bot_db_connections_total',
|
||
'Total number of database connections',
|
||
['status']
|
||
)
|
||
|
||
# Метрики пула соединений
|
||
self.db_pool_size = Gauge(
|
||
'anon_bot_db_pool_size',
|
||
'Database connection pool size'
|
||
)
|
||
|
||
self.db_pool_created_connections = Gauge(
|
||
'anon_bot_db_pool_created_connections',
|
||
'Number of created connections in pool'
|
||
)
|
||
|
||
self.db_pool_available_connections = Gauge(
|
||
'anon_bot_db_pool_available_connections',
|
||
'Number of available connections in pool'
|
||
)
|
||
|
||
self.db_pool_utilization_percent = Gauge(
|
||
'anon_bot_db_pool_utilization_percent',
|
||
'Database connection pool utilization percentage'
|
||
)
|
||
|
||
# Метрики пагинации
|
||
self.pagination_requests_total = Counter(
|
||
'anon_bot_pagination_requests_total',
|
||
'Total number of pagination requests',
|
||
['entity_type', 'method']
|
||
)
|
||
|
||
self.pagination_duration = Histogram(
|
||
'anon_bot_pagination_duration_seconds',
|
||
'Pagination operation duration',
|
||
['entity_type', 'method'],
|
||
buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
|
||
)
|
||
|
||
self.pagination_errors_total = Counter(
|
||
'anon_bot_pagination_errors_total',
|
||
'Total number of pagination errors',
|
||
['entity_type', 'error_type']
|
||
)
|
||
|
||
# Метрики batch операций
|
||
self.batch_operations_total = Counter(
|
||
'anon_bot_batch_operations_total',
|
||
'Total number of batch operations',
|
||
['operation', 'table', 'status']
|
||
)
|
||
|
||
self.batch_operation_duration = Histogram(
|
||
'anon_bot_batch_operation_duration_seconds',
|
||
'Batch operation duration',
|
||
['operation', 'table'],
|
||
buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
)
|
||
|
||
self.batch_operation_size = Histogram(
|
||
'anon_bot_batch_operation_size',
|
||
'Batch operation size (number of items)',
|
||
['operation', 'table'],
|
||
buckets=[1, 5, 10, 25, 50, 100, 250, 500, 1000]
|
||
)
|
||
|
||
# Время ответа HTTP эндпоинтов
|
||
self.http_request_duration = Histogram(
|
||
'anon_bot_http_request_duration_seconds',
|
||
'HTTP request duration',
|
||
['method', 'endpoint'],
|
||
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
|
||
)
|
||
|
||
logger.info("Prometheus metrics initialized")
|
||
|
||
def increment_messages(self, message_type: str, status: str = "success"):
|
||
"""Увеличить счетчик сообщений"""
|
||
self.messages_total.labels(message_type=message_type, status=status).inc()
|
||
|
||
def increment_questions(self, status: str = "received"):
|
||
"""Увеличить счетчик вопросов"""
|
||
self.questions_total.labels(status=status).inc()
|
||
|
||
def increment_answers(self, status: str = "sent"):
|
||
"""Увеличить счетчик ответов"""
|
||
self.answers_total.labels(status=status).inc()
|
||
|
||
def increment_users(self, action: str):
|
||
"""Увеличить счетчик пользователей"""
|
||
self.users_total.labels(action=action).inc()
|
||
|
||
def increment_errors(self, error_type: str, component: str):
|
||
"""Увеличить счетчик ошибок"""
|
||
self.errors_total.labels(error_type=error_type, component=component).inc()
|
||
|
||
def increment_http_requests(self, method: str, endpoint: str, status_code: int):
|
||
"""Увеличить счетчик HTTP запросов"""
|
||
self.http_requests_total.labels(
|
||
method=method,
|
||
endpoint=endpoint,
|
||
status_code=status_code
|
||
).inc()
|
||
|
||
def set_active_users(self, count: int):
|
||
"""Установить количество активных пользователей"""
|
||
self.active_users.set(count)
|
||
|
||
def set_active_questions(self, count: int):
|
||
"""Установить количество активных вопросов"""
|
||
self.active_questions.set(count)
|
||
|
||
def record_message_processing_time(self, message_type: str, duration: float):
|
||
"""Записать время обработки сообщения"""
|
||
self.message_processing_time.labels(message_type=message_type).observe(duration)
|
||
|
||
def record_question_processing_time(self, duration: float):
|
||
"""Записать время обработки вопроса"""
|
||
self.question_processing_time.observe(duration)
|
||
|
||
def record_answer_processing_time(self, duration: float):
|
||
"""Записать время обработки ответа"""
|
||
self.answer_processing_time.observe(duration)
|
||
|
||
def record_http_request_duration(self, method: str, endpoint: str, duration: float):
|
||
"""Записать время обработки HTTP запроса"""
|
||
self.http_request_duration.labels(method=method, endpoint=endpoint).observe(duration)
|
||
|
||
# Методы для метрик БД
|
||
def record_db_query(self, operation: str, table: str, status: str, duration: float):
|
||
"""Записать метрики запроса к БД"""
|
||
self.db_queries_total.labels(operation=operation, table=table, status=status).inc()
|
||
self.db_query_duration.labels(operation=operation, table=table).observe(duration)
|
||
|
||
def record_db_connection(self, status: str):
|
||
"""Записать метрики подключения к БД (только для реальных соединений пула)"""
|
||
self.db_connections_total.labels(status=status).inc()
|
||
if status == "opened":
|
||
self.db_connections_active.inc()
|
||
elif status == "closed":
|
||
self.db_connections_active.dec()
|
||
|
||
def update_db_connections_from_pool(self, active_count: int):
|
||
"""Обновить количество активных соединений на основе реального пула"""
|
||
# Сбрасываем счетчик и устанавливаем реальное значение
|
||
self.db_connections_active.set(active_count)
|
||
|
||
def update_db_pool_metrics(self, pool_stats: dict):
|
||
"""Обновить метрики пула соединений"""
|
||
self.db_pool_size.set(pool_stats.get("pool_size", 0))
|
||
self.db_pool_created_connections.set(pool_stats.get("created_connections", 0))
|
||
self.db_pool_available_connections.set(pool_stats.get("available_connections", 0))
|
||
self.db_pool_utilization_percent.set(pool_stats.get("utilization_percent", 0))
|
||
|
||
def record_pagination_time(self, entity_type: str, duration: float, method: str = "cursor"):
|
||
"""Записать время пагинации"""
|
||
self.pagination_requests_total.labels(entity_type=entity_type, method=method).inc()
|
||
self.pagination_duration.labels(entity_type=entity_type, method=method).observe(duration)
|
||
|
||
def increment_pagination_requests(self, entity_type: str, method: str = "cursor"):
|
||
"""Увеличить счетчик запросов пагинации"""
|
||
self.pagination_requests_total.labels(entity_type=entity_type, method=method).inc()
|
||
|
||
def increment_pagination_errors(self, entity_type: str, error_type: str = "unknown"):
|
||
"""Увеличить счетчик ошибок пагинации"""
|
||
self.pagination_errors_total.labels(entity_type=entity_type, error_type=error_type).inc()
|
||
|
||
def record_batch_operation(self, operation: str, table: str, status: str, duration: float, size: int):
|
||
"""Записать метрики batch операции"""
|
||
self.batch_operations_total.labels(operation=operation, table=table, status=status).inc()
|
||
self.batch_operation_duration.labels(operation=operation, table=table).observe(duration)
|
||
self.batch_operation_size.labels(operation=operation, table=table).observe(size)
|
||
|
||
def get_metrics(self) -> str:
|
||
"""Получить метрики в формате Prometheus"""
|
||
return generate_latest()
|
||
|
||
def get_content_type(self) -> str:
|
||
"""Получить Content-Type для метрик"""
|
||
return CONTENT_TYPE_LATEST
|
||
|
||
|
||
# Глобальный экземпляр сервиса метрик
|
||
metrics_service = MetricsService()
|
||
|
||
|
||
def get_metrics_service() -> MetricsService:
|
||
"""Получить экземпляр сервиса метрик"""
|
||
return metrics_service
|
||
|
||
|
||
# Декораторы для автоматического сбора метрик
|
||
def track_message_processing(message_type: str):
|
||
"""Декоратор для отслеживания обработки сообщений"""
|
||
def decorator(func):
|
||
async def wrapper(*args, **kwargs):
|
||
# Убираем dispatcher, если он есть, так как он не нужен
|
||
kwargs.pop('dispatcher', None)
|
||
|
||
start_time = time.time()
|
||
try:
|
||
result = await func(*args, **kwargs)
|
||
metrics_service.increment_messages(message_type, "success")
|
||
return result
|
||
except Exception as e:
|
||
metrics_service.increment_messages(message_type, "error")
|
||
metrics_service.increment_errors(type(e).__name__, "message_processing")
|
||
raise
|
||
finally:
|
||
duration = time.time() - start_time
|
||
metrics_service.record_message_processing_time(message_type, duration)
|
||
return wrapper
|
||
return decorator
|
||
|
||
|
||
def track_question_processing():
|
||
"""Декоратор для отслеживания обработки вопросов"""
|
||
def decorator(func):
|
||
async def wrapper(*args, **kwargs):
|
||
# Убираем dispatcher, если он есть, так как он не нужен
|
||
kwargs.pop('dispatcher', None)
|
||
|
||
start_time = time.time()
|
||
try:
|
||
result = await func(*args, **kwargs)
|
||
metrics_service.increment_questions("processed")
|
||
return result
|
||
except Exception as e:
|
||
metrics_service.increment_questions("error")
|
||
metrics_service.increment_errors(type(e).__name__, "question_processing")
|
||
raise
|
||
finally:
|
||
duration = time.time() - start_time
|
||
metrics_service.record_question_processing_time(duration)
|
||
return wrapper
|
||
return decorator
|
||
|
||
|
||
def track_answer_processing():
|
||
"""Декоратор для отслеживания обработки ответов"""
|
||
def decorator(func):
|
||
async def wrapper(*args, **kwargs):
|
||
# Убираем dispatcher, если он есть, так как он не нужен
|
||
kwargs.pop('dispatcher', None)
|
||
|
||
start_time = time.time()
|
||
try:
|
||
result = await func(*args, **kwargs)
|
||
metrics_service.increment_answers("sent")
|
||
return result
|
||
except Exception as e:
|
||
metrics_service.increment_answers("error")
|
||
metrics_service.increment_errors(type(e).__name__, "answer_processing")
|
||
raise
|
||
finally:
|
||
duration = time.time() - start_time
|
||
metrics_service.record_answer_processing_time(duration)
|
||
return wrapper
|
||
return decorator
|