Files
AnonBot/services/infrastructure/metrics.py

352 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Сервис для работы с Prometheus метриками
"""
import time
import inspect
from typing import Optional, Callable
from prometheus_client import Counter, Histogram, Gauge, Info, generate_latest, CONTENT_TYPE_LATEST
from loguru import logger
class MetricsService:
"""Сервис для управления Prometheus метриками"""
def __init__(self):
self._init_metrics()
def _init_metrics(self):
"""Инициализация метрик"""
# Информация о боте
self.bot_info = Info('anon_bot_info', 'Information about the AnonBot')
self.bot_info.info({
'version': '1.0.0',
'service': 'anon-bot'
})
# Счетчики сообщений
self.messages_total = Counter(
'anon_bot_messages_total',
'Total number of messages processed',
['message_type', 'status']
)
# Счетчики вопросов
self.questions_total = Counter(
'anon_bot_questions_total',
'Total number of questions received',
['status']
)
# Счетчики ответов
self.answers_total = Counter(
'anon_bot_answers_total',
'Total number of answers sent',
['status']
)
# Счетчики пользователей
self.users_total = Counter(
'anon_bot_users_total',
'Total number of users',
['action']
)
# Время обработки сообщений
self.message_processing_time = Histogram(
'anon_bot_message_processing_seconds',
'Time spent processing messages',
['message_type'],
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
)
# Время обработки вопросов
self.question_processing_time = Histogram(
'anon_bot_question_processing_seconds',
'Time spent processing questions',
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
)
# Время обработки ответов
self.answer_processing_time = Histogram(
'anon_bot_answer_processing_seconds',
'Time spent processing answers',
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
)
# Активные пользователи
self.active_users = Gauge(
'anon_bot_active_users',
'Number of active users'
)
# Активные вопросы
self.active_questions = Gauge(
'anon_bot_active_questions',
'Number of active questions'
)
# Ошибки
self.errors_total = Counter(
'anon_bot_errors_total',
'Total number of errors',
['error_type', 'component']
)
# HTTP запросы к эндпоинтам
self.http_requests_total = Counter(
'anon_bot_http_requests_total',
'Total number of HTTP requests',
['method', 'endpoint', 'status_code']
)
# Метрики производительности БД
self.db_queries_total = Counter(
'anon_bot_db_queries_total',
'Total number of database queries',
['operation', 'table', 'status']
)
self.db_query_duration = Histogram(
'anon_bot_db_query_duration_seconds',
'Database query duration',
['operation', 'table'],
buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0]
)
self.db_connections_active = Gauge(
'anon_bot_db_connections_active',
'Number of active database connections'
)
self.db_connections_total = Counter(
'anon_bot_db_connections_total',
'Total number of database connections',
['status']
)
# Метрики пагинации
self.pagination_requests_total = Counter(
'anon_bot_pagination_requests_total',
'Total number of pagination requests',
['entity_type', 'method']
)
self.pagination_duration = Histogram(
'anon_bot_pagination_duration_seconds',
'Pagination operation duration',
['entity_type', 'method'],
buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0]
)
self.pagination_errors_total = Counter(
'anon_bot_pagination_errors_total',
'Total number of pagination errors',
['entity_type', 'error_type']
)
# Метрики batch операций
self.batch_operations_total = Counter(
'anon_bot_batch_operations_total',
'Total number of batch operations',
['operation', 'table', 'status']
)
self.batch_operation_duration = Histogram(
'anon_bot_batch_operation_duration_seconds',
'Batch operation duration',
['operation', 'table'],
buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
)
self.batch_operation_size = Histogram(
'anon_bot_batch_operation_size',
'Batch operation size (number of items)',
['operation', 'table'],
buckets=[1, 5, 10, 25, 50, 100, 250, 500, 1000]
)
# Время ответа HTTP эндпоинтов
self.http_request_duration = Histogram(
'anon_bot_http_request_duration_seconds',
'HTTP request duration',
['method', 'endpoint'],
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]
)
logger.info("Prometheus metrics initialized")
def increment_messages(self, message_type: str, status: str = "success"):
"""Увеличить счетчик сообщений"""
self.messages_total.labels(message_type=message_type, status=status).inc()
def increment_questions(self, status: str = "received"):
"""Увеличить счетчик вопросов"""
self.questions_total.labels(status=status).inc()
def increment_answers(self, status: str = "sent"):
"""Увеличить счетчик ответов"""
self.answers_total.labels(status=status).inc()
def increment_users(self, action: str):
"""Увеличить счетчик пользователей"""
self.users_total.labels(action=action).inc()
def increment_errors(self, error_type: str, component: str):
"""Увеличить счетчик ошибок"""
self.errors_total.labels(error_type=error_type, component=component).inc()
def increment_http_requests(self, method: str, endpoint: str, status_code: int):
"""Увеличить счетчик HTTP запросов"""
self.http_requests_total.labels(
method=method,
endpoint=endpoint,
status_code=status_code
).inc()
def set_active_users(self, count: int):
"""Установить количество активных пользователей"""
self.active_users.set(count)
def set_active_questions(self, count: int):
"""Установить количество активных вопросов"""
self.active_questions.set(count)
def record_message_processing_time(self, message_type: str, duration: float):
"""Записать время обработки сообщения"""
self.message_processing_time.labels(message_type=message_type).observe(duration)
def record_question_processing_time(self, duration: float):
"""Записать время обработки вопроса"""
self.question_processing_time.observe(duration)
def record_answer_processing_time(self, duration: float):
"""Записать время обработки ответа"""
self.answer_processing_time.observe(duration)
def record_http_request_duration(self, method: str, endpoint: str, duration: float):
"""Записать время обработки HTTP запроса"""
self.http_request_duration.labels(method=method, endpoint=endpoint).observe(duration)
# Методы для метрик БД
def record_db_query(self, operation: str, table: str, status: str, duration: float):
"""Записать метрики запроса к БД"""
self.db_queries_total.labels(operation=operation, table=table, status=status).inc()
self.db_query_duration.labels(operation=operation, table=table).observe(duration)
def record_db_connection(self, status: str):
"""Записать метрики подключения к БД"""
self.db_connections_total.labels(status=status).inc()
if status == "opened":
self.db_connections_active.inc()
elif status == "closed":
self.db_connections_active.dec()
def record_pagination_time(self, entity_type: str, duration: float, method: str = "cursor"):
"""Записать время пагинации"""
self.pagination_requests_total.labels(entity_type=entity_type, method=method).inc()
self.pagination_duration.labels(entity_type=entity_type, method=method).observe(duration)
def increment_pagination_requests(self, entity_type: str, method: str = "cursor"):
"""Увеличить счетчик запросов пагинации"""
self.pagination_requests_total.labels(entity_type=entity_type, method=method).inc()
def increment_pagination_errors(self, entity_type: str, error_type: str = "unknown"):
"""Увеличить счетчик ошибок пагинации"""
self.pagination_errors_total.labels(entity_type=entity_type, error_type=error_type).inc()
def record_batch_operation(self, operation: str, table: str, status: str, duration: float, size: int):
"""Записать метрики batch операции"""
self.batch_operations_total.labels(operation=operation, table=table, status=status).inc()
self.batch_operation_duration.labels(operation=operation, table=table).observe(duration)
self.batch_operation_size.labels(operation=operation, table=table).observe(size)
def get_metrics(self) -> str:
"""Получить метрики в формате Prometheus"""
return generate_latest()
def get_content_type(self) -> str:
"""Получить Content-Type для метрик"""
return CONTENT_TYPE_LATEST
# Глобальный экземпляр сервиса метрик
metrics_service = MetricsService()
def get_metrics_service() -> MetricsService:
"""Получить экземпляр сервиса метрик"""
return metrics_service
# Декораторы для автоматического сбора метрик
def track_message_processing(message_type: str):
"""Декоратор для отслеживания обработки сообщений"""
def decorator(func):
async def wrapper(*args, **kwargs):
# Убираем dispatcher, если он есть, так как он не нужен
kwargs.pop('dispatcher', None)
start_time = time.time()
try:
result = await func(*args, **kwargs)
metrics_service.increment_messages(message_type, "success")
return result
except Exception as e:
metrics_service.increment_messages(message_type, "error")
metrics_service.increment_errors(type(e).__name__, "message_processing")
raise
finally:
duration = time.time() - start_time
metrics_service.record_message_processing_time(message_type, duration)
return wrapper
return decorator
def track_question_processing():
"""Декоратор для отслеживания обработки вопросов"""
def decorator(func):
async def wrapper(*args, **kwargs):
# Убираем dispatcher, если он есть, так как он не нужен
kwargs.pop('dispatcher', None)
start_time = time.time()
try:
result = await func(*args, **kwargs)
metrics_service.increment_questions("processed")
return result
except Exception as e:
metrics_service.increment_questions("error")
metrics_service.increment_errors(type(e).__name__, "question_processing")
raise
finally:
duration = time.time() - start_time
metrics_service.record_question_processing_time(duration)
return wrapper
return decorator
def track_answer_processing():
"""Декоратор для отслеживания обработки ответов"""
def decorator(func):
async def wrapper(*args, **kwargs):
# Убираем dispatcher, если он есть, так как он не нужен
kwargs.pop('dispatcher', None)
start_time = time.time()
try:
result = await func(*args, **kwargs)
metrics_service.increment_answers("sent")
return result
except Exception as e:
metrics_service.increment_answers("error")
metrics_service.increment_errors(type(e).__name__, "answer_processing")
raise
finally:
duration = time.time() - start_time
metrics_service.record_answer_processing_time(duration)
return wrapper
return decorator