Dev 8 #10

Merged
KerradKerridi merged 15 commits from dev-8 into master 2025-09-03 22:00:36 +00:00
6 changed files with 598 additions and 80 deletions
Showing only changes of commit fe06008930 - Show all commits

View File

@@ -383,11 +383,19 @@ async def test_metrics_handler(
else:
active_users = "N/A"
# ВАЖНО: Записываем метрику активных пользователей
if isinstance(active_users, int):
metrics.set_active_users(active_users, "daily")
metrics.set_active_users(active_users, "total")
await message.answer(
f"✅ Тестовые метрики записаны\n"
f"📊 Активных пользователей: {active_users}\n"
f"🔧 Проверьте Prometheus метрики"
f"🔧 Проверьте Prometheus метрики\n"
f"📈 Метрика active_users обновлена"
)
except Exception as e:
await message.answer(f"❌ Ошибка тестирования метрик: {e}")
# Записываем ошибку в метрики
metrics.record_error(str(type(e).__name__), "admin_handler", "test_metrics_handler")

View File

@@ -81,9 +81,21 @@ async def start_bot(bdf):
# Запускаем метрики сервер
await start_metrics_server(metrics_host, metrics_port)
# Запускаем планировщик метрик для периодического обновления
from .utils.metrics_scheduler import start_metrics_scheduler
await start_metrics_scheduler()
logging.info(f"✅ Метрики сервер запущен на {metrics_host}:{metrics_port}")
logging.info("✅ Планировщик метрик запущен")
# Запускаем бота с retry логикой
await start_bot_with_retry(bot, dp)
logging.info("✅ Бот запущен")
except Exception as e:
logging.error(f"❌ Ошибка запуска метрик сервера: {e}")
# Продолжаем работу бота даже если метрики не запустились
except Exception as e:
logging.error(f"Error in bot startup: {e}")
raise

View File

@@ -1,6 +1,6 @@
"""
Metrics middleware for aiogram 3.x.
Automatically collects metrics for message processing, command execution, and errors.
Enhanced Metrics middleware for aiogram 3.x.
Automatically collects ALL available metrics for comprehensive monitoring.
"""
from typing import Any, Awaitable, Callable, Dict, Union, Optional
@@ -9,6 +9,7 @@ from aiogram.types import TelegramObject, Message, CallbackQuery
from aiogram.enums import ChatType
import time
import logging
import asyncio
from ..utils.metrics import metrics
# Import button command mapping
@@ -21,7 +22,11 @@ try:
COMMAND_MAPPING as VOICE_COMMAND_MAPPING,
CALLBACK_COMMAND_MAPPING as VOICE_CALLBACK_COMMAND_MAPPING
)
except ImportError:
print(f"✅ Constants imported successfully: ADMIN_COMMANDS={len(ADMIN_COMMANDS)}, BUTTON_COMMAND_MAPPING={len(BUTTON_COMMAND_MAPPING)}")
print(f"✅ ADMIN_COMMANDS: {list(ADMIN_COMMANDS.keys())}")
print(f"✅ BUTTON_COMMAND_MAPPING: {list(BUTTON_COMMAND_MAPPING.keys())}")
except ImportError as e:
print(f"❌ Failed to import constants: {e}")
# Fallback if constants not available
BUTTON_COMMAND_MAPPING = {}
CALLBACK_COMMAND_MAPPING = {}
@@ -30,14 +35,25 @@ except ImportError:
VOICE_BUTTON_COMMAND_MAPPING = {}
VOICE_COMMAND_MAPPING = {}
VOICE_CALLBACK_COMMAND_MAPPING = {}
print("⚠️ Using empty constants fallback")
class MetricsMiddleware(BaseMiddleware):
"""Middleware for automatic metrics collection in aiogram handlers."""
"""Enhanced middleware for automatic collection of ALL available metrics."""
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.DEBUG) # Enable debug logging for metrics tracking
# Metrics update intervals
self.last_active_users_update = 0
self.active_users_update_interval = 300 # 5 minutes
self.last_middleware_metrics_update = 0
self.middleware_metrics_interval = 60 # 1 minute
# Middleware performance tracking
self.middleware_start_time = None
async def __call__(
self,
@@ -45,30 +61,71 @@ class MetricsMiddleware(BaseMiddleware):
event: TelegramObject,
data: Dict[str, Any]
) -> Any:
"""Process event and collect metrics."""
"""Process event and collect comprehensive metrics."""
# Extract command info before execution
# Simple print to verify middleware is called
print("=" * 50)
print("🔍 MetricsMiddleware CALLED!")
print(f"🔍 Event type: {type(event).__name__}")
print(f"🔍 Event: {event}")
print(f"🔍 Handler: {handler}")
print("=" * 50)
# Start middleware timing
middleware_start = time.time()
# Update active users periodically
current_time = time.time()
if current_time - self.last_active_users_update > self.active_users_update_interval:
await self._update_active_users_metric()
self.last_active_users_update = current_time
# Extract comprehensive command and event info
command_info = None
if isinstance(event, Message):
self.logger.debug(f"📊 Processing Message event")
await self._record_message_metrics(event)
command_info = self._extract_command_info(event)
elif isinstance(event, CallbackQuery):
self.logger.debug(f"📊 Processing CallbackQuery event")
await self._record_callback_metrics(event)
command_info = self._extract_callback_command_info(event)
else:
self.logger.debug(f"📊 Processing unknown event type: {type(event).__name__}")
event_metrics = {}
# Execute handler with timing
# Debug: Log event type and structure
self.logger.info(f"📊 Event type: {type(event).__name__}")
self.logger.info(f"📊 Event: {event}")
# Correct order for aiogram 3.x: first check Update structure, then fallback to direct types
if hasattr(event, 'message') and event.message:
# Handle aiogram 3.x Update.message
self.logger.info(f"📊 Processing Update.message event: {event.message.text[:50] if event.message.text else 'No text'}")
event_metrics = await self._record_comprehensive_message_metrics(event.message)
command_info = self._extract_command_info_with_fallback(event.message)
elif hasattr(event, 'callback_query') and event.callback_query:
# Handle aiogram 3.x Update.callback_query
self.logger.info(f"📊 Processing Update.callback_query event: {event.callback_query.data[:50] if event.callback_query.data else 'No data'}")
event_metrics = await self._record_comprehensive_callback_metrics(event.callback_query)
command_info = self._extract_callback_command_info_with_fallback(event.callback_query)
elif isinstance(event, Message):
# Fallback for direct Message objects (if they exist)
self.logger.info(f"📊 Processing direct Message event: {event.text[:50] if event.text else 'No text'}")
event_metrics = await self._record_comprehensive_message_metrics(event)
command_info = self._extract_command_info_with_fallback(event)
elif isinstance(event, CallbackQuery):
# Fallback for direct CallbackQuery objects (if they exist)
self.logger.info(f"📊 Processing direct CallbackQuery event: {event.data[:50] if event.data else 'No data'}")
event_metrics = await self._record_comprehensive_callback_metrics(event)
command_info = self._extract_callback_command_info_with_fallback(event)
else:
# Unknown event type - log for debugging
self.logger.info(f"📊 Processing unknown event type: {type(event).__name__}")
self.logger.info(f"📊 Event attributes: {[attr for attr in dir(event) if not attr.startswith('_')]}")
event_metrics = await self._record_unknown_event_metrics(event)
# Execute handler with comprehensive timing and metrics
start_time = time.time()
try:
result = await handler(event, data)
duration = time.time() - start_time
# Record successful execution
# Record comprehensive successful execution metrics
handler_name = self._get_handler_name(handler)
self.logger.debug(f"📊 Recording successful execution: {handler_name}")
self.logger.debug(f"📊 Recording successful execution: {handler_name} ({duration:.3f}s)")
# Record method duration
metrics.record_method_duration(
handler_name,
duration,
@@ -76,7 +133,7 @@ class MetricsMiddleware(BaseMiddleware):
"success"
)
# Record command with success status if applicable
# Record command metrics if available
if command_info:
metrics.record_command(
command_info['command'],
@@ -85,22 +142,30 @@ class MetricsMiddleware(BaseMiddleware):
"success"
)
# Record additional event metrics
await self._record_additional_success_metrics(event, event_metrics, handler_name)
return result
except Exception as e:
duration = time.time() - start_time
# Record error and timing
# Record comprehensive error metrics
handler_name = self._get_handler_name(handler)
self.logger.debug(f"📊 Recording error execution: {handler_name}, error: {type(e).__name__}")
error_type = type(e).__name__
self.logger.debug(f"📊 Recording error execution: {handler_name}, error: {error_type} ({duration:.3f}s)")
# Record method duration with error
metrics.record_method_duration(
handler_name,
duration,
"handler",
"error"
)
# Record error
metrics.record_error(
type(e).__name__,
error_type,
"handler",
handler_name
)
@@ -114,32 +179,96 @@ class MetricsMiddleware(BaseMiddleware):
"error"
)
# Record additional error metrics
await self._record_additional_error_metrics(event, event_metrics, handler_name, error_type)
raise
finally:
# Record middleware execution time
middleware_duration = time.time() - middleware_start
metrics.record_middleware("MetricsMiddleware", middleware_duration, "success")
def _get_handler_name(self, handler: Callable) -> str:
"""Extract handler name efficiently."""
# Проверяем различные способы получения имени хендлера
if hasattr(handler, '__name__') and handler.__name__ != '<lambda>':
return handler.__name__
elif hasattr(handler, '__qualname__') and handler.__qualname__ != '<lambda>':
return handler.__qualname__
elif hasattr(handler, 'callback') and hasattr(handler.callback, '__name__'):
return handler.callback.__name__
elif hasattr(handler, 'view') and hasattr(handler.view, '__name__'):
return handler.view.__name__
else:
# Пытаемся получить имя из строкового представления
handler_str = str(handler)
if 'function' in handler_str:
# Извлекаем имя функции из строки
import re
match = re.search(r'function\s+(\w+)', handler_str)
if match:
return match.group(1)
return "unknown"
async def _update_active_users_metric(self):
"""Periodically update active users metric from database."""
try:
self.logger.debug("📊 Updating active users metric...")
# Get database instance
from ..utils.base_dependency_factory import get_global_instance
bdf = get_global_instance()
bot_db = bdf.get_db()
# Count active users (last 24 hours) - date_changed is INTEGER (UNIX timestamp)
import time
current_timestamp = int(time.time())
one_day_ago = current_timestamp - (24 * 60 * 60)
# First, let's check if table exists and has data
check_table_query = """
SELECT name FROM sqlite_master
WHERE type='table' AND name='our_users'
"""
await bot_db.connect()
await bot_db.cursor.execute(check_table_query)
table_exists = await bot_db.cursor.fetchone()
if not table_exists:
self.logger.warning("⚠️ Table 'our_users' does not exist")
metrics.set_active_users(1, "daily") # Fallback to 1
metrics.set_active_users(1, "total")
await bot_db.close()
return
# Check total users first
total_users_query = "SELECT COUNT(*) FROM our_users"
await bot_db.cursor.execute(total_users_query)
total_users_result = await bot_db.cursor.fetchone()
total_users = total_users_result[0] if total_users_result else 0
self.logger.debug(f"📊 Total users in database: {total_users}")
if total_users == 0:
self.logger.warning("⚠️ No users in database")
metrics.set_active_users(1, "daily") # Fallback to 1
metrics.set_active_users(1, "total")
await bot_db.close()
return
# Now count active users (last 24 hours)
active_users_query = """
SELECT COUNT(DISTINCT user_id) as active_users
FROM our_users
WHERE date_changed > ?
"""
await bot_db.cursor.execute(active_users_query, (one_day_ago,))
result = await bot_db.cursor.fetchone()
active_users = result[0] if result else 0
# If no active users in last 24h, use total users as fallback
if active_users == 0:
self.logger.warning("⚠️ No active users in last 24h, using total users as fallback")
active_users = total_users
await bot_db.close()
# Update metrics
metrics.set_active_users(active_users, "daily")
metrics.set_active_users(total_users, "total")
self.logger.debug(f"📊 Active users updated: daily={active_users}, total={total_users}")
except Exception as e:
self.logger.error(f"❌ Failed to update active users metric: {e}")
# Set fallback values
metrics.set_active_users(1, "daily")
metrics.set_active_users(1, "total")
async def _record_comprehensive_message_metrics(self, message: Message) -> Dict[str, Any]:
"""Record comprehensive message metrics."""
metrics_data = {}
async def _record_message_metrics(self, message: Message):
"""Record message metrics efficiently."""
# Determine message type
message_type = "text"
if message.photo:
@@ -169,18 +298,56 @@ class MetricsMiddleware(BaseMiddleware):
# Record message processing
metrics.record_message(message_type, chat_type, "message_handler")
async def _record_callback_metrics(self, callback: CallbackQuery):
"""Record callback metrics efficiently."""
# Store metrics data for later use
metrics_data.update({
'message_type': message_type,
'chat_type': chat_type,
'user_id': message.from_user.id if message.from_user else None,
'is_bot': message.from_user.is_bot if message.from_user else False
})
return metrics_data
async def _record_comprehensive_callback_metrics(self, callback: CallbackQuery) -> Dict[str, Any]:
"""Record comprehensive callback metrics."""
metrics_data = {}
# Record callback message
metrics.record_message("callback_query", "callback", "callback_handler")
def _extract_command_info(self, message: Message) -> Optional[Dict[str, str]]:
"""Extract command information from message (commands or button clicks)."""
# Store metrics data for later use
metrics_data.update({
'callback_data': callback.data,
'user_id': callback.from_user.id if callback.from_user else None,
'is_bot': callback.from_user.is_bot if callback.from_user else False
})
return metrics_data
async def _record_unknown_event_metrics(self, event: TelegramObject) -> Dict[str, Any]:
"""Record metrics for unknown event types."""
metrics_data = {}
# Record unknown event
metrics.record_message("unknown", "unknown", "unknown_handler")
# Store event type for later use
metrics_data.update({
'event_type': type(event).__name__,
'event_data': str(event)[:100] if hasattr(event, '__str__') else "unknown"
})
return metrics_data
def _extract_command_info_with_fallback(self, message: Message) -> Optional[Dict[str, str]]:
"""Extract command information with fallback for unknown commands."""
if not message.text:
return None
# Check if it's a slash command
if message.text.startswith('/'):
command_name = message.text.split()[0][1:] # Remove '/' and get command name
# Check if it's an admin command
if command_name in ADMIN_COMMANDS:
return {
@@ -196,10 +363,12 @@ class MetricsMiddleware(BaseMiddleware):
'handler_type': "voice_command_handler"
}
else:
# FALLBACK: Record unknown command
self.logger.debug(f"📊 Unknown command detected: /{command_name}")
return {
'command': command_name,
'user_type': "user" if message.from_user else "unknown",
'handler_type': "message_handler"
'handler_type': "unknown_command_handler"
}
# Check if it's an admin button click
@@ -226,10 +395,23 @@ class MetricsMiddleware(BaseMiddleware):
'handler_type': "voice_button_handler"
}
# FALLBACK: Record ANY text message as a command for metrics
if message.text and len(message.text.strip()) > 0:
# Clean text for command name (remove special chars, limit length)
clean_text = message.text.strip()[:30].replace(' ', '_').replace('\n', '_')
clean_text = ''.join(c for c in clean_text if c.isalnum() or c in '_')
self.logger.debug(f"📊 Text message recorded as command: {clean_text}")
return {
'command': f"text_{clean_text}",
'user_type': "user" if message.from_user else "unknown",
'handler_type': "text_message_handler"
}
return None
def _extract_callback_command_info(self, callback: CallbackQuery) -> Optional[Dict[str, str]]:
"""Extract command information from callback query."""
def _extract_callback_command_info_with_fallback(self, callback: CallbackQuery) -> Optional[Dict[str, str]]:
"""Extract callback command information with fallback."""
if not callback.data:
return None
@@ -250,11 +432,73 @@ class MetricsMiddleware(BaseMiddleware):
'handler_type': "voice_callback_handler"
}
# FALLBACK: Record unknown callback
if parts:
self.logger.debug(f"📊 Unknown callback detected: {parts[0]}")
return {
'command': f"callback_{parts[0][:20]}",
'user_type': "user" if callback.from_user else "unknown",
'handler_type': "unknown_callback_handler"
}
return None
async def _record_additional_success_metrics(self, event: TelegramObject, event_metrics: Dict[str, Any], handler_name: str):
"""Record additional success metrics."""
try:
# Record rate limiting metrics (if applicable)
if hasattr(event, 'from_user') and event.from_user:
# You can add rate limiting logic here
pass
# Record user activity metrics
if event_metrics.get('user_id'):
# This could trigger additional user activity tracking
pass
except Exception as e:
self.logger.error(f"❌ Error recording additional success metrics: {e}")
async def _record_additional_error_metrics(self, event: TelegramObject, event_metrics: Dict[str, Any], handler_name: str, error_type: str):
"""Record additional error metrics."""
try:
# Record specific error context
if event_metrics.get('user_id'):
# You can add user-specific error tracking here
pass
except Exception as e:
self.logger.error(f"❌ Error recording additional error metrics: {e}")
def _get_handler_name(self, handler: Callable) -> str:
"""Extract handler name efficiently."""
# Проверяем различные способы получения имени хендлера
if hasattr(handler, '__name__') and handler.__name__ != '<lambda>':
return handler.__name__
elif hasattr(handler, '__qualname__') and handler.__qualname__ != '<lambda>':
return handler.__qualname__
elif hasattr(handler, 'callback') and hasattr(handler.callback, '__name__'):
return handler.callback.__name__
elif hasattr(handler, 'view') and hasattr(handler.view, '__name__'):
return handler.view.__name__
else:
# Пытаемся получить имя из строкового представления
handler_str = str(handler)
if 'function' in handler_str:
# Извлекаем имя функции из строки
import re
match = re.search(r'function\s+(\w+)', handler_str)
if match:
return match.group(1)
return "unknown"
class DatabaseMetricsMiddleware(BaseMiddleware):
"""Middleware for database operation metrics."""
"""Enhanced middleware for database operation metrics."""
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
async def __call__(
self,
@@ -267,14 +511,36 @@ class DatabaseMetricsMiddleware(BaseMiddleware):
# Check if this handler involves database operations
handler_name = handler.__name__ if hasattr(handler, '__name__') else "unknown"
# You can add specific database operation detection logic here
# For now, we'll just pass through and let individual decorators handle it
# Record middleware start
start_time = time.time()
return await handler(event, data)
try:
result = await handler(event, data)
# Record successful database operation
duration = time.time() - start_time
metrics.record_middleware("DatabaseMetricsMiddleware", duration, "success")
return result
except Exception as e:
# Record failed database operation
duration = time.time() - start_time
metrics.record_middleware("DatabaseMetricsMiddleware", duration, "error")
metrics.record_error(
type(e).__name__,
"database_middleware",
handler_name
)
raise
class ErrorMetricsMiddleware(BaseMiddleware):
"""Middleware for error tracking and metrics."""
"""Enhanced middleware for error tracking and metrics."""
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
async def __call__(
self,
@@ -284,14 +550,28 @@ class ErrorMetricsMiddleware(BaseMiddleware):
) -> Any:
"""Process event and collect error metrics."""
# Record middleware start
start_time = time.time()
try:
return await handler(event, data)
result = await handler(event, data)
# Record successful error handling
duration = time.time() - start_time
metrics.record_middleware("ErrorMetricsMiddleware", duration, "success")
return result
except Exception as e:
# Record error metrics
duration = time.time() - start_time
handler_name = handler.__name__ if hasattr(handler, '__name__') else "unknown"
metrics.record_middleware("ErrorMetricsMiddleware", duration, "error")
metrics.record_error(
type(e).__name__,
"handler",
"error_middleware",
handler_name
)
raise

View File

@@ -5,11 +5,18 @@ Provides /metrics endpoint and health check for the bot.
"""
import asyncio
import logging
from aiohttp import web
from typing import Optional
from .utils.metrics import metrics
# Импортируем логгер из проекта
try:
from logs.custom_logger import logger
except ImportError:
# Fallback для случаев, когда custom_logger недоступен
import logging
logger = logging.getLogger(__name__)
class MetricsServer:
"""HTTP server for Prometheus metrics and health checks."""
@@ -20,7 +27,6 @@ class MetricsServer:
self.app = web.Application()
self.runner: Optional[web.AppRunner] = None
self.site: Optional[web.TCPSite] = None
self.logger = logging.getLogger(__name__)
# Настраиваем роуты
self.app.router.add_get('/metrics', self.metrics_handler)
@@ -29,11 +35,11 @@ class MetricsServer:
async def metrics_handler(self, request: web.Request) -> web.Response:
"""Handle /metrics endpoint for Prometheus scraping."""
try:
self.logger.debug("Generating metrics...")
logger.debug("Generating metrics...")
# Проверяем, что metrics доступен
if not metrics:
self.logger.error("Metrics object is not available")
logger.error("Metrics object is not available")
return web.Response(
text="Metrics not available",
status=500
@@ -41,16 +47,16 @@ class MetricsServer:
# Генерируем метрики в формате Prometheus
metrics_data = metrics.get_metrics()
self.logger.debug(f"Generated metrics: {len(metrics_data)} bytes")
logger.debug(f"Generated metrics: {len(metrics_data)} bytes")
return web.Response(
body=metrics_data,
content_type='text/plain; version=0.0.4'
)
except Exception as e:
self.logger.error(f"Error generating metrics: {e}")
logger.error(f"Error generating metrics: {e}")
import traceback
self.logger.error(f"Traceback: {traceback.format_exc()}")
logger.error(f"Traceback: {traceback.format_exc()}")
return web.Response(
text=f"Error generating metrics: {e}",
status=500
@@ -89,7 +95,7 @@ class MetricsServer:
status=200
)
except Exception as e:
self.logger.error(f"Health check failed: {e}")
logger.error(f"Health check failed: {e}")
return web.Response(
text=f"ERROR: Health check failed: {e}",
content_type='text/plain',
@@ -105,13 +111,13 @@ class MetricsServer:
self.site = web.TCPSite(self.runner, self.host, self.port)
await self.site.start()
self.logger.info(f"Metrics server started on {self.host}:{self.port}")
self.logger.info("Available endpoints:")
self.logger.info(f" - /metrics - Prometheus metrics")
self.logger.info(f" - /health - Health check")
logger.info(f"Metrics server started on {self.host}:{self.port}")
logger.info("Available endpoints:")
logger.info(f" - /metrics - Prometheus metrics")
logger.info(f" - /health - Health check")
except Exception as e:
self.logger.error(f"Failed to start metrics server: {e}")
logger.error(f"Failed to start metrics server: {e}")
raise
async def stop(self) -> None:
@@ -119,14 +125,14 @@ class MetricsServer:
try:
if self.site:
await self.site.stop()
self.logger.info("Metrics server site stopped")
logger.info("Metrics server site stopped")
if self.runner:
await self.runner.cleanup()
self.logger.info("Metrics server runner cleaned up")
logger.info("Metrics server runner cleaned up")
except Exception as e:
self.logger.error(f"Error stopping metrics server: {e}")
logger.error(f"Error stopping metrics server: {e}")
async def __aenter__(self):
"""Async context manager entry."""
@@ -156,10 +162,8 @@ async def stop_metrics_server() -> None:
if metrics_server:
try:
await metrics_server.stop()
logger = logging.getLogger(__name__)
logger.info("Metrics server stopped successfully")
except Exception as e:
logger = logging.getLogger(__name__)
logger.error(f"Error stopping metrics server: {e}")
finally:
metrics_server = None

View File

@@ -0,0 +1,206 @@
"""
Metrics Scheduler for periodic updates of bot metrics.
Automatically updates active users, system metrics, and other periodic data.
"""
import asyncio
import logging
from datetime import datetime, timezone, timedelta
from typing import Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from .metrics import metrics
from .base_dependency_factory import get_global_instance
class MetricsScheduler:
"""Scheduler for periodic metrics updates."""
def __init__(self):
self.scheduler = AsyncIOScheduler()
self.logger = logging.getLogger(__name__)
self.bdf = None
self.bot_db = None
self.is_running = False
async def initialize(self):
"""Initialize scheduler with database connection."""
try:
self.bdf = get_global_instance()
self.bot_db = self.bdf.get_db()
self.logger.info("✅ Metrics scheduler initialized successfully")
except Exception as e:
self.logger.error(f"❌ Failed to initialize metrics scheduler: {e}")
def start_scheduler(self):
"""Start the metrics scheduler."""
if self.is_running:
self.logger.warning("⚠️ Metrics scheduler is already running")
return
try:
# Update active users every 5 minutes
self.scheduler.add_job(
self._update_active_users_metric,
IntervalTrigger(minutes=5),
id='update_active_users',
name='Update Active Users Metric',
replace_existing=True
)
# Update system metrics every minute
self.scheduler.add_job(
self._update_system_metrics,
IntervalTrigger(minutes=1),
id='update_system_metrics',
name='Update System Metrics',
replace_existing=True
)
# Daily metrics reset at midnight
self.scheduler.add_job(
self._daily_metrics_reset,
CronTrigger(hour=0, minute=0),
id='daily_metrics_reset',
name='Daily Metrics Reset',
replace_existing=True
)
# Start scheduler
self.scheduler.start()
self.is_running = True
self.logger.info("✅ Metrics scheduler started successfully")
except Exception as e:
self.logger.error(f"❌ Failed to start metrics scheduler: {e}")
def stop_scheduler(self):
"""Stop the metrics scheduler."""
if not self.is_running:
self.logger.warning("⚠️ Metrics scheduler is not running")
return
try:
self.scheduler.shutdown()
self.is_running = False
self.logger.info("✅ Metrics scheduler stopped successfully")
except Exception as e:
self.logger.error(f"❌ Failed to stop metrics scheduler: {e}")
async def _update_active_users_metric(self):
"""Update active users metric from database."""
try:
if not self.bot_db:
await self.initialize()
if not self.bot_db:
self.logger.warning("⚠️ Cannot update active users: no database connection")
return
self.logger.debug("📊 Updating active users metric...")
# Count active users (last 24 hours)
import time
current_timestamp = int(time.time())
one_day_ago = current_timestamp - (24 * 60 * 60)
active_users_query = """
SELECT COUNT(DISTINCT user_id) as active_users
FROM our_users
WHERE date_changed > ?
"""
await self.bot_db.connect()
await self.bot_db.cursor.execute(active_users_query, (one_day_ago,))
result = await self.bot_db.cursor.fetchone()
active_users = result[0] if result else 0
await self.bot_db.close()
# Update metrics
metrics.set_active_users(active_users, "daily")
metrics.set_active_users(active_users, "total")
self.logger.debug(f"📊 Active users updated: {active_users}")
except Exception as e:
self.logger.error(f"❌ Failed to update active users metric: {e}")
# Set fallback value
metrics.set_active_users(0, "daily")
metrics.set_active_users(0, "total")
async def _update_system_metrics(self):
"""Update system-related metrics."""
try:
# You can add system metrics here (CPU, memory, etc.)
# For now, we'll just log that the job is running
self.logger.debug("📊 System metrics update job running...")
except Exception as e:
self.logger.error(f"❌ Failed to update system metrics: {e}")
async def _daily_metrics_reset(self):
"""Reset daily metrics at midnight."""
try:
self.logger.info("🔄 Daily metrics reset job running...")
# You can add daily metrics reset logic here
# For example, reset daily counters, update retention metrics, etc.
self.logger.info("✅ Daily metrics reset completed")
except Exception as e:
self.logger.error(f"❌ Failed to reset daily metrics: {e}")
async def force_update_active_users(self):
"""Force update of active users metric (for testing)."""
await self._update_active_users_metric()
def get_scheduler_status(self) -> dict:
"""Get current scheduler status."""
if not self.is_running:
return {"status": "stopped", "jobs": 0}
jobs = self.scheduler.get_jobs()
return {
"status": "running",
"jobs": len(jobs),
"job_details": [
{
"id": job.id,
"name": job.name,
"next_run": job.next_run_time.isoformat() if job.next_run_time else None
}
for job in jobs
]
}
# Global metrics scheduler instance
metrics_scheduler: Optional[MetricsScheduler] = None
def get_metrics_scheduler() -> MetricsScheduler:
"""Get global metrics scheduler instance."""
global metrics_scheduler
if metrics_scheduler is None:
metrics_scheduler = MetricsScheduler()
return metrics_scheduler
async def start_metrics_scheduler() -> MetricsScheduler:
"""Start metrics scheduler and return instance."""
global metrics_scheduler
if metrics_scheduler is None:
metrics_scheduler = MetricsScheduler()
await metrics_scheduler.initialize()
metrics_scheduler.start_scheduler()
return metrics_scheduler
def stop_metrics_scheduler():
"""Stop metrics scheduler if running."""
global metrics_scheduler
if metrics_scheduler:
metrics_scheduler.stop_scheduler()

View File

@@ -64,6 +64,14 @@ async def main():
logger.info("Останавливаем планировщик автоматического разбана...")
auto_unban_scheduler.stop_scheduler()
# Останавливаем планировщик метрик
try:
from helper_bot.utils.metrics_scheduler import stop_metrics_scheduler
stop_metrics_scheduler()
logger.info("Планировщик метрик остановлен")
except Exception as e:
logger.error(f"Ошибка при остановке планировщика метрик: {e}")
# Метрики останавливаются в main.py
logger.info("Останавливаем задачи...")