Enhance metrics handling and logging in bot

- Integrated metrics scheduler start and stop functionality in `run_helper.py` for better resource management.
- Improved logging for metrics server operations in `server_prometheus.py`, ensuring clearer error reporting and status updates.
- Updated metrics middleware to collect comprehensive metrics for all event types, enhancing monitoring capabilities.
- Added active user metrics tracking in `admin_handlers.py` to provide insights on user engagement.
- Refactored command and callback handling in `metrics_middleware.py` to improve clarity and error handling.
This commit is contained in:
2025-09-03 16:16:14 +03:00
parent c8c7d50cbb
commit fe06008930
6 changed files with 598 additions and 80 deletions

View File

@@ -1,6 +1,6 @@
"""
Metrics middleware for aiogram 3.x.
Automatically collects metrics for message processing, command execution, and errors.
Enhanced Metrics middleware for aiogram 3.x.
Automatically collects ALL available metrics for comprehensive monitoring.
"""
from typing import Any, Awaitable, Callable, Dict, Union, Optional
@@ -9,6 +9,7 @@ from aiogram.types import TelegramObject, Message, CallbackQuery
from aiogram.enums import ChatType
import time
import logging
import asyncio
from ..utils.metrics import metrics
# Import button command mapping
@@ -21,7 +22,11 @@ try:
COMMAND_MAPPING as VOICE_COMMAND_MAPPING,
CALLBACK_COMMAND_MAPPING as VOICE_CALLBACK_COMMAND_MAPPING
)
except ImportError:
print(f"✅ Constants imported successfully: ADMIN_COMMANDS={len(ADMIN_COMMANDS)}, BUTTON_COMMAND_MAPPING={len(BUTTON_COMMAND_MAPPING)}")
print(f"✅ ADMIN_COMMANDS: {list(ADMIN_COMMANDS.keys())}")
print(f"✅ BUTTON_COMMAND_MAPPING: {list(BUTTON_COMMAND_MAPPING.keys())}")
except ImportError as e:
print(f"❌ Failed to import constants: {e}")
# Fallback if constants not available
BUTTON_COMMAND_MAPPING = {}
CALLBACK_COMMAND_MAPPING = {}
@@ -30,14 +35,25 @@ except ImportError:
VOICE_BUTTON_COMMAND_MAPPING = {}
VOICE_COMMAND_MAPPING = {}
VOICE_CALLBACK_COMMAND_MAPPING = {}
print("⚠️ Using empty constants fallback")
class MetricsMiddleware(BaseMiddleware):
"""Middleware for automatic metrics collection in aiogram handlers."""
"""Enhanced middleware for automatic collection of ALL available metrics."""
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.DEBUG) # Enable debug logging for metrics tracking
# Metrics update intervals
self.last_active_users_update = 0
self.active_users_update_interval = 300 # 5 minutes
self.last_middleware_metrics_update = 0
self.middleware_metrics_interval = 60 # 1 minute
# Middleware performance tracking
self.middleware_start_time = None
async def __call__(
self,
@@ -45,30 +61,71 @@ class MetricsMiddleware(BaseMiddleware):
event: TelegramObject,
data: Dict[str, Any]
) -> Any:
"""Process event and collect metrics."""
"""Process event and collect comprehensive metrics."""
# Extract command info before execution
# Simple print to verify middleware is called
print("=" * 50)
print("🔍 MetricsMiddleware CALLED!")
print(f"🔍 Event type: {type(event).__name__}")
print(f"🔍 Event: {event}")
print(f"🔍 Handler: {handler}")
print("=" * 50)
# Start middleware timing
middleware_start = time.time()
# Update active users periodically
current_time = time.time()
if current_time - self.last_active_users_update > self.active_users_update_interval:
await self._update_active_users_metric()
self.last_active_users_update = current_time
# Extract comprehensive command and event info
command_info = None
if isinstance(event, Message):
self.logger.debug(f"📊 Processing Message event")
await self._record_message_metrics(event)
command_info = self._extract_command_info(event)
elif isinstance(event, CallbackQuery):
self.logger.debug(f"📊 Processing CallbackQuery event")
await self._record_callback_metrics(event)
command_info = self._extract_callback_command_info(event)
else:
self.logger.debug(f"📊 Processing unknown event type: {type(event).__name__}")
event_metrics = {}
# Execute handler with timing
# Debug: Log event type and structure
self.logger.info(f"📊 Event type: {type(event).__name__}")
self.logger.info(f"📊 Event: {event}")
# Correct order for aiogram 3.x: first check Update structure, then fallback to direct types
if hasattr(event, 'message') and event.message:
# Handle aiogram 3.x Update.message
self.logger.info(f"📊 Processing Update.message event: {event.message.text[:50] if event.message.text else 'No text'}")
event_metrics = await self._record_comprehensive_message_metrics(event.message)
command_info = self._extract_command_info_with_fallback(event.message)
elif hasattr(event, 'callback_query') and event.callback_query:
# Handle aiogram 3.x Update.callback_query
self.logger.info(f"📊 Processing Update.callback_query event: {event.callback_query.data[:50] if event.callback_query.data else 'No data'}")
event_metrics = await self._record_comprehensive_callback_metrics(event.callback_query)
command_info = self._extract_callback_command_info_with_fallback(event.callback_query)
elif isinstance(event, Message):
# Fallback for direct Message objects (if they exist)
self.logger.info(f"📊 Processing direct Message event: {event.text[:50] if event.text else 'No text'}")
event_metrics = await self._record_comprehensive_message_metrics(event)
command_info = self._extract_command_info_with_fallback(event)
elif isinstance(event, CallbackQuery):
# Fallback for direct CallbackQuery objects (if they exist)
self.logger.info(f"📊 Processing direct CallbackQuery event: {event.data[:50] if event.data else 'No data'}")
event_metrics = await self._record_comprehensive_callback_metrics(event)
command_info = self._extract_callback_command_info_with_fallback(event)
else:
# Unknown event type - log for debugging
self.logger.info(f"📊 Processing unknown event type: {type(event).__name__}")
self.logger.info(f"📊 Event attributes: {[attr for attr in dir(event) if not attr.startswith('_')]}")
event_metrics = await self._record_unknown_event_metrics(event)
# Execute handler with comprehensive timing and metrics
start_time = time.time()
try:
result = await handler(event, data)
duration = time.time() - start_time
# Record successful execution
# Record comprehensive successful execution metrics
handler_name = self._get_handler_name(handler)
self.logger.debug(f"📊 Recording successful execution: {handler_name}")
self.logger.debug(f"📊 Recording successful execution: {handler_name} ({duration:.3f}s)")
# Record method duration
metrics.record_method_duration(
handler_name,
duration,
@@ -76,7 +133,7 @@ class MetricsMiddleware(BaseMiddleware):
"success"
)
# Record command with success status if applicable
# Record command metrics if available
if command_info:
metrics.record_command(
command_info['command'],
@@ -85,22 +142,30 @@ class MetricsMiddleware(BaseMiddleware):
"success"
)
# Record additional event metrics
await self._record_additional_success_metrics(event, event_metrics, handler_name)
return result
except Exception as e:
duration = time.time() - start_time
# Record error and timing
# Record comprehensive error metrics
handler_name = self._get_handler_name(handler)
self.logger.debug(f"📊 Recording error execution: {handler_name}, error: {type(e).__name__}")
error_type = type(e).__name__
self.logger.debug(f"📊 Recording error execution: {handler_name}, error: {error_type} ({duration:.3f}s)")
# Record method duration with error
metrics.record_method_duration(
handler_name,
duration,
"handler",
"error"
)
# Record error
metrics.record_error(
type(e).__name__,
error_type,
"handler",
handler_name
)
@@ -114,32 +179,96 @@ class MetricsMiddleware(BaseMiddleware):
"error"
)
# Record additional error metrics
await self._record_additional_error_metrics(event, event_metrics, handler_name, error_type)
raise
finally:
# Record middleware execution time
middleware_duration = time.time() - middleware_start
metrics.record_middleware("MetricsMiddleware", middleware_duration, "success")
def _get_handler_name(self, handler: Callable) -> str:
"""Extract handler name efficiently."""
# Проверяем различные способы получения имени хендлера
if hasattr(handler, '__name__') and handler.__name__ != '<lambda>':
return handler.__name__
elif hasattr(handler, '__qualname__') and handler.__qualname__ != '<lambda>':
return handler.__qualname__
elif hasattr(handler, 'callback') and hasattr(handler.callback, '__name__'):
return handler.callback.__name__
elif hasattr(handler, 'view') and hasattr(handler.view, '__name__'):
return handler.view.__name__
else:
# Пытаемся получить имя из строкового представления
handler_str = str(handler)
if 'function' in handler_str:
# Извлекаем имя функции из строки
import re
match = re.search(r'function\s+(\w+)', handler_str)
if match:
return match.group(1)
return "unknown"
async def _update_active_users_metric(self):
"""Periodically update active users metric from database."""
try:
self.logger.debug("📊 Updating active users metric...")
# Get database instance
from ..utils.base_dependency_factory import get_global_instance
bdf = get_global_instance()
bot_db = bdf.get_db()
# Count active users (last 24 hours) - date_changed is INTEGER (UNIX timestamp)
import time
current_timestamp = int(time.time())
one_day_ago = current_timestamp - (24 * 60 * 60)
# First, let's check if table exists and has data
check_table_query = """
SELECT name FROM sqlite_master
WHERE type='table' AND name='our_users'
"""
await bot_db.connect()
await bot_db.cursor.execute(check_table_query)
table_exists = await bot_db.cursor.fetchone()
if not table_exists:
self.logger.warning("⚠️ Table 'our_users' does not exist")
metrics.set_active_users(1, "daily") # Fallback to 1
metrics.set_active_users(1, "total")
await bot_db.close()
return
# Check total users first
total_users_query = "SELECT COUNT(*) FROM our_users"
await bot_db.cursor.execute(total_users_query)
total_users_result = await bot_db.cursor.fetchone()
total_users = total_users_result[0] if total_users_result else 0
self.logger.debug(f"📊 Total users in database: {total_users}")
if total_users == 0:
self.logger.warning("⚠️ No users in database")
metrics.set_active_users(1, "daily") # Fallback to 1
metrics.set_active_users(1, "total")
await bot_db.close()
return
# Now count active users (last 24 hours)
active_users_query = """
SELECT COUNT(DISTINCT user_id) as active_users
FROM our_users
WHERE date_changed > ?
"""
await bot_db.cursor.execute(active_users_query, (one_day_ago,))
result = await bot_db.cursor.fetchone()
active_users = result[0] if result else 0
# If no active users in last 24h, use total users as fallback
if active_users == 0:
self.logger.warning("⚠️ No active users in last 24h, using total users as fallback")
active_users = total_users
await bot_db.close()
# Update metrics
metrics.set_active_users(active_users, "daily")
metrics.set_active_users(total_users, "total")
self.logger.debug(f"📊 Active users updated: daily={active_users}, total={total_users}")
except Exception as e:
self.logger.error(f"❌ Failed to update active users metric: {e}")
# Set fallback values
metrics.set_active_users(1, "daily")
metrics.set_active_users(1, "total")
async def _record_message_metrics(self, message: Message):
"""Record message metrics efficiently."""
async def _record_comprehensive_message_metrics(self, message: Message) -> Dict[str, Any]:
"""Record comprehensive message metrics."""
metrics_data = {}
# Determine message type
message_type = "text"
if message.photo:
@@ -168,19 +297,57 @@ class MetricsMiddleware(BaseMiddleware):
# Record message processing
metrics.record_message(message_type, chat_type, "message_handler")
# Store metrics data for later use
metrics_data.update({
'message_type': message_type,
'chat_type': chat_type,
'user_id': message.from_user.id if message.from_user else None,
'is_bot': message.from_user.is_bot if message.from_user else False
})
return metrics_data
async def _record_callback_metrics(self, callback: CallbackQuery):
"""Record callback metrics efficiently."""
async def _record_comprehensive_callback_metrics(self, callback: CallbackQuery) -> Dict[str, Any]:
"""Record comprehensive callback metrics."""
metrics_data = {}
# Record callback message
metrics.record_message("callback_query", "callback", "callback_handler")
# Store metrics data for later use
metrics_data.update({
'callback_data': callback.data,
'user_id': callback.from_user.id if callback.from_user else None,
'is_bot': callback.from_user.is_bot if callback.from_user else False
})
return metrics_data
def _extract_command_info(self, message: Message) -> Optional[Dict[str, str]]:
"""Extract command information from message (commands or button clicks)."""
async def _record_unknown_event_metrics(self, event: TelegramObject) -> Dict[str, Any]:
"""Record metrics for unknown event types."""
metrics_data = {}
# Record unknown event
metrics.record_message("unknown", "unknown", "unknown_handler")
# Store event type for later use
metrics_data.update({
'event_type': type(event).__name__,
'event_data': str(event)[:100] if hasattr(event, '__str__') else "unknown"
})
return metrics_data
def _extract_command_info_with_fallback(self, message: Message) -> Optional[Dict[str, str]]:
"""Extract command information with fallback for unknown commands."""
if not message.text:
return None
# Check if it's a slash command
if message.text.startswith('/'):
command_name = message.text.split()[0][1:] # Remove '/' and get command name
# Check if it's an admin command
if command_name in ADMIN_COMMANDS:
return {
@@ -196,10 +363,12 @@ class MetricsMiddleware(BaseMiddleware):
'handler_type': "voice_command_handler"
}
else:
# FALLBACK: Record unknown command
self.logger.debug(f"📊 Unknown command detected: /{command_name}")
return {
'command': command_name,
'user_type': "user" if message.from_user else "unknown",
'handler_type': "message_handler"
'handler_type': "unknown_command_handler"
}
# Check if it's an admin button click
@@ -226,10 +395,23 @@ class MetricsMiddleware(BaseMiddleware):
'handler_type': "voice_button_handler"
}
# FALLBACK: Record ANY text message as a command for metrics
if message.text and len(message.text.strip()) > 0:
# Clean text for command name (remove special chars, limit length)
clean_text = message.text.strip()[:30].replace(' ', '_').replace('\n', '_')
clean_text = ''.join(c for c in clean_text if c.isalnum() or c in '_')
self.logger.debug(f"📊 Text message recorded as command: {clean_text}")
return {
'command': f"text_{clean_text}",
'user_type': "user" if message.from_user else "unknown",
'handler_type': "text_message_handler"
}
return None
def _extract_callback_command_info(self, callback: CallbackQuery) -> Optional[Dict[str, str]]:
"""Extract command information from callback query."""
def _extract_callback_command_info_with_fallback(self, callback: CallbackQuery) -> Optional[Dict[str, str]]:
"""Extract callback command information with fallback."""
if not callback.data:
return None
@@ -250,11 +432,73 @@ class MetricsMiddleware(BaseMiddleware):
'handler_type': "voice_callback_handler"
}
# FALLBACK: Record unknown callback
if parts:
self.logger.debug(f"📊 Unknown callback detected: {parts[0]}")
return {
'command': f"callback_{parts[0][:20]}",
'user_type': "user" if callback.from_user else "unknown",
'handler_type': "unknown_callback_handler"
}
return None
async def _record_additional_success_metrics(self, event: TelegramObject, event_metrics: Dict[str, Any], handler_name: str):
"""Record additional success metrics."""
try:
# Record rate limiting metrics (if applicable)
if hasattr(event, 'from_user') and event.from_user:
# You can add rate limiting logic here
pass
# Record user activity metrics
if event_metrics.get('user_id'):
# This could trigger additional user activity tracking
pass
except Exception as e:
self.logger.error(f"❌ Error recording additional success metrics: {e}")
async def _record_additional_error_metrics(self, event: TelegramObject, event_metrics: Dict[str, Any], handler_name: str, error_type: str):
"""Record additional error metrics."""
try:
# Record specific error context
if event_metrics.get('user_id'):
# You can add user-specific error tracking here
pass
except Exception as e:
self.logger.error(f"❌ Error recording additional error metrics: {e}")
def _get_handler_name(self, handler: Callable) -> str:
"""Extract handler name efficiently."""
# Проверяем различные способы получения имени хендлера
if hasattr(handler, '__name__') and handler.__name__ != '<lambda>':
return handler.__name__
elif hasattr(handler, '__qualname__') and handler.__qualname__ != '<lambda>':
return handler.__qualname__
elif hasattr(handler, 'callback') and hasattr(handler.callback, '__name__'):
return handler.callback.__name__
elif hasattr(handler, 'view') and hasattr(handler.view, '__name__'):
return handler.view.__name__
else:
# Пытаемся получить имя из строкового представления
handler_str = str(handler)
if 'function' in handler_str:
# Извлекаем имя функции из строки
import re
match = re.search(r'function\s+(\w+)', handler_str)
if match:
return match.group(1)
return "unknown"
class DatabaseMetricsMiddleware(BaseMiddleware):
"""Middleware for database operation metrics."""
"""Enhanced middleware for database operation metrics."""
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
async def __call__(
self,
@@ -267,14 +511,36 @@ class DatabaseMetricsMiddleware(BaseMiddleware):
# Check if this handler involves database operations
handler_name = handler.__name__ if hasattr(handler, '__name__') else "unknown"
# You can add specific database operation detection logic here
# For now, we'll just pass through and let individual decorators handle it
# Record middleware start
start_time = time.time()
return await handler(event, data)
try:
result = await handler(event, data)
# Record successful database operation
duration = time.time() - start_time
metrics.record_middleware("DatabaseMetricsMiddleware", duration, "success")
return result
except Exception as e:
# Record failed database operation
duration = time.time() - start_time
metrics.record_middleware("DatabaseMetricsMiddleware", duration, "error")
metrics.record_error(
type(e).__name__,
"database_middleware",
handler_name
)
raise
class ErrorMetricsMiddleware(BaseMiddleware):
"""Middleware for error tracking and metrics."""
"""Enhanced middleware for error tracking and metrics."""
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
async def __call__(
self,
@@ -284,14 +550,28 @@ class ErrorMetricsMiddleware(BaseMiddleware):
) -> Any:
"""Process event and collect error metrics."""
# Record middleware start
start_time = time.time()
try:
return await handler(event, data)
result = await handler(event, data)
# Record successful error handling
duration = time.time() - start_time
metrics.record_middleware("ErrorMetricsMiddleware", duration, "success")
return result
except Exception as e:
# Record error metrics
duration = time.time() - start_time
handler_name = handler.__name__ if hasattr(handler, '__name__') else "unknown"
metrics.record_middleware("ErrorMetricsMiddleware", duration, "error")
metrics.record_error(
type(e).__name__,
"handler",
"error_middleware",
handler_name
)
raise