- Added `ca-certificates` installation to Dockerfile for improved network security. - Updated health check command in Dockerfile to include better timeout handling. - Refactored `run_helper.py` to implement proper signal handling and logging during shutdown. - Transitioned database operations to an asynchronous model in `async_db.py`, improving performance and responsiveness. - Updated database schema to support new foreign key relationships and optimized indexing for better query performance. - Enhanced various bot handlers to utilize async database methods, improving overall efficiency and user experience. - Removed obsolete database and fix scripts to streamline the project structure.
125 lines
3.3 KiB
Bash
Executable File
125 lines
3.3 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Script for monitoring and auto-restarting the Telegram bot
|
|
# Usage: ./monitor_bot.sh
|
|
|
|
set -e
|
|
|
|
# Configuration
|
|
BOT_CONTAINER="telegram-helper-bot"
|
|
HEALTH_ENDPOINT="http://localhost:8080/health"
|
|
CHECK_INTERVAL=60 # seconds
|
|
MAX_FAILURES=3
|
|
LOG_FILE="logs/bot_monitor.log"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Logging function
|
|
log() {
|
|
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
|
|
}
|
|
|
|
# Check if container is running
|
|
check_container_running() {
|
|
if docker ps --format "table {{.Names}}" | grep -q "^${BOT_CONTAINER}$"; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check health endpoint
|
|
check_health() {
|
|
if curl -f --connect-timeout 5 --max-time 10 "$HEALTH_ENDPOINT" >/dev/null 2>&1; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Restart container
|
|
restart_container() {
|
|
log "${YELLOW}Restarting container ${BOT_CONTAINER}...${NC}"
|
|
|
|
if docker restart "$BOT_CONTAINER" >/dev/null 2>&1; then
|
|
log "${GREEN}Container restarted successfully${NC}"
|
|
|
|
# Wait for container to be ready
|
|
log "Waiting for container to be ready..."
|
|
sleep 30
|
|
|
|
# Check if container is healthy
|
|
local attempts=0
|
|
while [ $attempts -lt 10 ]; do
|
|
if check_health; then
|
|
log "${GREEN}Container is healthy after restart${NC}"
|
|
return 0
|
|
fi
|
|
attempts=$((attempts + 1))
|
|
sleep 10
|
|
done
|
|
|
|
log "${RED}Container failed to become healthy after restart${NC}"
|
|
return 1
|
|
else
|
|
log "${RED}Failed to restart container${NC}"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Main monitoring loop
|
|
main() {
|
|
log "${GREEN}Starting bot monitoring...${NC}"
|
|
log "Container: $BOT_CONTAINER"
|
|
log "Health endpoint: $HEALTH_ENDPOINT"
|
|
log "Check interval: ${CHECK_INTERVAL}s"
|
|
log "Max failures: $MAX_FAILURES"
|
|
|
|
local failure_count=0
|
|
|
|
while true; do
|
|
# Check if container is running
|
|
if ! check_container_running; then
|
|
log "${RED}Container $BOT_CONTAINER is not running!${NC}"
|
|
if restart_container; then
|
|
failure_count=0
|
|
else
|
|
failure_count=$((failure_count + 1))
|
|
fi
|
|
else
|
|
# Check health endpoint
|
|
if check_health; then
|
|
if [ $failure_count -gt 0 ]; then
|
|
log "${GREEN}Container recovered, resetting failure count${NC}"
|
|
failure_count=0
|
|
fi
|
|
log "${GREEN}Container is healthy${NC}"
|
|
else
|
|
failure_count=$((failure_count + 1))
|
|
log "${YELLOW}Health check failed (${failure_count}/${MAX_FAILURES})${NC}"
|
|
|
|
if [ $failure_count -ge $MAX_FAILURES ]; then
|
|
log "${RED}Max failures reached, restarting container${NC}"
|
|
if restart_container; then
|
|
failure_count=0
|
|
else
|
|
log "${RED}Failed to restart container after max failures${NC}"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
sleep "$CHECK_INTERVAL"
|
|
done
|
|
}
|
|
|
|
# Handle script interruption
|
|
trap 'log "Monitoring stopped by user"; exit 0' INT TERM
|
|
|
|
# Run main function
|
|
main "$@"
|