#!/bin/bash # Script for monitoring and auto-restarting the Telegram bot # Usage: ./monitor_bot.sh set -e # Configuration BOT_CONTAINER="telegram-helper-bot" HEALTH_ENDPOINT="http://localhost:8080/health" CHECK_INTERVAL=60 # seconds MAX_FAILURES=3 LOG_FILE="logs/bot_monitor.log" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Logging function log() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE" } # Check if container is running check_container_running() { if docker ps --format "table {{.Names}}" | grep -q "^${BOT_CONTAINER}$"; then return 0 else return 1 fi } # Check health endpoint check_health() { if curl -f --connect-timeout 5 --max-time 10 "$HEALTH_ENDPOINT" >/dev/null 2>&1; then return 0 else return 1 fi } # Restart container restart_container() { log "${YELLOW}Restarting container ${BOT_CONTAINER}...${NC}" if docker restart "$BOT_CONTAINER" >/dev/null 2>&1; then log "${GREEN}Container restarted successfully${NC}" # Wait for container to be ready log "Waiting for container to be ready..." sleep 30 # Check if container is healthy local attempts=0 while [ $attempts -lt 10 ]; do if check_health; then log "${GREEN}Container is healthy after restart${NC}" return 0 fi attempts=$((attempts + 1)) sleep 10 done log "${RED}Container failed to become healthy after restart${NC}" return 1 else log "${RED}Failed to restart container${NC}" return 1 fi } # Main monitoring loop main() { log "${GREEN}Starting bot monitoring...${NC}" log "Container: $BOT_CONTAINER" log "Health endpoint: $HEALTH_ENDPOINT" log "Check interval: ${CHECK_INTERVAL}s" log "Max failures: $MAX_FAILURES" local failure_count=0 while true; do # Check if container is running if ! check_container_running; then log "${RED}Container $BOT_CONTAINER is not running!${NC}" if restart_container; then failure_count=0 else failure_count=$((failure_count + 1)) fi else # Check health endpoint if check_health; then if [ $failure_count -gt 0 ]; then log "${GREEN}Container recovered, resetting failure count${NC}" failure_count=0 fi log "${GREEN}Container is healthy${NC}" else failure_count=$((failure_count + 1)) log "${YELLOW}Health check failed (${failure_count}/${MAX_FAILURES})${NC}" if [ $failure_count -ge $MAX_FAILURES ]; then log "${RED}Max failures reached, restarting container${NC}" if restart_container; then failure_count=0 else log "${RED}Failed to restart container after max failures${NC}" fi fi fi fi sleep "$CHECK_INTERVAL" done } # Handle script interruption trap 'log "Monitoring stopped by user"; exit 0' INT TERM # Run main function main "$@"