name: Deploy to Production on: pull_request: types: [closed] branches: [ main ] workflow_dispatch: jobs: deploy: runs-on: ubuntu-latest name: Deploy to Production concurrency: group: production-deploy cancel-in-progress: false if: | (github.event_name == 'pull_request' && github.event.pull_request.merged == true) || github.event_name == 'workflow_dispatch' environment: name: production steps: - name: Checkout code uses: actions/checkout@v4 with: ref: main - name: Debug secrets availability run: | echo "🔍 Checking secrets availability..." echo "TELEGRAM_BOT_TOKEN: $([ -n '${{ secrets.TELEGRAM_BOT_TOKEN }}' ] && echo '✅ Set' || echo '❌ Not set')" echo "TELEGRAM_TEST_BOT_TOKEN: $([ -n '${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}' ] && echo '✅ Set' || echo '⚠️ Not set (optional)')" echo "ANON_BOT_TOKEN: $([ -n '${{ secrets.ANON_BOT_TOKEN }}' ] && echo '✅ Set' || echo '⚠️ Not set (optional)')" echo "SSH_PRIVATE_KEY: $([ -n '${{ secrets.SSH_PRIVATE_KEY }}' ] && echo '✅ Set' || echo '❌ Not set')" echo "SERVER_HOST: $([ -n '${{ vars.SERVER_HOST || secrets.SERVER_HOST }}' ] && echo '✅ Set' || echo '❌ Not set')" echo "SERVER_USER: $([ -n '${{ vars.SERVER_USER || secrets.SERVER_USER }}' ] && echo '✅ Set' || echo '❌ Not set')" - name: Validate Telegram Bot Tokens uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} key: ${{ secrets.SSH_PRIVATE_KEY }} port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | set -e export TELEGRAM_BOT_TOKEN="${{ secrets.TELEGRAM_BOT_TOKEN }}" export TELEGRAM_TEST_BOT_TOKEN="${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}" export ANON_BOT_TOKEN="${{ secrets.ANON_BOT_TOKEN }}" echo "🔍 Debug: Checking environment variables..." echo "TELEGRAM_BOT_TOKEN length: ${#TELEGRAM_BOT_TOKEN}" echo "TELEGRAM_TEST_BOT_TOKEN length: ${#TELEGRAM_TEST_BOT_TOKEN}" echo "ANON_BOT_TOKEN length: ${#ANON_BOT_TOKEN}" echo "TELEGRAM_BOT_TOKEN is empty: $([ -z "$TELEGRAM_BOT_TOKEN" ] && echo 'YES' || echo 'NO')" echo "🔍 Validating Telegram Bot tokens from GitHub Secrets..." # Функция для проверки токена с retry validate_token() { local token_name=$1 local token=$2 local max_retries=3 local retry=1 while [ $retry -le $max_retries ]; do echo "🔍 Checking $token_name (attempt $retry/$max_retries)..." response=$(curl -s --max-time 10 "https://api.telegram.org/bot${token}/getMe" || echo "") if echo "$response" | grep -q '"ok":true'; then bot_username=$(echo "$response" | grep -o '"username":"[^"]*"' | cut -d'"' -f4 || echo "unknown") echo "✅ $token_name is valid (bot: @$bot_username)" return 0 else if [ $retry -lt $max_retries ]; then echo "⏳ $token_name validation failed, retrying in 5 seconds..." sleep 5 else echo "❌ $token_name is invalid or unreachable" echo "Response: $response" return 1 fi fi retry=$((retry + 1)) done return 1 } # Проверяем Telegram Helper Bot токен из Secrets if [ -z "$TELEGRAM_BOT_TOKEN" ]; then echo "❌ TELEGRAM_BOT_TOKEN not found in GitHub Secrets" echo "💡 Make sure the secret is added to the 'production' environment or repository secrets" exit 1 fi if ! validate_token "Telegram Helper Bot Token" "$TELEGRAM_BOT_TOKEN"; then exit 1 fi # Проверяем TELEGRAM_TEST_BOT_TOKEN (опционально) if [ -n "$TELEGRAM_TEST_BOT_TOKEN" ]; then if ! validate_token "Telegram Test Bot Token" "$TELEGRAM_TEST_BOT_TOKEN"; then echo "⚠️ Test bot token validation failed, but continuing..." fi else echo "ℹ️ TELEGRAM_TEST_BOT_TOKEN not set, skipping" fi # Проверяем AnonBot токен из Secrets if [ -z "$ANON_BOT_TOKEN" ]; then echo "⚠️ ANON_BOT_TOKEN not found in GitHub Secrets, skipping validation" else if ! validate_token "AnonBot Token" "$ANON_BOT_TOKEN"; then exit 1 fi fi echo "✅ All token validations passed!" - name: Deploy to server uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} key: ${{ secrets.SSH_PRIVATE_KEY }} port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | set -e export TELEGRAM_BOT_TOKEN="${{ secrets.TELEGRAM_BOT_TOKEN }}" export TELEGRAM_TEST_BOT_TOKEN="${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}" export ANON_BOT_TOKEN="${{ secrets.ANON_BOT_TOKEN }}" echo "🔍 Debug: Checking environment variables in Deploy step..." echo "TELEGRAM_BOT_TOKEN is set: $([ -n "$TELEGRAM_BOT_TOKEN" ] && echo 'YES' || echo 'NO')" echo "TELEGRAM_TEST_BOT_TOKEN is set: $([ -n "$TELEGRAM_TEST_BOT_TOKEN" ] && echo 'YES' || echo 'NO')" echo "ANON_BOT_TOKEN is set: $([ -n "$ANON_BOT_TOKEN" ] && echo 'YES' || echo 'NO')" echo "🚀 Starting deployment to production..." # Функция для безопасной записи в историю деплоев с использованием flock safe_write_history() { local entry="$1" local history_file="/home/prod/.deploy_history.txt" local lock_file="${history_file}.lock" local history_size="${DEPLOY_HISTORY_SIZE:-10}" ( # Блокируем файл на запись (exclusive lock) flock -x 200 || { echo "❌ Failed to acquire lock on $history_file"; exit 1; } # Записываем новую запись echo "$entry" >> "$history_file" # Обрезаем файл атомарно tail -n "$history_size" "$history_file" > "${history_file}.tmp" mv "${history_file}.tmp" "$history_file" echo "✅ History updated safely" ) 200>"$lock_file" } # Переходим в директорию проекта под пользователем deploy cd /home/prod # Сохраняем текущий коммит для отката CURRENT_COMMIT=$(git rev-parse HEAD) COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" || echo "Unknown") COMMIT_AUTHOR=$(git log -1 --pretty=format:"%an" || echo "Unknown") TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") # Сохраняем для быстрого доступа echo "$CURRENT_COMMIT" > /tmp/last_deploy_commit.txt echo "📝 Current commit: $CURRENT_COMMIT" echo "📝 Commit message: $COMMIT_MESSAGE" echo "📝 Author: $COMMIT_AUTHOR" # Сохраняем в файл истории деплоев безопасно DEPLOY_HISTORY="/home/prod/.deploy_history.txt" DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}" # Добавляем запись о начале деплоя с блокировкой safe_write_history "${TIMESTAMP}|${CURRENT_COMMIT}|${COMMIT_MESSAGE}|${COMMIT_AUTHOR}|deploying" # Обновляем код из main echo "📥 Pulling latest changes from main..." # Исправляем права на файлы в bots директории перед обновлением fix_bots_permissions() { local bots_dir="/home/prod/bots" if [ ! -d "$bots_dir" ]; then echo "⚠️ Bots directory not found, skipping permissions fix" return 0 fi echo "🔧 Fixing permissions for bots directory..." sudo chown -R deploy:deploy "$bots_dir" || true echo "✅ Permissions fixed" } fix_bots_permissions # Проверяем наличие локальных изменений перед reset check_local_changes() { echo "🔍 Checking for local changes..." # Сохраняем текущее состояние git fetch origin main # Проверяем, есть ли локальные изменения if ! git diff --quiet HEAD origin/main 2>/dev/null; then echo "⚠️ Local changes detected! They will be overwritten by git reset --hard" echo "📋 Diff summary:" git diff --stat HEAD origin/main || true fi # Проверяем uncommitted changes if ! git diff --quiet 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then echo "⚠️ Uncommitted changes detected! They will be lost." git status --short || true fi echo "✅ Proceeding with git reset --hard" } check_local_changes git fetch origin main git reset --hard origin/main # Исправляем права на файлы в bots директории после обновления fix_bots_permissions # Проверяем, что изменения есть NEW_COMMIT=$(git rev-parse HEAD) if [ "$CURRENT_COMMIT" = "$NEW_COMMIT" ]; then echo "ℹ️ No new changes to deploy" else echo "✅ Code updated: $CURRENT_COMMIT → $NEW_COMMIT" fi # Проверяем docker-compose файл validate_docker_compose() { local compose_file="docker-compose.yml" echo "🔍 Validating docker-compose configuration..." if [ ! -f "$compose_file" ]; then echo "❌ $compose_file not found!" exit 1 fi if ! docker-compose config > /dev/null 2>&1; then echo "❌ Invalid docker-compose.yml syntax!" docker-compose config # Показываем ошибки exit 1 fi echo "✅ docker-compose.yml is valid" } validate_docker_compose # Проверяем дисковое пространство перед сборкой check_disk_space() { local min_free_gb=5 local available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") echo "💾 Checking disk space..." echo "Available space: ${available_space}GB" if [ "$available_space" -lt "$min_free_gb" ]; then echo "⚠️ Insufficient disk space! Need at least ${min_free_gb}GB, but only ${available_space}GB available" echo "🧹 Attempting to clean up unused Docker resources..." docker system prune -f --volumes || true # Проверяем снова после очистки available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") echo "Available space after cleanup: ${available_space}GB" if [ "$available_space" -lt "$min_free_gb" ]; then echo "❌ Still insufficient disk space after cleanup!" exit 1 fi fi echo "✅ Sufficient disk space available" } # Проверяем доступность памяти и CPU (опционально) check_resources() { echo "💻 Checking system resources..." # Проверяем доступную память (в MB) available_mem=$(free -m 2>/dev/null | awk '/^Mem:/ {print $7}' || echo "0") min_mem_mb=512 if [ "$available_mem" -lt "$min_mem_mb" ] && [ "$available_mem" -gt 0 ]; then echo "⚠️ Low available memory: ${available_mem}MB (recommended: ${min_mem_mb}MB+)" else echo "✅ Available memory: ${available_mem}MB" fi # Проверяем загрузку CPU (опционально) load_avg=$(uptime 2>/dev/null | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//' || echo "0") echo "📊 System load average: ${load_avg}" } check_disk_space check_resources # Пересобираем все контейнеры с обновлением базовых образов и кешированием echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..." cd /home/prod # Останавливаем все контейнеры с graceful shutdown (30 секунд на остановку) echo "🛑 Stopping containers gracefully..." docker-compose down -t 30 || true # Пересобираем все контейнеры с --pull (обновляет базовые образы, использует кеш слоев) # Передаем токены из GitHub Secrets через переменные окружения TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ docker-compose build --pull # Запускаем все контейнеры с токенами из Secrets echo "🚀 Starting all containers..." TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ docker-compose up -d echo "✅ Containers rebuilt and started" - name: Health check uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} key: ${{ secrets.SSH_PRIVATE_KEY }} port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | set -e echo "🏥 Running health checks..." # Проверяем доступность сети check_network_availability() { echo "🔍 Checking network availability..." if ! ping -c 1 -W 2 localhost > /dev/null 2>&1; then echo "❌ Localhost not reachable! Network issue detected." return 1 fi echo "✅ Network is available" return 0 } # Адаптивное ожидание готовности контейнеров wait_for_containers_ready() { local max_wait=180 # 3 минуты максимум local check_interval=5 local elapsed=0 echo "⏳ Waiting for containers to be ready..." while [ $elapsed -lt $max_wait ]; do # Проверяем, что все контейнеры запущены (нет Exit или Restarting) if docker-compose ps 2>/dev/null | grep -q "Exit\|Restarting"; then echo "⏳ Some containers not ready yet, waiting ${check_interval}s... (${elapsed}/${max_wait}s)" sleep $check_interval elapsed=$((elapsed + check_interval)) else # Все контейнеры запущены local running_count=$(docker-compose ps 2>/dev/null | grep -c "Up" || echo "0") if [ "$running_count" -gt 0 ]; then echo "✅ All containers are ready! (waited ${elapsed}s, ${running_count} containers running)" return 0 else echo "⏳ Waiting for containers to start... (${elapsed}/${max_wait}s)" sleep $check_interval elapsed=$((elapsed + check_interval)) fi fi done echo "⚠️ Containers not fully ready after ${max_wait}s, but continuing with health checks..." return 0 } # Проверяем статус контейнеров echo "📊 Checking container status..." cd /home/prod docker-compose ps # Проверяем сеть перед health checks if ! check_network_availability; then echo "⚠️ Network check failed, but continuing with health checks..." fi # Ждем готовности контейнеров адаптивно wait_for_containers_ready # Функция для проверки с экспоненциальным retry check_health() { local service=$1 local url=$2 local attempt=1 local delays=(5 15 45) # Экспоненциальные задержки: 5s, 15s, 45s local max_attempts=${#delays[@]} echo "🔍 Checking $service health..." while [ $attempt -le $max_attempts ]; do if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then echo "✅ $service is healthy (attempt $attempt/$max_attempts)" return 0 else if [ $attempt -lt $max_attempts ]; then delay=${delays[$((attempt - 1))]} echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..." sleep $delay else echo "❌ $service health check failed after $max_attempts attempts" return 1 fi fi attempt=$((attempt + 1)) done return 1 } # Общая функция для проверки всех сервисов run_health_checks() { local failed=0 local services=( "Prometheus:http://localhost:9090/-/healthy:prometheus" "Grafana:http://localhost:3000/api/health:grafana" "Telegram Bot:http://localhost:8080/health:telegram-bot" "AnonBot:http://localhost:8081/health:anon-bot" ) for service_info in "${services[@]}"; do IFS=':' read -r service_name url container_name <<< "$service_info" echo "🔍 Checking $service_name..." if ! check_health "$service_name" "$url"; then echo "⚠️ $service_name health check failed" docker-compose logs --tail=30 "$container_name" || true failed=1 fi done return $failed } # Проверяем все сервисы HEALTH_CHECK_FAILED=0 if ! run_health_checks; then HEALTH_CHECK_FAILED=1 fi # Проверяем статус всех контейнеров echo "📊 Final container status:" docker-compose ps # Проверяем, что все контейнеры запущены FAILED_CONTAINERS=$(docker-compose ps | grep -c "Exit\|Restarting" || true) if [ "$FAILED_CONTAINERS" -gt 0 ]; then echo "❌ Some containers are not running properly" docker-compose ps HEALTH_CHECK_FAILED=1 fi if [ $HEALTH_CHECK_FAILED -eq 1 ]; then echo "❌ Health checks failed!" exit 1 else echo "✅ All health checks passed!" fi - name: Update deploy history if: always() uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} key: ${{ secrets.SSH_PRIVATE_KEY }} port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | # Функция для безопасной записи в историю деплоев с использованием flock # С fallback для файловых систем, которые не поддерживают flock (например, NFS) safe_update_history_status() { local new_status="$1" local history_file="/home/prod/.deploy_history.txt" local lock_file="${history_file}.lock" if command -v flock > /dev/null 2>&1; then ( # Блокируем файл на запись (exclusive lock) if flock -x 200 2>/dev/null; then if [ -f "$history_file" ]; then # Заменяем последнюю строку со статусом deploying на финальный статус sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file" echo "✅ Deploy history updated with status: $new_status (with flock)" else echo "⚠️ History file not found, skipping update" fi else echo "⚠️ Failed to acquire lock, using fallback method" # Fallback: простая запись без блокировки if [ -f "$history_file" ]; then sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file" echo "✅ Deploy history updated (fallback method)" fi fi ) 200>"$lock_file" else # Fallback: если flock недоступен echo "⚠️ flock not available, using simple update" if [ -f "$history_file" ]; then sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file" echo "✅ Deploy history updated (simple method)" fi fi } DEPLOY_HISTORY="/home/prod/.deploy_history.txt" if [ -f "$DEPLOY_HISTORY" ]; then # Обновляем последнюю запись со статусом deploying на success или failed if [ "${{ job.status }}" = "success" ]; then status="success" else status="failed" fi # Обновляем статус безопасно safe_update_history_status "$status" fi - name: Send deployment notification if: always() uses: appleboy/telegram-action@v1.0.0 with: to: ${{ secrets.TELEGRAM_CHAT_ID }} token: ${{ secrets.TELEGRAM_BOT_TOKEN }} message: | ${{ job.status == 'success' && '✅' || '❌' }} Deployment: ${{ job.status }} 📦 Repository: prod 🌿 Branch: main 📝 Commit: ${{ github.event.pull_request.merge_commit_sha || github.sha }} 👤 Author: ${{ github.event.pull_request.user.login || github.actor }} ${{ github.event.pull_request.number && format('🔀 PR: #{0}', github.event.pull_request.number) || '' }} ${{ job.status == 'success' && '✅ Deployment successful! All services are healthy.' || '❌ Deployment failed! Check logs for details.' }} 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} continue-on-error: true smoke-tests: runs-on: ubuntu-latest name: Smoke Tests needs: deploy if: | always() && needs.deploy.result == 'success' steps: - name: Run Smoke Tests uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} key: ${{ secrets.SSH_PRIVATE_KEY }} port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | set -e export TELEGRAM_BOT_TOKEN="${{ secrets.TELEGRAM_BOT_TOKEN }}" export ANON_BOT_TOKEN="${{ secrets.ANON_BOT_TOKEN }}" echo "🧪 Running smoke tests..." SMOKE_TEST_CHAT_ID="${SMOKE_TEST_CHAT_ID:--898316252}" echo "📝 Using test chat ID: $SMOKE_TEST_CHAT_ID" # Проверка health endpoints echo "🔍 Checking health endpoints..." if ! curl -f -s --max-time 10 "http://localhost:8080/health" > /dev/null 2>&1; then echo "❌ Telegram Bot health endpoint failed" exit 1 fi echo "✅ Telegram Bot health endpoint OK" if ! curl -f -s --max-time 10 "http://localhost:8081/health" > /dev/null 2>&1; then echo "❌ AnonBot health endpoint failed" exit 1 fi echo "✅ AnonBot health endpoint OK" # Проверка метрик (опционально) echo "🔍 Checking metrics endpoints..." curl -f -s --max-time 10 "http://localhost:8080/metrics" > /dev/null 2>&1 && echo "✅ Telegram Bot metrics OK" || echo "⚠️ Telegram Bot metrics not available" curl -f -s --max-time 10 "http://localhost:8081/metrics" > /dev/null 2>&1 && echo "✅ AnonBot metrics OK" || echo "⚠️ AnonBot metrics not available" # Smoke-тест Telegram Helper Bot (используем токен из GitHub Secrets) echo "🔍 Testing Telegram Helper Bot..." if [ -z "$TELEGRAM_BOT_TOKEN" ]; then echo "❌ TELEGRAM_BOT_TOKEN not found in GitHub Secrets" exit 1 fi # Отправляем сообщение "ping" в тестовый чат response=$(curl -s --max-time 30 -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ -d "chat_id=${SMOKE_TEST_CHAT_ID}" \ -d "text=ping" || echo "") if echo "$response" | grep -q '"ok":true'; then echo "✅ Telegram Helper Bot smoke test passed (message sent successfully)" else echo "❌ Telegram Helper Bot smoke test failed" echo "Response: $response" exit 1 fi # Smoke-тест AnonBot (используем токен из GitHub Secrets) echo "🔍 Testing AnonBot..." if [ -n "$ANON_BOT_TOKEN" ]; then response=$(curl -s --max-time 30 -X POST "https://api.telegram.org/bot${ANON_BOT_TOKEN}/sendMessage" \ -d "chat_id=${SMOKE_TEST_CHAT_ID}" \ -d "text=ping" || echo "") if echo "$response" | grep -q '"ok":true'; then echo "✅ AnonBot smoke test passed (message sent successfully)" else echo "⚠️ AnonBot smoke test failed (non-critical)" echo "Response: $response" fi else echo "ℹ️ ANON_BOT_TOKEN not set, skipping smoke test" fi echo "✅ All smoke tests passed!" - name: Send smoke tests notification if: always() uses: appleboy/telegram-action@v1.0.0 with: to: ${{ secrets.TELEGRAM_CHAT_ID }} token: ${{ secrets.TELEGRAM_BOT_TOKEN }} message: | ${{ job.status == 'success' && '✅' || '❌' }} Smoke Tests: ${{ job.status }} 📦 Repository: prod 🌿 Branch: main 📝 Commit: ${{ github.event.pull_request.merge_commit_sha || github.sha }} ${{ job.status == 'success' && '✅ All smoke tests passed! Bots are working correctly.' || '❌ Smoke tests failed! Auto-rollback will be triggered.' }} 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} continue-on-error: true auto-rollback: runs-on: ubuntu-latest name: Auto Rollback concurrency: group: production-rollback cancel-in-progress: false needs: [deploy, smoke-tests] if: | always() && needs.smoke-tests.result == 'failure' environment: name: production steps: - name: Checkout code uses: actions/checkout@v4 with: ref: main - name: Auto Rollback uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} key: ${{ secrets.SSH_PRIVATE_KEY }} port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | set -e export TELEGRAM_BOT_TOKEN="${{ secrets.TELEGRAM_BOT_TOKEN }}" export TELEGRAM_TEST_BOT_TOKEN="${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}" export ANON_BOT_TOKEN="${{ secrets.ANON_BOT_TOKEN }}" echo "🔄 Starting automatic rollback after smoke tests failure..." # Функция для безопасного чтения истории деплоев с использованием flock # С fallback для файловых систем, которые не поддерживают flock (например, NFS) safe_read_history() { local history_file="/home/prod/.deploy_history.txt" local lock_file="${history_file}.lock" if command -v flock > /dev/null 2>&1; then ( # Блокируем файл на чтение (shared lock) if flock -s 200 2>/dev/null; then if [ -f "$history_file" ]; then cat "$history_file" else echo "" fi else echo "⚠️ Failed to acquire lock, using fallback method" # Fallback: простое чтение без блокировки if [ -f "$history_file" ]; then cat "$history_file" else echo "" fi fi ) 200>"$lock_file" else # Fallback: если flock недоступен if [ -f "$history_file" ]; then cat "$history_file" else echo "" fi fi } # Функция для безопасной записи в историю деплоев с использованием flock safe_write_history() { local entry="$1" local history_file="/home/prod/.deploy_history.txt" local lock_file="${history_file}.lock" local history_size="${DEPLOY_HISTORY_SIZE:-10}" ( # Блокируем файл на запись (exclusive lock) flock -x 200 || { echo "❌ Failed to acquire lock on $history_file"; exit 1; } # Записываем новую запись echo "$entry" >> "$history_file" # Обрезаем файл атомарно tail -n "$history_size" "$history_file" > "${history_file}.tmp" mv "${history_file}.tmp" "$history_file" echo "✅ History updated safely" ) 200>"$lock_file" } # Функция для безопасного изменения прав на bots директорию fix_bots_permissions() { local bots_dir="/home/prod/bots" if [ ! -d "$bots_dir" ]; then echo "⚠️ Bots directory not found, skipping permissions fix" return 0 fi echo "🔧 Fixing permissions for bots directory..." sudo chown -R deploy:deploy "$bots_dir" || true echo "✅ Permissions fixed" } cd /home/prod DEPLOY_HISTORY="/home/prod/.deploy_history.txt" DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}" # Находим последний успешный деплой из истории (безопасно) HISTORY_CONTENT=$(safe_read_history) LAST_SUCCESSFUL_COMMIT=$(echo "$HISTORY_CONTENT" | grep "|success" | tail -1 | cut -d'|' -f2 || echo "") # Если нет успешного деплоя в истории, используем сохраненный коммит if [ -z "$LAST_SUCCESSFUL_COMMIT" ]; then if [ -f "/tmp/last_deploy_commit.txt" ]; then LAST_SUCCESSFUL_COMMIT=$(cat /tmp/last_deploy_commit.txt) echo "📝 Using saved commit from /tmp/last_deploy_commit.txt: $LAST_SUCCESSFUL_COMMIT" else echo "❌ No previous successful deploy found in history and no saved commit!" exit 1 fi else echo "📝 Found last successful deploy in history: $LAST_SUCCESSFUL_COMMIT" fi # Откатываем код echo "🔄 Rolling back to commit: $LAST_SUCCESSFUL_COMMIT" # Исправляем права на файлы в bots директории fix_bots_permissions # Проверяем наличие локальных изменений перед reset echo "🔍 Checking for local changes before rollback..." git fetch origin main # Проверяем uncommitted changes if ! git diff --quiet 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then echo "⚠️ Uncommitted changes detected! They will be lost during rollback." git status --short || true fi git fetch origin main git reset --hard "$LAST_SUCCESSFUL_COMMIT" # Устанавливаем правильные права после отката fix_bots_permissions echo "✅ Code rolled back to: $LAST_SUCCESSFUL_COMMIT" # Проверяем docker-compose файл validate_docker_compose() { local compose_file="docker-compose.yml" echo "🔍 Validating docker-compose configuration..." if [ ! -f "$compose_file" ]; then echo "❌ $compose_file not found!" exit 1 fi if ! docker-compose config > /dev/null 2>&1; then echo "❌ Invalid docker-compose.yml syntax!" docker-compose config # Показываем ошибки exit 1 fi echo "✅ docker-compose.yml is valid" } validate_docker_compose # Проверяем дисковое пространство перед сборкой check_disk_space() { local min_free_gb=5 local available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") echo "💾 Checking disk space..." echo "Available space: ${available_space}GB" if [ "$available_space" -lt "$min_free_gb" ]; then echo "⚠️ Insufficient disk space! Need at least ${min_free_gb}GB, but only ${available_space}GB available" echo "🧹 Attempting to clean up unused Docker resources..." docker system prune -f --volumes || true # Проверяем снова после очистки available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") echo "Available space after cleanup: ${available_space}GB" if [ "$available_space" -lt "$min_free_gb" ]; then echo "❌ Still insufficient disk space after cleanup!" exit 1 fi fi echo "✅ Sufficient disk space available" } # Проверяем доступность памяти и CPU (опционально) check_resources() { echo "💻 Checking system resources..." # Проверяем доступную память (в MB) available_mem=$(free -m 2>/dev/null | awk '/^Mem:/ {print $7}' || echo "0") min_mem_mb=512 if [ "$available_mem" -lt "$min_mem_mb" ] && [ "$available_mem" -gt 0 ]; then echo "⚠️ Low available memory: ${available_mem}MB (recommended: ${min_mem_mb}MB+)" else echo "✅ Available memory: ${available_mem}MB" fi # Проверяем загрузку CPU (опционально) load_avg=$(uptime 2>/dev/null | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//' || echo "0") echo "📊 System load average: ${load_avg}" } check_disk_space check_resources # Пересобираем все контейнеры с обновлением базовых образов и кешированием echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..." # Останавливаем все контейнеры с graceful shutdown (30 секунд на остановку) echo "🛑 Stopping containers gracefully..." docker-compose down -t 30 || true # Пересобираем с токенами из Secrets TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ docker-compose build --pull # Запускаем с токенами из Secrets echo "🚀 Starting all containers..." TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ docker-compose up -d echo "✅ Containers rebuilt and started" # Проверяем доступность сети check_network_availability() { echo "🔍 Checking network availability..." if ! ping -c 1 -W 2 localhost > /dev/null 2>&1; then echo "❌ Localhost not reachable! Network issue detected." return 1 fi echo "✅ Network is available" return 0 } # Адаптивное ожидание готовности контейнеров wait_for_containers_ready() { local max_wait=180 # 3 минуты максимум local check_interval=5 local elapsed=0 echo "⏳ Waiting for containers to be ready..." while [ $elapsed -lt $max_wait ]; do if docker-compose ps 2>/dev/null | grep -q "Exit\|Restarting"; then echo "⏳ Some containers not ready yet, waiting ${check_interval}s... (${elapsed}/${max_wait}s)" sleep $check_interval elapsed=$((elapsed + check_interval)) else local running_count=$(docker-compose ps 2>/dev/null | grep -c "Up" || echo "0") if [ "$running_count" -gt 0 ]; then echo "✅ All containers are ready! (waited ${elapsed}s, ${running_count} containers running)" return 0 else echo "⏳ Waiting for containers to start... (${elapsed}/${max_wait}s)" sleep $check_interval elapsed=$((elapsed + check_interval)) fi fi done echo "⚠️ Containers not fully ready after ${max_wait}s, but continuing with health checks..." return 0 } # Проверяем сеть перед health checks if ! check_network_availability; then echo "⚠️ Network check failed, but continuing with health checks..." fi # Ждем готовности контейнеров адаптивно wait_for_containers_ready # Функция для проверки с экспоненциальным retry check_health() { local service=$1 local url=$2 local attempt=1 local delays=(5 15 45) local max_attempts=${#delays[@]} echo "🔍 Checking $service health..." while [ $attempt -le $max_attempts ]; do if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then echo "✅ $service is healthy (attempt $attempt/$max_attempts)" return 0 else if [ $attempt -lt $max_attempts ]; then delay=${delays[$((attempt - 1))]} echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..." sleep $delay else echo "❌ $service health check failed after $max_attempts attempts" return 1 fi fi attempt=$((attempt + 1)) done return 1 } # Общая функция для проверки всех сервисов run_health_checks() { local failed=0 local services=( "Prometheus:http://localhost:9090/-/healthy:prometheus" "Grafana:http://localhost:3000/api/health:grafana" "Telegram Bot:http://localhost:8080/health:telegram-bot" "AnonBot:http://localhost:8081/health:anon-bot" ) for service_info in "${services[@]}"; do IFS=':' read -r service_name url container_name <<< "$service_info" echo "🔍 Checking $service_name..." if ! check_health "$service_name" "$url"; then echo "⚠️ $service_name health check failed" docker-compose logs --tail=30 "$container_name" || true failed=1 fi done return $failed } HEALTH_CHECK_FAILED=0 if ! run_health_checks; then HEALTH_CHECK_FAILED=1 fi # Проверяем статус всех контейнеров echo "📊 Final container status:" docker-compose ps # Проверяем, что все контейнеры запущены FAILED_CONTAINERS=$(docker-compose ps | grep -c "Exit\|Restarting" || true) if [ "$FAILED_CONTAINERS" -gt 0 ]; then echo "❌ Some containers are not running properly" docker-compose ps HEALTH_CHECK_FAILED=1 fi if [ $HEALTH_CHECK_FAILED -eq 1 ]; then echo "⚠️ Some health checks failed, but rollback completed" else echo "✅ All health checks passed after rollback!" fi # Обновляем историю безопасно TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" "$LAST_SUCCESSFUL_COMMIT" 2>/dev/null || echo "Auto-rollback") safe_write_history "${TIMESTAMP}|${LAST_SUCCESSFUL_COMMIT}|Auto-rollback after smoke tests failure|github-actions|rolled_back" echo "✅ Rollback completed successfully" - name: Send rollback notification if: always() uses: appleboy/telegram-action@v1.0.0 with: to: ${{ secrets.TELEGRAM_CHAT_ID }} token: ${{ secrets.TELEGRAM_BOT_TOKEN }} message: | 🔄 Automatic Rollback: ${{ job.status }} 📦 Repository: prod 🌿 Branch: main 📝 Rolled back to previous successful commit ${{ github.event.pull_request.number && format('🔀 PR: #{0}', github.event.pull_request.number) || '' }} ⚠️ Rollback was triggered automatically due to smoke tests failure. ${{ job.status == 'success' && '✅ Rollback completed successfully! Services restored to previous version.' || '❌ Rollback failed! Manual intervention required.' }} 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} continue-on-error: true