From 1dceab6479e4cb251449a12ed5a068ac4e143c7b Mon Sep 17 00:00:00 2001 From: Andrey Date: Sun, 25 Jan 2026 18:33:58 +0300 Subject: [PATCH] =?UTF-8?q?chore:=20=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20Docker=20Compose=20=D0=B8=20CI/CD=20?= =?UTF-8?q?=D0=BF=D0=B0=D0=B9=D0=BF=D0=BB=D0=B0=D0=B9=D0=BD=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Docker Compose теперь использует GitHub Secrets для токенов ботов (приоритет над .env) - Добавлена функция ручного отката с указанием коммита - Реализованы проверки работоспособности с экспоненциальной задержкой - Улучшены уведомления об откате --- .github/workflows/deploy.yml | 1039 ++++++++++++++++++++++++++++++++ .github/workflows/pipeline.yml | 386 +++++++++--- docker-compose.yml | 12 +- 3 files changed, 1341 insertions(+), 96 deletions(-) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..2d85957 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,1039 @@ +name: Deploy to Production + +on: + pull_request: + types: [closed] + branches: [ main ] + +jobs: + deploy: + runs-on: ubuntu-latest + name: Deploy to Production + concurrency: + group: production-deploy + cancel-in-progress: false + if: github.event.pull_request.merged == true + environment: + name: production + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: main + + - name: Validate Telegram Bot Tokens + env: + TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} + TELEGRAM_TEST_BOT_TOKEN: ${{ secrets.TELEGRAM_TEST_BOT_TOKEN }} + ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }} + uses: appleboy/ssh-action@v1.0.0 + with: + host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} + username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} + key: ${{ secrets.SSH_PRIVATE_KEY }} + port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} + script: | + set -e + echo "🔍 Validating Telegram Bot tokens from GitHub Secrets..." + + # Функция для проверки токена с retry + validate_token() { + local token_name=$1 + local token=$2 + local max_retries=3 + local retry=1 + + while [ $retry -le $max_retries ]; do + echo "🔍 Checking $token_name (attempt $retry/$max_retries)..." + + response=$(curl -s --max-time 10 "https://api.telegram.org/bot${token}/getMe" || echo "") + + if echo "$response" | grep -q '"ok":true'; then + bot_username=$(echo "$response" | grep -o '"username":"[^"]*"' | cut -d'"' -f4 || echo "unknown") + echo "✅ $token_name is valid (bot: @$bot_username)" + return 0 + else + if [ $retry -lt $max_retries ]; then + echo "⏳ $token_name validation failed, retrying in 5 seconds..." + sleep 5 + else + echo "❌ $token_name is invalid or unreachable" + echo "Response: $response" + return 1 + fi + fi + + retry=$((retry + 1)) + done + + return 1 + } + + # Проверяем Telegram Helper Bot токен из Secrets + if [ -z "$TELEGRAM_BOT_TOKEN" ]; then + echo "❌ TELEGRAM_BOT_TOKEN not found in GitHub Secrets" + exit 1 + fi + + if ! validate_token "Telegram Helper Bot Token" "$TELEGRAM_BOT_TOKEN"; then + exit 1 + fi + + # Проверяем TELEGRAM_TEST_BOT_TOKEN (опционально) + if [ -n "$TELEGRAM_TEST_BOT_TOKEN" ]; then + if ! validate_token "Telegram Test Bot Token" "$TELEGRAM_TEST_BOT_TOKEN"; then + echo "⚠️ Test bot token validation failed, but continuing..." + fi + else + echo "ℹ️ TELEGRAM_TEST_BOT_TOKEN not set, skipping" + fi + + # Проверяем AnonBot токен из Secrets + if [ -z "$ANON_BOT_TOKEN" ]; then + echo "⚠️ ANON_BOT_TOKEN not found in GitHub Secrets, skipping validation" + else + if ! validate_token "AnonBot Token" "$ANON_BOT_TOKEN"; then + exit 1 + fi + fi + + echo "✅ All token validations passed!" + + - name: Deploy to server + env: + TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} + TELEGRAM_TEST_BOT_TOKEN: ${{ secrets.TELEGRAM_TEST_BOT_TOKEN }} + ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }} + uses: appleboy/ssh-action@v1.0.0 + with: + host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} + username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} + key: ${{ secrets.SSH_PRIVATE_KEY }} + port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} + script: | + set -e + echo "🚀 Starting deployment to production..." + + # Функция для безопасной записи в историю деплоев с использованием flock + safe_write_history() { + local entry="$1" + local history_file="/home/prod/.deploy_history.txt" + local lock_file="${history_file}.lock" + local history_size="${DEPLOY_HISTORY_SIZE:-10}" + + ( + # Блокируем файл на запись (exclusive lock) + flock -x 200 || { echo "❌ Failed to acquire lock on $history_file"; exit 1; } + + # Записываем новую запись + echo "$entry" >> "$history_file" + + # Обрезаем файл атомарно + tail -n "$history_size" "$history_file" > "${history_file}.tmp" + mv "${history_file}.tmp" "$history_file" + + echo "✅ History updated safely" + ) 200>"$lock_file" + } + + # Переходим в директорию проекта под пользователем deploy + cd /home/prod + + # Сохраняем текущий коммит для отката + CURRENT_COMMIT=$(git rev-parse HEAD) + COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" || echo "Unknown") + COMMIT_AUTHOR=$(git log -1 --pretty=format:"%an" || echo "Unknown") + TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") + + # Сохраняем для быстрого доступа + echo "$CURRENT_COMMIT" > /tmp/last_deploy_commit.txt + echo "📝 Current commit: $CURRENT_COMMIT" + echo "📝 Commit message: $COMMIT_MESSAGE" + echo "📝 Author: $COMMIT_AUTHOR" + + # Сохраняем в файл истории деплоев безопасно + DEPLOY_HISTORY="/home/prod/.deploy_history.txt" + DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}" + + # Добавляем запись о начале деплоя с блокировкой + safe_write_history "${TIMESTAMP}|${CURRENT_COMMIT}|${COMMIT_MESSAGE}|${COMMIT_AUTHOR}|deploying" + + # Обновляем код из main + echo "📥 Pulling latest changes from main..." + + # Исправляем права на файлы в bots директории перед обновлением + fix_bots_permissions() { + local bots_dir="/home/prod/bots" + + if [ ! -d "$bots_dir" ]; then + echo "⚠️ Bots directory not found, skipping permissions fix" + return 0 + fi + + echo "🔧 Fixing permissions for bots directory..." + sudo chown -R deploy:deploy "$bots_dir" || true + echo "✅ Permissions fixed" + } + + fix_bots_permissions + + # Проверяем наличие локальных изменений перед reset + check_local_changes() { + echo "🔍 Checking for local changes..." + + # Сохраняем текущее состояние + git fetch origin main + + # Проверяем, есть ли локальные изменения + if ! git diff --quiet HEAD origin/main 2>/dev/null; then + echo "⚠️ Local changes detected! They will be overwritten by git reset --hard" + echo "📋 Diff summary:" + git diff --stat HEAD origin/main || true + fi + + # Проверяем uncommitted changes + if ! git diff --quiet 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then + echo "⚠️ Uncommitted changes detected! They will be lost." + git status --short || true + fi + + echo "✅ Proceeding with git reset --hard" + } + + check_local_changes + + git fetch origin main + git reset --hard origin/main + + # Исправляем права на файлы в bots директории после обновления + fix_bots_permissions + + # Проверяем, что изменения есть + NEW_COMMIT=$(git rev-parse HEAD) + if [ "$CURRENT_COMMIT" = "$NEW_COMMIT" ]; then + echo "ℹ️ No new changes to deploy" + else + echo "✅ Code updated: $CURRENT_COMMIT → $NEW_COMMIT" + fi + + # Проверяем docker-compose файл + validate_docker_compose() { + local compose_file="docker-compose.yml" + + echo "🔍 Validating docker-compose configuration..." + + if [ ! -f "$compose_file" ]; then + echo "❌ $compose_file not found!" + exit 1 + fi + + if ! docker-compose config > /dev/null 2>&1; then + echo "❌ Invalid docker-compose.yml syntax!" + docker-compose config # Показываем ошибки + exit 1 + fi + + echo "✅ docker-compose.yml is valid" + } + + validate_docker_compose + + # Проверяем дисковое пространство перед сборкой + check_disk_space() { + local min_free_gb=5 + local available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") + + echo "💾 Checking disk space..." + echo "Available space: ${available_space}GB" + + if [ "$available_space" -lt "$min_free_gb" ]; then + echo "⚠️ Insufficient disk space! Need at least ${min_free_gb}GB, but only ${available_space}GB available" + echo "🧹 Attempting to clean up unused Docker resources..." + docker system prune -f --volumes || true + + # Проверяем снова после очистки + available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") + echo "Available space after cleanup: ${available_space}GB" + + if [ "$available_space" -lt "$min_free_gb" ]; then + echo "❌ Still insufficient disk space after cleanup!" + exit 1 + fi + fi + + echo "✅ Sufficient disk space available" + } + + # Проверяем доступность памяти и CPU (опционально) + check_resources() { + echo "💻 Checking system resources..." + + # Проверяем доступную память (в MB) + available_mem=$(free -m 2>/dev/null | awk '/^Mem:/ {print $7}' || echo "0") + min_mem_mb=512 + + if [ "$available_mem" -lt "$min_mem_mb" ] && [ "$available_mem" -gt 0 ]; then + echo "⚠️ Low available memory: ${available_mem}MB (recommended: ${min_mem_mb}MB+)" + else + echo "✅ Available memory: ${available_mem}MB" + fi + + # Проверяем загрузку CPU (опционально) + load_avg=$(uptime 2>/dev/null | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//' || echo "0") + echo "📊 System load average: ${load_avg}" + } + + check_disk_space + check_resources + + # Пересобираем все контейнеры с обновлением базовых образов и кешированием + echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..." + cd /home/prod + + # Останавливаем все контейнеры с graceful shutdown (30 секунд на остановку) + echo "🛑 Stopping containers gracefully..." + docker-compose down -t 30 || true + + # Пересобираем все контейнеры с --pull (обновляет базовые образы, использует кеш слоев) + # Передаем токены из GitHub Secrets через переменные окружения + TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ + TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ + ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ + docker-compose build --pull + + # Запускаем все контейнеры с токенами из Secrets + echo "🚀 Starting all containers..." + TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ + TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ + ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ + docker-compose up -d + + echo "✅ Containers rebuilt and started" + + - name: Health check + uses: appleboy/ssh-action@v1.0.0 + with: + host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} + username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} + key: ${{ secrets.SSH_PRIVATE_KEY }} + port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} + script: | + set -e + echo "🏥 Running health checks..." + + # Проверяем доступность сети + check_network_availability() { + echo "🔍 Checking network availability..." + + if ! ping -c 1 -W 2 localhost > /dev/null 2>&1; then + echo "❌ Localhost not reachable! Network issue detected." + return 1 + fi + + echo "✅ Network is available" + return 0 + } + + # Адаптивное ожидание готовности контейнеров + wait_for_containers_ready() { + local max_wait=180 # 3 минуты максимум + local check_interval=5 + local elapsed=0 + + echo "⏳ Waiting for containers to be ready..." + + while [ $elapsed -lt $max_wait ]; do + # Проверяем, что все контейнеры запущены (нет Exit или Restarting) + if docker-compose ps 2>/dev/null | grep -q "Exit\|Restarting"; then + echo "⏳ Some containers not ready yet, waiting ${check_interval}s... (${elapsed}/${max_wait}s)" + sleep $check_interval + elapsed=$((elapsed + check_interval)) + else + # Все контейнеры запущены + local running_count=$(docker-compose ps 2>/dev/null | grep -c "Up" || echo "0") + if [ "$running_count" -gt 0 ]; then + echo "✅ All containers are ready! (waited ${elapsed}s, ${running_count} containers running)" + return 0 + else + echo "⏳ Waiting for containers to start... (${elapsed}/${max_wait}s)" + sleep $check_interval + elapsed=$((elapsed + check_interval)) + fi + fi + done + + echo "⚠️ Containers not fully ready after ${max_wait}s, but continuing with health checks..." + return 0 + } + + # Проверяем статус контейнеров + echo "📊 Checking container status..." + cd /home/prod + docker-compose ps + + # Проверяем сеть перед health checks + if ! check_network_availability; then + echo "⚠️ Network check failed, but continuing with health checks..." + fi + + # Ждем готовности контейнеров адаптивно + wait_for_containers_ready + + # Функция для проверки с экспоненциальным retry + check_health() { + local service=$1 + local url=$2 + local attempt=1 + local delays=(5 15 45) # Экспоненциальные задержки: 5s, 15s, 45s + local max_attempts=${#delays[@]} + + echo "🔍 Checking $service health..." + + while [ $attempt -le $max_attempts ]; do + if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then + echo "✅ $service is healthy (attempt $attempt/$max_attempts)" + return 0 + else + if [ $attempt -lt $max_attempts ]; then + delay=${delays[$((attempt - 1))]} + echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..." + sleep $delay + else + echo "❌ $service health check failed after $max_attempts attempts" + return 1 + fi + fi + attempt=$((attempt + 1)) + done + + return 1 + } + + # Общая функция для проверки всех сервисов + run_health_checks() { + local failed=0 + local services=( + "Prometheus:http://localhost:9090/-/healthy:prometheus" + "Grafana:http://localhost:3000/api/health:grafana" + "Telegram Bot:http://localhost:8080/health:telegram-bot" + "AnonBot:http://localhost:8081/health:anon-bot" + ) + + for service_info in "${services[@]}"; do + IFS=':' read -r service_name url container_name <<< "$service_info" + echo "🔍 Checking $service_name..." + if ! check_health "$service_name" "$url"; then + echo "⚠️ $service_name health check failed" + docker-compose logs --tail=30 "$container_name" || true + failed=1 + fi + done + + return $failed + } + + # Проверяем все сервисы + HEALTH_CHECK_FAILED=0 + if ! run_health_checks; then + HEALTH_CHECK_FAILED=1 + fi + + # Проверяем статус всех контейнеров + echo "📊 Final container status:" + docker-compose ps + + # Проверяем, что все контейнеры запущены + FAILED_CONTAINERS=$(docker-compose ps | grep -c "Exit\|Restarting" || true) + if [ "$FAILED_CONTAINERS" -gt 0 ]; then + echo "❌ Some containers are not running properly" + docker-compose ps + HEALTH_CHECK_FAILED=1 + fi + + if [ $HEALTH_CHECK_FAILED -eq 1 ]; then + echo "❌ Health checks failed!" + exit 1 + else + echo "✅ All health checks passed!" + fi + + - name: Update deploy history + if: always() + uses: appleboy/ssh-action@v1.0.0 + with: + host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} + username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} + key: ${{ secrets.SSH_PRIVATE_KEY }} + port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} + script: | + # Функция для безопасной записи в историю деплоев с использованием flock + # С fallback для файловых систем, которые не поддерживают flock (например, NFS) + safe_update_history_status() { + local new_status="$1" + local history_file="/home/prod/.deploy_history.txt" + local lock_file="${history_file}.lock" + + if command -v flock > /dev/null 2>&1; then + ( + # Блокируем файл на запись (exclusive lock) + if flock -x 200 2>/dev/null; then + if [ -f "$history_file" ]; then + # Заменяем последнюю строку со статусом deploying на финальный статус + sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file" + echo "✅ Deploy history updated with status: $new_status (with flock)" + else + echo "⚠️ History file not found, skipping update" + fi + else + echo "⚠️ Failed to acquire lock, using fallback method" + # Fallback: простая запись без блокировки + if [ -f "$history_file" ]; then + sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file" + echo "✅ Deploy history updated (fallback method)" + fi + fi + ) 200>"$lock_file" + else + # Fallback: если flock недоступен + echo "⚠️ flock not available, using simple update" + if [ -f "$history_file" ]; then + sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file" + echo "✅ Deploy history updated (simple method)" + fi + fi + } + + DEPLOY_HISTORY="/home/prod/.deploy_history.txt" + + if [ -f "$DEPLOY_HISTORY" ]; then + # Обновляем последнюю запись со статусом deploying на success или failed + if [ "${{ job.status }}" = "success" ]; then + status="success" + else + status="failed" + fi + + # Обновляем статус безопасно + safe_update_history_status "$status" + fi + + - name: Send deployment notification + if: always() + uses: appleboy/telegram-action@v1.0.0 + with: + to: ${{ secrets.TELEGRAM_CHAT_ID }} + token: ${{ secrets.TELEGRAM_BOT_TOKEN }} + message: | + ${{ job.status == 'success' && '✅' || '❌' }} Deployment: ${{ job.status }} + + 📦 Repository: prod + 🌿 Branch: main + 📝 Commit: ${{ github.event.pull_request.merge_commit_sha }} + 👤 Author: ${{ github.event.pull_request.user.login }} + 🔀 PR: #${{ github.event.pull_request.number }} + + ${{ job.status == 'success' && '✅ Deployment successful! All services are healthy.' || '❌ Deployment failed! Check logs for details.' }} + + 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + continue-on-error: true + + smoke-tests: + runs-on: ubuntu-latest + name: Smoke Tests + needs: deploy + if: | + always() && + needs.deploy.result == 'success' + + steps: + - name: Run Smoke Tests + env: + TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} + ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }} + uses: appleboy/ssh-action@v1.0.0 + with: + host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} + username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} + key: ${{ secrets.SSH_PRIVATE_KEY }} + port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} + script: | + set -e + echo "🧪 Running smoke tests..." + + SMOKE_TEST_CHAT_ID="${SMOKE_TEST_CHAT_ID:--898316252}" + echo "📝 Using test chat ID: $SMOKE_TEST_CHAT_ID" + + # Проверка health endpoints + echo "🔍 Checking health endpoints..." + + if ! curl -f -s --max-time 10 "http://localhost:8080/health" > /dev/null 2>&1; then + echo "❌ Telegram Bot health endpoint failed" + exit 1 + fi + echo "✅ Telegram Bot health endpoint OK" + + if ! curl -f -s --max-time 10 "http://localhost:8081/health" > /dev/null 2>&1; then + echo "❌ AnonBot health endpoint failed" + exit 1 + fi + echo "✅ AnonBot health endpoint OK" + + # Проверка метрик (опционально) + echo "🔍 Checking metrics endpoints..." + curl -f -s --max-time 10 "http://localhost:8080/metrics" > /dev/null 2>&1 && echo "✅ Telegram Bot metrics OK" || echo "⚠️ Telegram Bot metrics not available" + curl -f -s --max-time 10 "http://localhost:8081/metrics" > /dev/null 2>&1 && echo "✅ AnonBot metrics OK" || echo "⚠️ AnonBot metrics not available" + + # Smoke-тест Telegram Helper Bot (используем токен из GitHub Secrets) + echo "🔍 Testing Telegram Helper Bot..." + if [ -z "$TELEGRAM_BOT_TOKEN" ]; then + echo "❌ TELEGRAM_BOT_TOKEN not found in GitHub Secrets" + exit 1 + fi + + # Отправляем сообщение "ping" в тестовый чат + response=$(curl -s --max-time 30 -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ + -d "chat_id=${SMOKE_TEST_CHAT_ID}" \ + -d "text=ping" || echo "") + + if echo "$response" | grep -q '"ok":true'; then + echo "✅ Telegram Helper Bot smoke test passed (message sent successfully)" + else + echo "❌ Telegram Helper Bot smoke test failed" + echo "Response: $response" + exit 1 + fi + + # Smoke-тест AnonBot (используем токен из GitHub Secrets) + echo "🔍 Testing AnonBot..." + if [ -n "$ANON_BOT_TOKEN" ]; then + response=$(curl -s --max-time 30 -X POST "https://api.telegram.org/bot${ANON_BOT_TOKEN}/sendMessage" \ + -d "chat_id=${SMOKE_TEST_CHAT_ID}" \ + -d "text=ping" || echo "") + + if echo "$response" | grep -q '"ok":true'; then + echo "✅ AnonBot smoke test passed (message sent successfully)" + else + echo "⚠️ AnonBot smoke test failed (non-critical)" + echo "Response: $response" + fi + else + echo "ℹ️ ANON_BOT_TOKEN not set, skipping smoke test" + fi + + echo "✅ All smoke tests passed!" + + - name: Send smoke tests notification + if: always() + uses: appleboy/telegram-action@v1.0.0 + with: + to: ${{ secrets.TELEGRAM_CHAT_ID }} + token: ${{ secrets.TELEGRAM_BOT_TOKEN }} + message: | + ${{ job.status == 'success' && '✅' || '❌' }} Smoke Tests: ${{ job.status }} + + 📦 Repository: prod + 🌿 Branch: main + 📝 Commit: ${{ github.event.pull_request.merge_commit_sha }} + + ${{ job.status == 'success' && '✅ All smoke tests passed! Bots are working correctly.' || '❌ Smoke tests failed! Auto-rollback will be triggered.' }} + + 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + continue-on-error: true + + auto-rollback: + runs-on: ubuntu-latest + name: Auto Rollback + concurrency: + group: production-rollback + cancel-in-progress: false + needs: [deploy, smoke-tests] + if: | + always() && + needs.smoke-tests.result == 'failure' + environment: + name: production + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: main + + - name: Auto Rollback + env: + TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }} + TELEGRAM_TEST_BOT_TOKEN: ${{ secrets.TELEGRAM_TEST_BOT_TOKEN }} + ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }} + uses: appleboy/ssh-action@v1.0.0 + with: + host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} + username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} + key: ${{ secrets.SSH_PRIVATE_KEY }} + port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} + script: | + set -e + echo "🔄 Starting automatic rollback after smoke tests failure..." + + # Функция для безопасного чтения истории деплоев с использованием flock + # С fallback для файловых систем, которые не поддерживают flock (например, NFS) + safe_read_history() { + local history_file="/home/prod/.deploy_history.txt" + local lock_file="${history_file}.lock" + + if command -v flock > /dev/null 2>&1; then + ( + # Блокируем файл на чтение (shared lock) + if flock -s 200 2>/dev/null; then + if [ -f "$history_file" ]; then + cat "$history_file" + else + echo "" + fi + else + echo "⚠️ Failed to acquire lock, using fallback method" + # Fallback: простое чтение без блокировки + if [ -f "$history_file" ]; then + cat "$history_file" + else + echo "" + fi + fi + ) 200>"$lock_file" + else + # Fallback: если flock недоступен + if [ -f "$history_file" ]; then + cat "$history_file" + else + echo "" + fi + fi + } + + # Функция для безопасной записи в историю деплоев с использованием flock + safe_write_history() { + local entry="$1" + local history_file="/home/prod/.deploy_history.txt" + local lock_file="${history_file}.lock" + local history_size="${DEPLOY_HISTORY_SIZE:-10}" + + ( + # Блокируем файл на запись (exclusive lock) + flock -x 200 || { echo "❌ Failed to acquire lock on $history_file"; exit 1; } + + # Записываем новую запись + echo "$entry" >> "$history_file" + + # Обрезаем файл атомарно + tail -n "$history_size" "$history_file" > "${history_file}.tmp" + mv "${history_file}.tmp" "$history_file" + + echo "✅ History updated safely" + ) 200>"$lock_file" + } + + # Функция для безопасного изменения прав на bots директорию + fix_bots_permissions() { + local bots_dir="/home/prod/bots" + + if [ ! -d "$bots_dir" ]; then + echo "⚠️ Bots directory not found, skipping permissions fix" + return 0 + fi + + echo "🔧 Fixing permissions for bots directory..." + sudo chown -R deploy:deploy "$bots_dir" || true + echo "✅ Permissions fixed" + } + + cd /home/prod + DEPLOY_HISTORY="/home/prod/.deploy_history.txt" + DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}" + + # Находим последний успешный деплой из истории (безопасно) + HISTORY_CONTENT=$(safe_read_history) + LAST_SUCCESSFUL_COMMIT=$(echo "$HISTORY_CONTENT" | grep "|success" | tail -1 | cut -d'|' -f2 || echo "") + + # Если нет успешного деплоя в истории, используем сохраненный коммит + if [ -z "$LAST_SUCCESSFUL_COMMIT" ]; then + if [ -f "/tmp/last_deploy_commit.txt" ]; then + LAST_SUCCESSFUL_COMMIT=$(cat /tmp/last_deploy_commit.txt) + echo "📝 Using saved commit from /tmp/last_deploy_commit.txt: $LAST_SUCCESSFUL_COMMIT" + else + echo "❌ No previous successful deploy found in history and no saved commit!" + exit 1 + fi + else + echo "📝 Found last successful deploy in history: $LAST_SUCCESSFUL_COMMIT" + fi + + # Откатываем код + echo "🔄 Rolling back to commit: $LAST_SUCCESSFUL_COMMIT" + + # Исправляем права на файлы в bots директории + fix_bots_permissions + + # Проверяем наличие локальных изменений перед reset + echo "🔍 Checking for local changes before rollback..." + git fetch origin main + + # Проверяем uncommitted changes + if ! git diff --quiet 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then + echo "⚠️ Uncommitted changes detected! They will be lost during rollback." + git status --short || true + fi + + git fetch origin main + git reset --hard "$LAST_SUCCESSFUL_COMMIT" + + # Устанавливаем правильные права после отката + fix_bots_permissions + + echo "✅ Code rolled back to: $LAST_SUCCESSFUL_COMMIT" + + # Проверяем docker-compose файл + validate_docker_compose() { + local compose_file="docker-compose.yml" + + echo "🔍 Validating docker-compose configuration..." + + if [ ! -f "$compose_file" ]; then + echo "❌ $compose_file not found!" + exit 1 + fi + + if ! docker-compose config > /dev/null 2>&1; then + echo "❌ Invalid docker-compose.yml syntax!" + docker-compose config # Показываем ошибки + exit 1 + fi + + echo "✅ docker-compose.yml is valid" + } + + validate_docker_compose + + # Проверяем дисковое пространство перед сборкой + check_disk_space() { + local min_free_gb=5 + local available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") + + echo "💾 Checking disk space..." + echo "Available space: ${available_space}GB" + + if [ "$available_space" -lt "$min_free_gb" ]; then + echo "⚠️ Insufficient disk space! Need at least ${min_free_gb}GB, but only ${available_space}GB available" + echo "🧹 Attempting to clean up unused Docker resources..." + docker system prune -f --volumes || true + + # Проверяем снова после очистки + available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0") + echo "Available space after cleanup: ${available_space}GB" + + if [ "$available_space" -lt "$min_free_gb" ]; then + echo "❌ Still insufficient disk space after cleanup!" + exit 1 + fi + fi + + echo "✅ Sufficient disk space available" + } + + # Проверяем доступность памяти и CPU (опционально) + check_resources() { + echo "💻 Checking system resources..." + + # Проверяем доступную память (в MB) + available_mem=$(free -m 2>/dev/null | awk '/^Mem:/ {print $7}' || echo "0") + min_mem_mb=512 + + if [ "$available_mem" -lt "$min_mem_mb" ] && [ "$available_mem" -gt 0 ]; then + echo "⚠️ Low available memory: ${available_mem}MB (recommended: ${min_mem_mb}MB+)" + else + echo "✅ Available memory: ${available_mem}MB" + fi + + # Проверяем загрузку CPU (опционально) + load_avg=$(uptime 2>/dev/null | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//' || echo "0") + echo "📊 System load average: ${load_avg}" + } + + check_disk_space + check_resources + + # Пересобираем все контейнеры с обновлением базовых образов и кешированием + echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..." + + # Останавливаем все контейнеры с graceful shutdown (30 секунд на остановку) + echo "🛑 Stopping containers gracefully..." + docker-compose down -t 30 || true + + # Пересобираем с токенами из Secrets + TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ + TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ + ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ + docker-compose build --pull + + # Запускаем с токенами из Secrets + echo "🚀 Starting all containers..." + TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \ + TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \ + ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \ + docker-compose up -d + + echo "✅ Containers rebuilt and started" + + # Проверяем доступность сети + check_network_availability() { + echo "🔍 Checking network availability..." + + if ! ping -c 1 -W 2 localhost > /dev/null 2>&1; then + echo "❌ Localhost not reachable! Network issue detected." + return 1 + fi + + echo "✅ Network is available" + return 0 + } + + # Адаптивное ожидание готовности контейнеров + wait_for_containers_ready() { + local max_wait=180 # 3 минуты максимум + local check_interval=5 + local elapsed=0 + + echo "⏳ Waiting for containers to be ready..." + + while [ $elapsed -lt $max_wait ]; do + if docker-compose ps 2>/dev/null | grep -q "Exit\|Restarting"; then + echo "⏳ Some containers not ready yet, waiting ${check_interval}s... (${elapsed}/${max_wait}s)" + sleep $check_interval + elapsed=$((elapsed + check_interval)) + else + local running_count=$(docker-compose ps 2>/dev/null | grep -c "Up" || echo "0") + if [ "$running_count" -gt 0 ]; then + echo "✅ All containers are ready! (waited ${elapsed}s, ${running_count} containers running)" + return 0 + else + echo "⏳ Waiting for containers to start... (${elapsed}/${max_wait}s)" + sleep $check_interval + elapsed=$((elapsed + check_interval)) + fi + fi + done + + echo "⚠️ Containers not fully ready after ${max_wait}s, but continuing with health checks..." + return 0 + } + + # Проверяем сеть перед health checks + if ! check_network_availability; then + echo "⚠️ Network check failed, but continuing with health checks..." + fi + + # Ждем готовности контейнеров адаптивно + wait_for_containers_ready + + # Функция для проверки с экспоненциальным retry + check_health() { + local service=$1 + local url=$2 + local attempt=1 + local delays=(5 15 45) + local max_attempts=${#delays[@]} + + echo "🔍 Checking $service health..." + + while [ $attempt -le $max_attempts ]; do + if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then + echo "✅ $service is healthy (attempt $attempt/$max_attempts)" + return 0 + else + if [ $attempt -lt $max_attempts ]; then + delay=${delays[$((attempt - 1))]} + echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..." + sleep $delay + else + echo "❌ $service health check failed after $max_attempts attempts" + return 1 + fi + fi + attempt=$((attempt + 1)) + done + + return 1 + } + + # Общая функция для проверки всех сервисов + run_health_checks() { + local failed=0 + local services=( + "Prometheus:http://localhost:9090/-/healthy:prometheus" + "Grafana:http://localhost:3000/api/health:grafana" + "Telegram Bot:http://localhost:8080/health:telegram-bot" + "AnonBot:http://localhost:8081/health:anon-bot" + ) + + for service_info in "${services[@]}"; do + IFS=':' read -r service_name url container_name <<< "$service_info" + echo "🔍 Checking $service_name..." + if ! check_health "$service_name" "$url"; then + echo "⚠️ $service_name health check failed" + docker-compose logs --tail=30 "$container_name" || true + failed=1 + fi + done + + return $failed + } + + HEALTH_CHECK_FAILED=0 + if ! run_health_checks; then + HEALTH_CHECK_FAILED=1 + fi + + # Проверяем статус всех контейнеров + echo "📊 Final container status:" + docker-compose ps + + # Проверяем, что все контейнеры запущены + FAILED_CONTAINERS=$(docker-compose ps | grep -c "Exit\|Restarting" || true) + if [ "$FAILED_CONTAINERS" -gt 0 ]; then + echo "❌ Some containers are not running properly" + docker-compose ps + HEALTH_CHECK_FAILED=1 + fi + + if [ $HEALTH_CHECK_FAILED -eq 1 ]; then + echo "⚠️ Some health checks failed, but rollback completed" + else + echo "✅ All health checks passed after rollback!" + fi + + # Обновляем историю безопасно + TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") + COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" "$LAST_SUCCESSFUL_COMMIT" 2>/dev/null || echo "Auto-rollback") + safe_write_history "${TIMESTAMP}|${LAST_SUCCESSFUL_COMMIT}|Auto-rollback after smoke tests failure|github-actions|rolled_back" + + echo "✅ Rollback completed successfully" + + - name: Send rollback notification + if: always() + uses: appleboy/telegram-action@v1.0.0 + with: + to: ${{ secrets.TELEGRAM_CHAT_ID }} + token: ${{ secrets.TELEGRAM_BOT_TOKEN }} + message: | + 🔄 Automatic Rollback: ${{ job.status }} + + 📦 Repository: prod + 🌿 Branch: main + 📝 Rolled back to previous successful commit + 🔀 PR: #${{ github.event.pull_request.number }} + + ⚠️ Rollback was triggered automatically due to smoke tests failure. + + ${{ job.status == 'success' && '✅ Rollback completed successfully! Services restored to previous version.' || '❌ Rollback failed! Manual intervention required.' }} + + 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + continue-on-error: true diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index bd3e408..732bd8a 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -5,11 +5,23 @@ on: branches: [ main, 'develop', 'dev-*', 'feature/**' ] pull_request: branches: [ main, develop ] + workflow_dispatch: + inputs: + action: + description: 'Action to perform' + required: true + type: choice + options: + - rollback + rollback_to_commit: + description: 'Commit hash to rollback to (optional, uses last deploy if empty)' + required: false + type: string jobs: test: runs-on: ubuntu-latest - name: Test + name: Test & Code Quality steps: - name: Checkout code @@ -25,6 +37,28 @@ jobs: run: | python -m pip install --upgrade pip pip install -r tests/infra/requirements-test.txt + pip install flake8 black isort mypy || true + + - name: Code formatting check (Black) + run: | + echo "🔍 Checking code formatting with Black..." + black --check . || (echo "❌ Code formatting issues found. Run 'black .' to fix." && exit 1) + + - name: Import sorting check (isort) + run: | + echo "🔍 Checking import sorting with isort..." + isort --check-only . || (echo "❌ Import sorting issues found. Run 'isort .' to fix." && exit 1) + + - name: Linting (flake8) - Critical errors + run: | + echo "🔍 Running flake8 linter (critical errors only)..." + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + + - name: Linting (flake8) - Warnings + run: | + echo "🔍 Running flake8 linter (warnings)..." + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics || true + continue-on-error: true - name: Run infrastructure tests run: | @@ -34,6 +68,19 @@ jobs: run: | python -m pytest tests/infra/test_prometheus_config.py -v + - name: Check for merge conflicts + if: github.event_name == 'pull_request' + run: | + echo "🔍 Checking for merge conflicts..." + git fetch origin main + if ! git merge --no-commit --no-ff origin/main; then + echo "❌ Merge conflicts detected!" + git merge --abort || true + exit 1 + fi + git merge --abort || true + echo "✅ No merge conflicts detected" + - name: Upload test results if: always() uses: actions/upload-artifact@v4 @@ -51,31 +98,181 @@ jobs: to: ${{ secrets.TELEGRAM_CHAT_ID }} token: ${{ secrets.TELEGRAM_BOT_TOKEN }} message: | - 🧪 CI Tests ${{ job.status }} + ${{ job.status == 'success' && '✅' || '❌' }} CI Tests: ${{ job.status }} - Repository: prod - Branch: ${{ github.ref_name }} - Commit: ${{ github.sha }} - Author: ${{ github.actor }} + 📦 Repository: prod + 🌿 Branch: ${{ github.ref_name }} + 📝 Commit: ${{ github.sha }} + 👤 Author: ${{ github.actor }} - ${{ job.status == 'success' && '✅ All tests passed! Ready for deployment.' || '❌ Tests failed! Deployment blocked.' }} + ${{ job.status == 'success' && '✅ All tests passed! Pull request will be created.' || '❌ Tests failed! Deployment blocked.' }} - View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} continue-on-error: true - deploy: + create-pr: runs-on: ubuntu-latest - name: Deploy + name: Create Pull Request needs: test - if: github.event_name == 'workflow_dispatch' # Только ручной запуск через кнопку + if: | + github.event_name == 'push' && + needs.test.result == 'success' && + github.ref_name != 'main' && + github.ref_name != 'develop' && + (startsWith(github.ref_name, 'dev-') || startsWith(github.ref_name, 'feature/')) + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check if PR already exists + id: check-pr + uses: actions/github-script@v6 + with: + script: | + const branchName = context.ref.replace('refs/heads/', ''); + const { data: prs } = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + head: `${context.repo.owner}:${branchName}`, + base: 'main', + state: 'open' + }); + + if (prs.length > 0) { + core.setOutput('exists', 'true'); + core.setOutput('number', prs[0].number); + core.setOutput('url', prs[0].html_url); + } else { + core.setOutput('exists', 'false'); + } + + - name: Update existing PR + if: steps.check-pr.outputs.exists == 'true' + uses: actions/github-script@v6 + with: + script: | + const prNumber = parseInt('${{ steps.check-pr.outputs.number }}'); + const branchName = context.ref.replace('refs/heads/', ''); + + await github.rest.pulls.update({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + title: `Merge ${branchName} into main`, + body: `## Updated Changes\n\nPR updated with new commits after successful CI tests.\n\n- Latest commit: ${{ github.sha }}\n- Branch: \`${branchName}\`\n- Author: @${{ github.actor }}\n\n## Test Results\n\n✅ All tests passed successfully!\n\nPlease review the changes and merge when ready.` + }); + + console.log(`✅ PR #${prNumber} updated successfully`); + + - name: Create Pull Request + if: steps.check-pr.outputs.exists == 'false' + id: create-pr + uses: peter-evans/create-pull-request@v5 + with: + token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref_name }} + base: main + title: "Merge ${{ github.ref_name }} into main" + body: | + ## Changes + + This PR was automatically created after successful CI tests. + + - Branch: `${{ github.ref_name }}` + - Commit: `${{ github.sha }}` + - Author: @${{ github.actor }} + + ## Test Results + + ✅ All tests passed successfully! + + Please review the changes and merge when ready. + labels: | + automated + ready-for-review + draft: false + + - name: Get created PR number + if: steps.check-pr.outputs.exists == 'false' && steps.create-pr.outcome == 'success' + id: get-pr-number + uses: actions/github-script@v6 + with: + script: | + const branchName = context.ref.replace('refs/heads/', ''); + const { data: prs } = await github.rest.pulls.list({ + owner: context.repo.owner, + repo: context.repo.repo, + head: `${context.repo.owner}:${branchName}`, + base: 'main', + state: 'open' + }); + + if (prs.length > 0) { + core.setOutput('number', prs[0].number); + core.setOutput('url', prs[0].html_url); + } + + - name: Send PR notification - PR exists + if: steps.check-pr.outputs.exists == 'true' + uses: appleboy/telegram-action@v1.0.0 + with: + to: ${{ secrets.TELEGRAM_CHAT_ID }} + token: ${{ secrets.TELEGRAM_BOT_TOKEN }} + message: | + ℹ️ Pull Request Updated + + 📦 Repository: prod + 🌿 Branch: ${{ github.ref_name }} → main + 📝 Commit: ${{ github.sha }} + 👤 Author: ${{ github.actor }} + + ✅ All tests passed! PR #${{ steps.check-pr.outputs.number }} already exists and has been updated. + + 🔗 View PR: ${{ steps.check-pr.outputs.url }} + continue-on-error: true + + - name: Send PR notification - PR created + if: steps.check-pr.outputs.exists == 'false' && steps.create-pr.outcome == 'success' + uses: appleboy/telegram-action@v1.0.0 + with: + to: ${{ secrets.TELEGRAM_CHAT_ID }} + token: ${{ secrets.TELEGRAM_BOT_TOKEN }} + message: | + 📝 Pull Request Created + + 📦 Repository: prod + 🌿 Branch: ${{ github.ref_name }} → main + 📝 Commit: ${{ github.sha }} + 👤 Author: ${{ github.actor }} + + ✅ All tests passed! Pull request #${{ steps.get-pr-number.outputs.number }} has been created and is ready for review. + + 🔗 View PR: ${{ steps.get-pr-number.outputs.url }} + continue-on-error: true + + rollback: + runs-on: ubuntu-latest + name: Manual Rollback + if: | + github.event_name == 'workflow_dispatch' && + github.event.inputs.action == 'rollback' environment: name: production steps: - name: Checkout code uses: actions/checkout@v4 + with: + ref: main - - name: Deploy to server + - name: Manual Rollback uses: appleboy/ssh-action@v1.0.0 with: host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} @@ -84,126 +281,135 @@ jobs: port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} script: | set -e - echo "🚀 Starting deployment..." + echo "🔄 Starting manual rollback..." - # Переходим в директорию проекта cd /home/prod + DEPLOY_HISTORY="/home/prod/.deploy_history.txt" + DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}" - # Сохраняем текущий коммит для отката - CURRENT_COMMIT=$(git rev-parse HEAD) - echo "Current commit: $CURRENT_COMMIT" > /tmp/last_deploy_commit.txt - - # Обновляем код - echo "📥 Pulling latest changes..." - - # Исправляем права на файлы перед обновлением - sudo chown -R deploy:deploy /home/prod || true + # Определяем коммит для отката + if [ -n "${{ github.event.inputs.rollback_to_commit }}" ]; then + ROLLBACK_COMMIT="${{ github.event.inputs.rollback_to_commit }}" + echo "📝 Using specified commit: $ROLLBACK_COMMIT" + else + # Используем последний успешный деплой из истории + ROLLBACK_COMMIT=$(grep "|success" "$DEPLOY_HISTORY" 2>/dev/null | tail -1 | cut -d'|' -f2 || echo "") + + # Если нет в истории, используем сохраненный коммит + if [ -z "$ROLLBACK_COMMIT" ]; then + if [ -f "/tmp/last_deploy_commit.txt" ]; then + ROLLBACK_COMMIT=$(cat /tmp/last_deploy_commit.txt) + echo "📝 Using saved commit from /tmp/last_deploy_commit.txt: $ROLLBACK_COMMIT" + else + echo "❌ No commit specified and no previous deploy found!" + exit 1 + fi + else + echo "📝 Using last successful deploy from history: $ROLLBACK_COMMIT" + fi + fi + # Проверяем что коммит существует git fetch origin main - git reset --hard origin/main + if ! git rev-parse --verify "$ROLLBACK_COMMIT" > /dev/null 2>&1; then + echo "❌ Commit $ROLLBACK_COMMIT not found!" + exit 1 + fi - # Устанавливаем правильные права после обновления + # Откатываем код + echo "🔄 Rolling back to commit: $ROLLBACK_COMMIT" + + # Исправляем права на файлы sudo chown -R deploy:deploy /home/prod || true - # Проверяем, что изменения есть - NEW_COMMIT=$(git rev-parse HEAD) - if [ "$CURRENT_COMMIT" = "$NEW_COMMIT" ]; then - echo "ℹ️ No new changes to deploy" - else - echo "✅ Code updated: $CURRENT_COMMIT → $NEW_COMMIT" - fi + git reset --hard "$ROLLBACK_COMMIT" - # Перезапускаем сервисы - echo "🔄 Restarting services..." - if command -v make &> /dev/null; then - make restart || docker-compose restart - else - cd /home/prod - docker-compose down - docker-compose up -d --build - fi + # Устанавливаем правильные права после отката + sudo chown -R deploy:deploy /home/prod || true - echo "✅ Deployment completed" - - - name: Health check - uses: appleboy/ssh-action@v1.0.0 - with: - host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }} - username: ${{ vars.SERVER_USER || secrets.SERVER_USER }} - key: ${{ secrets.SSH_PRIVATE_KEY }} - port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }} - script: | - echo "🏥 Running health checks..." + echo "✅ Code rolled back to: $ROLLBACK_COMMIT" - # Проверяем статус контейнеров сначала - echo "📊 Checking container status..." - cd /home/prod - docker-compose ps || docker ps --filter "name=bots_" + # Пересобираем все контейнеры с обновлением базовых образов и кешированием + echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..." + docker-compose down || true + docker-compose build --pull + docker-compose up -d - # Ждем запуска сервисов (увеличено время) - echo "⏳ Waiting for services to start (30 seconds)..." - sleep 30 + echo "✅ Containers rebuilt and started" - # Функция для проверки с повторными попытками + # Ждем запуска сервисов + echo "⏳ Waiting for services to start (45 seconds)..." + sleep 45 + + # Health checks с экспоненциальным retry check_health() { local service=$1 local url=$2 - local max_attempts=5 local attempt=1 + local delays=(5 15 45) + local max_attempts=${#delays[@]} echo "🔍 Checking $service health..." while [ $attempt -le $max_attempts ]; do - if curl -f -s --max-time 5 "$url" > /dev/null 2>&1; then + if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then echo "✅ $service is healthy (attempt $attempt/$max_attempts)" return 0 else - echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting 10 seconds..." - sleep 10 - attempt=$((attempt + 1)) + if [ $attempt -lt $max_attempts ]; then + delay=${delays[$((attempt - 1))]} + echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..." + sleep $delay + else + echo "❌ $service health check failed after $max_attempts attempts" + return 1 + fi fi + attempt=$((attempt + 1)) done - echo "❌ $service health check failed after $max_attempts attempts" return 1 } - # Проверяем Prometheus с повторными попытками - if ! check_health "Prometheus" "http://localhost:9090/-/healthy"; then - echo "⚠️ Prometheus health check failed, but continuing..." - echo "📊 Checking Prometheus logs:" - docker-compose logs --tail=20 prometheus || true + HEALTH_CHECK_FAILED=0 + + check_health "Prometheus" "http://localhost:9090/-/healthy" || HEALTH_CHECK_FAILED=1 + check_health "Grafana" "http://localhost:3000/api/health" || HEALTH_CHECK_FAILED=1 + check_health "Telegram Bot" "http://localhost:8080/health" || HEALTH_CHECK_FAILED=1 + check_health "AnonBot" "http://localhost:8081/health" || HEALTH_CHECK_FAILED=1 + + if [ $HEALTH_CHECK_FAILED -eq 1 ]; then + echo "⚠️ Some health checks failed, but rollback completed" + else + echo "✅ All health checks passed after rollback!" fi - # Проверяем Grafana с повторными попытками - if ! check_health "Grafana" "http://localhost:3000/api/health"; then - echo "⚠️ Grafana health check failed, but continuing..." - echo "📊 Checking Grafana logs:" - docker-compose logs --tail=20 grafana || true - fi + # Обновляем историю + TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S") + COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" "$ROLLBACK_COMMIT" 2>/dev/null || echo "Manual rollback") + COMMIT_AUTHOR="${{ github.actor }}" + echo "${TIMESTAMP}|${ROLLBACK_COMMIT}|${COMMIT_MESSAGE}|${COMMIT_AUTHOR}|rolled_back" >> "$DEPLOY_HISTORY" - # Проверяем статус контейнеров - echo "📊 Container status:" - cd /home/prod - docker-compose ps || docker ps --filter "name=bots_" + # Оставляем только последние N записей + tail -n "$DEPLOY_HISTORY_SIZE" "$DEPLOY_HISTORY" > "${DEPLOY_HISTORY}.tmp" && mv "${DEPLOY_HISTORY}.tmp" "$DEPLOY_HISTORY" - echo "✅ Health checks completed" + echo "✅ Rollback completed successfully" - - name: Send deployment notification + - name: Send rollback notification if: always() uses: appleboy/telegram-action@v1.0.0 with: to: ${{ secrets.TELEGRAM_CHAT_ID }} token: ${{ secrets.TELEGRAM_BOT_TOKEN }} message: | - 🚀 Deployment ${{ job.status }} + 🔄 Manual Rollback: ${{ job.status }} - Repository: prod - Branch: ${{ github.ref_name }} - Commit: ${{ github.sha }} - Author: ${{ github.actor }} + 📦 Repository: prod + 🌿 Branch: main + 📝 Commit: ${{ github.event.inputs.rollback_to_commit || 'Previous successful deploy' }} + 👤 Author: ${{ github.actor }} - ${{ job.status == 'success' && '✅ Deployment successful!' || '❌ Deployment failed!' }} + ${{ job.status == 'success' && '✅ Rollback completed successfully! Services restored to specified version.' || '❌ Rollback failed! Check logs for details.' }} - View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + 🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} continue-on-error: true diff --git a/docker-compose.yml b/docker-compose.yml index 73d5127..614ea0b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -148,10 +148,10 @@ services: - LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-30} - METRICS_HOST=${METRICS_HOST:-0.0.0.0} - METRICS_PORT=${METRICS_PORT:-8080} - # Telegram settings - - TELEGRAM_BOT_TOKEN=${BOT_TOKEN} - - TELEGRAM_LISTEN_BOT_TOKEN=${LISTEN_BOT_TOKEN} - - TELEGRAM_TEST_BOT_TOKEN=${TEST_BOT_TOKEN} + # Telegram settings (токены из GitHub Secrets имеют приоритет над .env) + - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-${BOT_TOKEN}} + - TELEGRAM_LISTEN_BOT_TOKEN=${TELEGRAM_LISTEN_BOT_TOKEN:-${LISTEN_BOT_TOKEN}} + - TELEGRAM_TEST_BOT_TOKEN=${TELEGRAM_TEST_BOT_TOKEN:-${TEST_BOT_TOKEN}} - TELEGRAM_PREVIEW_LINK=${PREVIEW_LINK:-false} - TELEGRAM_MAIN_PUBLIC=${MAIN_PUBLIC} - TELEGRAM_GROUP_FOR_POSTS=${GROUP_FOR_POSTS} @@ -205,8 +205,8 @@ services: - PYTHONUNBUFFERED=1 - DOCKER_CONTAINER=true - LOG_LEVEL=${LOG_LEVEL:-INFO} - # AnonBot settings - - ANON_BOT_TOKEN=${BOT_TOKEN} + # AnonBot settings (токен из GitHub Secrets имеет приоритет над .env) + - ANON_BOT_TOKEN=${ANON_BOT_TOKEN:-${BOT_TOKEN}} - ANON_BOT_ADMINS=${ADMINS} - ANON_BOT_DATABASE_PATH=/app/database/anon_qna.db - ANON_BOT_DEBUG=${DEBUG:-false}