chore: enhance deployment workflow with improved health checks and manual trigger

- Updated the deployment job to allow manual triggering via workflow_dispatch. - Implemented a retry mechanism for health checks on Prometheus and Grafana to improve reliability. - Increased wait time for services to start before health checks are performed. - Modified health check messages for better clarity and added logging for failed checks.
2026-01-25 16:58:16 +03:00
parent fde1f14708
commit 0cdc40cd21
1 changed files with 45 additions and 15 deletions
--- a/.github/workflows/pipeline.yml
+++ b/.github/workflows/pipeline.yml
@@ -67,7 +67,7 @@ jobs:
    runs-on: ubuntu-latest
    name: Deploy
    needs: test
-    if: success() && github.ref == 'refs/heads/main'
+    if: github.event_name == 'workflow_dispatch'  # Только ручной запуск через кнопку
    environment:
      name: production
@@ -134,22 +134,52 @@ jobs:
          port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
          script: |
            echo "🏥 Running health checks..."
            sleep 15  # Даем время сервисам запуститься
-            # Проверяем Prometheus
+            # Проверяем статус контейнеров сначала
-            if curl -f http://localhost:9090/-/healthy > /dev/null 2>&1; then
+            echo "📊 Checking container status..."
-              echo "✅ Prometheus is healthy"
+            cd /home/prod
-            else
+            docker-compose ps || docker ps --filter "name=bots_"
-              echo "❌ Prometheus health check failed"
+            
-              exit 1
+            # Ждем запуска сервисов (увеличено время)
            echo "⏳ Waiting for services to start (30 seconds)..."
            sleep 30
            # Функция для проверки с повторными попытками
            check_health() {
              local service=$1
              local url=$2
              local max_attempts=5
              local attempt=1
              echo "🔍 Checking $service health..."
              while [ $attempt -le $max_attempts ]; do
                if curl -f -s --max-time 5 "$url" > /dev/null 2>&1; then
                  echo "✅ $service is healthy (attempt $attempt/$max_attempts)"
                  return 0
                else
                  echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting 10 seconds..."
                  sleep 10
                  attempt=$((attempt + 1))
                fi
              done
              echo "❌ $service health check failed after $max_attempts attempts"
              return 1
            }
            # Проверяем Prometheus с повторными попытками
            if ! check_health "Prometheus" "http://localhost:9090/-/healthy"; then
              echo "⚠️  Prometheus health check failed, but continuing..."
              echo "📊 Checking Prometheus logs:"
              docker-compose logs --tail=20 prometheus || true
            fi
-            # Проверяем Grafana
+            # Проверяем Grafana с повторными попытками
-            if curl -f http://localhost:3000/api/health > /dev/null 2>&1; then
+            if ! check_health "Grafana" "http://localhost:3000/api/health"; then
-              echo "✅ Grafana is healthy"
+              echo "⚠️  Grafana health check failed, but continuing..."
-            else
+              echo "📊 Checking Grafana logs:"
-              echo "❌ Grafana health check failed"
+              docker-compose logs --tail=20 grafana || true
              exit 1
            fi
            # Проверяем статус контейнеров
@@ -157,7 +187,7 @@ jobs:
            cd /home/prod
            docker-compose ps || docker ps --filter "name=bots_"
-            echo "✅ All health checks passed"
+            echo "✅ Health checks completed"
      - name: Send deployment notification
        if: always()