chore: enhance deployment workflow with improved health checks and manual trigger
- Updated the deployment job to allow manual triggering via workflow_dispatch. - Implemented a retry mechanism for health checks on Prometheus and Grafana to improve reliability. - Increased wait time for services to start before health checks are performed. - Modified health check messages for better clarity and added logging for failed checks.
This commit is contained in:
60
.github/workflows/pipeline.yml
vendored
60
.github/workflows/pipeline.yml
vendored
@@ -67,7 +67,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
name: Deploy
|
||||
needs: test
|
||||
if: success() && github.ref == 'refs/heads/main'
|
||||
if: github.event_name == 'workflow_dispatch' # Только ручной запуск через кнопку
|
||||
environment:
|
||||
name: production
|
||||
|
||||
@@ -134,22 +134,52 @@ jobs:
|
||||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||||
script: |
|
||||
echo "🏥 Running health checks..."
|
||||
sleep 15 # Даем время сервисам запуститься
|
||||
|
||||
# Проверяем Prometheus
|
||||
if curl -f http://localhost:9090/-/healthy > /dev/null 2>&1; then
|
||||
echo "✅ Prometheus is healthy"
|
||||
else
|
||||
echo "❌ Prometheus health check failed"
|
||||
exit 1
|
||||
# Проверяем статус контейнеров сначала
|
||||
echo "📊 Checking container status..."
|
||||
cd /home/prod
|
||||
docker-compose ps || docker ps --filter "name=bots_"
|
||||
|
||||
# Ждем запуска сервисов (увеличено время)
|
||||
echo "⏳ Waiting for services to start (30 seconds)..."
|
||||
sleep 30
|
||||
|
||||
# Функция для проверки с повторными попытками
|
||||
check_health() {
|
||||
local service=$1
|
||||
local url=$2
|
||||
local max_attempts=5
|
||||
local attempt=1
|
||||
|
||||
echo "🔍 Checking $service health..."
|
||||
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
if curl -f -s --max-time 5 "$url" > /dev/null 2>&1; then
|
||||
echo "✅ $service is healthy (attempt $attempt/$max_attempts)"
|
||||
return 0
|
||||
else
|
||||
echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting 10 seconds..."
|
||||
sleep 10
|
||||
attempt=$((attempt + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo "❌ $service health check failed after $max_attempts attempts"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Проверяем Prometheus с повторными попытками
|
||||
if ! check_health "Prometheus" "http://localhost:9090/-/healthy"; then
|
||||
echo "⚠️ Prometheus health check failed, but continuing..."
|
||||
echo "📊 Checking Prometheus logs:"
|
||||
docker-compose logs --tail=20 prometheus || true
|
||||
fi
|
||||
|
||||
# Проверяем Grafana
|
||||
if curl -f http://localhost:3000/api/health > /dev/null 2>&1; then
|
||||
echo "✅ Grafana is healthy"
|
||||
else
|
||||
echo "❌ Grafana health check failed"
|
||||
exit 1
|
||||
# Проверяем Grafana с повторными попытками
|
||||
if ! check_health "Grafana" "http://localhost:3000/api/health"; then
|
||||
echo "⚠️ Grafana health check failed, but continuing..."
|
||||
echo "📊 Checking Grafana logs:"
|
||||
docker-compose logs --tail=20 grafana || true
|
||||
fi
|
||||
|
||||
# Проверяем статус контейнеров
|
||||
@@ -157,7 +187,7 @@ jobs:
|
||||
cd /home/prod
|
||||
docker-compose ps || docker ps --filter "name=bots_"
|
||||
|
||||
echo "✅ All health checks passed"
|
||||
echo "✅ Health checks completed"
|
||||
|
||||
- name: Send deployment notification
|
||||
if: always()
|
||||
|
||||
Reference in New Issue
Block a user