chore: enhance deployment workflow with improved health checks and manual trigger

- Updated the deployment job to allow manual triggering via workflow_dispatch.
- Implemented a retry mechanism for health checks on Prometheus and Grafana to improve reliability.
- Increased wait time for services to start before health checks are performed.
- Modified health check messages for better clarity and added logging for failed checks.
This commit is contained in:
2026-01-25 16:58:16 +03:00
parent fde1f14708
commit 0cdc40cd21

View File

@@ -67,7 +67,7 @@ jobs:
runs-on: ubuntu-latest
name: Deploy
needs: test
if: success() && github.ref == 'refs/heads/main'
if: github.event_name == 'workflow_dispatch' # Только ручной запуск через кнопку
environment:
name: production
@@ -134,22 +134,52 @@ jobs:
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
script: |
echo "🏥 Running health checks..."
sleep 15 # Даем время сервисам запуститься
# Проверяем Prometheus
if curl -f http://localhost:9090/-/healthy > /dev/null 2>&1; then
echo "✅ Prometheus is healthy"
# Проверяем статус контейнеров сначала
echo "📊 Checking container status..."
cd /home/prod
docker-compose ps || docker ps --filter "name=bots_"
# Ждем запуска сервисов (увеличено время)
echo "⏳ Waiting for services to start (30 seconds)..."
sleep 30
# Функция для проверки с повторными попытками
check_health() {
local service=$1
local url=$2
local max_attempts=5
local attempt=1
echo "🔍 Checking $service health..."
while [ $attempt -le $max_attempts ]; do
if curl -f -s --max-time 5 "$url" > /dev/null 2>&1; then
echo "✅ $service is healthy (attempt $attempt/$max_attempts)"
return 0
else
echo "❌ Prometheus health check failed"
exit 1
echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting 10 seconds..."
sleep 10
attempt=$((attempt + 1))
fi
done
echo "❌ $service health check failed after $max_attempts attempts"
return 1
}
# Проверяем Prometheus с повторными попытками
if ! check_health "Prometheus" "http://localhost:9090/-/healthy"; then
echo "⚠️ Prometheus health check failed, but continuing..."
echo "📊 Checking Prometheus logs:"
docker-compose logs --tail=20 prometheus || true
fi
# Проверяем Grafana
if curl -f http://localhost:3000/api/health > /dev/null 2>&1; then
echo " Grafana is healthy"
else
echo "❌ Grafana health check failed"
exit 1
# Проверяем Grafana с повторными попытками
if ! check_health "Grafana" "http://localhost:3000/api/health"; then
echo "⚠️ Grafana health check failed, but continuing..."
echo "📊 Checking Grafana logs:"
docker-compose logs --tail=20 grafana || true
fi
# Проверяем статус контейнеров
@@ -157,7 +187,7 @@ jobs:
cd /home/prod
docker-compose ps || docker ps --filter "name=bots_"
echo "✅ All health checks passed"
echo "✅ Health checks completed"
- name: Send deployment notification
if: always()