1051 lines
47 KiB
YAML
1051 lines
47 KiB
YAML
name: Deploy to Production
|
||
|
||
on:
|
||
pull_request:
|
||
types: [closed]
|
||
branches: [ main ]
|
||
|
||
jobs:
|
||
deploy:
|
||
runs-on: ubuntu-latest
|
||
name: Deploy to Production
|
||
concurrency:
|
||
group: production-deploy
|
||
cancel-in-progress: false
|
||
if: github.event.pull_request.merged == true
|
||
environment:
|
||
name: production
|
||
|
||
steps:
|
||
- name: Checkout code
|
||
uses: actions/checkout@v4
|
||
with:
|
||
ref: main
|
||
|
||
- name: Debug secrets availability
|
||
run: |
|
||
echo "🔍 Checking secrets availability..."
|
||
echo "TELEGRAM_BOT_TOKEN: $([ -n '${{ secrets.TELEGRAM_BOT_TOKEN }}' ] && echo '✅ Set' || echo '❌ Not set')"
|
||
echo "TELEGRAM_TEST_BOT_TOKEN: $([ -n '${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}' ] && echo '✅ Set' || echo '⚠️ Not set (optional)')"
|
||
echo "ANON_BOT_TOKEN: $([ -n '${{ secrets.ANON_BOT_TOKEN }}' ] && echo '✅ Set' || echo '⚠️ Not set (optional)')"
|
||
echo "SSH_PRIVATE_KEY: $([ -n '${{ secrets.SSH_PRIVATE_KEY }}' ] && echo '✅ Set' || echo '❌ Not set')"
|
||
echo "SERVER_HOST: $([ -n '${{ vars.SERVER_HOST || secrets.SERVER_HOST }}' ] && echo '✅ Set' || echo '❌ Not set')"
|
||
echo "SERVER_USER: $([ -n '${{ vars.SERVER_USER || secrets.SERVER_USER }}' ] && echo '✅ Set' || echo '❌ Not set')"
|
||
|
||
- name: Validate Telegram Bot Tokens
|
||
env:
|
||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
TELEGRAM_TEST_BOT_TOKEN: ${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}
|
||
ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }}
|
||
uses: appleboy/ssh-action@v1.0.0
|
||
with:
|
||
host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }}
|
||
username: ${{ vars.SERVER_USER || secrets.SERVER_USER }}
|
||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||
script: |
|
||
set -e
|
||
echo "🔍 Validating Telegram Bot tokens from GitHub Secrets..."
|
||
|
||
# Функция для проверки токена с retry
|
||
validate_token() {
|
||
local token_name=$1
|
||
local token=$2
|
||
local max_retries=3
|
||
local retry=1
|
||
|
||
while [ $retry -le $max_retries ]; do
|
||
echo "🔍 Checking $token_name (attempt $retry/$max_retries)..."
|
||
|
||
response=$(curl -s --max-time 10 "https://api.telegram.org/bot${token}/getMe" || echo "")
|
||
|
||
if echo "$response" | grep -q '"ok":true'; then
|
||
bot_username=$(echo "$response" | grep -o '"username":"[^"]*"' | cut -d'"' -f4 || echo "unknown")
|
||
echo "✅ $token_name is valid (bot: @$bot_username)"
|
||
return 0
|
||
else
|
||
if [ $retry -lt $max_retries ]; then
|
||
echo "⏳ $token_name validation failed, retrying in 5 seconds..."
|
||
sleep 5
|
||
else
|
||
echo "❌ $token_name is invalid or unreachable"
|
||
echo "Response: $response"
|
||
return 1
|
||
fi
|
||
fi
|
||
|
||
retry=$((retry + 1))
|
||
done
|
||
|
||
return 1
|
||
}
|
||
|
||
# Проверяем Telegram Helper Bot токен из Secrets
|
||
if [ -z "$TELEGRAM_BOT_TOKEN" ]; then
|
||
echo "❌ TELEGRAM_BOT_TOKEN not found in GitHub Secrets"
|
||
echo "💡 Make sure the secret is added to the 'production' environment or repository secrets"
|
||
exit 1
|
||
fi
|
||
|
||
if ! validate_token "Telegram Helper Bot Token" "$TELEGRAM_BOT_TOKEN"; then
|
||
exit 1
|
||
fi
|
||
|
||
# Проверяем TELEGRAM_TEST_BOT_TOKEN (опционально)
|
||
if [ -n "$TELEGRAM_TEST_BOT_TOKEN" ]; then
|
||
if ! validate_token "Telegram Test Bot Token" "$TELEGRAM_TEST_BOT_TOKEN"; then
|
||
echo "⚠️ Test bot token validation failed, but continuing..."
|
||
fi
|
||
else
|
||
echo "ℹ️ TELEGRAM_TEST_BOT_TOKEN not set, skipping"
|
||
fi
|
||
|
||
# Проверяем AnonBot токен из Secrets
|
||
if [ -z "$ANON_BOT_TOKEN" ]; then
|
||
echo "⚠️ ANON_BOT_TOKEN not found in GitHub Secrets, skipping validation"
|
||
else
|
||
if ! validate_token "AnonBot Token" "$ANON_BOT_TOKEN"; then
|
||
exit 1
|
||
fi
|
||
fi
|
||
|
||
echo "✅ All token validations passed!"
|
||
|
||
- name: Deploy to server
|
||
env:
|
||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
TELEGRAM_TEST_BOT_TOKEN: ${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}
|
||
ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }}
|
||
uses: appleboy/ssh-action@v1.0.0
|
||
with:
|
||
host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }}
|
||
username: ${{ vars.SERVER_USER || secrets.SERVER_USER }}
|
||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||
script: |
|
||
set -e
|
||
echo "🚀 Starting deployment to production..."
|
||
|
||
# Функция для безопасной записи в историю деплоев с использованием flock
|
||
safe_write_history() {
|
||
local entry="$1"
|
||
local history_file="/home/prod/.deploy_history.txt"
|
||
local lock_file="${history_file}.lock"
|
||
local history_size="${DEPLOY_HISTORY_SIZE:-10}"
|
||
|
||
(
|
||
# Блокируем файл на запись (exclusive lock)
|
||
flock -x 200 || { echo "❌ Failed to acquire lock on $history_file"; exit 1; }
|
||
|
||
# Записываем новую запись
|
||
echo "$entry" >> "$history_file"
|
||
|
||
# Обрезаем файл атомарно
|
||
tail -n "$history_size" "$history_file" > "${history_file}.tmp"
|
||
mv "${history_file}.tmp" "$history_file"
|
||
|
||
echo "✅ History updated safely"
|
||
) 200>"$lock_file"
|
||
}
|
||
|
||
# Переходим в директорию проекта под пользователем deploy
|
||
cd /home/prod
|
||
|
||
# Сохраняем текущий коммит для отката
|
||
CURRENT_COMMIT=$(git rev-parse HEAD)
|
||
COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" || echo "Unknown")
|
||
COMMIT_AUTHOR=$(git log -1 --pretty=format:"%an" || echo "Unknown")
|
||
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
|
||
|
||
# Сохраняем для быстрого доступа
|
||
echo "$CURRENT_COMMIT" > /tmp/last_deploy_commit.txt
|
||
echo "📝 Current commit: $CURRENT_COMMIT"
|
||
echo "📝 Commit message: $COMMIT_MESSAGE"
|
||
echo "📝 Author: $COMMIT_AUTHOR"
|
||
|
||
# Сохраняем в файл истории деплоев безопасно
|
||
DEPLOY_HISTORY="/home/prod/.deploy_history.txt"
|
||
DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}"
|
||
|
||
# Добавляем запись о начале деплоя с блокировкой
|
||
safe_write_history "${TIMESTAMP}|${CURRENT_COMMIT}|${COMMIT_MESSAGE}|${COMMIT_AUTHOR}|deploying"
|
||
|
||
# Обновляем код из main
|
||
echo "📥 Pulling latest changes from main..."
|
||
|
||
# Исправляем права на файлы в bots директории перед обновлением
|
||
fix_bots_permissions() {
|
||
local bots_dir="/home/prod/bots"
|
||
|
||
if [ ! -d "$bots_dir" ]; then
|
||
echo "⚠️ Bots directory not found, skipping permissions fix"
|
||
return 0
|
||
fi
|
||
|
||
echo "🔧 Fixing permissions for bots directory..."
|
||
sudo chown -R deploy:deploy "$bots_dir" || true
|
||
echo "✅ Permissions fixed"
|
||
}
|
||
|
||
fix_bots_permissions
|
||
|
||
# Проверяем наличие локальных изменений перед reset
|
||
check_local_changes() {
|
||
echo "🔍 Checking for local changes..."
|
||
|
||
# Сохраняем текущее состояние
|
||
git fetch origin main
|
||
|
||
# Проверяем, есть ли локальные изменения
|
||
if ! git diff --quiet HEAD origin/main 2>/dev/null; then
|
||
echo "⚠️ Local changes detected! They will be overwritten by git reset --hard"
|
||
echo "📋 Diff summary:"
|
||
git diff --stat HEAD origin/main || true
|
||
fi
|
||
|
||
# Проверяем uncommitted changes
|
||
if ! git diff --quiet 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then
|
||
echo "⚠️ Uncommitted changes detected! They will be lost."
|
||
git status --short || true
|
||
fi
|
||
|
||
echo "✅ Proceeding with git reset --hard"
|
||
}
|
||
|
||
check_local_changes
|
||
|
||
git fetch origin main
|
||
git reset --hard origin/main
|
||
|
||
# Исправляем права на файлы в bots директории после обновления
|
||
fix_bots_permissions
|
||
|
||
# Проверяем, что изменения есть
|
||
NEW_COMMIT=$(git rev-parse HEAD)
|
||
if [ "$CURRENT_COMMIT" = "$NEW_COMMIT" ]; then
|
||
echo "ℹ️ No new changes to deploy"
|
||
else
|
||
echo "✅ Code updated: $CURRENT_COMMIT → $NEW_COMMIT"
|
||
fi
|
||
|
||
# Проверяем docker-compose файл
|
||
validate_docker_compose() {
|
||
local compose_file="docker-compose.yml"
|
||
|
||
echo "🔍 Validating docker-compose configuration..."
|
||
|
||
if [ ! -f "$compose_file" ]; then
|
||
echo "❌ $compose_file not found!"
|
||
exit 1
|
||
fi
|
||
|
||
if ! docker-compose config > /dev/null 2>&1; then
|
||
echo "❌ Invalid docker-compose.yml syntax!"
|
||
docker-compose config # Показываем ошибки
|
||
exit 1
|
||
fi
|
||
|
||
echo "✅ docker-compose.yml is valid"
|
||
}
|
||
|
||
validate_docker_compose
|
||
|
||
# Проверяем дисковое пространство перед сборкой
|
||
check_disk_space() {
|
||
local min_free_gb=5
|
||
local available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0")
|
||
|
||
echo "💾 Checking disk space..."
|
||
echo "Available space: ${available_space}GB"
|
||
|
||
if [ "$available_space" -lt "$min_free_gb" ]; then
|
||
echo "⚠️ Insufficient disk space! Need at least ${min_free_gb}GB, but only ${available_space}GB available"
|
||
echo "🧹 Attempting to clean up unused Docker resources..."
|
||
docker system prune -f --volumes || true
|
||
|
||
# Проверяем снова после очистки
|
||
available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0")
|
||
echo "Available space after cleanup: ${available_space}GB"
|
||
|
||
if [ "$available_space" -lt "$min_free_gb" ]; then
|
||
echo "❌ Still insufficient disk space after cleanup!"
|
||
exit 1
|
||
fi
|
||
fi
|
||
|
||
echo "✅ Sufficient disk space available"
|
||
}
|
||
|
||
# Проверяем доступность памяти и CPU (опционально)
|
||
check_resources() {
|
||
echo "💻 Checking system resources..."
|
||
|
||
# Проверяем доступную память (в MB)
|
||
available_mem=$(free -m 2>/dev/null | awk '/^Mem:/ {print $7}' || echo "0")
|
||
min_mem_mb=512
|
||
|
||
if [ "$available_mem" -lt "$min_mem_mb" ] && [ "$available_mem" -gt 0 ]; then
|
||
echo "⚠️ Low available memory: ${available_mem}MB (recommended: ${min_mem_mb}MB+)"
|
||
else
|
||
echo "✅ Available memory: ${available_mem}MB"
|
||
fi
|
||
|
||
# Проверяем загрузку CPU (опционально)
|
||
load_avg=$(uptime 2>/dev/null | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//' || echo "0")
|
||
echo "📊 System load average: ${load_avg}"
|
||
}
|
||
|
||
check_disk_space
|
||
check_resources
|
||
|
||
# Пересобираем все контейнеры с обновлением базовых образов и кешированием
|
||
echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..."
|
||
cd /home/prod
|
||
|
||
# Останавливаем все контейнеры с graceful shutdown (30 секунд на остановку)
|
||
echo "🛑 Stopping containers gracefully..."
|
||
docker-compose down -t 30 || true
|
||
|
||
# Пересобираем все контейнеры с --pull (обновляет базовые образы, использует кеш слоев)
|
||
# Передаем токены из GitHub Secrets через переменные окружения
|
||
TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \
|
||
TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \
|
||
ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \
|
||
docker-compose build --pull
|
||
|
||
# Запускаем все контейнеры с токенами из Secrets
|
||
echo "🚀 Starting all containers..."
|
||
TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \
|
||
TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \
|
||
ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \
|
||
docker-compose up -d
|
||
|
||
echo "✅ Containers rebuilt and started"
|
||
|
||
- name: Health check
|
||
uses: appleboy/ssh-action@v1.0.0
|
||
with:
|
||
host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }}
|
||
username: ${{ vars.SERVER_USER || secrets.SERVER_USER }}
|
||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||
script: |
|
||
set -e
|
||
echo "🏥 Running health checks..."
|
||
|
||
# Проверяем доступность сети
|
||
check_network_availability() {
|
||
echo "🔍 Checking network availability..."
|
||
|
||
if ! ping -c 1 -W 2 localhost > /dev/null 2>&1; then
|
||
echo "❌ Localhost not reachable! Network issue detected."
|
||
return 1
|
||
fi
|
||
|
||
echo "✅ Network is available"
|
||
return 0
|
||
}
|
||
|
||
# Адаптивное ожидание готовности контейнеров
|
||
wait_for_containers_ready() {
|
||
local max_wait=180 # 3 минуты максимум
|
||
local check_interval=5
|
||
local elapsed=0
|
||
|
||
echo "⏳ Waiting for containers to be ready..."
|
||
|
||
while [ $elapsed -lt $max_wait ]; do
|
||
# Проверяем, что все контейнеры запущены (нет Exit или Restarting)
|
||
if docker-compose ps 2>/dev/null | grep -q "Exit\|Restarting"; then
|
||
echo "⏳ Some containers not ready yet, waiting ${check_interval}s... (${elapsed}/${max_wait}s)"
|
||
sleep $check_interval
|
||
elapsed=$((elapsed + check_interval))
|
||
else
|
||
# Все контейнеры запущены
|
||
local running_count=$(docker-compose ps 2>/dev/null | grep -c "Up" || echo "0")
|
||
if [ "$running_count" -gt 0 ]; then
|
||
echo "✅ All containers are ready! (waited ${elapsed}s, ${running_count} containers running)"
|
||
return 0
|
||
else
|
||
echo "⏳ Waiting for containers to start... (${elapsed}/${max_wait}s)"
|
||
sleep $check_interval
|
||
elapsed=$((elapsed + check_interval))
|
||
fi
|
||
fi
|
||
done
|
||
|
||
echo "⚠️ Containers not fully ready after ${max_wait}s, but continuing with health checks..."
|
||
return 0
|
||
}
|
||
|
||
# Проверяем статус контейнеров
|
||
echo "📊 Checking container status..."
|
||
cd /home/prod
|
||
docker-compose ps
|
||
|
||
# Проверяем сеть перед health checks
|
||
if ! check_network_availability; then
|
||
echo "⚠️ Network check failed, but continuing with health checks..."
|
||
fi
|
||
|
||
# Ждем готовности контейнеров адаптивно
|
||
wait_for_containers_ready
|
||
|
||
# Функция для проверки с экспоненциальным retry
|
||
check_health() {
|
||
local service=$1
|
||
local url=$2
|
||
local attempt=1
|
||
local delays=(5 15 45) # Экспоненциальные задержки: 5s, 15s, 45s
|
||
local max_attempts=${#delays[@]}
|
||
|
||
echo "🔍 Checking $service health..."
|
||
|
||
while [ $attempt -le $max_attempts ]; do
|
||
if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then
|
||
echo "✅ $service is healthy (attempt $attempt/$max_attempts)"
|
||
return 0
|
||
else
|
||
if [ $attempt -lt $max_attempts ]; then
|
||
delay=${delays[$((attempt - 1))]}
|
||
echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..."
|
||
sleep $delay
|
||
else
|
||
echo "❌ $service health check failed after $max_attempts attempts"
|
||
return 1
|
||
fi
|
||
fi
|
||
attempt=$((attempt + 1))
|
||
done
|
||
|
||
return 1
|
||
}
|
||
|
||
# Общая функция для проверки всех сервисов
|
||
run_health_checks() {
|
||
local failed=0
|
||
local services=(
|
||
"Prometheus:http://localhost:9090/-/healthy:prometheus"
|
||
"Grafana:http://localhost:3000/api/health:grafana"
|
||
"Telegram Bot:http://localhost:8080/health:telegram-bot"
|
||
"AnonBot:http://localhost:8081/health:anon-bot"
|
||
)
|
||
|
||
for service_info in "${services[@]}"; do
|
||
IFS=':' read -r service_name url container_name <<< "$service_info"
|
||
echo "🔍 Checking $service_name..."
|
||
if ! check_health "$service_name" "$url"; then
|
||
echo "⚠️ $service_name health check failed"
|
||
docker-compose logs --tail=30 "$container_name" || true
|
||
failed=1
|
||
fi
|
||
done
|
||
|
||
return $failed
|
||
}
|
||
|
||
# Проверяем все сервисы
|
||
HEALTH_CHECK_FAILED=0
|
||
if ! run_health_checks; then
|
||
HEALTH_CHECK_FAILED=1
|
||
fi
|
||
|
||
# Проверяем статус всех контейнеров
|
||
echo "📊 Final container status:"
|
||
docker-compose ps
|
||
|
||
# Проверяем, что все контейнеры запущены
|
||
FAILED_CONTAINERS=$(docker-compose ps | grep -c "Exit\|Restarting" || true)
|
||
if [ "$FAILED_CONTAINERS" -gt 0 ]; then
|
||
echo "❌ Some containers are not running properly"
|
||
docker-compose ps
|
||
HEALTH_CHECK_FAILED=1
|
||
fi
|
||
|
||
if [ $HEALTH_CHECK_FAILED -eq 1 ]; then
|
||
echo "❌ Health checks failed!"
|
||
exit 1
|
||
else
|
||
echo "✅ All health checks passed!"
|
||
fi
|
||
|
||
- name: Update deploy history
|
||
if: always()
|
||
uses: appleboy/ssh-action@v1.0.0
|
||
with:
|
||
host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }}
|
||
username: ${{ vars.SERVER_USER || secrets.SERVER_USER }}
|
||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||
script: |
|
||
# Функция для безопасной записи в историю деплоев с использованием flock
|
||
# С fallback для файловых систем, которые не поддерживают flock (например, NFS)
|
||
safe_update_history_status() {
|
||
local new_status="$1"
|
||
local history_file="/home/prod/.deploy_history.txt"
|
||
local lock_file="${history_file}.lock"
|
||
|
||
if command -v flock > /dev/null 2>&1; then
|
||
(
|
||
# Блокируем файл на запись (exclusive lock)
|
||
if flock -x 200 2>/dev/null; then
|
||
if [ -f "$history_file" ]; then
|
||
# Заменяем последнюю строку со статусом deploying на финальный статус
|
||
sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file"
|
||
echo "✅ Deploy history updated with status: $new_status (with flock)"
|
||
else
|
||
echo "⚠️ History file not found, skipping update"
|
||
fi
|
||
else
|
||
echo "⚠️ Failed to acquire lock, using fallback method"
|
||
# Fallback: простая запись без блокировки
|
||
if [ -f "$history_file" ]; then
|
||
sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file"
|
||
echo "✅ Deploy history updated (fallback method)"
|
||
fi
|
||
fi
|
||
) 200>"$lock_file"
|
||
else
|
||
# Fallback: если flock недоступен
|
||
echo "⚠️ flock not available, using simple update"
|
||
if [ -f "$history_file" ]; then
|
||
sed -i '$s/|deploying$/|'"$new_status"'/' "$history_file"
|
||
echo "✅ Deploy history updated (simple method)"
|
||
fi
|
||
fi
|
||
}
|
||
|
||
DEPLOY_HISTORY="/home/prod/.deploy_history.txt"
|
||
|
||
if [ -f "$DEPLOY_HISTORY" ]; then
|
||
# Обновляем последнюю запись со статусом deploying на success или failed
|
||
if [ "${{ job.status }}" = "success" ]; then
|
||
status="success"
|
||
else
|
||
status="failed"
|
||
fi
|
||
|
||
# Обновляем статус безопасно
|
||
safe_update_history_status "$status"
|
||
fi
|
||
|
||
- name: Send deployment notification
|
||
if: always()
|
||
uses: appleboy/telegram-action@v1.0.0
|
||
with:
|
||
to: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||
token: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
message: |
|
||
${{ job.status == 'success' && '✅' || '❌' }} Deployment: ${{ job.status }}
|
||
|
||
📦 Repository: prod
|
||
🌿 Branch: main
|
||
📝 Commit: ${{ github.event.pull_request.merge_commit_sha }}
|
||
👤 Author: ${{ github.event.pull_request.user.login }}
|
||
🔀 PR: #${{ github.event.pull_request.number }}
|
||
|
||
${{ job.status == 'success' && '✅ Deployment successful! All services are healthy.' || '❌ Deployment failed! Check logs for details.' }}
|
||
|
||
🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||
continue-on-error: true
|
||
|
||
smoke-tests:
|
||
runs-on: ubuntu-latest
|
||
name: Smoke Tests
|
||
needs: deploy
|
||
if: |
|
||
always() &&
|
||
needs.deploy.result == 'success'
|
||
|
||
steps:
|
||
- name: Run Smoke Tests
|
||
env:
|
||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }}
|
||
uses: appleboy/ssh-action@v1.0.0
|
||
with:
|
||
host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }}
|
||
username: ${{ vars.SERVER_USER || secrets.SERVER_USER }}
|
||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||
script: |
|
||
set -e
|
||
echo "🧪 Running smoke tests..."
|
||
|
||
SMOKE_TEST_CHAT_ID="${SMOKE_TEST_CHAT_ID:--898316252}"
|
||
echo "📝 Using test chat ID: $SMOKE_TEST_CHAT_ID"
|
||
|
||
# Проверка health endpoints
|
||
echo "🔍 Checking health endpoints..."
|
||
|
||
if ! curl -f -s --max-time 10 "http://localhost:8080/health" > /dev/null 2>&1; then
|
||
echo "❌ Telegram Bot health endpoint failed"
|
||
exit 1
|
||
fi
|
||
echo "✅ Telegram Bot health endpoint OK"
|
||
|
||
if ! curl -f -s --max-time 10 "http://localhost:8081/health" > /dev/null 2>&1; then
|
||
echo "❌ AnonBot health endpoint failed"
|
||
exit 1
|
||
fi
|
||
echo "✅ AnonBot health endpoint OK"
|
||
|
||
# Проверка метрик (опционально)
|
||
echo "🔍 Checking metrics endpoints..."
|
||
curl -f -s --max-time 10 "http://localhost:8080/metrics" > /dev/null 2>&1 && echo "✅ Telegram Bot metrics OK" || echo "⚠️ Telegram Bot metrics not available"
|
||
curl -f -s --max-time 10 "http://localhost:8081/metrics" > /dev/null 2>&1 && echo "✅ AnonBot metrics OK" || echo "⚠️ AnonBot metrics not available"
|
||
|
||
# Smoke-тест Telegram Helper Bot (используем токен из GitHub Secrets)
|
||
echo "🔍 Testing Telegram Helper Bot..."
|
||
if [ -z "$TELEGRAM_BOT_TOKEN" ]; then
|
||
echo "❌ TELEGRAM_BOT_TOKEN not found in GitHub Secrets"
|
||
exit 1
|
||
fi
|
||
|
||
# Отправляем сообщение "ping" в тестовый чат
|
||
response=$(curl -s --max-time 30 -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
||
-d "chat_id=${SMOKE_TEST_CHAT_ID}" \
|
||
-d "text=ping" || echo "")
|
||
|
||
if echo "$response" | grep -q '"ok":true'; then
|
||
echo "✅ Telegram Helper Bot smoke test passed (message sent successfully)"
|
||
else
|
||
echo "❌ Telegram Helper Bot smoke test failed"
|
||
echo "Response: $response"
|
||
exit 1
|
||
fi
|
||
|
||
# Smoke-тест AnonBot (используем токен из GitHub Secrets)
|
||
echo "🔍 Testing AnonBot..."
|
||
if [ -n "$ANON_BOT_TOKEN" ]; then
|
||
response=$(curl -s --max-time 30 -X POST "https://api.telegram.org/bot${ANON_BOT_TOKEN}/sendMessage" \
|
||
-d "chat_id=${SMOKE_TEST_CHAT_ID}" \
|
||
-d "text=ping" || echo "")
|
||
|
||
if echo "$response" | grep -q '"ok":true'; then
|
||
echo "✅ AnonBot smoke test passed (message sent successfully)"
|
||
else
|
||
echo "⚠️ AnonBot smoke test failed (non-critical)"
|
||
echo "Response: $response"
|
||
fi
|
||
else
|
||
echo "ℹ️ ANON_BOT_TOKEN not set, skipping smoke test"
|
||
fi
|
||
|
||
echo "✅ All smoke tests passed!"
|
||
|
||
- name: Send smoke tests notification
|
||
if: always()
|
||
uses: appleboy/telegram-action@v1.0.0
|
||
with:
|
||
to: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||
token: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
message: |
|
||
${{ job.status == 'success' && '✅' || '❌' }} Smoke Tests: ${{ job.status }}
|
||
|
||
📦 Repository: prod
|
||
🌿 Branch: main
|
||
📝 Commit: ${{ github.event.pull_request.merge_commit_sha }}
|
||
|
||
${{ job.status == 'success' && '✅ All smoke tests passed! Bots are working correctly.' || '❌ Smoke tests failed! Auto-rollback will be triggered.' }}
|
||
|
||
🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||
continue-on-error: true
|
||
|
||
auto-rollback:
|
||
runs-on: ubuntu-latest
|
||
name: Auto Rollback
|
||
concurrency:
|
||
group: production-rollback
|
||
cancel-in-progress: false
|
||
needs: [deploy, smoke-tests]
|
||
if: |
|
||
always() &&
|
||
needs.smoke-tests.result == 'failure'
|
||
environment:
|
||
name: production
|
||
|
||
steps:
|
||
- name: Checkout code
|
||
uses: actions/checkout@v4
|
||
with:
|
||
ref: main
|
||
|
||
- name: Auto Rollback
|
||
env:
|
||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
TELEGRAM_TEST_BOT_TOKEN: ${{ secrets.TELEGRAM_TEST_BOT_TOKEN }}
|
||
ANON_BOT_TOKEN: ${{ secrets.ANON_BOT_TOKEN }}
|
||
uses: appleboy/ssh-action@v1.0.0
|
||
with:
|
||
host: ${{ vars.SERVER_HOST || secrets.SERVER_HOST }}
|
||
username: ${{ vars.SERVER_USER || secrets.SERVER_USER }}
|
||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||
port: ${{ vars.SSH_PORT || secrets.SSH_PORT || 22 }}
|
||
script: |
|
||
set -e
|
||
echo "🔄 Starting automatic rollback after smoke tests failure..."
|
||
|
||
# Функция для безопасного чтения истории деплоев с использованием flock
|
||
# С fallback для файловых систем, которые не поддерживают flock (например, NFS)
|
||
safe_read_history() {
|
||
local history_file="/home/prod/.deploy_history.txt"
|
||
local lock_file="${history_file}.lock"
|
||
|
||
if command -v flock > /dev/null 2>&1; then
|
||
(
|
||
# Блокируем файл на чтение (shared lock)
|
||
if flock -s 200 2>/dev/null; then
|
||
if [ -f "$history_file" ]; then
|
||
cat "$history_file"
|
||
else
|
||
echo ""
|
||
fi
|
||
else
|
||
echo "⚠️ Failed to acquire lock, using fallback method"
|
||
# Fallback: простое чтение без блокировки
|
||
if [ -f "$history_file" ]; then
|
||
cat "$history_file"
|
||
else
|
||
echo ""
|
||
fi
|
||
fi
|
||
) 200>"$lock_file"
|
||
else
|
||
# Fallback: если flock недоступен
|
||
if [ -f "$history_file" ]; then
|
||
cat "$history_file"
|
||
else
|
||
echo ""
|
||
fi
|
||
fi
|
||
}
|
||
|
||
# Функция для безопасной записи в историю деплоев с использованием flock
|
||
safe_write_history() {
|
||
local entry="$1"
|
||
local history_file="/home/prod/.deploy_history.txt"
|
||
local lock_file="${history_file}.lock"
|
||
local history_size="${DEPLOY_HISTORY_SIZE:-10}"
|
||
|
||
(
|
||
# Блокируем файл на запись (exclusive lock)
|
||
flock -x 200 || { echo "❌ Failed to acquire lock on $history_file"; exit 1; }
|
||
|
||
# Записываем новую запись
|
||
echo "$entry" >> "$history_file"
|
||
|
||
# Обрезаем файл атомарно
|
||
tail -n "$history_size" "$history_file" > "${history_file}.tmp"
|
||
mv "${history_file}.tmp" "$history_file"
|
||
|
||
echo "✅ History updated safely"
|
||
) 200>"$lock_file"
|
||
}
|
||
|
||
# Функция для безопасного изменения прав на bots директорию
|
||
fix_bots_permissions() {
|
||
local bots_dir="/home/prod/bots"
|
||
|
||
if [ ! -d "$bots_dir" ]; then
|
||
echo "⚠️ Bots directory not found, skipping permissions fix"
|
||
return 0
|
||
fi
|
||
|
||
echo "🔧 Fixing permissions for bots directory..."
|
||
sudo chown -R deploy:deploy "$bots_dir" || true
|
||
echo "✅ Permissions fixed"
|
||
}
|
||
|
||
cd /home/prod
|
||
DEPLOY_HISTORY="/home/prod/.deploy_history.txt"
|
||
DEPLOY_HISTORY_SIZE="${DEPLOY_HISTORY_SIZE:-10}"
|
||
|
||
# Находим последний успешный деплой из истории (безопасно)
|
||
HISTORY_CONTENT=$(safe_read_history)
|
||
LAST_SUCCESSFUL_COMMIT=$(echo "$HISTORY_CONTENT" | grep "|success" | tail -1 | cut -d'|' -f2 || echo "")
|
||
|
||
# Если нет успешного деплоя в истории, используем сохраненный коммит
|
||
if [ -z "$LAST_SUCCESSFUL_COMMIT" ]; then
|
||
if [ -f "/tmp/last_deploy_commit.txt" ]; then
|
||
LAST_SUCCESSFUL_COMMIT=$(cat /tmp/last_deploy_commit.txt)
|
||
echo "📝 Using saved commit from /tmp/last_deploy_commit.txt: $LAST_SUCCESSFUL_COMMIT"
|
||
else
|
||
echo "❌ No previous successful deploy found in history and no saved commit!"
|
||
exit 1
|
||
fi
|
||
else
|
||
echo "📝 Found last successful deploy in history: $LAST_SUCCESSFUL_COMMIT"
|
||
fi
|
||
|
||
# Откатываем код
|
||
echo "🔄 Rolling back to commit: $LAST_SUCCESSFUL_COMMIT"
|
||
|
||
# Исправляем права на файлы в bots директории
|
||
fix_bots_permissions
|
||
|
||
# Проверяем наличие локальных изменений перед reset
|
||
echo "🔍 Checking for local changes before rollback..."
|
||
git fetch origin main
|
||
|
||
# Проверяем uncommitted changes
|
||
if ! git diff --quiet 2>/dev/null || ! git diff --cached --quiet 2>/dev/null; then
|
||
echo "⚠️ Uncommitted changes detected! They will be lost during rollback."
|
||
git status --short || true
|
||
fi
|
||
|
||
git fetch origin main
|
||
git reset --hard "$LAST_SUCCESSFUL_COMMIT"
|
||
|
||
# Устанавливаем правильные права после отката
|
||
fix_bots_permissions
|
||
|
||
echo "✅ Code rolled back to: $LAST_SUCCESSFUL_COMMIT"
|
||
|
||
# Проверяем docker-compose файл
|
||
validate_docker_compose() {
|
||
local compose_file="docker-compose.yml"
|
||
|
||
echo "🔍 Validating docker-compose configuration..."
|
||
|
||
if [ ! -f "$compose_file" ]; then
|
||
echo "❌ $compose_file not found!"
|
||
exit 1
|
||
fi
|
||
|
||
if ! docker-compose config > /dev/null 2>&1; then
|
||
echo "❌ Invalid docker-compose.yml syntax!"
|
||
docker-compose config # Показываем ошибки
|
||
exit 1
|
||
fi
|
||
|
||
echo "✅ docker-compose.yml is valid"
|
||
}
|
||
|
||
validate_docker_compose
|
||
|
||
# Проверяем дисковое пространство перед сборкой
|
||
check_disk_space() {
|
||
local min_free_gb=5
|
||
local available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0")
|
||
|
||
echo "💾 Checking disk space..."
|
||
echo "Available space: ${available_space}GB"
|
||
|
||
if [ "$available_space" -lt "$min_free_gb" ]; then
|
||
echo "⚠️ Insufficient disk space! Need at least ${min_free_gb}GB, but only ${available_space}GB available"
|
||
echo "🧹 Attempting to clean up unused Docker resources..."
|
||
docker system prune -f --volumes || true
|
||
|
||
# Проверяем снова после очистки
|
||
available_space=$(df -BG /home/prod 2>/dev/null | tail -1 | awk '{print $4}' | sed 's/G//' || echo "0")
|
||
echo "Available space after cleanup: ${available_space}GB"
|
||
|
||
if [ "$available_space" -lt "$min_free_gb" ]; then
|
||
echo "❌ Still insufficient disk space after cleanup!"
|
||
exit 1
|
||
fi
|
||
fi
|
||
|
||
echo "✅ Sufficient disk space available"
|
||
}
|
||
|
||
# Проверяем доступность памяти и CPU (опционально)
|
||
check_resources() {
|
||
echo "💻 Checking system resources..."
|
||
|
||
# Проверяем доступную память (в MB)
|
||
available_mem=$(free -m 2>/dev/null | awk '/^Mem:/ {print $7}' || echo "0")
|
||
min_mem_mb=512
|
||
|
||
if [ "$available_mem" -lt "$min_mem_mb" ] && [ "$available_mem" -gt 0 ]; then
|
||
echo "⚠️ Low available memory: ${available_mem}MB (recommended: ${min_mem_mb}MB+)"
|
||
else
|
||
echo "✅ Available memory: ${available_mem}MB"
|
||
fi
|
||
|
||
# Проверяем загрузку CPU (опционально)
|
||
load_avg=$(uptime 2>/dev/null | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//' || echo "0")
|
||
echo "📊 System load average: ${load_avg}"
|
||
}
|
||
|
||
check_disk_space
|
||
check_resources
|
||
|
||
# Пересобираем все контейнеры с обновлением базовых образов и кешированием
|
||
echo "🔨 Rebuilding all containers with --pull (updating base images, using cache)..."
|
||
|
||
# Останавливаем все контейнеры с graceful shutdown (30 секунд на остановку)
|
||
echo "🛑 Stopping containers gracefully..."
|
||
docker-compose down -t 30 || true
|
||
|
||
# Пересобираем с токенами из Secrets
|
||
TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \
|
||
TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \
|
||
ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \
|
||
docker-compose build --pull
|
||
|
||
# Запускаем с токенами из Secrets
|
||
echo "🚀 Starting all containers..."
|
||
TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \
|
||
TELEGRAM_TEST_BOT_TOKEN="$TELEGRAM_TEST_BOT_TOKEN" \
|
||
ANON_BOT_TOKEN="$ANON_BOT_TOKEN" \
|
||
docker-compose up -d
|
||
|
||
echo "✅ Containers rebuilt and started"
|
||
|
||
# Проверяем доступность сети
|
||
check_network_availability() {
|
||
echo "🔍 Checking network availability..."
|
||
|
||
if ! ping -c 1 -W 2 localhost > /dev/null 2>&1; then
|
||
echo "❌ Localhost not reachable! Network issue detected."
|
||
return 1
|
||
fi
|
||
|
||
echo "✅ Network is available"
|
||
return 0
|
||
}
|
||
|
||
# Адаптивное ожидание готовности контейнеров
|
||
wait_for_containers_ready() {
|
||
local max_wait=180 # 3 минуты максимум
|
||
local check_interval=5
|
||
local elapsed=0
|
||
|
||
echo "⏳ Waiting for containers to be ready..."
|
||
|
||
while [ $elapsed -lt $max_wait ]; do
|
||
if docker-compose ps 2>/dev/null | grep -q "Exit\|Restarting"; then
|
||
echo "⏳ Some containers not ready yet, waiting ${check_interval}s... (${elapsed}/${max_wait}s)"
|
||
sleep $check_interval
|
||
elapsed=$((elapsed + check_interval))
|
||
else
|
||
local running_count=$(docker-compose ps 2>/dev/null | grep -c "Up" || echo "0")
|
||
if [ "$running_count" -gt 0 ]; then
|
||
echo "✅ All containers are ready! (waited ${elapsed}s, ${running_count} containers running)"
|
||
return 0
|
||
else
|
||
echo "⏳ Waiting for containers to start... (${elapsed}/${max_wait}s)"
|
||
sleep $check_interval
|
||
elapsed=$((elapsed + check_interval))
|
||
fi
|
||
fi
|
||
done
|
||
|
||
echo "⚠️ Containers not fully ready after ${max_wait}s, but continuing with health checks..."
|
||
return 0
|
||
}
|
||
|
||
# Проверяем сеть перед health checks
|
||
if ! check_network_availability; then
|
||
echo "⚠️ Network check failed, but continuing with health checks..."
|
||
fi
|
||
|
||
# Ждем готовности контейнеров адаптивно
|
||
wait_for_containers_ready
|
||
|
||
# Функция для проверки с экспоненциальным retry
|
||
check_health() {
|
||
local service=$1
|
||
local url=$2
|
||
local attempt=1
|
||
local delays=(5 15 45)
|
||
local max_attempts=${#delays[@]}
|
||
|
||
echo "🔍 Checking $service health..."
|
||
|
||
while [ $attempt -le $max_attempts ]; do
|
||
if curl -f -s --max-time 10 "$url" > /dev/null 2>&1; then
|
||
echo "✅ $service is healthy (attempt $attempt/$max_attempts)"
|
||
return 0
|
||
else
|
||
if [ $attempt -lt $max_attempts ]; then
|
||
delay=${delays[$((attempt - 1))]}
|
||
echo "⏳ $service not ready yet (attempt $attempt/$max_attempts), waiting ${delay} seconds..."
|
||
sleep $delay
|
||
else
|
||
echo "❌ $service health check failed after $max_attempts attempts"
|
||
return 1
|
||
fi
|
||
fi
|
||
attempt=$((attempt + 1))
|
||
done
|
||
|
||
return 1
|
||
}
|
||
|
||
# Общая функция для проверки всех сервисов
|
||
run_health_checks() {
|
||
local failed=0
|
||
local services=(
|
||
"Prometheus:http://localhost:9090/-/healthy:prometheus"
|
||
"Grafana:http://localhost:3000/api/health:grafana"
|
||
"Telegram Bot:http://localhost:8080/health:telegram-bot"
|
||
"AnonBot:http://localhost:8081/health:anon-bot"
|
||
)
|
||
|
||
for service_info in "${services[@]}"; do
|
||
IFS=':' read -r service_name url container_name <<< "$service_info"
|
||
echo "🔍 Checking $service_name..."
|
||
if ! check_health "$service_name" "$url"; then
|
||
echo "⚠️ $service_name health check failed"
|
||
docker-compose logs --tail=30 "$container_name" || true
|
||
failed=1
|
||
fi
|
||
done
|
||
|
||
return $failed
|
||
}
|
||
|
||
HEALTH_CHECK_FAILED=0
|
||
if ! run_health_checks; then
|
||
HEALTH_CHECK_FAILED=1
|
||
fi
|
||
|
||
# Проверяем статус всех контейнеров
|
||
echo "📊 Final container status:"
|
||
docker-compose ps
|
||
|
||
# Проверяем, что все контейнеры запущены
|
||
FAILED_CONTAINERS=$(docker-compose ps | grep -c "Exit\|Restarting" || true)
|
||
if [ "$FAILED_CONTAINERS" -gt 0 ]; then
|
||
echo "❌ Some containers are not running properly"
|
||
docker-compose ps
|
||
HEALTH_CHECK_FAILED=1
|
||
fi
|
||
|
||
if [ $HEALTH_CHECK_FAILED -eq 1 ]; then
|
||
echo "⚠️ Some health checks failed, but rollback completed"
|
||
else
|
||
echo "✅ All health checks passed after rollback!"
|
||
fi
|
||
|
||
# Обновляем историю безопасно
|
||
TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
|
||
COMMIT_MESSAGE=$(git log -1 --pretty=format:"%s" "$LAST_SUCCESSFUL_COMMIT" 2>/dev/null || echo "Auto-rollback")
|
||
safe_write_history "${TIMESTAMP}|${LAST_SUCCESSFUL_COMMIT}|Auto-rollback after smoke tests failure|github-actions|rolled_back"
|
||
|
||
echo "✅ Rollback completed successfully"
|
||
|
||
- name: Send rollback notification
|
||
if: always()
|
||
uses: appleboy/telegram-action@v1.0.0
|
||
with:
|
||
to: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||
token: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||
message: |
|
||
🔄 Automatic Rollback: ${{ job.status }}
|
||
|
||
📦 Repository: prod
|
||
🌿 Branch: main
|
||
📝 Rolled back to previous successful commit
|
||
🔀 PR: #${{ github.event.pull_request.number }}
|
||
|
||
⚠️ Rollback was triggered automatically due to smoke tests failure.
|
||
|
||
${{ job.status == 'success' && '✅ Rollback completed successfully! Services restored to previous version.' || '❌ Rollback failed! Manual intervention required.' }}
|
||
|
||
🔗 View details: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||
continue-on-error: true
|