Update architecture and backup documentation to include Healthchecks integration

Add Healthchecks service details to architecture and backup documentation, including its role as a Dead man's switch for backups. Update backup scripts to utilize systemd timers instead of cron for improved scheduling. Enhance network topology documentation to reflect Healthchecks integration in the VPS Miran setup. This update clarifies backup processes and enhances overall system reliability.
This commit is contained in:
2026-02-28 15:43:39 +03:00
parent 16c254510a
commit 53769e6832
61 changed files with 1697 additions and 39 deletions

View File

@@ -4,6 +4,8 @@
# Запускать на хосте Proxmox под root. Секреты из Vaultwarden (объект RESTIC), файл /root/.bw-master (chmod 600).
# Cron: 10 4 * * * (04:10, после основного restic в 04:00).
set -e
export PATH="/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"
export HOME="${HOME:-/root}"
BACKUP_PATH="/mnt/backup/photos"
# Время запуска (для логов и уведомлений)

View File

@@ -6,6 +6,9 @@
# Перед первым запуском: установить restic, bw (Bitwarden CLI), jq; bw config server https://vault.katykhin.ru; restic init.
# Cron: 0 4 * * * (04:00, после окна 01:0003:30; 05:00 зарезервировано под перезагрузку).
set -e
# При запуске из systemd PATH и HOME могут быть пустыми
export PATH="/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"
export HOME="${HOME:-/root}"
BACKUP_PATH="/mnt/backup"
# Время запуска (для логов и уведомлений)

20
scripts/healthcheck-ping.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
# Ping Healthchecks после успешного окна бэкапов (Dead man's switch).
# Если ping не пришёл — Healthchecks шлёт алерт в Telegram.
# Конфиг: /root/.healthchecks.env (HEALTHCHECKS_URL, HEALTHCHECKS_HOMELAB_UUID)
CONFIG="${HEALTHCHECKS_CONFIG:-/root/.healthchecks.env}"
if [ -f "$CONFIG" ]; then
set -a
# shellcheck source=/dev/null
source "$CONFIG"
set +a
fi
HC_URL="${HEALTHCHECKS_URL:-https://healthchecks.katykhin.ru}"
HC_UUID="${HEALTHCHECKS_HOMELAB_UUID:-}"
[ -z "$HC_UUID" ] && exit 0
curl -fsS --retry 3 --max-time 10 "${HC_URL}/ping/${HC_UUID}" >/dev/null 2>&1 || true
exit 0

View File

@@ -0,0 +1,23 @@
# Healthchecks на VPS Миран
# Копировать: cp .env.example .env
SITE_ROOT=https://healthchecks.katykhin.ru/healthchecks
SECRET_KEY=CHANGE_ME_openssl_rand_hex_32
ALLOWED_HOSTS=healthchecks.katykhin.ru,185.147.80.190,localhost
DB=postgres
DB_HOST=db
DB_NAME=hc
DB_USER=postgres
DB_PASSWORD=CHANGE_ME_secure_password
# Свой бот (не @HealthchecksBot!) — создать через @BotFather, username бота
TELEGRAM_TOKEN=
TELEGRAM_BOT_NAME=YourBotUsername
REGISTRATION_OPEN=False
EMAIL_HOST=
EMAIL_HOST_USER=
EMAIL_HOST_PASSWORD=
DEFAULT_FROM_EMAIL=healthchecks@katykhin.ru

View File

@@ -0,0 +1,31 @@
# Healthchecks на VPS Миран
# Копировать: cp -r scripts/healthchecks-docker /home/prod/healthchecks
# cd /home/prod/healthchecks && cp .env.example .env && редактировать .env
volumes:
db-data:
services:
db:
image: postgres:16
volumes:
- db-data:/var/lib/postgresql/data
environment:
- POSTGRES_DB=${DB_NAME:-hc}
- POSTGRES_PASSWORD=${DB_PASSWORD}
restart: unless-stopped
web:
image: healthchecks/healthchecks:latest
env_file:
- .env
environment:
- DB_HOST=db
- DB_NAME=${DB_NAME:-hc}
- DB_USER=postgres
- DB_PASSWORD=${DB_PASSWORD}
ports:
- "127.0.0.1:8000:8000"
depends_on:
- db
restart: unless-stopped

View File

@@ -0,0 +1,25 @@
# Референс: server block для healthchecks.katykhin.ru (Let's Encrypt, Telegram webhook)
# Вставить в nginx.conf после HTTP redirect server block
server {
listen 443 ssl http2;
server_name healthchecks.katykhin.ru;
ssl_certificate /etc/letsencrypt/live/healthchecks.katykhin.ru/fullchain.pem;
ssl_certificate_key /etc/letsencrypt/live/healthchecks.katykhin.ru/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
location = / { return 302 /healthchecks/; }
location /static/ { proxy_pass http://127.0.0.1:8000/static/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /projects/ { proxy_pass http://127.0.0.1:8000/projects/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /accounts/ { proxy_pass http://127.0.0.1:8000/accounts/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /integrations/ { proxy_pass http://127.0.0.1:8000/integrations/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /ping/ { proxy_pass http://127.0.0.1:8000/ping/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /admin/ { proxy_pass http://127.0.0.1:8000/admin/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /badge/ { proxy_pass http://127.0.0.1:8000/badge/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /checks/ { proxy_pass http://127.0.0.1:8000/checks/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /docs/ { proxy_pass http://127.0.0.1:8000/docs/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location /tv/ { proxy_pass http://127.0.0.1:8000/tv/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
location = /healthchecks/ { return 302 /healthchecks/accounts/login/; }
location = /healthchecks { return 302 /healthchecks/accounts/login/; }
location /healthchecks/ { proxy_pass http://127.0.0.1:8000/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
}

View File

@@ -0,0 +1,6 @@
# Конфиг для healthcheck-ping.sh (Proxmox)
# Копировать: cp healthchecks.env.example /root/.healthchecks.env
# UUID — из веб-интерфейса Healthchecks после создания check "homelab-backups"
HEALTHCHECKS_URL=https://healthchecks.katykhin.ru
HEALTHCHECKS_HOMELAB_UUID=

30
scripts/smartd-notify.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
# Вызывается smartd при обнаружении проблемы (-M exec).
# Аргументы: $1 = device, $2 = type (1=health, 2=usage, 3=fail), $3 = message
# См. man smartd.conf
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
DEVICE="${1:-unknown}"
TYPE="${2:-}"
MSG="${3:-}"
# Дополнительный вывод smartd может быть в stdin
EXTRA=$(cat 2>/dev/null || true)
case "$TYPE" in
1) SUMMARY="Health check failed" ;;
2) SUMMARY="Usage attribute warning" ;;
3) SUMMARY="Usage attribute failure" ;;
*) SUMMARY="SMART problem" ;;
esac
if [ -x "$NOTIFY_SCRIPT" ]; then
BODY="Диск $DEVICE: $SUMMARY"
[ -n "$MSG" ] && BODY="${BODY}
$MSG"
[ -n "$EXTRA" ] && BODY="${BODY}
$EXTRA"
"$NOTIFY_SCRIPT" "⚠️ SMART" "$BODY" || true
fi
exit 0

24
scripts/systemd/README.md Normal file
View File

@@ -0,0 +1,24 @@
# Systemd unit-файлы для бэкапов и мониторинга
Копировать на хост Proxmox в `/etc/systemd/system/`:
```bash
cp *.service *.timer /etc/systemd/system/
systemctl daemon-reload
```
Включить все таймеры:
```bash
for t in backup-*.timer notify-vzdump-success.timer verify-*.timer backup-watchdog-timers.timer backup-healthcheck-ping.timer; do
systemctl enable --now "$t" 2>/dev/null || true
done
```
Проверка:
```bash
systemctl list-timers --all | grep backup
```
Перед миграцией с cron — отключить задания в crontab (`crontab -e`).

View File

@@ -0,0 +1,14 @@
# Бэкап БД Nextcloud (CT 101)
[Unit]
Description=Backup Nextcloud PostgreSQL (CT 101)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct101-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct101-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Nextcloud DB daily at 01:15
[Timer]
OnCalendar=*-*-* 01:15:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап БД Gitea (CT 103)
[Unit]
Description=Backup Gitea PostgreSQL (CT 103)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct103-gitea-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct103-gitea-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Gitea DB daily at 03:00
[Timer]
OnCalendar=*-*-* 03:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап БД Paperless (CT 104)
[Unit]
Description=Backup Paperless PostgreSQL (CT 104)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct104-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct104-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Paperless DB daily at 02:30
[Timer]
OnCalendar=*-*-* 02:30:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап векторов RAG (CT 105)
[Unit]
Description=Backup RAG vectors (CT 105)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct105-vectors.sh && echo $(date -Iseconds) > /var/run/backup-ct105-vectors.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup RAG vectors daily at 03:30
[Timer]
OnCalendar=*-*-* 03:30:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап /etc/pve и конфигов хоста
[Unit]
Description=Backup Proxmox host config (/etc/pve, interfaces, hosts)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-etc-pve.sh && echo $(date -Iseconds) > /var/run/backup-etc-pve.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup etc-pve daily at 02:15
[Timer]
OnCalendar=*-*-* 02:15:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Ping Healthchecks после окна бэкапов (Dead man's switch)
[Unit]
Description=Ping Healthchecks (homelab backups)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/healthcheck-ping.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Ping Healthchecks daily at 04:35 (after backup window)
[Timer]
OnCalendar=*-*-* 04:35:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап библиотеки фото Immich (rsync с VM 200)
[Unit]
Description=Backup Immich photos (rsync from VM 200)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-immich-photos.sh && echo $(date -Iseconds) > /var/run/backup-immich-photos.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Immich photos daily at 01:30
[Timer]
OnCalendar=*-*-* 01:30:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,16 @@
# Выгрузка /mnt/backup/photos в Yandex S3 через restic
[Unit]
Description=Backup photos to Yandex S3 (restic)
After=network-online.target
[Service]
Type=oneshot
Environment=HOME=/root
Environment=PATH=/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/bin/sh -c '/root/scripts/backup-restic-yandex-photos.sh && echo $(date -Iseconds) > /var/run/backup-restic-yandex-photos.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic backup photos to Yandex daily at 04:10
[Timer]
OnCalendar=*-*-* 04:10:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,16 @@
# Выгрузка /mnt/backup (без photos) в Yandex S3 через restic
[Unit]
Description=Backup to Yandex S3 (restic, main)
After=network-online.target
[Service]
Type=oneshot
Environment=HOME=/root
Environment=PATH=/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/bin/sh -c '/root/scripts/backup-restic-yandex.sh && echo $(date -Iseconds) > /var/run/backup-restic-yandex.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic backup to Yandex daily at 04:00
[Timer]
OnCalendar=*-*-* 04:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап данных Vaultwarden (CT 103)
[Unit]
Description=Backup Vaultwarden data (CT 103)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-vaultwarden-data.sh && echo $(date -Iseconds) > /var/run/backup-vaultwarden-data.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Vaultwarden daily at 02:45
[Timer]
OnCalendar=*-*-* 02:45:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап БД Immich (VM 200)
[Unit]
Description=Backup Immich PostgreSQL (VM 200)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-vm200-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-vm200-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Immich DB daily at 03:15
[Timer]
OnCalendar=*-*-* 03:15:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,17 @@
# Копировать на Proxmox: /etc/systemd/system/
# systemctl daemon-reload && systemctl enable --now backup-vps-miran.timer
# Удалить из cron: 0 1 * * *
[Unit]
Description=Backup VPS Miran (БД бота, voice_users, S3)
After=network-online.target
[Service]
Type=oneshot
# Запись .ok только при успехе (для watchdog)
ExecStart=/bin/sh -c '/root/scripts/backup-vps-miran.sh && echo $(date -Iseconds) > /var/run/backup-vps-miran.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup VPS Miran daily at 01:00
[Timer]
OnCalendar=*-*-* 01:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап конфигов MTProto + сайт (VPS Германия)
[Unit]
Description=Backup VPS MTProto (Germany)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-vps-mtproto.sh && echo $(date -Iseconds) > /var/run/backup-vps-mtproto.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup VPS MTProto daily at 01:45
[Timer]
OnCalendar=*-*-* 01:45:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Watchdog: проверка failed timers и устаревших healthcheck-файлов
[Unit]
Description=Backup watchdog (failed timers, stale .ok files)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/watchdog-timers.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup watchdog daily at 12:00
[Timer]
OnCalendar=*-*-* 12:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,15 @@
# Проверка локального vzdump за последние 2 ч, отправка сводки в Telegram
# Задание vzdump в Proxmox UI выполняется в 02:00
[Unit]
Description=Notify vzdump success (check dump dir, send Telegram)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/notify-vzdump-success.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Notify vzdump success daily at 03:00
[Timer]
OnCalendar=*-*-* 03:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Restic check --read-data (раз в 6 мес: 1 янв и 1 июля)
[Unit]
Description=Verify restic repository (full read-data, semiannual)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh full-check
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Restic full check semiannual (Jan 1, Jul 1 at 10:00)
[Timer]
OnCalendar=*-01-01 10:00:00
OnCalendar=*-07-01 10:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Restic check --read-data-subset=10% (ежемесячно, 1-е число)
[Unit]
Description=Verify restic repository (monthly read-data-subset)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh monthly-check
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic check read-data-subset monthly (1st at 10:00)
[Timer]
OnCalendar=*-*-01 10:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Тест restore дампа Nextcloud из restic (ежемесячно)
[Unit]
Description=Verify Nextcloud dump restore from restic (monthly)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh monthly-dump
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Verify Nextcloud dump restore monthly (1st at 11:00)
[Timer]
OnCalendar=*-*-01 11:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Restic check (еженедельно)
[Unit]
Description=Verify restic repository (weekly check)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh weekly
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic check weekly (Sunday 03:00)
[Timer]
OnCalendar=Sun *-*-* 03:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Автотест vzdump CT 107 (ежемесячно)
[Unit]
Description=Verify vzdump restore (CT 107, monthly)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-vzdump-level2.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Verify vzdump restore monthly (1st at 12:00)
[Timer]
OnCalendar=*-*-01 12:00:00
Persistent=yes
[Install]
WantedBy=timers.target

147
scripts/verify-restore-level1.sh Executable file
View File

@@ -0,0 +1,147 @@
#!/bin/bash
# Тест восстановления уровня 1: restic check и проверка дампа Nextcloud из restic.
# Запускать на хосте Proxmox под root.
# Режимы (аргумент): weekly | monthly-check | full-check | monthly-dump
# weekly — restic check (еженедельно)
# monthly-check — restic check --read-data-subset=10% (ежемесячно, 1-е число)
# full-check — restic check --read-data (раз в 612 мес, 1 янв и 1 июля)
# monthly-dump — restore дампа Nextcloud из restic, проверка целостности (ежемесячно)
# Секреты: из Vaultwarden (объект RESTIC), как в backup-restic-yandex.sh.
# Cron/Timer: отдельные таймеры для каждого режима.
set -e
export PATH="/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"
MODE="${1:-weekly}"
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
RESTORE_TARGET="/tmp/restore-test"
RESTIC_PATH_NEXTCLOUD="/mnt/backup/databases/ct101-nextcloud"
MIN_DUMP_SIZE_MB=1
if [ "$(id -u)" -ne 0 ]; then
echo "Запускайте под root."
exit 1
fi
# Загрузка кредов restic из Vaultwarden (как в backup-restic-yandex.sh)
setup_restic_env() {
BW_MASTER_PASSWORD_FILE="${BW_MASTER_PASSWORD_FILE:-/root/.bw-master}"
if [ ! -f "$BW_MASTER_PASSWORD_FILE" ]; then
echo "Нет файла с мастер-паролем Vaultwarden: $BW_MASTER_PASSWORD_FILE"
return 1
fi
if ! command -v bw >/dev/null 2>&1 || ! command -v jq >/dev/null 2>&1; then
echo "Установите bw (Bitwarden CLI) и jq."
return 1
fi
export BW_SESSION
BW_SESSION=$(bw unlock --passwordfile "$BW_MASTER_PASSWORD_FILE" --raw 2>/dev/null) || return 1
RESTIC_ITEM=$(bw get item "RESTIC" 2>/dev/null) || return 1
export RESTIC_REPOSITORY
RESTIC_REPOSITORY=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="RESTIC_REPOSITORY") | .value')
export AWS_ACCESS_KEY_ID
AWS_ACCESS_KEY_ID=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="AWS_ACCESS_KEY_ID") | .value')
export AWS_SECRET_ACCESS_KEY
AWS_SECRET_ACCESS_KEY=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="AWS_SECRET_ACCESS_KEY") | .value')
export AWS_DEFAULT_REGION
AWS_DEFAULT_REGION=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="AWS_DEFAULT_REGION") | .value')
[ -z "$AWS_DEFAULT_REGION" ] && AWS_DEFAULT_REGION="ru-central1"
RESTIC_PASS=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="RESTIC_BACKUP_KEY") | .value')
for var in RESTIC_REPOSITORY AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
[ -z "${!var}" ] && return 1
done
RESTIC_PASSWORD_FILE=$(mktemp -u)
echo -n "$RESTIC_PASS" > "$RESTIC_PASSWORD_FILE"
chmod 600 "$RESTIC_PASSWORD_FILE"
trap 'rm -f "$RESTIC_PASSWORD_FILE"' EXIT INT TERM
export RESTIC_PASSWORD_FILE
return 0
}
notify_ok() {
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "$1" "$2" || true
}
notify_err() {
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "⚠️ $1" "$2" || true
}
case "$MODE" in
weekly)
echo "[verify-restore-level1] Режим: weekly (restic check)"
setup_restic_env || { notify_err "Restic check" "Не удалось загрузить креды restic."; exit 1; }
if restic check 2>&1; then
echo "[verify-restore-level1] restic check OK"
# При еженедельном успехе — не спамим (только при ошибке)
else
notify_err "Restic check" "Ошибка проверки репозитория restic."
exit 1
fi
;;
monthly-check)
echo "[verify-restore-level1] Режим: monthly-check (restic check --read-data-subset=10%)"
setup_restic_env || { notify_err "Restic check (read-data-subset)" "Не удалось загрузить креды restic."; exit 1; }
if restic check --read-data-subset=10% 2>&1; then
echo "[verify-restore-level1] restic check --read-data-subset=10% OK"
notify_ok "Тест restic (read-data-subset)" "OK, 10% данных проверено."
else
notify_err "Restic check (read-data-subset)" "Ошибка проверки 10% данных репозитория."
exit 1
fi
;;
full-check)
echo "[verify-restore-level1] Режим: full-check (restic check --read-data)"
setup_restic_env || { notify_err "Restic check (read-data)" "Не удалось загрузить креды restic."; exit 1; }
if restic check --read-data 2>&1; then
echo "[verify-restore-level1] restic check --read-data OK"
notify_ok "Тест restic (full read-data)" "OK, полная проверка данных завершена."
else
notify_err "Restic check (read-data)" "Ошибка полной проверки данных репозитория."
exit 1
fi
;;
monthly-dump)
echo "[verify-restore-level1] Режим: monthly-dump (restore и проверка дампа Nextcloud)"
setup_restic_env || { notify_err "Тест дампа Nextcloud" "Не удалось загрузить креды restic."; exit 1; }
rm -rf "$RESTORE_TARGET"
mkdir -p "$RESTORE_TARGET"
trap 'rm -f "${RESTIC_PASSWORD_FILE:-}" 2>/dev/null; rm -rf "$RESTORE_TARGET"' EXIT INT TERM
if ! restic restore latest --target "$RESTORE_TARGET" --path "$RESTIC_PATH_NEXTCLOUD" 2>&1; then
notify_err "Тест дампа Nextcloud" "Ошибка restic restore: не удалось восстановить $RESTIC_PATH_NEXTCLOUD"
exit 1
fi
# Путь после restore: RESTORE_TARGET/mnt/backup/databases/ct101-nextcloud/
RESTORED_DIR="$RESTORE_TARGET/mnt/backup/databases/ct101-nextcloud"
if [ ! -d "$RESTORED_DIR" ]; then
notify_err "Тест дампа Nextcloud" "Ошибка: каталог $RESTORED_DIR не найден после restore."
exit 1
fi
LATEST_SQL=$(ls -t "$RESTORED_DIR"/nextcloud-db-*.sql.gz 2>/dev/null | head -1)
if [ -z "$LATEST_SQL" ] || [ ! -f "$LATEST_SQL" ]; then
notify_err "Тест дампа Nextcloud" "Ошибка: не найден .sql.gz в $RESTORED_DIR"
exit 1
fi
SIZE_BYTES=$(stat -c%s "$LATEST_SQL" 2>/dev/null || echo 0)
SIZE_MB=$(( SIZE_BYTES / 1024 / 1024 ))
if [ "$SIZE_MB" -lt "$MIN_DUMP_SIZE_MB" ]; then
notify_err "Тест дампа Nextcloud" "Ошибка: размер дампа ${SIZE_MB} MB < ${MIN_DUMP_SIZE_MB} MB (файл: $LATEST_SQL)"
exit 1
fi
if ! gunzip -t "$LATEST_SQL" 2>/dev/null; then
notify_err "Тест дампа Nextcloud" "Ошибка: gunzip -t не прошёл для $LATEST_SQL"
exit 1
fi
if ! gunzip -c "$LATEST_SQL" 2>/dev/null | grep -q 'CREATE TABLE'; then
notify_err "Тест дампа Nextcloud" "Ошибка: в распакованном дампе нет CREATE TABLE (возможно не SQL дамп)"
exit 1
fi
echo "[verify-restore-level1] Дамп Nextcloud OK: $LATEST_SQL, размер ${SIZE_MB} MB"
notify_ok "Тест дампа Nextcloud" "OK, размер ${SIZE_MB} MB."
;;
*)
echo "Использование: $0 {weekly|monthly-check|full-check|monthly-dump}"
exit 1
;;
esac
exit 0

88
scripts/verify-vzdump-level2.sh Executable file
View File

@@ -0,0 +1,88 @@
#!/bin/bash
# Тест восстановления уровня 2: автотест vzdump CT 107.
# Восстанавливает последний vzdump-lxc-107 в временный CT 999, проверяет запуск, удаляет.
# Запускать на хосте Proxmox под root. Ежемесячно (systemd timer).
# При успехе/ошибке — уведомление в Telegram.
set -e
DUMP_DIR="/mnt/backup/proxmox/dump/dump"
TEST_VMID=999
TEST_IP="192.168.1.199/24"
TEST_GW="192.168.1.1"
STORAGE="local-lvm"
WAIT_START_SEC=60
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
if [ "$(id -u)" -ne 0 ]; then
echo "Запускайте под root."
exit 1
fi
# Очистка при выходе (успех или ошибка)
cleanup() {
if pct status "$TEST_VMID" &>/dev/null; then
echo "[verify-vzdump] Останавливаем и удаляем CT $TEST_VMID..."
pct stop "$TEST_VMID" --skiplock 2>/dev/null || true
sleep 2
pct destroy "$TEST_VMID" --purge 1 --force 2>/dev/null || true
fi
}
trap cleanup EXIT INT TERM
notify_ok() {
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "✅ Тест vzdump CT 107" "$1" || true
}
notify_err() {
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "⚠️ Тест vzdump CT 107" "Ошибка: $1" || true
}
if [ ! -d "$DUMP_DIR" ]; then
notify_err "Каталог $DUMP_DIR не найден."
exit 1
fi
# Последний vzdump-lxc-107
ARCHIVE=$(ls -t "$DUMP_DIR"/vzdump-lxc-107-*.tar.zst 2>/dev/null | head -1)
if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
notify_err "Не найден vzdump-lxc-107-*.tar.zst в $DUMP_DIR"
exit 1
fi
echo "[verify-vzdump] Архив: $ARCHIVE"
# Убедиться, что CT 999 не существует (остаток от прошлого запуска)
if pct status "$TEST_VMID" &>/dev/null; then
pct destroy "$TEST_VMID" --purge 1 --force 2>/dev/null || true
sleep 2
fi
echo "[verify-vzdump] Создаём CT $TEST_VMID из архива..."
if ! pct create "$TEST_VMID" "$ARCHIVE" --restore 1 --storage "$STORAGE" 2>&1; then
notify_err "pct create не удался."
exit 1
fi
# Другой IP, чтобы не конфликтовать с оригиналом 107
echo "[verify-vzdump] Настраиваем сеть (IP $TEST_IP)..."
pct set "$TEST_VMID" --net0 "name=eth0,bridge=vmbr0,gw=$TEST_GW,ip=$TEST_IP,type=veth"
echo "[verify-vzdump] Запускаем CT $TEST_VMID..."
if ! pct start "$TEST_VMID" 2>&1; then
notify_err "pct start не удался."
exit 1
fi
echo "[verify-vzdump] Ожидание $WAIT_START_SEC сек..."
sleep "$WAIT_START_SEC"
STATUS=$(pct exec "$TEST_VMID" -- systemctl is-system-running 2>/dev/null || echo "unknown")
if [ "$STATUS" != "running" ]; then
notify_err "systemctl is-system-running вернул: $STATUS (ожидалось running)"
exit 1
fi
echo "[verify-vzdump] CT 999 запущен, system running. Тест пройден."
notify_ok "OK"
exit 0

58
scripts/watchdog-timers.sh Executable file
View File

@@ -0,0 +1,58 @@
#!/bin/bash
# Watchdog: проверка провалившихся systemd timers.
# Запускать раз в день (например 12:00). При наличии failed → notify в Telegram.
# Timer: backup-watchdog-timers.timer
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
MAX_AGE_HOURS=24
BACKUP_OK_DIR="/var/run"
if [ "$(id -u)" -ne 0 ]; then
echo "Запускайте под root."
exit 1
fi
# 1. Проверка systemctl list-timers --failed
FAILED=$(systemctl list-timers --failed --no-legend --no-pager 2>/dev/null | grep -v '^$' || true)
if [ -n "$FAILED" ]; then
MSG="Провалившиеся таймеры:
$FAILED"
if [ -x "$NOTIFY_SCRIPT" ]; then
"$NOTIFY_SCRIPT" "⚠️ Systemd timers" "$MSG" || true
fi
echo "[watchdog] Найдены провалившиеся таймеры"
echo "$FAILED"
exit 1
fi
# 2. Проверка healthcheck-файлов (если файл старше 24 ч — алерт)
BACKUP_NAMES="vps-miran ct101-pgdump immich-photos vps-mtproto etc-pve ct104-pgdump vaultwarden-data ct103-gitea-pgdump vm200-pgdump ct105-vectors restic-yandex restic-yandex-photos"
STALE=""
for name in $BACKUP_NAMES; do
OK_FILE="$BACKUP_OK_DIR/backup-$name.ok"
if [ -f "$OK_FILE" ]; then
AGE_SEC=$(( $(date +%s) - $(stat -c %Y "$OK_FILE" 2>/dev/null || echo 0) ))
AGE_HOURS=$(( AGE_SEC / 3600 ))
if [ "$AGE_HOURS" -ge "$MAX_AGE_HOURS" ]; then
STALE="${STALE}backup-$name.ok (${AGE_HOURS}h)
"
fi
else
STALE="${STALE}backup-$name.ok (отсутствует)
"
fi
done
if [ -n "$STALE" ]; then
MSG="Файлы .ok старше ${MAX_AGE_HOURS} ч или отсутствуют (последний успешный бэкап):
$STALE"
if [ -x "$NOTIFY_SCRIPT" ]; then
"$NOTIFY_SCRIPT" "⚠️ Backup watchdog" "$MSG" || true
fi
echo "[watchdog] Устаревшие healthcheck-файлы"
echo "$STALE"
exit 1
fi
echo "[watchdog] OK: таймеры и healthcheck-файлы в порядке"
exit 0