Update architecture and backup documentation to include Healthchecks integration
Add Healthchecks service details to architecture and backup documentation, including its role as a Dead man's switch for backups. Update backup scripts to utilize systemd timers instead of cron for improved scheduling. Enhance network topology documentation to reflect Healthchecks integration in the VPS Miran setup. This update clarifies backup processes and enhances overall system reliability.
This commit is contained in:
@@ -4,6 +4,8 @@
|
||||
# Запускать на хосте Proxmox под root. Секреты из Vaultwarden (объект RESTIC), файл /root/.bw-master (chmod 600).
|
||||
# Cron: 10 4 * * * (04:10, после основного restic в 04:00).
|
||||
set -e
|
||||
export PATH="/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"
|
||||
export HOME="${HOME:-/root}"
|
||||
|
||||
BACKUP_PATH="/mnt/backup/photos"
|
||||
# Время запуска (для логов и уведомлений)
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
# Перед первым запуском: установить restic, bw (Bitwarden CLI), jq; bw config server https://vault.katykhin.ru; restic init.
|
||||
# Cron: 0 4 * * * (04:00, после окна 01:00–03:30; 05:00 зарезервировано под перезагрузку).
|
||||
set -e
|
||||
# При запуске из systemd PATH и HOME могут быть пустыми
|
||||
export PATH="/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"
|
||||
export HOME="${HOME:-/root}"
|
||||
|
||||
BACKUP_PATH="/mnt/backup"
|
||||
# Время запуска (для логов и уведомлений)
|
||||
|
||||
20
scripts/healthcheck-ping.sh
Executable file
20
scripts/healthcheck-ping.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
# Ping Healthchecks после успешного окна бэкапов (Dead man's switch).
|
||||
# Если ping не пришёл — Healthchecks шлёт алерт в Telegram.
|
||||
# Конфиг: /root/.healthchecks.env (HEALTHCHECKS_URL, HEALTHCHECKS_HOMELAB_UUID)
|
||||
|
||||
CONFIG="${HEALTHCHECKS_CONFIG:-/root/.healthchecks.env}"
|
||||
if [ -f "$CONFIG" ]; then
|
||||
set -a
|
||||
# shellcheck source=/dev/null
|
||||
source "$CONFIG"
|
||||
set +a
|
||||
fi
|
||||
|
||||
HC_URL="${HEALTHCHECKS_URL:-https://healthchecks.katykhin.ru}"
|
||||
HC_UUID="${HEALTHCHECKS_HOMELAB_UUID:-}"
|
||||
|
||||
[ -z "$HC_UUID" ] && exit 0
|
||||
|
||||
curl -fsS --retry 3 --max-time 10 "${HC_URL}/ping/${HC_UUID}" >/dev/null 2>&1 || true
|
||||
exit 0
|
||||
23
scripts/healthchecks-docker/.env.example
Normal file
23
scripts/healthchecks-docker/.env.example
Normal file
@@ -0,0 +1,23 @@
|
||||
# Healthchecks на VPS Миран
|
||||
# Копировать: cp .env.example .env
|
||||
|
||||
SITE_ROOT=https://healthchecks.katykhin.ru/healthchecks
|
||||
SECRET_KEY=CHANGE_ME_openssl_rand_hex_32
|
||||
ALLOWED_HOSTS=healthchecks.katykhin.ru,185.147.80.190,localhost
|
||||
|
||||
DB=postgres
|
||||
DB_HOST=db
|
||||
DB_NAME=hc
|
||||
DB_USER=postgres
|
||||
DB_PASSWORD=CHANGE_ME_secure_password
|
||||
|
||||
# Свой бот (не @HealthchecksBot!) — создать через @BotFather, username бота
|
||||
TELEGRAM_TOKEN=
|
||||
TELEGRAM_BOT_NAME=YourBotUsername
|
||||
|
||||
REGISTRATION_OPEN=False
|
||||
|
||||
EMAIL_HOST=
|
||||
EMAIL_HOST_USER=
|
||||
EMAIL_HOST_PASSWORD=
|
||||
DEFAULT_FROM_EMAIL=healthchecks@katykhin.ru
|
||||
31
scripts/healthchecks-docker/docker-compose.yml
Normal file
31
scripts/healthchecks-docker/docker-compose.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
# Healthchecks на VPS Миран
|
||||
# Копировать: cp -r scripts/healthchecks-docker /home/prod/healthchecks
|
||||
# cd /home/prod/healthchecks && cp .env.example .env && редактировать .env
|
||||
|
||||
volumes:
|
||||
db-data:
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:16
|
||||
volumes:
|
||||
- db-data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- POSTGRES_DB=${DB_NAME:-hc}
|
||||
- POSTGRES_PASSWORD=${DB_PASSWORD}
|
||||
restart: unless-stopped
|
||||
|
||||
web:
|
||||
image: healthchecks/healthchecks:latest
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- DB_HOST=db
|
||||
- DB_NAME=${DB_NAME:-hc}
|
||||
- DB_USER=postgres
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
ports:
|
||||
- "127.0.0.1:8000:8000"
|
||||
depends_on:
|
||||
- db
|
||||
restart: unless-stopped
|
||||
25
scripts/healthchecks-nginx-server.conf
Normal file
25
scripts/healthchecks-nginx-server.conf
Normal file
@@ -0,0 +1,25 @@
|
||||
# Референс: server block для healthchecks.katykhin.ru (Let's Encrypt, Telegram webhook)
|
||||
# Вставить в nginx.conf после HTTP redirect server block
|
||||
server {
|
||||
listen 443 ssl http2;
|
||||
server_name healthchecks.katykhin.ru;
|
||||
|
||||
ssl_certificate /etc/letsencrypt/live/healthchecks.katykhin.ru/fullchain.pem;
|
||||
ssl_certificate_key /etc/letsencrypt/live/healthchecks.katykhin.ru/privkey.pem;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
|
||||
location = / { return 302 /healthchecks/; }
|
||||
location /static/ { proxy_pass http://127.0.0.1:8000/static/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /projects/ { proxy_pass http://127.0.0.1:8000/projects/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /accounts/ { proxy_pass http://127.0.0.1:8000/accounts/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /integrations/ { proxy_pass http://127.0.0.1:8000/integrations/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /ping/ { proxy_pass http://127.0.0.1:8000/ping/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /admin/ { proxy_pass http://127.0.0.1:8000/admin/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /badge/ { proxy_pass http://127.0.0.1:8000/badge/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /checks/ { proxy_pass http://127.0.0.1:8000/checks/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /docs/ { proxy_pass http://127.0.0.1:8000/docs/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location /tv/ { proxy_pass http://127.0.0.1:8000/tv/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
location = /healthchecks/ { return 302 /healthchecks/accounts/login/; }
|
||||
location = /healthchecks { return 302 /healthchecks/accounts/login/; }
|
||||
location /healthchecks/ { proxy_pass http://127.0.0.1:8000/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; }
|
||||
}
|
||||
6
scripts/healthchecks.env.example
Normal file
6
scripts/healthchecks.env.example
Normal file
@@ -0,0 +1,6 @@
|
||||
# Конфиг для healthcheck-ping.sh (Proxmox)
|
||||
# Копировать: cp healthchecks.env.example /root/.healthchecks.env
|
||||
# UUID — из веб-интерфейса Healthchecks после создания check "homelab-backups"
|
||||
|
||||
HEALTHCHECKS_URL=https://healthchecks.katykhin.ru
|
||||
HEALTHCHECKS_HOMELAB_UUID=
|
||||
30
scripts/smartd-notify.sh
Executable file
30
scripts/smartd-notify.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
# Вызывается smartd при обнаружении проблемы (-M exec).
|
||||
# Аргументы: $1 = device, $2 = type (1=health, 2=usage, 3=fail), $3 = message
|
||||
# См. man smartd.conf
|
||||
|
||||
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
|
||||
DEVICE="${1:-unknown}"
|
||||
TYPE="${2:-}"
|
||||
MSG="${3:-}"
|
||||
# Дополнительный вывод smartd может быть в stdin
|
||||
EXTRA=$(cat 2>/dev/null || true)
|
||||
|
||||
case "$TYPE" in
|
||||
1) SUMMARY="Health check failed" ;;
|
||||
2) SUMMARY="Usage attribute warning" ;;
|
||||
3) SUMMARY="Usage attribute failure" ;;
|
||||
*) SUMMARY="SMART problem" ;;
|
||||
esac
|
||||
|
||||
if [ -x "$NOTIFY_SCRIPT" ]; then
|
||||
BODY="Диск $DEVICE: $SUMMARY"
|
||||
[ -n "$MSG" ] && BODY="${BODY}
|
||||
$MSG"
|
||||
[ -n "$EXTRA" ] && BODY="${BODY}
|
||||
|
||||
$EXTRA"
|
||||
"$NOTIFY_SCRIPT" "⚠️ SMART" "$BODY" || true
|
||||
fi
|
||||
|
||||
exit 0
|
||||
24
scripts/systemd/README.md
Normal file
24
scripts/systemd/README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Systemd unit-файлы для бэкапов и мониторинга
|
||||
|
||||
Копировать на хост Proxmox в `/etc/systemd/system/`:
|
||||
|
||||
```bash
|
||||
cp *.service *.timer /etc/systemd/system/
|
||||
systemctl daemon-reload
|
||||
```
|
||||
|
||||
Включить все таймеры:
|
||||
|
||||
```bash
|
||||
for t in backup-*.timer notify-vzdump-success.timer verify-*.timer backup-watchdog-timers.timer backup-healthcheck-ping.timer; do
|
||||
systemctl enable --now "$t" 2>/dev/null || true
|
||||
done
|
||||
```
|
||||
|
||||
Проверка:
|
||||
|
||||
```bash
|
||||
systemctl list-timers --all | grep backup
|
||||
```
|
||||
|
||||
Перед миграцией с cron — отключить задания в crontab (`crontab -e`).
|
||||
14
scripts/systemd/backup-ct101-pgdump.service
Normal file
14
scripts/systemd/backup-ct101-pgdump.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап БД Nextcloud (CT 101)
|
||||
|
||||
[Unit]
|
||||
Description=Backup Nextcloud PostgreSQL (CT 101)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-ct101-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct101-pgdump.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-ct101-pgdump.timer
Normal file
9
scripts/systemd/backup-ct101-pgdump.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup Nextcloud DB daily at 01:15
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 01:15:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-ct103-gitea-pgdump.service
Normal file
14
scripts/systemd/backup-ct103-gitea-pgdump.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап БД Gitea (CT 103)
|
||||
|
||||
[Unit]
|
||||
Description=Backup Gitea PostgreSQL (CT 103)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-ct103-gitea-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct103-gitea-pgdump.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-ct103-gitea-pgdump.timer
Normal file
9
scripts/systemd/backup-ct103-gitea-pgdump.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup Gitea DB daily at 03:00
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-ct104-pgdump.service
Normal file
14
scripts/systemd/backup-ct104-pgdump.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап БД Paperless (CT 104)
|
||||
|
||||
[Unit]
|
||||
Description=Backup Paperless PostgreSQL (CT 104)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-ct104-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct104-pgdump.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-ct104-pgdump.timer
Normal file
9
scripts/systemd/backup-ct104-pgdump.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup Paperless DB daily at 02:30
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 02:30:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-ct105-vectors.service
Normal file
14
scripts/systemd/backup-ct105-vectors.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап векторов RAG (CT 105)
|
||||
|
||||
[Unit]
|
||||
Description=Backup RAG vectors (CT 105)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-ct105-vectors.sh && echo $(date -Iseconds) > /var/run/backup-ct105-vectors.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-ct105-vectors.timer
Normal file
9
scripts/systemd/backup-ct105-vectors.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup RAG vectors daily at 03:30
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:30:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-etc-pve.service
Normal file
14
scripts/systemd/backup-etc-pve.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап /etc/pve и конфигов хоста
|
||||
|
||||
[Unit]
|
||||
Description=Backup Proxmox host config (/etc/pve, interfaces, hosts)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-etc-pve.sh && echo $(date -Iseconds) > /var/run/backup-etc-pve.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-etc-pve.timer
Normal file
9
scripts/systemd/backup-etc-pve.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup etc-pve daily at 02:15
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 02:15:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-healthcheck-ping.service
Normal file
14
scripts/systemd/backup-healthcheck-ping.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Ping Healthchecks после окна бэкапов (Dead man's switch)
|
||||
|
||||
[Unit]
|
||||
Description=Ping Healthchecks (homelab backups)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/healthcheck-ping.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-healthcheck-ping.timer
Normal file
9
scripts/systemd/backup-healthcheck-ping.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Ping Healthchecks daily at 04:35 (after backup window)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 04:35:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-immich-photos.service
Normal file
14
scripts/systemd/backup-immich-photos.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап библиотеки фото Immich (rsync с VM 200)
|
||||
|
||||
[Unit]
|
||||
Description=Backup Immich photos (rsync from VM 200)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-immich-photos.sh && echo $(date -Iseconds) > /var/run/backup-immich-photos.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-immich-photos.timer
Normal file
9
scripts/systemd/backup-immich-photos.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup Immich photos daily at 01:30
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 01:30:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
16
scripts/systemd/backup-restic-yandex-photos.service
Normal file
16
scripts/systemd/backup-restic-yandex-photos.service
Normal file
@@ -0,0 +1,16 @@
|
||||
# Выгрузка /mnt/backup/photos в Yandex S3 через restic
|
||||
|
||||
[Unit]
|
||||
Description=Backup photos to Yandex S3 (restic)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment=HOME=/root
|
||||
Environment=PATH=/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-restic-yandex-photos.sh && echo $(date -Iseconds) > /var/run/backup-restic-yandex-photos.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-restic-yandex-photos.timer
Normal file
9
scripts/systemd/backup-restic-yandex-photos.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Restic backup photos to Yandex daily at 04:10
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 04:10:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
16
scripts/systemd/backup-restic-yandex.service
Normal file
16
scripts/systemd/backup-restic-yandex.service
Normal file
@@ -0,0 +1,16 @@
|
||||
# Выгрузка /mnt/backup (без photos) в Yandex S3 через restic
|
||||
|
||||
[Unit]
|
||||
Description=Backup to Yandex S3 (restic, main)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
Environment=HOME=/root
|
||||
Environment=PATH=/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-restic-yandex.sh && echo $(date -Iseconds) > /var/run/backup-restic-yandex.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-restic-yandex.timer
Normal file
9
scripts/systemd/backup-restic-yandex.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Restic backup to Yandex daily at 04:00
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 04:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-vaultwarden-data.service
Normal file
14
scripts/systemd/backup-vaultwarden-data.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап данных Vaultwarden (CT 103)
|
||||
|
||||
[Unit]
|
||||
Description=Backup Vaultwarden data (CT 103)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-vaultwarden-data.sh && echo $(date -Iseconds) > /var/run/backup-vaultwarden-data.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-vaultwarden-data.timer
Normal file
9
scripts/systemd/backup-vaultwarden-data.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup Vaultwarden daily at 02:45
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 02:45:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-vm200-pgdump.service
Normal file
14
scripts/systemd/backup-vm200-pgdump.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап БД Immich (VM 200)
|
||||
|
||||
[Unit]
|
||||
Description=Backup Immich PostgreSQL (VM 200)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-vm200-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-vm200-pgdump.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-vm200-pgdump.timer
Normal file
9
scripts/systemd/backup-vm200-pgdump.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup Immich DB daily at 03:15
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:15:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
17
scripts/systemd/backup-vps-miran.service
Normal file
17
scripts/systemd/backup-vps-miran.service
Normal file
@@ -0,0 +1,17 @@
|
||||
# Копировать на Proxmox: /etc/systemd/system/
|
||||
# systemctl daemon-reload && systemctl enable --now backup-vps-miran.timer
|
||||
# Удалить из cron: 0 1 * * *
|
||||
|
||||
[Unit]
|
||||
Description=Backup VPS Miran (БД бота, voice_users, S3)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
# Запись .ok только при успехе (для watchdog)
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-vps-miran.sh && echo $(date -Iseconds) > /var/run/backup-vps-miran.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-vps-miran.timer
Normal file
9
scripts/systemd/backup-vps-miran.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup VPS Miran daily at 01:00
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 01:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-vps-mtproto.service
Normal file
14
scripts/systemd/backup-vps-mtproto.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Бэкап конфигов MTProto + сайт (VPS Германия)
|
||||
|
||||
[Unit]
|
||||
Description=Backup VPS MTProto (Germany)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c '/root/scripts/backup-vps-mtproto.sh && echo $(date -Iseconds) > /var/run/backup-vps-mtproto.ok'
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-vps-mtproto.timer
Normal file
9
scripts/systemd/backup-vps-mtproto.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup VPS MTProto daily at 01:45
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 01:45:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/backup-watchdog-timers.service
Normal file
14
scripts/systemd/backup-watchdog-timers.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Watchdog: проверка failed timers и устаревших healthcheck-файлов
|
||||
|
||||
[Unit]
|
||||
Description=Backup watchdog (failed timers, stale .ok files)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/watchdog-timers.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/backup-watchdog-timers.timer
Normal file
9
scripts/systemd/backup-watchdog-timers.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Backup watchdog daily at 12:00
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 12:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
15
scripts/systemd/notify-vzdump-success.service
Normal file
15
scripts/systemd/notify-vzdump-success.service
Normal file
@@ -0,0 +1,15 @@
|
||||
# Проверка локального vzdump за последние 2 ч, отправка сводки в Telegram
|
||||
# Задание vzdump в Proxmox UI выполняется в 02:00
|
||||
|
||||
[Unit]
|
||||
Description=Notify vzdump success (check dump dir, send Telegram)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/notify-vzdump-success.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/notify-vzdump-success.timer
Normal file
9
scripts/systemd/notify-vzdump-success.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Notify vzdump success daily at 03:00
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 03:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/verify-restore-level1-full-check.service
Normal file
14
scripts/systemd/verify-restore-level1-full-check.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Restic check --read-data (раз в 6 мес: 1 янв и 1 июля)
|
||||
|
||||
[Unit]
|
||||
Description=Verify restic repository (full read-data, semiannual)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/verify-restore-level1.sh full-check
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
10
scripts/systemd/verify-restore-level1-full-check.timer
Normal file
10
scripts/systemd/verify-restore-level1-full-check.timer
Normal file
@@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=Restic full check semiannual (Jan 1, Jul 1 at 10:00)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-01-01 10:00:00
|
||||
OnCalendar=*-07-01 10:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/verify-restore-level1-monthly-check.service
Normal file
14
scripts/systemd/verify-restore-level1-monthly-check.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Restic check --read-data-subset=10% (ежемесячно, 1-е число)
|
||||
|
||||
[Unit]
|
||||
Description=Verify restic repository (monthly read-data-subset)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/verify-restore-level1.sh monthly-check
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Restic check read-data-subset monthly (1st at 10:00)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-01 10:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/verify-restore-level1-monthly-dump.service
Normal file
14
scripts/systemd/verify-restore-level1-monthly-dump.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Тест restore дампа Nextcloud из restic (ежемесячно)
|
||||
|
||||
[Unit]
|
||||
Description=Verify Nextcloud dump restore from restic (monthly)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/verify-restore-level1.sh monthly-dump
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/verify-restore-level1-monthly-dump.timer
Normal file
9
scripts/systemd/verify-restore-level1-monthly-dump.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Verify Nextcloud dump restore monthly (1st at 11:00)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-01 11:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/verify-restore-level1-weekly.service
Normal file
14
scripts/systemd/verify-restore-level1-weekly.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Restic check (еженедельно)
|
||||
|
||||
[Unit]
|
||||
Description=Verify restic repository (weekly check)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/verify-restore-level1.sh weekly
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/verify-restore-level1-weekly.timer
Normal file
9
scripts/systemd/verify-restore-level1-weekly.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Restic check weekly (Sunday 03:00)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=Sun *-*-* 03:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
14
scripts/systemd/verify-vzdump-level2.service
Normal file
14
scripts/systemd/verify-vzdump-level2.service
Normal file
@@ -0,0 +1,14 @@
|
||||
# Автотест vzdump CT 107 (ежемесячно)
|
||||
|
||||
[Unit]
|
||||
Description=Verify vzdump restore (CT 107, monthly)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/root/scripts/verify-vzdump-level2.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
9
scripts/systemd/verify-vzdump-level2.timer
Normal file
9
scripts/systemd/verify-vzdump-level2.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Verify vzdump restore monthly (1st at 12:00)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-01 12:00:00
|
||||
Persistent=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
147
scripts/verify-restore-level1.sh
Executable file
147
scripts/verify-restore-level1.sh
Executable file
@@ -0,0 +1,147 @@
|
||||
#!/bin/bash
|
||||
# Тест восстановления уровня 1: restic check и проверка дампа Nextcloud из restic.
|
||||
# Запускать на хосте Proxmox под root.
|
||||
# Режимы (аргумент): weekly | monthly-check | full-check | monthly-dump
|
||||
# weekly — restic check (еженедельно)
|
||||
# monthly-check — restic check --read-data-subset=10% (ежемесячно, 1-е число)
|
||||
# full-check — restic check --read-data (раз в 6–12 мес, 1 янв и 1 июля)
|
||||
# monthly-dump — restore дампа Nextcloud из restic, проверка целостности (ежемесячно)
|
||||
# Секреты: из Vaultwarden (объект RESTIC), как в backup-restic-yandex.sh.
|
||||
# Cron/Timer: отдельные таймеры для каждого режима.
|
||||
set -e
|
||||
|
||||
export PATH="/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"
|
||||
|
||||
MODE="${1:-weekly}"
|
||||
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
|
||||
RESTORE_TARGET="/tmp/restore-test"
|
||||
RESTIC_PATH_NEXTCLOUD="/mnt/backup/databases/ct101-nextcloud"
|
||||
MIN_DUMP_SIZE_MB=1
|
||||
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "Запускайте под root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Загрузка кредов restic из Vaultwarden (как в backup-restic-yandex.sh)
|
||||
setup_restic_env() {
|
||||
BW_MASTER_PASSWORD_FILE="${BW_MASTER_PASSWORD_FILE:-/root/.bw-master}"
|
||||
if [ ! -f "$BW_MASTER_PASSWORD_FILE" ]; then
|
||||
echo "Нет файла с мастер-паролем Vaultwarden: $BW_MASTER_PASSWORD_FILE"
|
||||
return 1
|
||||
fi
|
||||
if ! command -v bw >/dev/null 2>&1 || ! command -v jq >/dev/null 2>&1; then
|
||||
echo "Установите bw (Bitwarden CLI) и jq."
|
||||
return 1
|
||||
fi
|
||||
export BW_SESSION
|
||||
BW_SESSION=$(bw unlock --passwordfile "$BW_MASTER_PASSWORD_FILE" --raw 2>/dev/null) || return 1
|
||||
RESTIC_ITEM=$(bw get item "RESTIC" 2>/dev/null) || return 1
|
||||
export RESTIC_REPOSITORY
|
||||
RESTIC_REPOSITORY=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="RESTIC_REPOSITORY") | .value')
|
||||
export AWS_ACCESS_KEY_ID
|
||||
AWS_ACCESS_KEY_ID=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="AWS_ACCESS_KEY_ID") | .value')
|
||||
export AWS_SECRET_ACCESS_KEY
|
||||
AWS_SECRET_ACCESS_KEY=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="AWS_SECRET_ACCESS_KEY") | .value')
|
||||
export AWS_DEFAULT_REGION
|
||||
AWS_DEFAULT_REGION=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="AWS_DEFAULT_REGION") | .value')
|
||||
[ -z "$AWS_DEFAULT_REGION" ] && AWS_DEFAULT_REGION="ru-central1"
|
||||
RESTIC_PASS=$(echo "$RESTIC_ITEM" | jq -r '.fields[] | select(.name=="RESTIC_BACKUP_KEY") | .value')
|
||||
for var in RESTIC_REPOSITORY AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
[ -z "${!var}" ] && return 1
|
||||
done
|
||||
RESTIC_PASSWORD_FILE=$(mktemp -u)
|
||||
echo -n "$RESTIC_PASS" > "$RESTIC_PASSWORD_FILE"
|
||||
chmod 600 "$RESTIC_PASSWORD_FILE"
|
||||
trap 'rm -f "$RESTIC_PASSWORD_FILE"' EXIT INT TERM
|
||||
export RESTIC_PASSWORD_FILE
|
||||
return 0
|
||||
}
|
||||
|
||||
notify_ok() {
|
||||
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "$1" "$2" || true
|
||||
}
|
||||
|
||||
notify_err() {
|
||||
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "⚠️ $1" "$2" || true
|
||||
}
|
||||
|
||||
case "$MODE" in
|
||||
weekly)
|
||||
echo "[verify-restore-level1] Режим: weekly (restic check)"
|
||||
setup_restic_env || { notify_err "Restic check" "Не удалось загрузить креды restic."; exit 1; }
|
||||
if restic check 2>&1; then
|
||||
echo "[verify-restore-level1] restic check OK"
|
||||
# При еженедельном успехе — не спамим (только при ошибке)
|
||||
else
|
||||
notify_err "Restic check" "Ошибка проверки репозитория restic."
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
monthly-check)
|
||||
echo "[verify-restore-level1] Режим: monthly-check (restic check --read-data-subset=10%)"
|
||||
setup_restic_env || { notify_err "Restic check (read-data-subset)" "Не удалось загрузить креды restic."; exit 1; }
|
||||
if restic check --read-data-subset=10% 2>&1; then
|
||||
echo "[verify-restore-level1] restic check --read-data-subset=10% OK"
|
||||
notify_ok "Тест restic (read-data-subset)" "OK, 10% данных проверено."
|
||||
else
|
||||
notify_err "Restic check (read-data-subset)" "Ошибка проверки 10% данных репозитория."
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
full-check)
|
||||
echo "[verify-restore-level1] Режим: full-check (restic check --read-data)"
|
||||
setup_restic_env || { notify_err "Restic check (read-data)" "Не удалось загрузить креды restic."; exit 1; }
|
||||
if restic check --read-data 2>&1; then
|
||||
echo "[verify-restore-level1] restic check --read-data OK"
|
||||
notify_ok "Тест restic (full read-data)" "OK, полная проверка данных завершена."
|
||||
else
|
||||
notify_err "Restic check (read-data)" "Ошибка полной проверки данных репозитория."
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
monthly-dump)
|
||||
echo "[verify-restore-level1] Режим: monthly-dump (restore и проверка дампа Nextcloud)"
|
||||
setup_restic_env || { notify_err "Тест дампа Nextcloud" "Не удалось загрузить креды restic."; exit 1; }
|
||||
rm -rf "$RESTORE_TARGET"
|
||||
mkdir -p "$RESTORE_TARGET"
|
||||
trap 'rm -f "${RESTIC_PASSWORD_FILE:-}" 2>/dev/null; rm -rf "$RESTORE_TARGET"' EXIT INT TERM
|
||||
if ! restic restore latest --target "$RESTORE_TARGET" --path "$RESTIC_PATH_NEXTCLOUD" 2>&1; then
|
||||
notify_err "Тест дампа Nextcloud" "Ошибка restic restore: не удалось восстановить $RESTIC_PATH_NEXTCLOUD"
|
||||
exit 1
|
||||
fi
|
||||
# Путь после restore: RESTORE_TARGET/mnt/backup/databases/ct101-nextcloud/
|
||||
RESTORED_DIR="$RESTORE_TARGET/mnt/backup/databases/ct101-nextcloud"
|
||||
if [ ! -d "$RESTORED_DIR" ]; then
|
||||
notify_err "Тест дампа Nextcloud" "Ошибка: каталог $RESTORED_DIR не найден после restore."
|
||||
exit 1
|
||||
fi
|
||||
LATEST_SQL=$(ls -t "$RESTORED_DIR"/nextcloud-db-*.sql.gz 2>/dev/null | head -1)
|
||||
if [ -z "$LATEST_SQL" ] || [ ! -f "$LATEST_SQL" ]; then
|
||||
notify_err "Тест дампа Nextcloud" "Ошибка: не найден .sql.gz в $RESTORED_DIR"
|
||||
exit 1
|
||||
fi
|
||||
SIZE_BYTES=$(stat -c%s "$LATEST_SQL" 2>/dev/null || echo 0)
|
||||
SIZE_MB=$(( SIZE_BYTES / 1024 / 1024 ))
|
||||
if [ "$SIZE_MB" -lt "$MIN_DUMP_SIZE_MB" ]; then
|
||||
notify_err "Тест дампа Nextcloud" "Ошибка: размер дампа ${SIZE_MB} MB < ${MIN_DUMP_SIZE_MB} MB (файл: $LATEST_SQL)"
|
||||
exit 1
|
||||
fi
|
||||
if ! gunzip -t "$LATEST_SQL" 2>/dev/null; then
|
||||
notify_err "Тест дампа Nextcloud" "Ошибка: gunzip -t не прошёл для $LATEST_SQL"
|
||||
exit 1
|
||||
fi
|
||||
if ! gunzip -c "$LATEST_SQL" 2>/dev/null | grep -q 'CREATE TABLE'; then
|
||||
notify_err "Тест дампа Nextcloud" "Ошибка: в распакованном дампе нет CREATE TABLE (возможно не SQL дамп)"
|
||||
exit 1
|
||||
fi
|
||||
echo "[verify-restore-level1] Дамп Nextcloud OK: $LATEST_SQL, размер ${SIZE_MB} MB"
|
||||
notify_ok "Тест дампа Nextcloud" "OK, размер ${SIZE_MB} MB."
|
||||
;;
|
||||
*)
|
||||
echo "Использование: $0 {weekly|monthly-check|full-check|monthly-dump}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
88
scripts/verify-vzdump-level2.sh
Executable file
88
scripts/verify-vzdump-level2.sh
Executable file
@@ -0,0 +1,88 @@
|
||||
#!/bin/bash
|
||||
# Тест восстановления уровня 2: автотест vzdump CT 107.
|
||||
# Восстанавливает последний vzdump-lxc-107 в временный CT 999, проверяет запуск, удаляет.
|
||||
# Запускать на хосте Proxmox под root. Ежемесячно (systemd timer).
|
||||
# При успехе/ошибке — уведомление в Telegram.
|
||||
set -e
|
||||
|
||||
DUMP_DIR="/mnt/backup/proxmox/dump/dump"
|
||||
TEST_VMID=999
|
||||
TEST_IP="192.168.1.199/24"
|
||||
TEST_GW="192.168.1.1"
|
||||
STORAGE="local-lvm"
|
||||
WAIT_START_SEC=60
|
||||
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
|
||||
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "Запускайте под root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Очистка при выходе (успех или ошибка)
|
||||
cleanup() {
|
||||
if pct status "$TEST_VMID" &>/dev/null; then
|
||||
echo "[verify-vzdump] Останавливаем и удаляем CT $TEST_VMID..."
|
||||
pct stop "$TEST_VMID" --skiplock 2>/dev/null || true
|
||||
sleep 2
|
||||
pct destroy "$TEST_VMID" --purge 1 --force 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
notify_ok() {
|
||||
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "✅ Тест vzdump CT 107" "$1" || true
|
||||
}
|
||||
|
||||
notify_err() {
|
||||
[ -x "$NOTIFY_SCRIPT" ] && "$NOTIFY_SCRIPT" "⚠️ Тест vzdump CT 107" "Ошибка: $1" || true
|
||||
}
|
||||
|
||||
if [ ! -d "$DUMP_DIR" ]; then
|
||||
notify_err "Каталог $DUMP_DIR не найден."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Последний vzdump-lxc-107
|
||||
ARCHIVE=$(ls -t "$DUMP_DIR"/vzdump-lxc-107-*.tar.zst 2>/dev/null | head -1)
|
||||
if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
|
||||
notify_err "Не найден vzdump-lxc-107-*.tar.zst в $DUMP_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[verify-vzdump] Архив: $ARCHIVE"
|
||||
|
||||
# Убедиться, что CT 999 не существует (остаток от прошлого запуска)
|
||||
if pct status "$TEST_VMID" &>/dev/null; then
|
||||
pct destroy "$TEST_VMID" --purge 1 --force 2>/dev/null || true
|
||||
sleep 2
|
||||
fi
|
||||
|
||||
echo "[verify-vzdump] Создаём CT $TEST_VMID из архива..."
|
||||
if ! pct create "$TEST_VMID" "$ARCHIVE" --restore 1 --storage "$STORAGE" 2>&1; then
|
||||
notify_err "pct create не удался."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Другой IP, чтобы не конфликтовать с оригиналом 107
|
||||
echo "[verify-vzdump] Настраиваем сеть (IP $TEST_IP)..."
|
||||
pct set "$TEST_VMID" --net0 "name=eth0,bridge=vmbr0,gw=$TEST_GW,ip=$TEST_IP,type=veth"
|
||||
|
||||
echo "[verify-vzdump] Запускаем CT $TEST_VMID..."
|
||||
if ! pct start "$TEST_VMID" 2>&1; then
|
||||
notify_err "pct start не удался."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[verify-vzdump] Ожидание $WAIT_START_SEC сек..."
|
||||
sleep "$WAIT_START_SEC"
|
||||
|
||||
STATUS=$(pct exec "$TEST_VMID" -- systemctl is-system-running 2>/dev/null || echo "unknown")
|
||||
if [ "$STATUS" != "running" ]; then
|
||||
notify_err "systemctl is-system-running вернул: $STATUS (ожидалось running)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[verify-vzdump] CT 999 запущен, system running. Тест пройден."
|
||||
notify_ok "OK"
|
||||
|
||||
exit 0
|
||||
58
scripts/watchdog-timers.sh
Executable file
58
scripts/watchdog-timers.sh
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/bin/bash
|
||||
# Watchdog: проверка провалившихся systemd timers.
|
||||
# Запускать раз в день (например 12:00). При наличии failed → notify в Telegram.
|
||||
# Timer: backup-watchdog-timers.timer
|
||||
|
||||
NOTIFY_SCRIPT="${NOTIFY_SCRIPT:-/root/scripts/notify-telegram.sh}"
|
||||
MAX_AGE_HOURS=24
|
||||
BACKUP_OK_DIR="/var/run"
|
||||
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "Запускайте под root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 1. Проверка systemctl list-timers --failed
|
||||
FAILED=$(systemctl list-timers --failed --no-legend --no-pager 2>/dev/null | grep -v '^$' || true)
|
||||
if [ -n "$FAILED" ]; then
|
||||
MSG="Провалившиеся таймеры:
|
||||
$FAILED"
|
||||
if [ -x "$NOTIFY_SCRIPT" ]; then
|
||||
"$NOTIFY_SCRIPT" "⚠️ Systemd timers" "$MSG" || true
|
||||
fi
|
||||
echo "[watchdog] Найдены провалившиеся таймеры"
|
||||
echo "$FAILED"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 2. Проверка healthcheck-файлов (если файл старше 24 ч — алерт)
|
||||
BACKUP_NAMES="vps-miran ct101-pgdump immich-photos vps-mtproto etc-pve ct104-pgdump vaultwarden-data ct103-gitea-pgdump vm200-pgdump ct105-vectors restic-yandex restic-yandex-photos"
|
||||
STALE=""
|
||||
for name in $BACKUP_NAMES; do
|
||||
OK_FILE="$BACKUP_OK_DIR/backup-$name.ok"
|
||||
if [ -f "$OK_FILE" ]; then
|
||||
AGE_SEC=$(( $(date +%s) - $(stat -c %Y "$OK_FILE" 2>/dev/null || echo 0) ))
|
||||
AGE_HOURS=$(( AGE_SEC / 3600 ))
|
||||
if [ "$AGE_HOURS" -ge "$MAX_AGE_HOURS" ]; then
|
||||
STALE="${STALE}backup-$name.ok (${AGE_HOURS}h)
|
||||
"
|
||||
fi
|
||||
else
|
||||
STALE="${STALE}backup-$name.ok (отсутствует)
|
||||
"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$STALE" ]; then
|
||||
MSG="Файлы .ok старше ${MAX_AGE_HOURS} ч или отсутствуют (последний успешный бэкап):
|
||||
$STALE"
|
||||
if [ -x "$NOTIFY_SCRIPT" ]; then
|
||||
"$NOTIFY_SCRIPT" "⚠️ Backup watchdog" "$MSG" || true
|
||||
fi
|
||||
echo "[watchdog] Устаревшие healthcheck-файлы"
|
||||
echo "$STALE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[watchdog] OK: таймеры и healthcheck-файлы в порядке"
|
||||
exit 0
|
||||
Reference in New Issue
Block a user