Update architecture and backup documentation to include Healthchecks integration

Add Healthchecks service details to architecture and backup documentation, including its role as a Dead man's switch for backups. Update backup scripts to utilize systemd timers instead of cron for improved scheduling. Enhance network topology documentation to reflect Healthchecks integration in the VPS Miran setup. This update clarifies backup processes and enhances overall system reliability.
This commit is contained in:
2026-02-28 15:43:39 +03:00
parent 16c254510a
commit 53769e6832
61 changed files with 1697 additions and 39 deletions

24
scripts/systemd/README.md Normal file
View File

@@ -0,0 +1,24 @@
# Systemd unit-файлы для бэкапов и мониторинга
Копировать на хост Proxmox в `/etc/systemd/system/`:
```bash
cp *.service *.timer /etc/systemd/system/
systemctl daemon-reload
```
Включить все таймеры:
```bash
for t in backup-*.timer notify-vzdump-success.timer verify-*.timer backup-watchdog-timers.timer backup-healthcheck-ping.timer; do
systemctl enable --now "$t" 2>/dev/null || true
done
```
Проверка:
```bash
systemctl list-timers --all | grep backup
```
Перед миграцией с cron — отключить задания в crontab (`crontab -e`).

View File

@@ -0,0 +1,14 @@
# Бэкап БД Nextcloud (CT 101)
[Unit]
Description=Backup Nextcloud PostgreSQL (CT 101)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct101-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct101-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Nextcloud DB daily at 01:15
[Timer]
OnCalendar=*-*-* 01:15:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап БД Gitea (CT 103)
[Unit]
Description=Backup Gitea PostgreSQL (CT 103)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct103-gitea-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct103-gitea-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Gitea DB daily at 03:00
[Timer]
OnCalendar=*-*-* 03:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап БД Paperless (CT 104)
[Unit]
Description=Backup Paperless PostgreSQL (CT 104)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct104-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-ct104-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Paperless DB daily at 02:30
[Timer]
OnCalendar=*-*-* 02:30:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап векторов RAG (CT 105)
[Unit]
Description=Backup RAG vectors (CT 105)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-ct105-vectors.sh && echo $(date -Iseconds) > /var/run/backup-ct105-vectors.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup RAG vectors daily at 03:30
[Timer]
OnCalendar=*-*-* 03:30:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап /etc/pve и конфигов хоста
[Unit]
Description=Backup Proxmox host config (/etc/pve, interfaces, hosts)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-etc-pve.sh && echo $(date -Iseconds) > /var/run/backup-etc-pve.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup etc-pve daily at 02:15
[Timer]
OnCalendar=*-*-* 02:15:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Ping Healthchecks после окна бэкапов (Dead man's switch)
[Unit]
Description=Ping Healthchecks (homelab backups)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/healthcheck-ping.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Ping Healthchecks daily at 04:35 (after backup window)
[Timer]
OnCalendar=*-*-* 04:35:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап библиотеки фото Immich (rsync с VM 200)
[Unit]
Description=Backup Immich photos (rsync from VM 200)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-immich-photos.sh && echo $(date -Iseconds) > /var/run/backup-immich-photos.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Immich photos daily at 01:30
[Timer]
OnCalendar=*-*-* 01:30:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,16 @@
# Выгрузка /mnt/backup/photos в Yandex S3 через restic
[Unit]
Description=Backup photos to Yandex S3 (restic)
After=network-online.target
[Service]
Type=oneshot
Environment=HOME=/root
Environment=PATH=/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/bin/sh -c '/root/scripts/backup-restic-yandex-photos.sh && echo $(date -Iseconds) > /var/run/backup-restic-yandex-photos.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic backup photos to Yandex daily at 04:10
[Timer]
OnCalendar=*-*-* 04:10:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,16 @@
# Выгрузка /mnt/backup (без photos) в Yandex S3 через restic
[Unit]
Description=Backup to Yandex S3 (restic, main)
After=network-online.target
[Service]
Type=oneshot
Environment=HOME=/root
Environment=PATH=/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/bin/sh -c '/root/scripts/backup-restic-yandex.sh && echo $(date -Iseconds) > /var/run/backup-restic-yandex.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic backup to Yandex daily at 04:00
[Timer]
OnCalendar=*-*-* 04:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап данных Vaultwarden (CT 103)
[Unit]
Description=Backup Vaultwarden data (CT 103)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-vaultwarden-data.sh && echo $(date -Iseconds) > /var/run/backup-vaultwarden-data.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Vaultwarden daily at 02:45
[Timer]
OnCalendar=*-*-* 02:45:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап БД Immich (VM 200)
[Unit]
Description=Backup Immich PostgreSQL (VM 200)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-vm200-pgdump.sh && echo $(date -Iseconds) > /var/run/backup-vm200-pgdump.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup Immich DB daily at 03:15
[Timer]
OnCalendar=*-*-* 03:15:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,17 @@
# Копировать на Proxmox: /etc/systemd/system/
# systemctl daemon-reload && systemctl enable --now backup-vps-miran.timer
# Удалить из cron: 0 1 * * *
[Unit]
Description=Backup VPS Miran (БД бота, voice_users, S3)
After=network-online.target
[Service]
Type=oneshot
# Запись .ok только при успехе (для watchdog)
ExecStart=/bin/sh -c '/root/scripts/backup-vps-miran.sh && echo $(date -Iseconds) > /var/run/backup-vps-miran.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup VPS Miran daily at 01:00
[Timer]
OnCalendar=*-*-* 01:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Бэкап конфигов MTProto + сайт (VPS Германия)
[Unit]
Description=Backup VPS MTProto (Germany)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/root/scripts/backup-vps-mtproto.sh && echo $(date -Iseconds) > /var/run/backup-vps-mtproto.ok'
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup VPS MTProto daily at 01:45
[Timer]
OnCalendar=*-*-* 01:45:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Watchdog: проверка failed timers и устаревших healthcheck-файлов
[Unit]
Description=Backup watchdog (failed timers, stale .ok files)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/watchdog-timers.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Backup watchdog daily at 12:00
[Timer]
OnCalendar=*-*-* 12:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,15 @@
# Проверка локального vzdump за последние 2 ч, отправка сводки в Telegram
# Задание vzdump в Proxmox UI выполняется в 02:00
[Unit]
Description=Notify vzdump success (check dump dir, send Telegram)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/notify-vzdump-success.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Notify vzdump success daily at 03:00
[Timer]
OnCalendar=*-*-* 03:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Restic check --read-data (раз в 6 мес: 1 янв и 1 июля)
[Unit]
Description=Verify restic repository (full read-data, semiannual)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh full-check
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Restic full check semiannual (Jan 1, Jul 1 at 10:00)
[Timer]
OnCalendar=*-01-01 10:00:00
OnCalendar=*-07-01 10:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Restic check --read-data-subset=10% (ежемесячно, 1-е число)
[Unit]
Description=Verify restic repository (monthly read-data-subset)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh monthly-check
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic check read-data-subset monthly (1st at 10:00)
[Timer]
OnCalendar=*-*-01 10:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Тест restore дампа Nextcloud из restic (ежемесячно)
[Unit]
Description=Verify Nextcloud dump restore from restic (monthly)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh monthly-dump
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Verify Nextcloud dump restore monthly (1st at 11:00)
[Timer]
OnCalendar=*-*-01 11:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Restic check (еженедельно)
[Unit]
Description=Verify restic repository (weekly check)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-restore-level1.sh weekly
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Restic check weekly (Sunday 03:00)
[Timer]
OnCalendar=Sun *-*-* 03:00:00
Persistent=yes
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,14 @@
# Автотест vzdump CT 107 (ежемесячно)
[Unit]
Description=Verify vzdump restore (CT 107, monthly)
After=network-online.target
[Service]
Type=oneshot
ExecStart=/root/scripts/verify-vzdump-level2.sh
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Verify vzdump restore monthly (1st at 12:00)
[Timer]
OnCalendar=*-*-01 12:00:00
Persistent=yes
[Install]
WantedBy=timers.target