Update container documentation to reflect disk space adjustments and Docker log management

Expand the root disk size from 35 GB to 50 GB and implement log size limits for Docker containers. Add details about the new monitoring dashboard for homelab services, including deployment instructions and access URL. Ensure clarity on log rotation policies and risks associated with disk space usage.
This commit is contained in:
2026-02-28 17:10:34 +03:00
parent 53769e6832
commit 604f0c705f
10 changed files with 683 additions and 10 deletions

View File

@@ -0,0 +1,28 @@
#!/bin/bash
# Добавить Homelab Dashboard в Homepage (services.yaml на CT 103)
# Запуск: с хоста Proxmox — pct exec 103 -- bash -s < /root/scripts/dashboard/add-to-homepage.sh
set -e
SERVICES_YAML="${SERVICES_YAML:-/opt/docker/homepage/config/services.yaml}"
if [ ! -f "$SERVICES_YAML" ]; then
echo "ERROR: $SERVICES_YAML not found"
exit 1
fi
if grep -q "Homelab Dashboard" "$SERVICES_YAML" 2>/dev/null; then
echo "Homelab Dashboard already in services.yaml"
exit 0
fi
# Вставить после блока Netdata (ping: http://192.168.1.150:19999)
sed -i '/ping: http:\/\/192.168.1.150:19999$/a\
- Homelab Dashboard:\
icon: mdi-chart-box\
href: http://192.168.1.150:19998\
description: Мониторинг хоста, контейнеров, сервисов\
target: _blank
' "$SERVICES_YAML"
echo "Added Homelab Dashboard to services.yaml"

View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Экспортер метрик для дашборда homelab: disk % и OOM по контейнерам/VM.
Запуск: python3 dashboard-exporter.py (выводит JSON в stdout)
"""
import json
import subprocess
import sys
from pathlib import Path
# Маппинг: (vmid, type) -> (name, cgroup_name для Netdata)
CONTAINERS = [
(100, "lxc", "nginx", "cgroup_nginx"),
(101, "lxc", "nextcloud", "cgroup_nextcloud"),
(103, "lxc", "gitea", "cgroup_gitea"),
(104, "lxc", "paperless", "cgroup_paperless"),
(105, "lxc", "rag-service", "cgroup_rag-service"),
(107, "lxc", "misc", "cgroup_misc"),
(108, "lxc", "galene", "cgroup_galene"),
(109, "lxc", "local-vpn", "cgroup_local-vpn"),
(200, "qemu", "immich", "cgroup_qemu_immich"),
]
LXC_CGROUP = Path("/sys/fs/cgroup/lxc")
QEMU_CGROUP_200 = Path("/sys/fs/cgroup/qemu.slice/200.scope")
def get_disk_pct_lxc(vmid: int) -> float | None:
"""Disk % для LXC через pct exec df."""
try:
r = subprocess.run(
["pct", "exec", str(vmid), "--", "df", "-P", "/"],
capture_output=True,
text=True,
timeout=10,
)
if r.returncode != 0:
return None
lines = r.stdout.strip().split("\n")
if len(lines) < 2:
return None
# Формат: Filesystem 1K-blocks Used Available Use% Mounted
parts = lines[-1].split()
if len(parts) >= 5:
use_pct = parts[4].rstrip("%")
return float(use_pct)
except (subprocess.TimeoutExpired, ValueError):
pass
return None
def get_disk_pct_vm200() -> float | None:
"""Disk % для VM 200 через lvs (fallback, т.к. qm guest exec часто недоступен)."""
try:
r = subprocess.run(
["lvs", "-o", "data_percent", "--noheadings", "pve/vm-200-disk-0"],
capture_output=True,
text=True,
timeout=5,
)
if r.returncode != 0:
return None
val = r.stdout.strip()
if val:
return float(val)
except (subprocess.TimeoutExpired, ValueError):
pass
return None
def get_oom_count(vmid: int, vmtype: str) -> int | None:
"""OOM count из cgroup memory.events."""
if vmtype == "lxc":
path = LXC_CGROUP / str(vmid) / "memory.events"
elif vmtype == "qemu" and vmid == 200:
path = QEMU_CGROUP_200 / "memory.events"
else:
return None
if not path.exists():
return None
try:
text = path.read_text()
for line in text.splitlines():
if line.startswith("oom_kill "):
return int(line.split()[1])
except (OSError, ValueError):
pass
return None
def main() -> None:
result = {"containers": [], "ok": True}
for vmid, vmtype, name, cgroup_name in CONTAINERS:
disk_pct = None
if vmtype == "lxc":
disk_pct = get_disk_pct_lxc(vmid)
elif vmtype == "qemu" and vmid == 200:
disk_pct = get_disk_pct_vm200()
oom = get_oom_count(vmid, vmtype)
result["containers"].append({
"vmid": vmid,
"name": name,
"cgroup_name": cgroup_name,
"disk_pct": disk_pct,
"oom_count": oom,
})
print(json.dumps(result, ensure_ascii=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""
HTTP-сервер дашборда homelab: статика, /api/containers, прокси к Netdata.
Порт: 19998 (по умолчанию).
"""
import json
import os
import subprocess
import sys
import urllib.request
from http.server import HTTPServer, BaseHTTPRequestHandler
from pathlib import Path
PORT = int(os.environ.get("DASHBOARD_PORT", "19998"))
NETDATA_URL = os.environ.get("NETDATA_URL", "http://127.0.0.1:19999")
SCRIPT_DIR = Path(__file__).resolve().parent
EXPORTER = SCRIPT_DIR / "dashboard-exporter.py"
class DashboardHandler(BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass # подавить вывод в консоль
def send_json(self, data: dict, status: int = 200):
self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(json.dumps(data, ensure_ascii=False).encode("utf-8"))
def send_html(self, html: bytes, status: int = 200):
self.send_response(status)
self.send_header("Content-Type", "text/html; charset=utf-8")
self.send_header("Cache-Control", "no-cache")
self.end_headers()
self.wfile.write(html)
def do_GET(self):
path = self.path.split("?")[0].rstrip("/") or "/"
if path == "/":
self.serve_index()
elif path == "/api/containers":
self.serve_containers()
elif path.startswith("/api/netdata"):
self.proxy_netdata()
else:
self.send_error(404)
def serve_index(self):
html_file = SCRIPT_DIR / "index.html"
if html_file.exists():
self.send_html(html_file.read_bytes())
else:
self.send_error(404, "index.html not found")
def serve_containers(self):
try:
r = subprocess.run(
[sys.executable, str(EXPORTER)],
capture_output=True,
text=True,
timeout=30,
cwd=str(SCRIPT_DIR),
)
if r.returncode != 0:
self.send_json({"ok": False, "error": r.stderr or "exporter failed"}, 500)
return
data = json.loads(r.stdout)
self.send_json(data)
except subprocess.TimeoutExpired:
self.send_json({"ok": False, "error": "timeout"}, 504)
except json.JSONDecodeError as e:
self.send_json({"ok": False, "error": str(e)}, 500)
except Exception as e:
self.send_json({"ok": False, "error": str(e)}, 500)
def proxy_netdata(self):
qs = self.path.split("?", 1)[1] if "?" in self.path else ""
url = f"{NETDATA_URL}/api/v1/data?{qs}" if qs else f"{NETDATA_URL}/api/v1/data"
try:
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=10) as resp:
data = resp.read()
self.send_response(200)
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(data)
except Exception as e:
self.send_json({"error": str(e)}, 502)
def main():
server = HTTPServer(("0.0.0.0", PORT), DashboardHandler)
print(f"Dashboard server on http://0.0.0.0:{PORT}", file=sys.stderr)
try:
server.serve_forever()
except KeyboardInterrupt:
pass
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,35 @@
#!/bin/bash
# Деплой дашборда homelab на хост Proxmox
# Запуск: с хоста Proxmox или ssh root@192.168.1.150 'bash -s' < scripts/dashboard/deploy-dashboard.sh
# Или из репозитория: ./scripts/dashboard/deploy-dashboard.sh (копирует из текущей директории)
set -e
# REPO_ROOT: корень репозитория (содержит scripts/dashboard/)
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="${REPO_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
DASHBOARD_SRC="${REPO_ROOT}/scripts/dashboard"
DEST="/root/scripts/dashboard"
SYSTEMD_DEST="/etc/systemd/system"
log() { echo "[$(date -Iseconds)] $*"; }
log "Deploying homelab dashboard..."
mkdir -p "$DEST"
if [ "$(realpath "$DASHBOARD_SRC")" != "$(realpath "$DEST")" ]; then
cp -v "${DASHBOARD_SRC}/dashboard-exporter.py" "$DEST/"
cp -v "${DASHBOARD_SRC}/dashboard-server.py" "$DEST/"
cp -v "${DASHBOARD_SRC}/index.html" "$DEST/"
fi
chmod +x "${DEST}/dashboard-exporter.py" "${DEST}/dashboard-server.py"
if [ -f "${REPO_ROOT}/scripts/systemd/homelab-dashboard.service" ]; then
cp -v "${REPO_ROOT}/scripts/systemd/homelab-dashboard.service" "$SYSTEMD_DEST/"
fi
systemctl daemon-reload
systemctl enable homelab-dashboard.service
systemctl restart homelab-dashboard.service
log "Dashboard deployed. URL: http://192.168.1.150:19998"
log "Status: $(systemctl is-active homelab-dashboard.service)"

View File

@@ -0,0 +1,210 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Homelab Dashboard</title>
<style>
:root { --bg: #0d1117; --card: #161b22; --text: #e6edf3; --muted: #8b949e; --accent: #58a6ff; --ok: #3fb950; --warn: #d29922; --err: #f85149; }
* { box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; background: var(--bg); color: var(--text); margin: 0; padding: 1rem; line-height: 1.5; }
h1 { font-size: 1.25rem; margin: 0 0 1rem; }
h2 { font-size: 1rem; margin: 0 0 0.5rem; color: var(--muted); font-weight: 500; }
.card { background: var(--card); border-radius: 8px; padding: 1rem; margin-bottom: 1rem; }
.grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(140px, 1fr)); gap: 0.75rem; }
.metric { text-align: center; }
.metric-value { font-size: 1.5rem; font-weight: 600; }
.metric-label { font-size: 0.75rem; color: var(--muted); }
table { width: 100%; border-collapse: collapse; }
th, td { padding: 0.5rem; text-align: left; border-bottom: 1px solid #30363d; }
th { color: var(--muted); font-weight: 500; font-size: 0.85rem; }
.pct-ok { color: var(--ok); }
.pct-warn { color: var(--warn); }
.pct-err { color: var(--err); }
.links { display: flex; flex-wrap: wrap; gap: 0.5rem; margin-top: 0.5rem; }
.links a { color: var(--accent); text-decoration: none; font-size: 0.9rem; }
.links a:hover { text-decoration: underline; }
.loading { color: var(--muted); }
.error { color: var(--err); }
.updated { font-size: 0.75rem; color: var(--muted); margin-top: 0.5rem; }
</style>
</head>
<body>
<h1>Homelab Dashboard</h1>
<div class="card">
<h2>Блок 1 — Хост</h2>
<div class="grid" id="host-metrics">
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">CPU %</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">RAM</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">Load</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">iowait %</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">Disk /</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">Disk backup</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">Disk nextcloud-hdd</span></div>
<div class="metric"><span class="metric-value loading"></span><span class="metric-label">Disk tank</span></div>
</div>
</div>
<div class="card">
<h2>Блок 2 — Контейнеры</h2>
<table>
<thead>
<tr><th>Контейнер</th><th>CPU %</th><th>RAM %</th><th>Disk %</th><th>OOM</th></tr>
</thead>
<tbody id="containers-table"></tbody>
</table>
</div>
<div class="card">
<h2>Блок 3 — Критические сервисы</h2>
<div class="links">
<a href="http://192.168.1.150:19999/#menu_system_submenu_cpu;netdata" target="_blank">Netdata (CPU)</a>
<a href="http://192.168.1.150:19999/#menu_cgroup_nginx;netdata" target="_blank">nginx (CT 100)</a>
<a href="http://192.168.1.150:19999/#menu_cgroup_nextcloud;netdata" target="_blank">Nextcloud (CT 101)</a>
<a href="http://192.168.1.150:19999/#menu_cgroup_qemu_immich;netdata" target="_blank">Immich (VM 200)</a>
<a href="http://192.168.1.150:19999/#menu_cgroup_local-vpn;netdata" target="_blank">VPN (CT 109)</a>
</div>
</div>
<div class="updated" id="updated"></div>
<div id="status" class="updated" style="color:var(--muted)"></div>
<script>
const API = window.location.origin; // явно использовать текущий origin
function pctClass(v) {
if (v == null) return '';
if (v >= 90) return 'pct-err';
if (v >= 75) return 'pct-warn';
return 'pct-ok';
}
function fmt(v, suffix = '') {
if (v == null || v === undefined) return '—';
if (typeof v === 'number') return v.toFixed(1) + suffix;
return String(v) + suffix;
}
async function fetchNetdata(chart, points = 1) {
const url = `${API}/api/netdata?chart=${encodeURIComponent(chart)}&points=${points}&format=json`;
const r = await fetch(url);
if (!r.ok) throw new Error(`${chart}: ${r.status}`);
return r.json();
}
async function loadHost() {
try {
const results = await Promise.allSettled([
fetchNetdata('system.cpu'),
fetchNetdata('system.ram'),
fetchNetdata('system.load'),
fetchNetdata('disk_space./'),
fetchNetdata('disk_space./mnt/backup'),
fetchNetdata('disk_space./mnt/nextcloud-hdd'),
fetchNetdata('disk_space./tank'),
]);
const [cpu, ram, load, diskRoot, diskBackup, diskNextcloud, diskTank] = results.map(r => r.status === 'fulfilled' ? r.value : null);
const cpuData = cpu?.data?.[0];
const ramData = ram?.data?.[0];
const loadData = load?.data?.[0];
const li = cpu?.labels || [];
const cpuTotal = cpuData ? (li.indexOf('user') >= 0 ? (cpuData[li.indexOf('user')] || 0) + (cpuData[li.indexOf('system')] || 0) + (cpuData[li.indexOf('nice')] || 0) + (cpuData[li.indexOf('iowait')] || 0) + (cpuData[li.indexOf('irq')] || 0) + (cpuData[li.indexOf('softirq')] || 0) + (cpuData[li.indexOf('steal')] || 0) + (cpuData[li.indexOf('guest')] || 0) + (cpuData[li.indexOf('guest_nice')] || 0) : 0) : null;
const iowait = cpuData && li.indexOf('iowait') >= 0 ? cpuData[li.indexOf('iowait')] : null;
const ramUsed = ramData && ram?.labels ? ramData[ram.labels.indexOf('used')] : null;
const load15 = loadData && load?.labels ? loadData[load.labels.indexOf('load15')] : null;
// disk_space возвращает avail/used в GiB, считаем %: used/(used+avail)*100
const diskPct = (d) => {
if (!d?.data?.[0] || !d?.labels) return null;
const idxU = d.labels.indexOf('used'), idxA = d.labels.indexOf('avail');
if (idxU < 0 || idxA < 0) return null;
const used = d.data[0][idxU], avail = d.data[0][idxA];
const total = used + avail;
return total > 0 ? (used / total * 100) : null;
};
const diskRootUsed = diskPct(diskRoot);
const diskBackupUsed = diskPct(diskBackup);
const diskNextcloudUsed = diskPct(diskNextcloud);
const diskTankUsed = diskPct(diskTank);
document.getElementById('host-metrics').innerHTML = `
<div class="metric"><span class="metric-value">${fmt(cpuTotal, '%')}</span><span class="metric-label">CPU %</span></div>
<div class="metric"><span class="metric-value">${fmt(ramUsed, ' MiB')}</span><span class="metric-label">RAM used</span></div>
<div class="metric"><span class="metric-value">${fmt(load15)}</span><span class="metric-label">Load 15</span></div>
<div class="metric"><span class="metric-value">${fmt(iowait, '%')}</span><span class="metric-label">iowait %</span></div>
<div class="metric"><span class="metric-value ${pctClass(diskRootUsed)}">${fmt(diskRootUsed, '%')}</span><span class="metric-label">Disk /</span></div>
<div class="metric"><span class="metric-value ${pctClass(diskBackupUsed)}">${fmt(diskBackupUsed, '%')}</span><span class="metric-label">Disk backup</span></div>
<div class="metric"><span class="metric-value ${pctClass(diskNextcloudUsed)}">${fmt(diskNextcloudUsed, '%')}</span><span class="metric-label">Disk nextcloud-hdd</span></div>
<div class="metric"><span class="metric-value ${pctClass(diskTankUsed)}">${fmt(diskTankUsed, '%')}</span><span class="metric-label">Disk tank</span></div>
`;
} catch (e) {
document.getElementById('host-metrics').innerHTML = `<div class="error">Ошибка: ${e.message}</div>`;
}
}
const CGROUP_CHARTS = {
'cgroup_nginx': { cpu: 'cgroup_nginx.cpu_limit', mem: 'cgroup_nginx.mem_utilization' },
'cgroup_nextcloud': { cpu: 'cgroup_nextcloud.cpu_limit', mem: 'cgroup_nextcloud.mem_utilization' },
'cgroup_gitea': { cpu: 'cgroup_gitea.cpu_limit', mem: 'cgroup_gitea.mem_utilization' },
'cgroup_paperless': { cpu: 'cgroup_paperless.cpu_limit', mem: 'cgroup_paperless.mem_utilization' },
'cgroup_rag-service': { cpu: 'cgroup_rag-service.cpu_limit', mem: 'cgroup_rag-service.mem_utilization' },
'cgroup_misc': { cpu: 'cgroup_misc.cpu_limit', mem: 'cgroup_misc.mem_utilization' },
'cgroup_galene': { cpu: 'cgroup_galene.cpu_limit', mem: 'cgroup_galene.mem_utilization' },
'cgroup_local-vpn': { cpu: 'cgroup_local-vpn.cpu_limit', mem: 'cgroup_local-vpn.mem_utilization' },
'cgroup_qemu_immich': { cpu: 'cgroup_qemu_immich.cpu_limit', mem: 'cgroup_qemu_immich.mem_utilization' },
};
async function loadContainers() {
try {
const containersRes = await fetch(`${API}/api/containers`);
if (!containersRes.ok) throw new Error(`API ${containersRes.status}`);
const containersData = await containersRes.json();
if (!containersData.ok || !containersData.containers) {
document.getElementById('containers-table').innerHTML = `<tr><td colspan="5" class="error">Ошибка загрузки</td></tr>`;
return;
}
const containers = containersData.containers;
const cpuPromises = containers.map(c => {
const charts = CGROUP_CHARTS[c.cgroup_name];
if (!charts) return [null, null];
return Promise.all([
fetchNetdata(charts.cpu).then(d => d.data?.[0]?.[d.labels?.indexOf('used') ?? 0] != null ? d.data[0][d.labels.indexOf('used')] * 100 : null),
fetchNetdata(charts.mem).then(d => d.data?.[0]?.[d.labels?.indexOf('utilization') ?? 0] != null ? d.data[0][d.labels.indexOf('utilization')] : null),
]);
});
const netdataRows = await Promise.all(cpuPromises);
const rows = containers.map((c, i) => {
const [cpuPct, ramPct] = netdataRows[i] || [null, null];
return `<tr>
<td>${c.name} (${c.vmid})</td>
<td class="${pctClass(cpuPct)}">${fmt(cpuPct, '%')}</td>
<td class="${pctClass(ramPct)}">${fmt(ramPct, '%')}</td>
<td class="${pctClass(c.disk_pct)}">${fmt(c.disk_pct, '%')}</td>
<td>${c.oom_count != null ? c.oom_count : '—'}</td>
</tr>`;
});
document.getElementById('containers-table').innerHTML = rows.join('');
} catch (e) {
document.getElementById('containers-table').innerHTML = `<tr><td colspan="5" class="error">Ошибка: ${e.message}. Проверьте доступ к ${API}</td></tr>`;
}
}
async function refresh() {
const statusEl = document.getElementById('status');
statusEl.textContent = 'Загрузка...';
try {
await Promise.all([loadHost(), loadContainers()]);
document.getElementById('updated').textContent = 'Обновлено: ' + new Date().toLocaleString('ru');
statusEl.textContent = '';
} catch (e) {
document.getElementById('updated').textContent = '';
statusEl.textContent = 'Ошибка: ' + e.message;
statusEl.style.color = 'var(--err)';
}
}
refresh();
setInterval(refresh, 30000);
</script>
</body>
</html>

View File

@@ -0,0 +1,19 @@
# Дашборд мониторинга homelab (хост, контейнеры, сервисы)
# Порт 19998, статика + API + прокси к Netdata
[Unit]
Description=Homelab Dashboard (monitoring)
After=network-online.target netdata.service
Wants=network-online.target
[Service]
Type=simple
ExecStart=/usr/bin/python3 /root/scripts/dashboard/dashboard-server.py
WorkingDirectory=/root/scripts/dashboard
Restart=on-failure
RestartSec=5
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target