Files
homelab-docs/scripts/dashboard/dashboard-exporter.py
Andrey 604f0c705f Update container documentation to reflect disk space adjustments and Docker log management
Expand the root disk size from 35 GB to 50 GB and implement log size limits for Docker containers. Add details about the new monitoring dashboard for homelab services, including deployment instructions and access URL. Ensure clarity on log rotation policies and risks associated with disk space usage.
2026-02-28 17:10:34 +03:00

113 lines
3.4 KiB
Python

#!/usr/bin/env python3
"""
Экспортер метрик для дашборда homelab: disk % и OOM по контейнерам/VM.
Запуск: python3 dashboard-exporter.py (выводит JSON в stdout)
"""
import json
import subprocess
import sys
from pathlib import Path
# Маппинг: (vmid, type) -> (name, cgroup_name для Netdata)
CONTAINERS = [
(100, "lxc", "nginx", "cgroup_nginx"),
(101, "lxc", "nextcloud", "cgroup_nextcloud"),
(103, "lxc", "gitea", "cgroup_gitea"),
(104, "lxc", "paperless", "cgroup_paperless"),
(105, "lxc", "rag-service", "cgroup_rag-service"),
(107, "lxc", "misc", "cgroup_misc"),
(108, "lxc", "galene", "cgroup_galene"),
(109, "lxc", "local-vpn", "cgroup_local-vpn"),
(200, "qemu", "immich", "cgroup_qemu_immich"),
]
LXC_CGROUP = Path("/sys/fs/cgroup/lxc")
QEMU_CGROUP_200 = Path("/sys/fs/cgroup/qemu.slice/200.scope")
def get_disk_pct_lxc(vmid: int) -> float | None:
"""Disk % для LXC через pct exec df."""
try:
r = subprocess.run(
["pct", "exec", str(vmid), "--", "df", "-P", "/"],
capture_output=True,
text=True,
timeout=10,
)
if r.returncode != 0:
return None
lines = r.stdout.strip().split("\n")
if len(lines) < 2:
return None
# Формат: Filesystem 1K-blocks Used Available Use% Mounted
parts = lines[-1].split()
if len(parts) >= 5:
use_pct = parts[4].rstrip("%")
return float(use_pct)
except (subprocess.TimeoutExpired, ValueError):
pass
return None
def get_disk_pct_vm200() -> float | None:
"""Disk % для VM 200 через lvs (fallback, т.к. qm guest exec часто недоступен)."""
try:
r = subprocess.run(
["lvs", "-o", "data_percent", "--noheadings", "pve/vm-200-disk-0"],
capture_output=True,
text=True,
timeout=5,
)
if r.returncode != 0:
return None
val = r.stdout.strip()
if val:
return float(val)
except (subprocess.TimeoutExpired, ValueError):
pass
return None
def get_oom_count(vmid: int, vmtype: str) -> int | None:
"""OOM count из cgroup memory.events."""
if vmtype == "lxc":
path = LXC_CGROUP / str(vmid) / "memory.events"
elif vmtype == "qemu" and vmid == 200:
path = QEMU_CGROUP_200 / "memory.events"
else:
return None
if not path.exists():
return None
try:
text = path.read_text()
for line in text.splitlines():
if line.startswith("oom_kill "):
return int(line.split()[1])
except (OSError, ValueError):
pass
return None
def main() -> None:
result = {"containers": [], "ok": True}
for vmid, vmtype, name, cgroup_name in CONTAINERS:
disk_pct = None
if vmtype == "lxc":
disk_pct = get_disk_pct_lxc(vmid)
elif vmtype == "qemu" and vmid == 200:
disk_pct = get_disk_pct_vm200()
oom = get_oom_count(vmid, vmtype)
result["containers"].append({
"vmid": vmid,
"name": name,
"cgroup_name": cgroup_name,
"disk_pct": disk_pct,
"oom_count": oom,
})
print(json.dumps(result, ensure_ascii=False))
if __name__ == "__main__":
main()