#!/usr/bin/env python3 """ Экспортер метрик для дашборда homelab: disk % и OOM по контейнерам/VM. Запуск: python3 dashboard-exporter.py (выводит JSON в stdout) """ import json import subprocess import sys from pathlib import Path # Маппинг: (vmid, type) -> (name, cgroup_name для Netdata) CONTAINERS = [ (100, "lxc", "nginx", "cgroup_nginx"), (101, "lxc", "nextcloud", "cgroup_nextcloud"), (103, "lxc", "gitea", "cgroup_gitea"), (104, "lxc", "paperless", "cgroup_paperless"), (105, "lxc", "rag-service", "cgroup_rag-service"), (107, "lxc", "misc", "cgroup_misc"), (108, "lxc", "galene", "cgroup_galene"), (109, "lxc", "local-vpn", "cgroup_local-vpn"), (200, "qemu", "immich", "cgroup_qemu_immich"), ] LXC_CGROUP = Path("/sys/fs/cgroup/lxc") QEMU_CGROUP_200 = Path("/sys/fs/cgroup/qemu.slice/200.scope") def get_disk_pct_lxc(vmid: int) -> float | None: """Disk % для LXC через pct exec df.""" try: r = subprocess.run( ["pct", "exec", str(vmid), "--", "df", "-P", "/"], capture_output=True, text=True, timeout=10, ) if r.returncode != 0: return None lines = r.stdout.strip().split("\n") if len(lines) < 2: return None # Формат: Filesystem 1K-blocks Used Available Use% Mounted parts = lines[-1].split() if len(parts) >= 5: use_pct = parts[4].rstrip("%") return float(use_pct) except (subprocess.TimeoutExpired, ValueError): pass return None def get_disk_pct_vm200() -> float | None: """Disk % для VM 200 через lvs (fallback, т.к. qm guest exec часто недоступен).""" try: r = subprocess.run( ["lvs", "-o", "data_percent", "--noheadings", "pve/vm-200-disk-0"], capture_output=True, text=True, timeout=5, ) if r.returncode != 0: return None val = r.stdout.strip() if val: return float(val) except (subprocess.TimeoutExpired, ValueError): pass return None def get_oom_count(vmid: int, vmtype: str) -> int | None: """OOM count из cgroup memory.events.""" if vmtype == "lxc": path = LXC_CGROUP / str(vmid) / "memory.events" elif vmtype == "qemu" and vmid == 200: path = QEMU_CGROUP_200 / "memory.events" else: return None if not path.exists(): return None try: text = path.read_text() for line in text.splitlines(): if line.startswith("oom_kill "): return int(line.split()[1]) except (OSError, ValueError): pass return None def main() -> None: result = {"containers": [], "ok": True} for vmid, vmtype, name, cgroup_name in CONTAINERS: disk_pct = None if vmtype == "lxc": disk_pct = get_disk_pct_lxc(vmid) elif vmtype == "qemu" and vmid == 200: disk_pct = get_disk_pct_vm200() oom = get_oom_count(vmid, vmtype) result["containers"].append({ "vmid": vmid, "name": name, "cgroup_name": cgroup_name, "disk_pct": disk_pct, "oom_count": oom, }) print(json.dumps(result, ensure_ascii=False)) if __name__ == "__main__": main()