Expand the root disk size from 35 GB to 50 GB and implement log size limits for Docker containers. Add details about the new monitoring dashboard for homelab services, including deployment instructions and access URL. Ensure clarity on log rotation policies and risks associated with disk space usage.
113 lines
3.4 KiB
Python
113 lines
3.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Экспортер метрик для дашборда homelab: disk % и OOM по контейнерам/VM.
|
|
Запуск: python3 dashboard-exporter.py (выводит JSON в stdout)
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Маппинг: (vmid, type) -> (name, cgroup_name для Netdata)
|
|
CONTAINERS = [
|
|
(100, "lxc", "nginx", "cgroup_nginx"),
|
|
(101, "lxc", "nextcloud", "cgroup_nextcloud"),
|
|
(103, "lxc", "gitea", "cgroup_gitea"),
|
|
(104, "lxc", "paperless", "cgroup_paperless"),
|
|
(105, "lxc", "rag-service", "cgroup_rag-service"),
|
|
(107, "lxc", "misc", "cgroup_misc"),
|
|
(108, "lxc", "galene", "cgroup_galene"),
|
|
(109, "lxc", "local-vpn", "cgroup_local-vpn"),
|
|
(200, "qemu", "immich", "cgroup_qemu_immich"),
|
|
]
|
|
|
|
LXC_CGROUP = Path("/sys/fs/cgroup/lxc")
|
|
QEMU_CGROUP_200 = Path("/sys/fs/cgroup/qemu.slice/200.scope")
|
|
|
|
|
|
def get_disk_pct_lxc(vmid: int) -> float | None:
|
|
"""Disk % для LXC через pct exec df."""
|
|
try:
|
|
r = subprocess.run(
|
|
["pct", "exec", str(vmid), "--", "df", "-P", "/"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10,
|
|
)
|
|
if r.returncode != 0:
|
|
return None
|
|
lines = r.stdout.strip().split("\n")
|
|
if len(lines) < 2:
|
|
return None
|
|
# Формат: Filesystem 1K-blocks Used Available Use% Mounted
|
|
parts = lines[-1].split()
|
|
if len(parts) >= 5:
|
|
use_pct = parts[4].rstrip("%")
|
|
return float(use_pct)
|
|
except (subprocess.TimeoutExpired, ValueError):
|
|
pass
|
|
return None
|
|
|
|
|
|
def get_disk_pct_vm200() -> float | None:
|
|
"""Disk % для VM 200 через lvs (fallback, т.к. qm guest exec часто недоступен)."""
|
|
try:
|
|
r = subprocess.run(
|
|
["lvs", "-o", "data_percent", "--noheadings", "pve/vm-200-disk-0"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5,
|
|
)
|
|
if r.returncode != 0:
|
|
return None
|
|
val = r.stdout.strip()
|
|
if val:
|
|
return float(val)
|
|
except (subprocess.TimeoutExpired, ValueError):
|
|
pass
|
|
return None
|
|
|
|
|
|
def get_oom_count(vmid: int, vmtype: str) -> int | None:
|
|
"""OOM count из cgroup memory.events."""
|
|
if vmtype == "lxc":
|
|
path = LXC_CGROUP / str(vmid) / "memory.events"
|
|
elif vmtype == "qemu" and vmid == 200:
|
|
path = QEMU_CGROUP_200 / "memory.events"
|
|
else:
|
|
return None
|
|
if not path.exists():
|
|
return None
|
|
try:
|
|
text = path.read_text()
|
|
for line in text.splitlines():
|
|
if line.startswith("oom_kill "):
|
|
return int(line.split()[1])
|
|
except (OSError, ValueError):
|
|
pass
|
|
return None
|
|
|
|
|
|
def main() -> None:
|
|
result = {"containers": [], "ok": True}
|
|
for vmid, vmtype, name, cgroup_name in CONTAINERS:
|
|
disk_pct = None
|
|
if vmtype == "lxc":
|
|
disk_pct = get_disk_pct_lxc(vmid)
|
|
elif vmtype == "qemu" and vmid == 200:
|
|
disk_pct = get_disk_pct_vm200()
|
|
oom = get_oom_count(vmid, vmtype)
|
|
result["containers"].append({
|
|
"vmid": vmid,
|
|
"name": name,
|
|
"cgroup_name": cgroup_name,
|
|
"disk_pct": disk_pct,
|
|
"oom_count": oom,
|
|
})
|
|
print(json.dumps(result, ensure_ascii=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|