Update container documentation to reflect disk space adjustments and Docker log management
Expand the root disk size from 35 GB to 50 GB and implement log size limits for Docker containers. Add details about the new monitoring dashboard for homelab services, including deployment instructions and access URL. Ensure clarity on log rotation policies and risks associated with disk space usage.
This commit is contained in:
112
scripts/dashboard/dashboard-exporter.py
Normal file
112
scripts/dashboard/dashboard-exporter.py
Normal file
@@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Экспортер метрик для дашборда homelab: disk % и OOM по контейнерам/VM.
|
||||
Запуск: python3 dashboard-exporter.py (выводит JSON в stdout)
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Маппинг: (vmid, type) -> (name, cgroup_name для Netdata)
|
||||
CONTAINERS = [
|
||||
(100, "lxc", "nginx", "cgroup_nginx"),
|
||||
(101, "lxc", "nextcloud", "cgroup_nextcloud"),
|
||||
(103, "lxc", "gitea", "cgroup_gitea"),
|
||||
(104, "lxc", "paperless", "cgroup_paperless"),
|
||||
(105, "lxc", "rag-service", "cgroup_rag-service"),
|
||||
(107, "lxc", "misc", "cgroup_misc"),
|
||||
(108, "lxc", "galene", "cgroup_galene"),
|
||||
(109, "lxc", "local-vpn", "cgroup_local-vpn"),
|
||||
(200, "qemu", "immich", "cgroup_qemu_immich"),
|
||||
]
|
||||
|
||||
LXC_CGROUP = Path("/sys/fs/cgroup/lxc")
|
||||
QEMU_CGROUP_200 = Path("/sys/fs/cgroup/qemu.slice/200.scope")
|
||||
|
||||
|
||||
def get_disk_pct_lxc(vmid: int) -> float | None:
|
||||
"""Disk % для LXC через pct exec df."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["pct", "exec", str(vmid), "--", "df", "-P", "/"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return None
|
||||
lines = r.stdout.strip().split("\n")
|
||||
if len(lines) < 2:
|
||||
return None
|
||||
# Формат: Filesystem 1K-blocks Used Available Use% Mounted
|
||||
parts = lines[-1].split()
|
||||
if len(parts) >= 5:
|
||||
use_pct = parts[4].rstrip("%")
|
||||
return float(use_pct)
|
||||
except (subprocess.TimeoutExpired, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def get_disk_pct_vm200() -> float | None:
|
||||
"""Disk % для VM 200 через lvs (fallback, т.к. qm guest exec часто недоступен)."""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["lvs", "-o", "data_percent", "--noheadings", "pve/vm-200-disk-0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if r.returncode != 0:
|
||||
return None
|
||||
val = r.stdout.strip()
|
||||
if val:
|
||||
return float(val)
|
||||
except (subprocess.TimeoutExpired, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def get_oom_count(vmid: int, vmtype: str) -> int | None:
|
||||
"""OOM count из cgroup memory.events."""
|
||||
if vmtype == "lxc":
|
||||
path = LXC_CGROUP / str(vmid) / "memory.events"
|
||||
elif vmtype == "qemu" and vmid == 200:
|
||||
path = QEMU_CGROUP_200 / "memory.events"
|
||||
else:
|
||||
return None
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
text = path.read_text()
|
||||
for line in text.splitlines():
|
||||
if line.startswith("oom_kill "):
|
||||
return int(line.split()[1])
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def main() -> None:
|
||||
result = {"containers": [], "ok": True}
|
||||
for vmid, vmtype, name, cgroup_name in CONTAINERS:
|
||||
disk_pct = None
|
||||
if vmtype == "lxc":
|
||||
disk_pct = get_disk_pct_lxc(vmid)
|
||||
elif vmtype == "qemu" and vmid == 200:
|
||||
disk_pct = get_disk_pct_vm200()
|
||||
oom = get_oom_count(vmid, vmtype)
|
||||
result["containers"].append({
|
||||
"vmid": vmid,
|
||||
"name": name,
|
||||
"cgroup_name": cgroup_name,
|
||||
"disk_pct": disk_pct,
|
||||
"oom_count": oom,
|
||||
})
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user