#!/usr/bin/env python3
"""
Генерирует HTML-дашборд по access-логам Nginx Proxy Manager.
- Сводка по доменам (2 суток) + по IP (топ-5 по каждому домену + остальные) с геолокацией.
- Лента запросов с пагинацией и фильтром по домену (клиентский JS).
Геолокация: ip-api.com с кэшем (локальная сеть для 10.x, 192.168.x, 127.x).
"""
import base64
import json
import re
import sys
import time
import urllib.request
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from pathlib import Path
LOG_DIR = Path("/opt/docker/nginx-proxy/data/logs")
CACHE_FILE = Path("/opt/docker/log-dashboard/ip_cache.json")
FEED_MAX = 2_000 # макс. записей в ленте (встроено в HTML, ~300 KB)
GEO_MAX_NEW_PER_RUN = 40 # ip-api.com limit 45/min
API_URL = "http://ip-api.com/json/{ip}?fields=status,country,city,isp"
LINE_RE = re.compile(
r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}\s+\+\d{4})\]\s+-\s+(\d+)\s+\d+\s+-\s+(\w+)\s+\w+\s+([^\s]+)\s+"([^"]*)"\s+\[Client\s+([^\]]+)\]'
)
def parse_date(s: str) -> datetime | None:
try:
return datetime.strptime(s.strip(), "%d/%b/%Y:%H:%M:%S %z")
except Exception:
return None
def parse_line(line: str) -> dict | None:
m = LINE_RE.search(line)
if not m:
return None
date_s, status, method, domain, path, client = m.groups()
dt = parse_date(date_s)
if not dt:
return None
return {
"time": dt,
"date_s": date_s,
"status": status,
"method": method,
"domain": domain,
"path": (path or "/")[:80],
"client": client.strip(),
}
def is_private_ip(ip: str) -> bool:
if not ip or ip == "-":
return True
parts = ip.split(".")
if len(parts) != 4:
return True
try:
a, b, c, d = (int(x) for x in parts)
if a == 10:
return True
if a == 172 and 16 <= b <= 31:
return True
if a == 192 and b == 168:
return True
if a == 127:
return True
except ValueError:
return True
return False
def load_geo_cache() -> dict:
if CACHE_FILE.exists():
try:
with open(CACHE_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
pass
return {}
def save_geo_cache(cache: dict) -> None:
try:
CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(CACHE_FILE, "w", encoding="utf-8") as f:
json.dump(cache, f, ensure_ascii=False, indent=0)
except Exception:
pass
def fetch_geo(ip: str) -> str:
if is_private_ip(ip):
return "Локальная сеть"
try:
req = urllib.request.Request(API_URL.format(ip=ip), headers={"User-Agent": "NPM-Log-Dashboard/1"})
with urllib.request.urlopen(req, timeout=3) as r:
data = json.loads(r.read().decode())
if data.get("status") != "success":
return "—"
parts = [data.get("country") or "", data.get("city") or ""]
loc = ", ".join(p for p in parts if p).strip() or "—"
isp = (data.get("isp") or "").strip()
if isp:
loc = f"{loc} ({isp})" if loc != "—" else isp
return loc or "—"
except Exception:
return "—"
def ensure_geo_for_ips(cache: dict, ips: list[str], max_new: int) -> None:
to_fetch = [ip for ip in ips if ip and not is_private_ip(ip) and ip not in cache]
to_fetch = to_fetch[:max_new]
for ip in to_fetch:
cache[ip] = fetch_geo(ip)
time.sleep(1.35) # ~44/min to stay under 45
def main():
out_path = sys.argv[1] if len(sys.argv) > 1 else None
try:
now = datetime.now(timezone.utc)
except Exception:
now = datetime.utcnow()
two_days_ago = now - timedelta(days=2)
try:
t_cut = two_days_ago.timestamp()
except Exception:
t_cut = (two_days_ago - datetime(1970, 1, 1)).total_seconds()
counts_by_domain: dict[str, int] = defaultdict(int)
counts_by_ip: dict[str, int] = defaultdict(int)
counts_by_domain_ip: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
all_entries: list[dict] = []
log_files = sorted(LOG_DIR.glob("proxy-host-*_access.log"))
for log_path in log_files:
try:
with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
lines = f.readlines()
except Exception:
continue
for line in lines:
entry = parse_line(line)
if not entry:
continue
domain = entry["domain"]
client = entry["client"]
try:
et = entry["time"].timestamp()
except Exception:
try:
et = entry["time"].replace(tzinfo=timezone.utc).timestamp()
except Exception:
et = 0
if et >= t_cut:
counts_by_domain[domain] += 1
counts_by_ip[client] += 1
counts_by_domain_ip[domain][client] += 1
all_entries.append(entry)
# Лента = все запросы за последние 2 суток (с лимитом FEED_MAX для размера HTML)
def entry_ts(e: dict) -> float:
try:
return e["time"].timestamp()
except Exception:
try:
return e["time"].replace(tzinfo=timezone.utc).timestamp()
except Exception:
return 0.0
feed_2d = [e for e in all_entries if entry_ts(e) >= t_cut]
feed_2d.sort(key=lambda x: x["time"], reverse=True)
total_2d = len(feed_2d)
feed = feed_2d[:FEED_MAX]
feed_capped = total_2d > FEED_MAX
feed_serial = [
{"t": e["date_s"][:20], "d": e["domain"], "m": e["method"], "p": e["path"], "s": e["status"], "c": e["client"]}
for e in feed
]
# Порядок доменов как в сводке (по убыванию запросов)
domain_order = sorted(counts_by_domain.keys(), key=lambda d: -counts_by_domain[d])
# Геокэш: топ-5 IP по каждому домену + часть уникальных из ленты
geo_cache = load_geo_cache()
ips_for_geo = []
for d in domain_order:
top5_for_d = [ip for ip, _ in sorted(counts_by_domain_ip[d].items(), key=lambda x: -x[1])[:5]]
ips_for_geo.extend(top5_for_d)
ips_for_geo = list(dict.fromkeys(ips_for_geo))
feed_ips = list(dict.fromkeys(e["client"] for e in feed[:500]))
ensure_geo_for_ips(geo_cache, ips_for_geo + feed_ips, GEO_MAX_NEW_PER_RUN)
save_geo_cache(geo_cache)
def geo_label(ip: str) -> str:
if is_private_ip(ip):
return "Локальная сеть"
return geo_cache.get(ip, "—")
# Сводка по доменам (таблица)
summary_domain_rows = []
for domain in domain_order:
summary_domain_rows.append(f"
{domain}
{counts_by_domain[domain]}
")
summary_domain_table = "\n".join(summary_domain_rows)
# Сводка по IP: топ-5 по каждому домену + остальные
summary_ip_parts = []
for domain in domain_order:
sorted_ips_d = sorted(counts_by_domain_ip[domain].items(), key=lambda x: -x[1])
top5_d = sorted_ips_d[:5]
rest_d = sum(c for _, c in sorted_ips_d[5:])
rows = []
for ip, cnt in top5_d:
geo = geo_label(ip)
link_2ip = f'{ip}'
rows.append(f"
{link_2ip}
{cnt}
{geo}
")
if rest_d:
rows.append(f'
Остальные IP
{rest_d}
—
')
summary_ip_parts.append(
f'
{domain}
\n' + "\n".join(rows)
)
summary_ip_table = "\n".join(summary_ip_parts)
# Гео для всех IP в кэше (для вывода в ленте в JS)
geo_map = {ip: geo_label(ip) for ip in set(e["client"] for e in feed)}
geo_map_json = json.dumps(geo_map, ensure_ascii=False)
domains_list = domain_order
feed_note = f" (показаны последние {FEED_MAX:,} из {total_2d:,})" if feed_capped else ""
feed_count = len(feed_serial)
_gen_time = datetime.now().strftime("%d.%m.%Y %H:%M")
# Favicon: мини-терминал с логами (тёмный фон, cyan строки)
favicon_svg = """"""
favicon_data_url = "data:image/svg+xml;base64," + base64.b64encode(favicon_svg.encode("utf-8")).decode("ascii")
html_start = f"""
Обращения к внешним URL (NPM)
Обращения к внешним URL (Nginx Proxy Manager)
Сводка за 2 суток; лента — последние {feed_count:,} запросов. Расчёт по крону раз в 15 мин. Сгенерировано: {_gen_time}
Запросы по доменам (2 суток)
Домен
Запросов
{summary_domain_table}
Запросы по IP (топ-5 по каждому домену + остальные)
IP
Запросов
Местоположение
{summary_ip_table}
Лента запросов (последние {feed_count:,})
Время
Домен
Метод
Путь
Статус
Client
Местоположение
—
"""
# JSON для ленты: экранируем чтобы не закрыть тег
payload = {"feed": feed_serial, "geo": geo_map, "domains": domains_list}
feed_json_raw = json.dumps(payload, ensure_ascii=False)
feed_json_safe = feed_json_raw.replace("<", "\\u003c").replace(">", "\\u003e")
if out_path:
out_dir = Path(out_path).parent
out_dir.mkdir(parents=True, exist_ok=True)
html_full = html_start.replace('',
'')
with open(out_path, "w", encoding="utf-8") as f:
f.write(html_full)
else:
html_full = html_start.replace('',
'')
print(html_full)
if __name__ == "__main__":
main()