#!/usr/bin/env python3 """ Генерирует HTML-дашборд по access-логам Nginx Proxy Manager. - Сводка по доменам (2 суток) + по IP (топ-5 по каждому домену + остальные) с геолокацией. - Лента запросов с пагинацией и фильтром по домену (клиентский JS). Геолокация: ip-api.com с кэшем (локальная сеть для 10.x, 192.168.x, 127.x). """ import base64 import json import re import sys import time import urllib.request from collections import defaultdict from datetime import datetime, timedelta, timezone from pathlib import Path LOG_DIR = Path("/opt/docker/nginx-proxy/data/logs") CACHE_FILE = Path("/opt/docker/log-dashboard/ip_cache.json") FEED_MAX = 2_000 # макс. записей в ленте (встроено в HTML, ~300 KB) GEO_MAX_NEW_PER_RUN = 40 # ip-api.com limit 45/min API_URL = "http://ip-api.com/json/{ip}?fields=status,country,city,isp" LINE_RE = re.compile( r'\[(\d{2}/\w{3}/\d{4}:\d{2}:\d{2}:\d{2}\s+\+\d{4})\]\s+-\s+(\d+)\s+\d+\s+-\s+(\w+)\s+\w+\s+([^\s]+)\s+"([^"]*)"\s+\[Client\s+([^\]]+)\]' ) def parse_date(s: str) -> datetime | None: try: return datetime.strptime(s.strip(), "%d/%b/%Y:%H:%M:%S %z") except Exception: return None def parse_line(line: str) -> dict | None: m = LINE_RE.search(line) if not m: return None date_s, status, method, domain, path, client = m.groups() dt = parse_date(date_s) if not dt: return None return { "time": dt, "date_s": date_s, "status": status, "method": method, "domain": domain, "path": (path or "/")[:80], "client": client.strip(), } def is_private_ip(ip: str) -> bool: if not ip or ip == "-": return True parts = ip.split(".") if len(parts) != 4: return True try: a, b, c, d = (int(x) for x in parts) if a == 10: return True if a == 172 and 16 <= b <= 31: return True if a == 192 and b == 168: return True if a == 127: return True except ValueError: return True return False def load_geo_cache() -> dict: if CACHE_FILE.exists(): try: with open(CACHE_FILE, "r", encoding="utf-8") as f: return json.load(f) except Exception: pass return {} def save_geo_cache(cache: dict) -> None: try: CACHE_FILE.parent.mkdir(parents=True, exist_ok=True) with open(CACHE_FILE, "w", encoding="utf-8") as f: json.dump(cache, f, ensure_ascii=False, indent=0) except Exception: pass def fetch_geo(ip: str) -> str: if is_private_ip(ip): return "Локальная сеть" try: req = urllib.request.Request(API_URL.format(ip=ip), headers={"User-Agent": "NPM-Log-Dashboard/1"}) with urllib.request.urlopen(req, timeout=3) as r: data = json.loads(r.read().decode()) if data.get("status") != "success": return "—" parts = [data.get("country") or "", data.get("city") or ""] loc = ", ".join(p for p in parts if p).strip() or "—" isp = (data.get("isp") or "").strip() if isp: loc = f"{loc} ({isp})" if loc != "—" else isp return loc or "—" except Exception: return "—" def ensure_geo_for_ips(cache: dict, ips: list[str], max_new: int) -> None: to_fetch = [ip for ip in ips if ip and not is_private_ip(ip) and ip not in cache] to_fetch = to_fetch[:max_new] for ip in to_fetch: cache[ip] = fetch_geo(ip) time.sleep(1.35) # ~44/min to stay under 45 def main(): out_path = sys.argv[1] if len(sys.argv) > 1 else None try: now = datetime.now(timezone.utc) except Exception: now = datetime.utcnow() two_days_ago = now - timedelta(days=2) try: t_cut = two_days_ago.timestamp() except Exception: t_cut = (two_days_ago - datetime(1970, 1, 1)).total_seconds() counts_by_domain: dict[str, int] = defaultdict(int) counts_by_ip: dict[str, int] = defaultdict(int) counts_by_domain_ip: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) all_entries: list[dict] = [] log_files = sorted(LOG_DIR.glob("proxy-host-*_access.log")) for log_path in log_files: try: with open(log_path, "r", encoding="utf-8", errors="ignore") as f: lines = f.readlines() except Exception: continue for line in lines: entry = parse_line(line) if not entry: continue domain = entry["domain"] client = entry["client"] try: et = entry["time"].timestamp() except Exception: try: et = entry["time"].replace(tzinfo=timezone.utc).timestamp() except Exception: et = 0 if et >= t_cut: counts_by_domain[domain] += 1 counts_by_ip[client] += 1 counts_by_domain_ip[domain][client] += 1 all_entries.append(entry) # Лента = все запросы за последние 2 суток (с лимитом FEED_MAX для размера HTML) def entry_ts(e: dict) -> float: try: return e["time"].timestamp() except Exception: try: return e["time"].replace(tzinfo=timezone.utc).timestamp() except Exception: return 0.0 feed_2d = [e for e in all_entries if entry_ts(e) >= t_cut] feed_2d.sort(key=lambda x: x["time"], reverse=True) total_2d = len(feed_2d) feed = feed_2d[:FEED_MAX] feed_capped = total_2d > FEED_MAX feed_serial = [ {"t": e["date_s"][:20], "d": e["domain"], "m": e["method"], "p": e["path"], "s": e["status"], "c": e["client"]} for e in feed ] # Порядок доменов как в сводке (по убыванию запросов) domain_order = sorted(counts_by_domain.keys(), key=lambda d: -counts_by_domain[d]) # Геокэш: топ-5 IP по каждому домену + часть уникальных из ленты geo_cache = load_geo_cache() ips_for_geo = [] for d in domain_order: top5_for_d = [ip for ip, _ in sorted(counts_by_domain_ip[d].items(), key=lambda x: -x[1])[:5]] ips_for_geo.extend(top5_for_d) ips_for_geo = list(dict.fromkeys(ips_for_geo)) feed_ips = list(dict.fromkeys(e["client"] for e in feed[:500])) ensure_geo_for_ips(geo_cache, ips_for_geo + feed_ips, GEO_MAX_NEW_PER_RUN) save_geo_cache(geo_cache) def geo_label(ip: str) -> str: if is_private_ip(ip): return "Локальная сеть" return geo_cache.get(ip, "—") # Сводка по доменам (таблица) summary_domain_rows = [] for domain in domain_order: summary_domain_rows.append(f"{domain}{counts_by_domain[domain]}") summary_domain_table = "\n".join(summary_domain_rows) # Сводка по IP: топ-5 по каждому домену + остальные summary_ip_parts = [] for domain in domain_order: sorted_ips_d = sorted(counts_by_domain_ip[domain].items(), key=lambda x: -x[1]) top5_d = sorted_ips_d[:5] rest_d = sum(c for _, c in sorted_ips_d[5:]) rows = [] for ip, cnt in top5_d: geo = geo_label(ip) link_2ip = f'{ip}' rows.append(f"{link_2ip}{cnt}{geo}") if rest_d: rows.append(f'Остальные IP{rest_d}—') summary_ip_parts.append( f'{domain}\n' + "\n".join(rows) ) summary_ip_table = "\n".join(summary_ip_parts) # Гео для всех IP в кэше (для вывода в ленте в JS) geo_map = {ip: geo_label(ip) for ip in set(e["client"] for e in feed)} geo_map_json = json.dumps(geo_map, ensure_ascii=False) domains_list = domain_order feed_note = f" (показаны последние {FEED_MAX:,} из {total_2d:,})" if feed_capped else "" feed_count = len(feed_serial) _gen_time = datetime.now().strftime("%d.%m.%Y %H:%M") # Favicon: мини-терминал с логами (тёмный фон, cyan строки) favicon_svg = """ """ favicon_data_url = "data:image/svg+xml;base64," + base64.b64encode(favicon_svg.encode("utf-8")).decode("ascii") html_start = f""" Обращения к внешним URL (NPM)

Обращения к внешним URL (Nginx Proxy Manager)

Сводка за 2 суток; лента — последние {feed_count:,} запросов. Расчёт по крону раз в 15 мин. Сгенерировано: {_gen_time}

Запросы по доменам (2 суток)

{summary_domain_table}
ДоменЗапросов

Запросы по IP (топ-5 по каждому домену + остальные)

{summary_ip_table}
IPЗапросовМестоположение

Лента запросов (последние {feed_count:,})

ВремяДоменМетодПутьСтатусClientМестоположение
""" # JSON для ленты: экранируем чтобы не закрыть тег payload = {"feed": feed_serial, "geo": geo_map, "domains": domains_list} feed_json_raw = json.dumps(payload, ensure_ascii=False) feed_json_safe = feed_json_raw.replace("<", "\\u003c").replace(">", "\\u003e") if out_path: out_dir = Path(out_path).parent out_dir.mkdir(parents=True, exist_ok=True) html_full = html_start.replace('', '') with open(out_path, "w", encoding="utf-8") as f: f.write(html_full) else: html_full = html_start.replace('', '') print(html_full) if __name__ == "__main__": main()