import hashlib import sqlite3 from datetime import datetime from pathlib import Path from typing import Tuple from backend.config import STATEMENTS_DIR from backend.db.database import get_connection from backend.parsers.bank_s import BankSParser from backend.parsers.bank_t import BankTParser from backend.parsers.bank_y import BankYParser from backend.parsers.base import BaseBankParser, ParsedTransaction BANK_REGISTRY: list[tuple[str, str, type[BaseBankParser]]] = [ ("T", "Т-банк", BankTParser), ("S", "Сбербанк", BankSParser), ("Y", "Яндекс Банк", BankYParser), ] def _dedup_key(account_id: int, operation_date: str, amount: float, description: str) -> str: raw = f"{account_id}|{operation_date}|{amount}|{description}" return hashlib.sha256(raw.encode("utf-8")).hexdigest() def _ensure_bank(conn, code: str, name: str) -> int: """Создать банк по коду и имени. Возвращает bank_id.""" conn.execute( "INSERT OR IGNORE INTO banks (code, name, is_salary) VALUES (?, ?, 0)", (code, name), ) conn.commit() row = conn.execute("SELECT id FROM banks WHERE code = ?", (code,)).fetchone() return row["id"] def _parser_for_filename(filename: str) -> tuple[str, str, BaseBankParser] | None: """По имени файла возвращает (code, name, parser) или None.""" for code, name, parser_cls in BANK_REGISTRY: p = parser_cls() if p.can_parse(filename): return (code, name, p) return None def _get_or_create_account(conn, bank_id: int, card_tail: str) -> int: cur = conn.execute( "SELECT id FROM accounts WHERE bank_id = ? AND external_id = ?", (bank_id, card_tail), ) row = cur.fetchone() if row: return row["id"] conn.execute( "INSERT INTO accounts (bank_id, external_id, name) VALUES (?, ?, ?)", (bank_id, card_tail, f"Карта ***{card_tail}"), ) conn.commit() return conn.execute("SELECT last_insert_rowid()").fetchone()[0] def import_file(file_path: str, source_filename: str) -> Tuple[int, int, int]: """ Парсит PDF выписку (Т- или С-банк), дедуплицирует и сохраняет в БД. Возвращает (добавлено, пропущено_дубликатов, распознано_всего). """ match = _parser_for_filename(source_filename) if not match: raise ValueError("Поддерживаются выписки Т-банка (Т-MM-YY.pdf), С-банка (С-MM-YY.pdf) и Я-банка (Я-MM-YY.pdf)") code, name, parser = match transactions = parser.parse(file_path) parsed_count = len(transactions) conn = get_connection() try: bank_id = _ensure_bank(conn, code, name) added, skipped = 0, 0 imported_at = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") for t in transactions: account_id = _get_or_create_account(conn, bank_id, t.card_tail) key = _dedup_key(account_id, t.operation_date, t.amount, t.description) try: conn.execute( """INSERT INTO transactions (account_id, operation_date, debit_date, amount, currency, amount_card_currency, description, source_file, imported_at, dedup_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", ( account_id, t.operation_date, t.debit_date, t.amount, "RUB", t.amount_card_currency, t.description, source_filename, imported_at, key, ), ) added += 1 except sqlite3.IntegrityError: skipped += 1 except Exception: raise conn.commit() return added, skipped, parsed_count finally: conn.close() def import_from_statements_dir() -> Tuple[int, int, int]: """Импортировать все файлы Т-*.pdf и С-*.pdf из папки Выписки банков. Возвращает (добавлено, дубликатов, распознано).""" path = Path(STATEMENTS_DIR) if not path.exists(): return 0, 0, 0 total_added, total_skipped, total_parsed = 0, 0, 0 for pattern in ("Т-*.pdf", "С-*.pdf", "Я-*.pdf"): for f in sorted(path.glob(pattern)): added, skipped, parsed = import_file(str(f), f.name) total_added += added total_skipped += skipped total_parsed += parsed return total_added, total_skipped, total_parsed