125 lines
4.8 KiB
Python
125 lines
4.8 KiB
Python
import hashlib
|
||
import sqlite3
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Tuple
|
||
|
||
from backend.config import STATEMENTS_DIR
|
||
from backend.db.database import get_connection
|
||
from backend.parsers.bank_s import BankSParser
|
||
from backend.parsers.bank_t import BankTParser
|
||
from backend.parsers.bank_y import BankYParser
|
||
from backend.parsers.base import BaseBankParser, ParsedTransaction
|
||
|
||
BANK_REGISTRY: list[tuple[str, str, type[BaseBankParser]]] = [
|
||
("T", "Т-банк", BankTParser),
|
||
("S", "Сбербанк", BankSParser),
|
||
("Y", "Яндекс Банк", BankYParser),
|
||
]
|
||
|
||
|
||
def _dedup_key(account_id: int, operation_date: str, amount: float, description: str) -> str:
|
||
raw = f"{account_id}|{operation_date}|{amount}|{description}"
|
||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||
|
||
|
||
def _ensure_bank(conn, code: str, name: str) -> int:
|
||
"""Создать банк по коду и имени. Возвращает bank_id."""
|
||
conn.execute(
|
||
"INSERT OR IGNORE INTO banks (code, name, is_salary) VALUES (?, ?, 0)",
|
||
(code, name),
|
||
)
|
||
conn.commit()
|
||
row = conn.execute("SELECT id FROM banks WHERE code = ?", (code,)).fetchone()
|
||
return row["id"]
|
||
|
||
|
||
def _parser_for_filename(filename: str) -> tuple[str, str, BaseBankParser] | None:
|
||
"""По имени файла возвращает (code, name, parser) или None."""
|
||
for code, name, parser_cls in BANK_REGISTRY:
|
||
p = parser_cls()
|
||
if p.can_parse(filename):
|
||
return (code, name, p)
|
||
return None
|
||
|
||
|
||
def _get_or_create_account(conn, bank_id: int, card_tail: str) -> int:
|
||
cur = conn.execute(
|
||
"SELECT id FROM accounts WHERE bank_id = ? AND external_id = ?",
|
||
(bank_id, card_tail),
|
||
)
|
||
row = cur.fetchone()
|
||
if row:
|
||
return row["id"]
|
||
conn.execute(
|
||
"INSERT INTO accounts (bank_id, external_id, name) VALUES (?, ?, ?)",
|
||
(bank_id, card_tail, f"Карта ***{card_tail}"),
|
||
)
|
||
conn.commit()
|
||
return conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
||
|
||
|
||
def import_file(file_path: str, source_filename: str) -> Tuple[int, int, int]:
|
||
"""
|
||
Парсит PDF выписку (Т- или С-банк), дедуплицирует и сохраняет в БД.
|
||
Возвращает (добавлено, пропущено_дубликатов, распознано_всего).
|
||
"""
|
||
match = _parser_for_filename(source_filename)
|
||
if not match:
|
||
raise ValueError("Поддерживаются выписки Т-банка (Т-MM-YY.pdf), С-банка (С-MM-YY.pdf) и Я-банка (Я-MM-YY.pdf)")
|
||
|
||
code, name, parser = match
|
||
transactions = parser.parse(file_path)
|
||
parsed_count = len(transactions)
|
||
conn = get_connection()
|
||
try:
|
||
bank_id = _ensure_bank(conn, code, name)
|
||
added, skipped = 0, 0
|
||
imported_at = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
|
||
|
||
for t in transactions:
|
||
account_id = _get_or_create_account(conn, bank_id, t.card_tail)
|
||
key = _dedup_key(account_id, t.operation_date, t.amount, t.description)
|
||
try:
|
||
conn.execute(
|
||
"""INSERT INTO transactions
|
||
(account_id, operation_date, debit_date, amount, currency, amount_card_currency, description, source_file, imported_at, dedup_key)
|
||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||
(
|
||
account_id,
|
||
t.operation_date,
|
||
t.debit_date,
|
||
t.amount,
|
||
"RUB",
|
||
t.amount_card_currency,
|
||
t.description,
|
||
source_filename,
|
||
imported_at,
|
||
key,
|
||
),
|
||
)
|
||
added += 1
|
||
except sqlite3.IntegrityError:
|
||
skipped += 1
|
||
except Exception:
|
||
raise
|
||
conn.commit()
|
||
return added, skipped, parsed_count
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
def import_from_statements_dir() -> Tuple[int, int, int]:
|
||
"""Импортировать все файлы Т-*.pdf и С-*.pdf из папки Выписки банков. Возвращает (добавлено, дубликатов, распознано)."""
|
||
path = Path(STATEMENTS_DIR)
|
||
if not path.exists():
|
||
return 0, 0, 0
|
||
total_added, total_skipped, total_parsed = 0, 0, 0
|
||
for pattern in ("Т-*.pdf", "С-*.pdf", "Я-*.pdf"):
|
||
for f in sorted(path.glob(pattern)):
|
||
added, skipped, parsed = import_file(str(f), f.name)
|
||
total_added += added
|
||
total_skipped += skipped
|
||
total_parsed += parsed
|
||
return total_added, total_skipped, total_parsed
|