Files
income_calculator/backend/services/import_service.py
2026-02-23 16:49:24 +03:00

125 lines
4.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import hashlib
import sqlite3
from datetime import datetime
from pathlib import Path
from typing import Tuple
from backend.config import STATEMENTS_DIR
from backend.db.database import get_connection
from backend.parsers.bank_s import BankSParser
from backend.parsers.bank_t import BankTParser
from backend.parsers.bank_y import BankYParser
from backend.parsers.base import BaseBankParser, ParsedTransaction
BANK_REGISTRY: list[tuple[str, str, type[BaseBankParser]]] = [
("T", "Т-банк", BankTParser),
("S", "Сбербанк", BankSParser),
("Y", "Яндекс Банк", BankYParser),
]
def _dedup_key(account_id: int, operation_date: str, amount: float, description: str) -> str:
raw = f"{account_id}|{operation_date}|{amount}|{description}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def _ensure_bank(conn, code: str, name: str) -> int:
"""Создать банк по коду и имени. Возвращает bank_id."""
conn.execute(
"INSERT OR IGNORE INTO banks (code, name, is_salary) VALUES (?, ?, 0)",
(code, name),
)
conn.commit()
row = conn.execute("SELECT id FROM banks WHERE code = ?", (code,)).fetchone()
return row["id"]
def _parser_for_filename(filename: str) -> tuple[str, str, BaseBankParser] | None:
"""По имени файла возвращает (code, name, parser) или None."""
for code, name, parser_cls in BANK_REGISTRY:
p = parser_cls()
if p.can_parse(filename):
return (code, name, p)
return None
def _get_or_create_account(conn, bank_id: int, card_tail: str) -> int:
cur = conn.execute(
"SELECT id FROM accounts WHERE bank_id = ? AND external_id = ?",
(bank_id, card_tail),
)
row = cur.fetchone()
if row:
return row["id"]
conn.execute(
"INSERT INTO accounts (bank_id, external_id, name) VALUES (?, ?, ?)",
(bank_id, card_tail, f"Карта ***{card_tail}"),
)
conn.commit()
return conn.execute("SELECT last_insert_rowid()").fetchone()[0]
def import_file(file_path: str, source_filename: str) -> Tuple[int, int, int]:
"""
Парсит PDF выписку (Т- или С-банк), дедуплицирует и сохраняет в БД.
Возвращает (добавлено, пропущеноубликатов, распознаносего).
"""
match = _parser_for_filename(source_filename)
if not match:
raise ValueError("Поддерживаются выписки Т-банка (Т-MM-YY.pdf), С-банка (С-MM-YY.pdf) и Я-банка (Я-MM-YY.pdf)")
code, name, parser = match
transactions = parser.parse(file_path)
parsed_count = len(transactions)
conn = get_connection()
try:
bank_id = _ensure_bank(conn, code, name)
added, skipped = 0, 0
imported_at = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
for t in transactions:
account_id = _get_or_create_account(conn, bank_id, t.card_tail)
key = _dedup_key(account_id, t.operation_date, t.amount, t.description)
try:
conn.execute(
"""INSERT INTO transactions
(account_id, operation_date, debit_date, amount, currency, amount_card_currency, description, source_file, imported_at, dedup_key)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
account_id,
t.operation_date,
t.debit_date,
t.amount,
"RUB",
t.amount_card_currency,
t.description,
source_filename,
imported_at,
key,
),
)
added += 1
except sqlite3.IntegrityError:
skipped += 1
except Exception:
raise
conn.commit()
return added, skipped, parsed_count
finally:
conn.close()
def import_from_statements_dir() -> Tuple[int, int, int]:
"""Импортировать все файлы Т-*.pdf и С-*.pdf из папки Выписки банков. Возвращает (добавлено, дубликатов, распознано)."""
path = Path(STATEMENTS_DIR)
if not path.exists():
return 0, 0, 0
total_added, total_skipped, total_parsed = 0, 0, 0
for pattern in ("Т-*.pdf", "С-*.pdf", "Я-*.pdf"):
for f in sorted(path.glob(pattern)):
added, skipped, parsed = import_file(str(f), f.name)
total_added += added
total_skipped += skipped
total_parsed += parsed
return total_added, total_skipped, total_parsed