Files
income_calculator/backend/parsers/bank_t.py
2026-02-23 16:49:24 +03:00

87 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from datetime import datetime
from pathlib import Path
from typing import List
import pdfplumber
from backend.parsers.base import BaseBankParser, ParsedTransaction
def _normalize_amount(s: str) -> float:
return float(s.replace("\u00a0", " ").replace(" ", "").replace(",", "."))
def _parse_date(d: str) -> str:
"""DD.MM.YYYY -> YYYY-MM-DD"""
try:
dt = datetime.strptime(d.strip(), "%d.%m.%Y")
return dt.strftime("%Y-%m-%d")
except ValueError:
return d.strip()
def _parse_datetime(d: str, time_str: str) -> str:
"""Date DD.MM.YYYY + time HH:MM -> ISO"""
try:
part = d.strip() + " " + (time_str or "00:00").strip()
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
return dt.strftime("%Y-%m-%dT%H:%M:%S")
except ValueError:
return _parse_date(d) + "T00:00:00"
class BankTParser(BaseBankParser):
"""Парсер выписок Т-банка. Файлы Т-MM-YY.pdf."""
# Первая строка операции: дата дата сумма ₽ сумма ₽ описание 4цифры (сумма может быть + или -)
ROW_RE = re.compile(
r"^(\d{2}\.\d{2}\.\d{4})\s+(\d{2}\.\d{2}\.\d{4})\s+([-+]?[\d\s,]+\.\d{2})\s*₽\s+([-+]?[\d\s,]+\.\d{2})\s*₽\s+(.+?)\s+(\d{4})\s*$",
re.UNICODE,
)
# Вторая строка (время): HH:MM HH:MM остаток текста
TIME_RE = re.compile(r"^(\d{1,2}:\d{2})\s+(\d{1,2}:\d{2})\s*(.*)$")
def can_parse(self, filename: str) -> bool:
name = Path(filename).name
return name.startswith("Т-") and name.lower().endswith(".pdf")
def parse(self, file_path: str) -> List[ParsedTransaction]:
result: List[ParsedTransaction] = []
with pdfplumber.open(file_path) as pdf:
lines: List[str] = []
for page in pdf.pages:
text = page.extract_text()
if text:
lines.extend(text.split("\n"))
i = 0
while i < len(lines):
line = lines[i]
m = self.ROW_RE.match(line.strip())
if m:
date_op, date_debit, amt_op, amt_card, desc, card_tail = m.groups()
op_time, debit_time = "00:00", "00:00"
if i + 1 < len(lines):
tm = self.TIME_RE.match(lines[i + 1].strip())
if tm:
op_time, debit_time, rest = tm.groups()
if rest:
desc = (desc + " " + rest).strip()
i += 1
amount = _normalize_amount(amt_op)
amount_card = _normalize_amount(amt_card) if amt_card else None
result.append(
ParsedTransaction(
operation_date=_parse_datetime(date_op, op_time),
debit_date=_parse_datetime(date_debit, debit_time),
amount=amount,
amount_card_currency=amount_card,
description=(desc or "").strip(),
card_tail=card_tail,
)
)
i += 1
return result