import re from datetime import datetime from pathlib import Path from typing import List import pdfplumber from backend.parsers.base import BaseBankParser, ParsedTransaction def _normalize_amount(s: str) -> float: return float(s.replace("\u00a0", " ").replace(" ", "").replace(",", ".")) def _parse_date(d: str) -> str: """DD.MM.YYYY -> YYYY-MM-DD""" try: dt = datetime.strptime(d.strip(), "%d.%m.%Y") return dt.strftime("%Y-%m-%d") except ValueError: return d.strip() def _parse_datetime(d: str, time_str: str) -> str: """Date DD.MM.YYYY + time HH:MM -> ISO""" try: part = d.strip() + " " + (time_str or "00:00").strip() dt = datetime.strptime(part, "%d.%m.%Y %H:%M") return dt.strftime("%Y-%m-%dT%H:%M:%S") except ValueError: return _parse_date(d) + "T00:00:00" class BankTParser(BaseBankParser): """Парсер выписок Т-банка. Файлы Т-MM-YY.pdf.""" # Первая строка операции: дата дата сумма ₽ сумма ₽ описание 4цифры (сумма может быть + или -) ROW_RE = re.compile( r"^(\d{2}\.\d{2}\.\d{4})\s+(\d{2}\.\d{2}\.\d{4})\s+([-+]?[\d\s,]+\.\d{2})\s*₽\s+([-+]?[\d\s,]+\.\d{2})\s*₽\s+(.+?)\s+(\d{4})\s*$", re.UNICODE, ) # Вторая строка (время): HH:MM HH:MM остаток текста TIME_RE = re.compile(r"^(\d{1,2}:\d{2})\s+(\d{1,2}:\d{2})\s*(.*)$") def can_parse(self, filename: str) -> bool: name = Path(filename).name return name.startswith("Т-") and name.lower().endswith(".pdf") def parse(self, file_path: str) -> List[ParsedTransaction]: result: List[ParsedTransaction] = [] with pdfplumber.open(file_path) as pdf: lines: List[str] = [] for page in pdf.pages: text = page.extract_text() if text: lines.extend(text.split("\n")) i = 0 while i < len(lines): line = lines[i] m = self.ROW_RE.match(line.strip()) if m: date_op, date_debit, amt_op, amt_card, desc, card_tail = m.groups() op_time, debit_time = "00:00", "00:00" if i + 1 < len(lines): tm = self.TIME_RE.match(lines[i + 1].strip()) if tm: op_time, debit_time, rest = tm.groups() if rest: desc = (desc + " " + rest).strip() i += 1 amount = _normalize_amount(amt_op) amount_card = _normalize_amount(amt_card) if amt_card else None result.append( ParsedTransaction( operation_date=_parse_datetime(date_op, op_time), debit_date=_parse_datetime(date_debit, debit_time), amount=amount, amount_card_currency=amount_card, description=(desc or "").strip(), card_tail=card_tail, ) ) i += 1 return result