Files
2026-02-23 16:49:24 +03:00

88 lines
3.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from datetime import datetime
from pathlib import Path
from typing import List
import pdfplumber
from backend.parsers.base import BaseBankParser, ParsedTransaction
def _normalize_amount(s: str) -> float:
"""Пробелы убрать, запятая — десятичный разделитель."""
return float(s.replace("\u00a0", " ").replace(" ", "").replace(",", "."))
def _parse_datetime_s(date_str: str, time_str: str) -> str:
"""DD.MM.YYYY + HH:MM -> ISO."""
try:
part = date_str.strip() + " " + (time_str or "00:00").strip()
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
return dt.strftime("%Y-%m-%dT%H:%M:%S")
except ValueError:
return date_str.strip()[:10].replace(".", "-") + "T00:00:00"
class BankSParser(BaseBankParser):
"""Парсер выписок Сбербанка. Файлы С-MM-YY.pdf."""
# Первая строка: дата время код_авторизации категория сумма остаток (сумма с + = приход, без = расход)
ROW_RE = re.compile(
r"^(\d{2}\.\d{2}\.\d{4})\s+(\d{1,2}:\d{2})\s+(\d{6})\s+(.+?)\s+([+-]?[\d\s]+,\d{2})\s+([\d\s]+,\d{2})\s*$",
re.UNICODE,
)
# Вторая строка: дата описание ... Операция по (перенос)
DESC_LINE1_RE = re.compile(r"^(\d{2}\.\d{2}\.\d{4})\s+(.+?)\s+Операция по\s*$", re.UNICODE)
# Третья строка: карте ****0566
CARD_LINE_RE = re.compile(r"^карте\s+\*\*\*\*(\d{4})\s*$", re.UNICODE)
def can_parse(self, filename: str) -> bool:
name = Path(filename).name
return name.startswith("С-") and name.lower().endswith(".pdf")
def parse(self, file_path: str) -> List[ParsedTransaction]:
result: List[ParsedTransaction] = []
with pdfplumber.open(file_path) as pdf:
lines: List[str] = []
for page in pdf.pages:
text = page.extract_text()
if text:
lines.extend(text.split("\n"))
i = 0
while i < len(lines):
line = lines[i].strip()
m = self.ROW_RE.match(line)
if m:
date_op, time_op, _code, category, amount_str, _balance = m.groups()
amount = _normalize_amount(amount_str)
if not amount_str.strip().startswith("+"):
amount = -amount
desc_extra = ""
card_tail = ""
if i + 1 < len(lines):
d1 = self.DESC_LINE1_RE.match(lines[i + 1].strip())
if d1:
desc_extra = " " + d1.group(2).strip()
i += 1
if i + 1 < len(lines):
c2 = self.CARD_LINE_RE.match(lines[i + 1].strip())
if c2:
card_tail = c2.group(1)
i += 1
if not card_tail:
card_tail = "0000"
description = (category + desc_extra).strip()
result.append(
ParsedTransaction(
operation_date=_parse_datetime_s(date_op, time_op),
debit_date=None,
amount=amount,
amount_card_currency=None,
description=description,
card_tail=card_tail,
)
)
i += 1
return result