88 lines
3.5 KiB
Python
88 lines
3.5 KiB
Python
import re
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import List
|
||
|
||
import pdfplumber
|
||
|
||
from backend.parsers.base import BaseBankParser, ParsedTransaction
|
||
|
||
|
||
def _normalize_amount(s: str) -> float:
|
||
"""Пробелы убрать, запятая — десятичный разделитель."""
|
||
return float(s.replace("\u00a0", " ").replace(" ", "").replace(",", "."))
|
||
|
||
|
||
def _parse_datetime_s(date_str: str, time_str: str) -> str:
|
||
"""DD.MM.YYYY + HH:MM -> ISO."""
|
||
try:
|
||
part = date_str.strip() + " " + (time_str or "00:00").strip()
|
||
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
|
||
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
||
except ValueError:
|
||
return date_str.strip()[:10].replace(".", "-") + "T00:00:00"
|
||
|
||
|
||
class BankSParser(BaseBankParser):
|
||
"""Парсер выписок Сбербанка. Файлы С-MM-YY.pdf."""
|
||
|
||
# Первая строка: дата время код_авторизации категория сумма остаток (сумма с + = приход, без = расход)
|
||
ROW_RE = re.compile(
|
||
r"^(\d{2}\.\d{2}\.\d{4})\s+(\d{1,2}:\d{2})\s+(\d{6})\s+(.+?)\s+([+-]?[\d\s]+,\d{2})\s+([\d\s]+,\d{2})\s*$",
|
||
re.UNICODE,
|
||
)
|
||
# Вторая строка: дата описание ... Операция по (перенос)
|
||
DESC_LINE1_RE = re.compile(r"^(\d{2}\.\d{2}\.\d{4})\s+(.+?)\s+Операция по\s*$", re.UNICODE)
|
||
# Третья строка: карте ****0566
|
||
CARD_LINE_RE = re.compile(r"^карте\s+\*\*\*\*(\d{4})\s*$", re.UNICODE)
|
||
|
||
def can_parse(self, filename: str) -> bool:
|
||
name = Path(filename).name
|
||
return name.startswith("С-") and name.lower().endswith(".pdf")
|
||
|
||
def parse(self, file_path: str) -> List[ParsedTransaction]:
|
||
result: List[ParsedTransaction] = []
|
||
with pdfplumber.open(file_path) as pdf:
|
||
lines: List[str] = []
|
||
for page in pdf.pages:
|
||
text = page.extract_text()
|
||
if text:
|
||
lines.extend(text.split("\n"))
|
||
|
||
i = 0
|
||
while i < len(lines):
|
||
line = lines[i].strip()
|
||
m = self.ROW_RE.match(line)
|
||
if m:
|
||
date_op, time_op, _code, category, amount_str, _balance = m.groups()
|
||
amount = _normalize_amount(amount_str)
|
||
if not amount_str.strip().startswith("+"):
|
||
amount = -amount
|
||
desc_extra = ""
|
||
card_tail = ""
|
||
if i + 1 < len(lines):
|
||
d1 = self.DESC_LINE1_RE.match(lines[i + 1].strip())
|
||
if d1:
|
||
desc_extra = " " + d1.group(2).strip()
|
||
i += 1
|
||
if i + 1 < len(lines):
|
||
c2 = self.CARD_LINE_RE.match(lines[i + 1].strip())
|
||
if c2:
|
||
card_tail = c2.group(1)
|
||
i += 1
|
||
if not card_tail:
|
||
card_tail = "0000"
|
||
description = (category + desc_extra).strip()
|
||
result.append(
|
||
ParsedTransaction(
|
||
operation_date=_parse_datetime_s(date_op, time_op),
|
||
debit_date=None,
|
||
amount=amount,
|
||
amount_card_currency=None,
|
||
description=description,
|
||
card_tail=card_tail,
|
||
)
|
||
)
|
||
i += 1
|
||
return result
|