Initial income_calculator project

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-23 16:49:24 +03:00
commit 31dc287c3d
44 changed files with 1935 additions and 0 deletions

84
backend/parsers/bank_y.py Normal file
View File

@@ -0,0 +1,84 @@
import re
from datetime import datetime
from pathlib import Path
from typing import List
import pdfplumber
from backend.parsers.base import BaseBankParser, ParsedTransaction
# Я-банк использует EN DASH (U+2013) для минуса
MINUS_CHARS = "\u2013-"
def _normalize_amount(s: str) -> float:
s = s.replace("\u00a0", " ").replace(" ", "").replace(",", ".")
for c in MINUS_CHARS:
s = s.replace(c, "-")
if s.startswith(""):
s = "-" + s[1:]
return float(s)
def _parse_datetime_y(date_str: str, time_str: str = "") -> str:
try:
part = date_str.strip() + " " + (time_str or "00:00").strip()
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
return dt.strftime("%Y-%m-%dT%H:%M:%S")
except ValueError:
return date_str.strip().replace(".", "-")[:10] + "T00:00:00"
class BankYParser(BaseBankParser):
"""Парсер выписок Яндекс Банка. Файлы Я-MM-YY.pdf."""
# Строка: описание ... DD.MM.YYYY DD.MM.YYYY [*XXXX] сумма ₽ сумма ₽ (минус может быть U+2013, карта опциональна)
ROW_RE = re.compile(
r"^(.+?)\s+(\d{2}\.\d{2}\.\d{4})\s+(\d{2}\.\d{2}\.\d{4})\s+(?:\*(\d{4})\s+)?([+\u2013\-]?[\d\s,]+)\s*₽\s+([+\u2013\-]?[\d\s,]+)\s*₽\s*$",
re.UNICODE,
)
# Вторая строка может содержать время: "в 18:13" или "клиента в 21:35"
TIME_RE = re.compile(r"^(?:.*\s+)?в\s+(\d{1,2}:\d{2})\s*$", re.UNICODE)
PIGGY_MARKER = "Перевод между счетами одного клиента"
def can_parse(self, filename: str) -> bool:
name = Path(filename).name
return name.startswith("Я-") and name.lower().endswith(".pdf")
def parse(self, file_path: str) -> List[ParsedTransaction]:
result: List[ParsedTransaction] = []
with pdfplumber.open(file_path) as pdf:
lines: List[str] = []
for page in pdf.pages:
text = page.extract_text()
if text:
lines.extend(text.split("\n"))
i = 0
while i < len(lines):
line = lines[i].strip()
m = self.ROW_RE.match(line)
if m:
desc, date_op, date_proc, card_tail, amt1, amt2 = m.groups()
card_tail = card_tail or "0000"
amount = _normalize_amount(amt1)
time_str = ""
if i + 1 < len(lines):
tm = self.TIME_RE.match(lines[i + 1].strip())
if tm:
time_str = tm.group(1)
desc = (desc + " " + lines[i + 1].strip()).strip()
i += 1
result.append(
ParsedTransaction(
operation_date=_parse_datetime_y(date_op, time_str),
debit_date=_parse_datetime_y(date_proc),
amount=amount,
amount_card_currency=_normalize_amount(amt2) if amt2 else None,
description=desc.strip(),
card_tail=card_tail,
)
)
i += 1
return result