Initial income_calculator project
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
1
backend/parsers/__init__.py
Normal file
1
backend/parsers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Parsers package
|
||||
87
backend/parsers/bank_s.py
Normal file
87
backend/parsers/bank_s.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import pdfplumber
|
||||
|
||||
from backend.parsers.base import BaseBankParser, ParsedTransaction
|
||||
|
||||
|
||||
def _normalize_amount(s: str) -> float:
|
||||
"""Пробелы убрать, запятая — десятичный разделитель."""
|
||||
return float(s.replace("\u00a0", " ").replace(" ", "").replace(",", "."))
|
||||
|
||||
|
||||
def _parse_datetime_s(date_str: str, time_str: str) -> str:
|
||||
"""DD.MM.YYYY + HH:MM -> ISO."""
|
||||
try:
|
||||
part = date_str.strip() + " " + (time_str or "00:00").strip()
|
||||
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
except ValueError:
|
||||
return date_str.strip()[:10].replace(".", "-") + "T00:00:00"
|
||||
|
||||
|
||||
class BankSParser(BaseBankParser):
|
||||
"""Парсер выписок Сбербанка. Файлы С-MM-YY.pdf."""
|
||||
|
||||
# Первая строка: дата время код_авторизации категория сумма остаток (сумма с + = приход, без = расход)
|
||||
ROW_RE = re.compile(
|
||||
r"^(\d{2}\.\d{2}\.\d{4})\s+(\d{1,2}:\d{2})\s+(\d{6})\s+(.+?)\s+([+-]?[\d\s]+,\d{2})\s+([\d\s]+,\d{2})\s*$",
|
||||
re.UNICODE,
|
||||
)
|
||||
# Вторая строка: дата описание ... Операция по (перенос)
|
||||
DESC_LINE1_RE = re.compile(r"^(\d{2}\.\d{2}\.\d{4})\s+(.+?)\s+Операция по\s*$", re.UNICODE)
|
||||
# Третья строка: карте ****0566
|
||||
CARD_LINE_RE = re.compile(r"^карте\s+\*\*\*\*(\d{4})\s*$", re.UNICODE)
|
||||
|
||||
def can_parse(self, filename: str) -> bool:
|
||||
name = Path(filename).name
|
||||
return name.startswith("С-") and name.lower().endswith(".pdf")
|
||||
|
||||
def parse(self, file_path: str) -> List[ParsedTransaction]:
|
||||
result: List[ParsedTransaction] = []
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
lines: List[str] = []
|
||||
for page in pdf.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
lines.extend(text.split("\n"))
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
m = self.ROW_RE.match(line)
|
||||
if m:
|
||||
date_op, time_op, _code, category, amount_str, _balance = m.groups()
|
||||
amount = _normalize_amount(amount_str)
|
||||
if not amount_str.strip().startswith("+"):
|
||||
amount = -amount
|
||||
desc_extra = ""
|
||||
card_tail = ""
|
||||
if i + 1 < len(lines):
|
||||
d1 = self.DESC_LINE1_RE.match(lines[i + 1].strip())
|
||||
if d1:
|
||||
desc_extra = " " + d1.group(2).strip()
|
||||
i += 1
|
||||
if i + 1 < len(lines):
|
||||
c2 = self.CARD_LINE_RE.match(lines[i + 1].strip())
|
||||
if c2:
|
||||
card_tail = c2.group(1)
|
||||
i += 1
|
||||
if not card_tail:
|
||||
card_tail = "0000"
|
||||
description = (category + desc_extra).strip()
|
||||
result.append(
|
||||
ParsedTransaction(
|
||||
operation_date=_parse_datetime_s(date_op, time_op),
|
||||
debit_date=None,
|
||||
amount=amount,
|
||||
amount_card_currency=None,
|
||||
description=description,
|
||||
card_tail=card_tail,
|
||||
)
|
||||
)
|
||||
i += 1
|
||||
return result
|
||||
86
backend/parsers/bank_t.py
Normal file
86
backend/parsers/bank_t.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import pdfplumber
|
||||
|
||||
from backend.parsers.base import BaseBankParser, ParsedTransaction
|
||||
|
||||
|
||||
def _normalize_amount(s: str) -> float:
|
||||
return float(s.replace("\u00a0", " ").replace(" ", "").replace(",", "."))
|
||||
|
||||
|
||||
def _parse_date(d: str) -> str:
|
||||
"""DD.MM.YYYY -> YYYY-MM-DD"""
|
||||
try:
|
||||
dt = datetime.strptime(d.strip(), "%d.%m.%Y")
|
||||
return dt.strftime("%Y-%m-%d")
|
||||
except ValueError:
|
||||
return d.strip()
|
||||
|
||||
|
||||
def _parse_datetime(d: str, time_str: str) -> str:
|
||||
"""Date DD.MM.YYYY + time HH:MM -> ISO"""
|
||||
try:
|
||||
part = d.strip() + " " + (time_str or "00:00").strip()
|
||||
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
except ValueError:
|
||||
return _parse_date(d) + "T00:00:00"
|
||||
|
||||
|
||||
class BankTParser(BaseBankParser):
|
||||
"""Парсер выписок Т-банка. Файлы Т-MM-YY.pdf."""
|
||||
|
||||
# Первая строка операции: дата дата сумма ₽ сумма ₽ описание 4цифры (сумма может быть + или -)
|
||||
ROW_RE = re.compile(
|
||||
r"^(\d{2}\.\d{2}\.\d{4})\s+(\d{2}\.\d{2}\.\d{4})\s+([-+]?[\d\s,]+\.\d{2})\s*₽\s+([-+]?[\d\s,]+\.\d{2})\s*₽\s+(.+?)\s+(\d{4})\s*$",
|
||||
re.UNICODE,
|
||||
)
|
||||
# Вторая строка (время): HH:MM HH:MM остаток текста
|
||||
TIME_RE = re.compile(r"^(\d{1,2}:\d{2})\s+(\d{1,2}:\d{2})\s*(.*)$")
|
||||
|
||||
def can_parse(self, filename: str) -> bool:
|
||||
name = Path(filename).name
|
||||
return name.startswith("Т-") and name.lower().endswith(".pdf")
|
||||
|
||||
def parse(self, file_path: str) -> List[ParsedTransaction]:
|
||||
result: List[ParsedTransaction] = []
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
lines: List[str] = []
|
||||
for page in pdf.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
lines.extend(text.split("\n"))
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
m = self.ROW_RE.match(line.strip())
|
||||
if m:
|
||||
date_op, date_debit, amt_op, amt_card, desc, card_tail = m.groups()
|
||||
op_time, debit_time = "00:00", "00:00"
|
||||
if i + 1 < len(lines):
|
||||
tm = self.TIME_RE.match(lines[i + 1].strip())
|
||||
if tm:
|
||||
op_time, debit_time, rest = tm.groups()
|
||||
if rest:
|
||||
desc = (desc + " " + rest).strip()
|
||||
i += 1
|
||||
|
||||
amount = _normalize_amount(amt_op)
|
||||
amount_card = _normalize_amount(amt_card) if amt_card else None
|
||||
result.append(
|
||||
ParsedTransaction(
|
||||
operation_date=_parse_datetime(date_op, op_time),
|
||||
debit_date=_parse_datetime(date_debit, debit_time),
|
||||
amount=amount,
|
||||
amount_card_currency=amount_card,
|
||||
description=(desc or "").strip(),
|
||||
card_tail=card_tail,
|
||||
)
|
||||
)
|
||||
i += 1
|
||||
return result
|
||||
84
backend/parsers/bank_y.py
Normal file
84
backend/parsers/bank_y.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import pdfplumber
|
||||
|
||||
from backend.parsers.base import BaseBankParser, ParsedTransaction
|
||||
|
||||
# Я-банк использует EN DASH (U+2013) для минуса
|
||||
MINUS_CHARS = "\u2013-"
|
||||
|
||||
|
||||
def _normalize_amount(s: str) -> float:
|
||||
s = s.replace("\u00a0", " ").replace(" ", "").replace(",", ".")
|
||||
for c in MINUS_CHARS:
|
||||
s = s.replace(c, "-")
|
||||
if s.startswith("−"):
|
||||
s = "-" + s[1:]
|
||||
return float(s)
|
||||
|
||||
|
||||
def _parse_datetime_y(date_str: str, time_str: str = "") -> str:
|
||||
try:
|
||||
part = date_str.strip() + " " + (time_str or "00:00").strip()
|
||||
dt = datetime.strptime(part, "%d.%m.%Y %H:%M")
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%S")
|
||||
except ValueError:
|
||||
return date_str.strip().replace(".", "-")[:10] + "T00:00:00"
|
||||
|
||||
|
||||
class BankYParser(BaseBankParser):
|
||||
"""Парсер выписок Яндекс Банка. Файлы Я-MM-YY.pdf."""
|
||||
|
||||
# Строка: описание ... DD.MM.YYYY DD.MM.YYYY [*XXXX] сумма ₽ сумма ₽ (минус может быть – U+2013, карта опциональна)
|
||||
ROW_RE = re.compile(
|
||||
r"^(.+?)\s+(\d{2}\.\d{2}\.\d{4})\s+(\d{2}\.\d{2}\.\d{4})\s+(?:\*(\d{4})\s+)?([+\u2013\-]?[\d\s,]+)\s*₽\s+([+\u2013\-]?[\d\s,]+)\s*₽\s*$",
|
||||
re.UNICODE,
|
||||
)
|
||||
# Вторая строка может содержать время: "в 18:13" или "клиента в 21:35"
|
||||
TIME_RE = re.compile(r"^(?:.*\s+)?в\s+(\d{1,2}:\d{2})\s*$", re.UNICODE)
|
||||
|
||||
PIGGY_MARKER = "Перевод между счетами одного клиента"
|
||||
|
||||
def can_parse(self, filename: str) -> bool:
|
||||
name = Path(filename).name
|
||||
return name.startswith("Я-") and name.lower().endswith(".pdf")
|
||||
|
||||
def parse(self, file_path: str) -> List[ParsedTransaction]:
|
||||
result: List[ParsedTransaction] = []
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
lines: List[str] = []
|
||||
for page in pdf.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
lines.extend(text.split("\n"))
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
m = self.ROW_RE.match(line)
|
||||
if m:
|
||||
desc, date_op, date_proc, card_tail, amt1, amt2 = m.groups()
|
||||
card_tail = card_tail or "0000"
|
||||
amount = _normalize_amount(amt1)
|
||||
time_str = ""
|
||||
if i + 1 < len(lines):
|
||||
tm = self.TIME_RE.match(lines[i + 1].strip())
|
||||
if tm:
|
||||
time_str = tm.group(1)
|
||||
desc = (desc + " " + lines[i + 1].strip()).strip()
|
||||
i += 1
|
||||
result.append(
|
||||
ParsedTransaction(
|
||||
operation_date=_parse_datetime_y(date_op, time_str),
|
||||
debit_date=_parse_datetime_y(date_proc),
|
||||
amount=amount,
|
||||
amount_card_currency=_normalize_amount(amt2) if amt2 else None,
|
||||
description=desc.strip(),
|
||||
card_tail=card_tail,
|
||||
)
|
||||
)
|
||||
i += 1
|
||||
return result
|
||||
23
backend/parsers/base.py
Normal file
23
backend/parsers/base.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParsedTransaction:
|
||||
operation_date: str # ISO date or datetime
|
||||
debit_date: str | None
|
||||
amount: float # signed: negative = expense
|
||||
amount_card_currency: float | None
|
||||
description: str
|
||||
card_tail: str # last 4 digits for account matching
|
||||
|
||||
|
||||
class BaseBankParser(ABC):
|
||||
@abstractmethod
|
||||
def can_parse(self, filename: str) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, file_path: str) -> List[ParsedTransaction]:
|
||||
pass
|
||||
Reference in New Issue
Block a user