Files
telegram-helper-bot/helper_bot/handlers/private/services.py
Andrey feee7f010c refactor: обновление системы ML-скоринга и переход на RAG API
- Обновлен Dockerfile для использования Alpine вместо Slim, улучшая размер образа.
- Удален устаревший RAGService и добавлен RagApiClient для работы с внешним RAG API.
- Обновлены переменные окружения в env.example для настройки нового RAG API.
- Обновлен ScoringManager для интеграции с RagApiClient.
- Упрощена структура проекта, удалены ненужные файлы и зависимости, связанные с векторным хранилищем.
- Обновлены обработчики и функции для работы с новым API, включая получение статистики и обработку ошибок.
2026-01-26 22:03:15 +03:00

538 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Service classes for private handlers"""
# Standard library imports
import asyncio
import html
import random
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, Protocol, Union
# Third-party imports
from aiogram import types
from aiogram.types import FSInputFile
from database.models import TelegramPost, User
from helper_bot.keyboards import get_reply_keyboard_for_post
# Local imports - utilities
from helper_bot.utils.helper_func import (
add_in_db_media, check_username_and_full_name, determine_anonymity,
get_first_name, get_text_message, prepare_media_group_from_middlewares,
send_audio_message, send_media_group_message_to_private_chat,
send_photo_message, send_text_message, send_video_message,
send_video_note_message, send_voice_message)
# Local imports - metrics
from helper_bot.utils.metrics import (db_query_time, track_errors,
track_file_operations,
track_media_processing, track_time)
from logs.custom_logger import logger
class DatabaseProtocol(Protocol):
"""Protocol for database operations"""
async def user_exists(self, user_id: int) -> bool: ...
async def add_user(self, user: User) -> None: ...
async def update_user_info(self, user_id: int, username: str = None, full_name: str = None) -> None: ...
async def update_user_date(self, user_id: int) -> None: ...
async def add_post(self, post: TelegramPost) -> None: ...
async def update_stickers_info(self, user_id: int) -> None: ...
async def add_message(self, message_text: str, user_id: int, message_id: int, date: int = None) -> None: ...
async def update_helper_message(self, message_id: int, helper_message_id: int) -> None: ...
@dataclass
class BotSettings:
"""Bot configuration settings"""
group_for_posts: str
group_for_message: str
main_public: str
group_for_logs: str
important_logs: str
preview_link: str
logs: str
test: str
class UserService:
"""Service for user-related operations"""
def __init__(self, db: DatabaseProtocol, settings: BotSettings) -> None:
self.db = db
self.settings = settings
@track_time("update_user_activity", "user_service")
@track_errors("user_service", "update_user_activity")
@db_query_time("update_user_activity", "users", "update")
async def update_user_activity(self, user_id: int) -> None:
"""Update user's last activity timestamp with metrics tracking"""
await self.db.update_user_date(user_id)
@track_time("ensure_user_exists", "user_service")
@track_errors("user_service", "ensure_user_exists")
@db_query_time("ensure_user_exists", "users", "insert")
async def ensure_user_exists(self, message: types.Message) -> None:
"""Ensure user exists in database, create if needed with metrics tracking"""
user_id = message.from_user.id
full_name = message.from_user.full_name
username = message.from_user.username or "private_username"
first_name = get_first_name(message)
is_bot = message.from_user.is_bot
language_code = message.from_user.language_code
# Create User object with current timestamp
current_timestamp = int(datetime.now().timestamp())
user = User(
user_id=user_id,
first_name=first_name,
full_name=full_name,
username=username,
is_bot=is_bot,
language_code=language_code,
emoji="",
has_stickers=False,
date_added=current_timestamp,
date_changed=current_timestamp,
voice_bot_welcome_received=False
)
# Пытаемся создать пользователя (если уже существует - игнорируем)
# Это устраняет race condition и упрощает логику
await self.db.add_user(user)
# Проверяем, нужно ли обновить информацию о существующем пользователе
is_need_update = await check_username_and_full_name(user_id, username, full_name, self.db)
if is_need_update:
await self.db.update_user_info(user_id, username, full_name)
safe_full_name = html.escape(full_name) if full_name else "Неизвестный пользователь"
safe_username = html.escape(username) if username else "Без никнейма"
await message.answer(
f"Давно не виделись! Вижу что ты изменился;) Теперь буду звать тебя: {safe_full_name} и ник @{safe_username}")
await message.bot.send_message(
chat_id=self.settings.group_for_logs,
text=f'Для пользователя: {user_id} обновлены данные в БД.\nНовое имя: {safe_full_name}\nНовый ник:{safe_username}')
await self.db.update_user_date(user_id)
async def log_user_message(self, message: types.Message) -> None:
"""Forward user message to logs group with metrics tracking"""
await message.forward(chat_id=self.settings.group_for_logs)
def get_safe_user_info(self, message: types.Message) -> tuple[str, str]:
"""Get safely escaped user information for logging"""
full_name = message.from_user.full_name or "Неизвестный пользователь"
username = message.from_user.username or "Без никнейма"
return html.escape(full_name), html.escape(username)
class PostService:
"""Service for post-related operations"""
def __init__(self, db: DatabaseProtocol, settings: BotSettings, s3_storage=None, scoring_manager=None) -> None:
self.db = db
self.settings = settings
self.s3_storage = s3_storage
self.scoring_manager = scoring_manager
async def _save_media_background(self, sent_message: types.Message, bot_db: Any, s3_storage) -> None:
"""Сохраняет медиа в фоне, чтобы не блокировать ответ пользователю"""
try:
success = await add_in_db_media(sent_message, bot_db, s3_storage)
if not success:
logger.warning(f"_save_media_background: Не удалось сохранить медиа для поста {sent_message.message_id}")
except Exception as e:
logger.error(f"_save_media_background: Ошибка при сохранении медиа для поста {sent_message.message_id}: {e}")
async def _get_scores(self, text: str) -> tuple:
"""
Получает скоры для текста поста.
Returns:
Tuple (deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json)
"""
if not self.scoring_manager or not text or not text.strip():
return None, None, None, None, None
try:
scores = await self.scoring_manager.score_post(text)
# Формируем JSON для сохранения в БД
import json
ml_scores_json = json.dumps(scores.to_json_dict()) if scores.has_any_score() else None
# Получаем данные от RAG
rag_confidence = scores.rag.confidence if scores.rag else None
rag_score_pos_only = scores.rag.metadata.get("rag_score_pos_only") if scores.rag else None
return scores.deepseek_score, scores.rag_score, rag_confidence, rag_score_pos_only, ml_scores_json
except Exception as e:
logger.error(f"PostService: Ошибка получения скоров: {e}")
return None, None, None, None, None
async def _save_scores_background(self, message_id: int, ml_scores_json: str) -> None:
"""Сохраняет скоры в БД в фоне."""
if ml_scores_json:
try:
await self.db.update_ml_scores(message_id, ml_scores_json)
except Exception as e:
logger.error(f"PostService: Ошибка сохранения скоров для {message_id}: {e}")
@track_time("handle_text_post", "post_service")
@track_errors("post_service", "handle_text_post")
@db_query_time("handle_text_post", "posts", "insert")
async def handle_text_post(self, message: types.Message, first_name: str) -> None:
"""Handle text post submission"""
raw_text = message.text or ""
# Получаем скоры для текста
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_text)
# Формируем текст с учетом скоров
post_text = get_text_message(
message.text.lower(),
first_name,
message.from_user.username,
deepseek_score=deepseek_score,
rag_score=rag_score,
rag_confidence=rag_confidence,
rag_score_pos_only=rag_score_pos_only,
)
markup = get_reply_keyboard_for_post()
sent_message = await send_text_message(self.settings.group_for_posts, message, post_text, markup)
# Определяем анонимность
is_anonymous = determine_anonymity(raw_text)
post = TelegramPost(
message_id=sent_message.message_id,
text=raw_text,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(post)
# Сохраняем скоры в фоне
if ml_scores_json:
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
@track_time("handle_photo_post", "post_service")
@track_errors("post_service", "handle_photo_post")
@db_query_time("handle_photo_post", "posts", "insert")
async def handle_photo_post(self, message: types.Message, first_name: str) -> None:
"""Handle photo post submission"""
raw_caption = message.caption or ""
# Получаем скоры для текста
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
post_caption = ""
if message.caption:
post_caption = get_text_message(
message.caption.lower(),
first_name,
message.from_user.username,
deepseek_score=deepseek_score,
rag_score=rag_score,
rag_confidence=rag_confidence,
rag_score_pos_only=rag_score_pos_only,
)
markup = get_reply_keyboard_for_post()
sent_message = await send_photo_message(
self.settings.group_for_posts, message, message.photo[-1].file_id, post_caption, markup
)
# Определяем анонимность
is_anonymous = determine_anonymity(raw_caption)
post = TelegramPost(
message_id=sent_message.message_id,
text=raw_caption,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(post)
# Сохраняем медиа и скоры в фоне
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
if ml_scores_json:
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
@track_time("handle_video_post", "post_service")
@track_errors("post_service", "handle_video_post")
@db_query_time("handle_video_post", "posts", "insert")
async def handle_video_post(self, message: types.Message, first_name: str) -> None:
"""Handle video post submission"""
raw_caption = message.caption or ""
# Получаем скоры для текста
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
post_caption = ""
if message.caption:
post_caption = get_text_message(
message.caption.lower(),
first_name,
message.from_user.username,
deepseek_score=deepseek_score,
rag_score=rag_score,
rag_confidence=rag_confidence,
rag_score_pos_only=rag_score_pos_only,
)
markup = get_reply_keyboard_for_post()
sent_message = await send_video_message(
self.settings.group_for_posts, message, message.video.file_id, post_caption, markup
)
# Определяем анонимность
is_anonymous = determine_anonymity(raw_caption)
post = TelegramPost(
message_id=sent_message.message_id,
text=raw_caption,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(post)
# Сохраняем медиа и скоры в фоне
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
if ml_scores_json:
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
@track_time("handle_video_note_post", "post_service")
@track_errors("post_service", "handle_video_note_post")
@db_query_time("handle_video_note_post", "posts", "insert")
async def handle_video_note_post(self, message: types.Message) -> None:
"""Handle video note post submission"""
markup = get_reply_keyboard_for_post()
sent_message = await send_video_note_message(
self.settings.group_for_posts, message, message.video_note.file_id, markup
)
# Сохраняем пустую строку, так как video_note не имеет caption
raw_caption = ""
is_anonymous = determine_anonymity(raw_caption)
post = TelegramPost(
message_id=sent_message.message_id,
text=raw_caption,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(post)
# Сохраняем медиа в фоне, чтобы не блокировать ответ пользователю
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
@track_time("handle_audio_post", "post_service")
@track_errors("post_service", "handle_audio_post")
@db_query_time("handle_audio_post", "posts", "insert")
async def handle_audio_post(self, message: types.Message, first_name: str) -> None:
"""Handle audio post submission"""
raw_caption = message.caption or ""
# Получаем скоры для текста
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
post_caption = ""
if message.caption:
post_caption = get_text_message(
message.caption.lower(),
first_name,
message.from_user.username,
deepseek_score=deepseek_score,
rag_score=rag_score,
rag_confidence=rag_confidence,
rag_score_pos_only=rag_score_pos_only,
)
markup = get_reply_keyboard_for_post()
sent_message = await send_audio_message(
self.settings.group_for_posts, message, message.audio.file_id, post_caption, markup
)
# Определяем анонимность
is_anonymous = determine_anonymity(raw_caption)
post = TelegramPost(
message_id=sent_message.message_id,
text=raw_caption,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(post)
# Сохраняем медиа и скоры в фоне
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
if ml_scores_json:
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
@track_time("handle_voice_post", "post_service")
@track_errors("post_service", "handle_voice_post")
@db_query_time("handle_voice_post", "posts", "insert")
async def handle_voice_post(self, message: types.Message) -> None:
"""Handle voice post submission"""
markup = get_reply_keyboard_for_post()
sent_message = await send_voice_message(
self.settings.group_for_posts, message, message.voice.file_id, markup
)
# Сохраняем пустую строку, так как voice не имеет caption
raw_caption = ""
is_anonymous = determine_anonymity(raw_caption)
post = TelegramPost(
message_id=sent_message.message_id,
text=raw_caption,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(post)
# Сохраняем медиа в фоне, чтобы не блокировать ответ пользователю
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
@track_time("handle_media_group_post", "post_service")
@track_errors("post_service", "handle_media_group_post")
@db_query_time("handle_media_group_post", "posts", "insert")
@track_media_processing("media_group")
async def handle_media_group_post(self, message: types.Message, album: list, first_name: str) -> None:
"""Handle media group post submission"""
post_caption = " "
raw_caption = ""
ml_scores_json = None
if album and album[0].caption:
raw_caption = album[0].caption or ""
# Получаем скоры для текста
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
post_caption = get_text_message(
album[0].caption.lower(),
first_name,
message.from_user.username,
deepseek_score=deepseek_score,
rag_score=rag_score,
rag_confidence=rag_confidence,
rag_score_pos_only=rag_score_pos_only,
)
is_anonymous = determine_anonymity(raw_caption)
media_group = await prepare_media_group_from_middlewares(album, post_caption)
media_group_message_ids = await send_media_group_message_to_private_chat(
self.settings.group_for_posts, message, media_group, self.db, None, self.s3_storage
)
main_post_id = media_group_message_ids[-1]
main_post = TelegramPost(
message_id=main_post_id,
text=raw_caption,
author_id=message.from_user.id,
created_at=int(datetime.now().timestamp()),
is_anonymous=is_anonymous
)
await self.db.add_post(main_post)
# Сохраняем скоры в фоне
if ml_scores_json:
asyncio.create_task(self._save_scores_background(main_post_id, ml_scores_json))
for msg_id in media_group_message_ids:
await self.db.add_message_link(main_post_id, msg_id)
await asyncio.sleep(0.2)
markup = get_reply_keyboard_for_post()
helper_message = await send_text_message(
self.settings.group_for_posts,
message,
"^",
markup
)
helper_message_id = helper_message.message_id
helper_post = TelegramPost(
message_id=helper_message_id,
text="^",
author_id=message.from_user.id,
helper_text_message_id=main_post_id,
created_at=int(datetime.now().timestamp())
)
await self.db.add_post(helper_post)
await self.db.update_helper_message(
message_id=main_post_id,
helper_message_id=helper_message_id
)
@track_time("process_post", "post_service")
@track_errors("post_service", "process_post")
@track_media_processing("media_group")
async def process_post(self, message: types.Message, album: Union[list, None] = None) -> None:
"""Process post based on content type"""
first_name = get_first_name(message)
if message.media_group_id is not None:
await self.handle_media_group_post(message, album, first_name)
return
content_handlers: Dict[str, Callable] = {
'text': lambda: self.handle_text_post(message, first_name),
'photo': lambda: self.handle_photo_post(message, first_name),
'video': lambda: self.handle_video_post(message, first_name),
'video_note': lambda: self.handle_video_note_post(message),
'audio': lambda: self.handle_audio_post(message, first_name),
'voice': lambda: self.handle_voice_post(message)
}
handler = content_handlers.get(message.content_type)
if handler:
await handler()
else:
from .constants import ERROR_MESSAGES
await message.bot.send_message(
message.chat.id, ERROR_MESSAGES["UNSUPPORTED_CONTENT"]
)
class StickerService:
"""Service for sticker-related operations"""
def __init__(self, settings: BotSettings) -> None:
self.settings = settings
@track_time("send_random_hello_sticker", "sticker_service")
@track_errors("sticker_service", "send_random_hello_sticker")
@track_file_operations("sticker")
async def send_random_hello_sticker(self, message: types.Message) -> None:
"""Send random hello sticker with metrics tracking"""
name_stick_hello = list(Path('Stick').rglob('Hello_*'))
if not name_stick_hello:
return
random_stick_hello = random.choice(name_stick_hello)
random_stick_hello = FSInputFile(path=random_stick_hello)
await message.answer_sticker(random_stick_hello)
await asyncio.sleep(0.3)
@track_time("send_random_goodbye_sticker", "sticker_service")
@track_errors("sticker_service", "send_random_goodbye_sticker")
@track_file_operations("sticker")
async def send_random_goodbye_sticker(self, message: types.Message) -> None:
"""Send random goodbye sticker with metrics tracking"""
name_stick_bye = list(Path('Stick').rglob('Universal_*'))
if not name_stick_bye:
return
random_stick_bye = random.choice(name_stick_bye)
random_stick_bye = FSInputFile(path=random_stick_bye)
await message.answer_sticker(random_stick_bye)