feat: интеграция ML-скоринга с использованием RAG и DeepSeek
- Обновлен Dockerfile для установки необходимых зависимостей. - Добавлены новые переменные окружения для настройки ML-скоринга в env.example. - Реализованы методы для получения и обновления ML-скоров в AsyncBotDB и PostRepository. - Обновлены обработчики публикации постов для интеграции ML-скоринга. - Добавлен новый обработчик для получения статистики ML-скоринга в админ-панели. - Обновлены функции для форматирования сообщений с учетом ML-скоров.
This commit is contained in:
@@ -128,10 +128,11 @@ class UserService:
|
||||
class PostService:
|
||||
"""Service for post-related operations"""
|
||||
|
||||
def __init__(self, db: DatabaseProtocol, settings: BotSettings, s3_storage=None) -> None:
|
||||
def __init__(self, db: DatabaseProtocol, settings: BotSettings, s3_storage=None, scoring_manager=None) -> None:
|
||||
self.db = db
|
||||
self.settings = settings
|
||||
self.s3_storage = s3_storage
|
||||
self.scoring_manager = scoring_manager
|
||||
|
||||
async def _save_media_background(self, sent_message: types.Message, bot_db: Any, s3_storage) -> None:
|
||||
"""Сохраняет медиа в фоне, чтобы не блокировать ответ пользователю"""
|
||||
@@ -142,18 +143,65 @@ class PostService:
|
||||
except Exception as e:
|
||||
logger.error(f"_save_media_background: Ошибка при сохранении медиа для поста {sent_message.message_id}: {e}")
|
||||
|
||||
async def _get_scores(self, text: str) -> tuple:
|
||||
"""
|
||||
Получает скоры для текста поста.
|
||||
|
||||
Returns:
|
||||
Tuple (deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json)
|
||||
"""
|
||||
if not self.scoring_manager or not text or not text.strip():
|
||||
return None, None, None, None, None
|
||||
|
||||
try:
|
||||
scores = await self.scoring_manager.score_post(text)
|
||||
|
||||
# Формируем JSON для сохранения в БД
|
||||
import json
|
||||
ml_scores_json = json.dumps(scores.to_json_dict()) if scores.has_any_score() else None
|
||||
|
||||
# Получаем данные от RAG
|
||||
rag_confidence = scores.rag.confidence if scores.rag else None
|
||||
rag_score_pos_only = scores.rag.metadata.get("score_pos_only") if scores.rag else None
|
||||
|
||||
return scores.deepseek_score, scores.rag_score, rag_confidence, rag_score_pos_only, ml_scores_json
|
||||
except Exception as e:
|
||||
logger.error(f"PostService: Ошибка получения скоров: {e}")
|
||||
return None, None, None, None, None
|
||||
|
||||
async def _save_scores_background(self, message_id: int, ml_scores_json: str) -> None:
|
||||
"""Сохраняет скоры в БД в фоне."""
|
||||
if ml_scores_json:
|
||||
try:
|
||||
await self.db.update_ml_scores(message_id, ml_scores_json)
|
||||
except Exception as e:
|
||||
logger.error(f"PostService: Ошибка сохранения скоров для {message_id}: {e}")
|
||||
|
||||
@track_time("handle_text_post", "post_service")
|
||||
@track_errors("post_service", "handle_text_post")
|
||||
@db_query_time("handle_text_post", "posts", "insert")
|
||||
async def handle_text_post(self, message: types.Message, first_name: str) -> None:
|
||||
"""Handle text post submission"""
|
||||
post_text = get_text_message(message.text.lower(), first_name, message.from_user.username)
|
||||
raw_text = message.text or ""
|
||||
|
||||
# Получаем скоры для текста
|
||||
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_text)
|
||||
|
||||
# Формируем текст с учетом скоров
|
||||
post_text = get_text_message(
|
||||
message.text.lower(),
|
||||
first_name,
|
||||
message.from_user.username,
|
||||
deepseek_score=deepseek_score,
|
||||
rag_score=rag_score,
|
||||
rag_confidence=rag_confidence,
|
||||
rag_score_pos_only=rag_score_pos_only,
|
||||
)
|
||||
markup = get_reply_keyboard_for_post()
|
||||
|
||||
sent_message = await send_text_message(self.settings.group_for_posts, message, post_text, markup)
|
||||
|
||||
# Сохраняем сырой текст и определяем анонимность
|
||||
raw_text = message.text or ""
|
||||
# Определяем анонимность
|
||||
is_anonymous = determine_anonymity(raw_text)
|
||||
|
||||
post = TelegramPost(
|
||||
@@ -164,23 +212,39 @@ class PostService:
|
||||
is_anonymous=is_anonymous
|
||||
)
|
||||
await self.db.add_post(post)
|
||||
|
||||
# Сохраняем скоры в фоне
|
||||
if ml_scores_json:
|
||||
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
|
||||
|
||||
@track_time("handle_photo_post", "post_service")
|
||||
@track_errors("post_service", "handle_photo_post")
|
||||
@db_query_time("handle_photo_post", "posts", "insert")
|
||||
async def handle_photo_post(self, message: types.Message, first_name: str) -> None:
|
||||
"""Handle photo post submission"""
|
||||
raw_caption = message.caption or ""
|
||||
|
||||
# Получаем скоры для текста
|
||||
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
|
||||
|
||||
post_caption = ""
|
||||
if message.caption:
|
||||
post_caption = get_text_message(message.caption.lower(), first_name, message.from_user.username)
|
||||
post_caption = get_text_message(
|
||||
message.caption.lower(),
|
||||
first_name,
|
||||
message.from_user.username,
|
||||
deepseek_score=deepseek_score,
|
||||
rag_score=rag_score,
|
||||
rag_confidence=rag_confidence,
|
||||
rag_score_pos_only=rag_score_pos_only,
|
||||
)
|
||||
|
||||
markup = get_reply_keyboard_for_post()
|
||||
sent_message = await send_photo_message(
|
||||
self.settings.group_for_posts, message, message.photo[-1].file_id, post_caption, markup
|
||||
)
|
||||
|
||||
# Сохраняем сырой caption и определяем анонимность
|
||||
raw_caption = message.caption or ""
|
||||
# Определяем анонимность
|
||||
is_anonymous = determine_anonymity(raw_caption)
|
||||
|
||||
post = TelegramPost(
|
||||
@@ -191,25 +255,40 @@ class PostService:
|
||||
is_anonymous=is_anonymous
|
||||
)
|
||||
await self.db.add_post(post)
|
||||
# Сохраняем медиа в фоне, чтобы не блокировать ответ пользователю
|
||||
|
||||
# Сохраняем медиа и скоры в фоне
|
||||
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
|
||||
if ml_scores_json:
|
||||
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
|
||||
|
||||
@track_time("handle_video_post", "post_service")
|
||||
@track_errors("post_service", "handle_video_post")
|
||||
@db_query_time("handle_video_post", "posts", "insert")
|
||||
async def handle_video_post(self, message: types.Message, first_name: str) -> None:
|
||||
"""Handle video post submission"""
|
||||
raw_caption = message.caption or ""
|
||||
|
||||
# Получаем скоры для текста
|
||||
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
|
||||
|
||||
post_caption = ""
|
||||
if message.caption:
|
||||
post_caption = get_text_message(message.caption.lower(), first_name, message.from_user.username)
|
||||
post_caption = get_text_message(
|
||||
message.caption.lower(),
|
||||
first_name,
|
||||
message.from_user.username,
|
||||
deepseek_score=deepseek_score,
|
||||
rag_score=rag_score,
|
||||
rag_confidence=rag_confidence,
|
||||
rag_score_pos_only=rag_score_pos_only,
|
||||
)
|
||||
|
||||
markup = get_reply_keyboard_for_post()
|
||||
sent_message = await send_video_message(
|
||||
self.settings.group_for_posts, message, message.video.file_id, post_caption, markup
|
||||
)
|
||||
|
||||
# Сохраняем сырой caption и определяем анонимность
|
||||
raw_caption = message.caption or ""
|
||||
# Определяем анонимность
|
||||
is_anonymous = determine_anonymity(raw_caption)
|
||||
|
||||
post = TelegramPost(
|
||||
@@ -220,8 +299,11 @@ class PostService:
|
||||
is_anonymous=is_anonymous
|
||||
)
|
||||
await self.db.add_post(post)
|
||||
# Сохраняем медиа в фоне, чтобы не блокировать ответ пользователю
|
||||
|
||||
# Сохраняем медиа и скоры в фоне
|
||||
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
|
||||
if ml_scores_json:
|
||||
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
|
||||
|
||||
@track_time("handle_video_note_post", "post_service")
|
||||
@track_errors("post_service", "handle_video_note_post")
|
||||
@@ -253,17 +335,29 @@ class PostService:
|
||||
@db_query_time("handle_audio_post", "posts", "insert")
|
||||
async def handle_audio_post(self, message: types.Message, first_name: str) -> None:
|
||||
"""Handle audio post submission"""
|
||||
raw_caption = message.caption or ""
|
||||
|
||||
# Получаем скоры для текста
|
||||
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
|
||||
|
||||
post_caption = ""
|
||||
if message.caption:
|
||||
post_caption = get_text_message(message.caption.lower(), first_name, message.from_user.username)
|
||||
post_caption = get_text_message(
|
||||
message.caption.lower(),
|
||||
first_name,
|
||||
message.from_user.username,
|
||||
deepseek_score=deepseek_score,
|
||||
rag_score=rag_score,
|
||||
rag_confidence=rag_confidence,
|
||||
rag_score_pos_only=rag_score_pos_only,
|
||||
)
|
||||
|
||||
markup = get_reply_keyboard_for_post()
|
||||
sent_message = await send_audio_message(
|
||||
self.settings.group_for_posts, message, message.audio.file_id, post_caption, markup
|
||||
)
|
||||
|
||||
# Сохраняем сырой caption и определяем анонимность
|
||||
raw_caption = message.caption or ""
|
||||
# Определяем анонимность
|
||||
is_anonymous = determine_anonymity(raw_caption)
|
||||
|
||||
post = TelegramPost(
|
||||
@@ -274,8 +368,11 @@ class PostService:
|
||||
is_anonymous=is_anonymous
|
||||
)
|
||||
await self.db.add_post(post)
|
||||
# Сохраняем медиа в фоне, чтобы не блокировать ответ пользователю
|
||||
|
||||
# Сохраняем медиа и скоры в фоне
|
||||
asyncio.create_task(self._save_media_background(sent_message, self.db, self.s3_storage))
|
||||
if ml_scores_json:
|
||||
asyncio.create_task(self._save_scores_background(sent_message.message_id, ml_scores_json))
|
||||
|
||||
@track_time("handle_voice_post", "post_service")
|
||||
@track_errors("post_service", "handle_voice_post")
|
||||
@@ -310,10 +407,23 @@ class PostService:
|
||||
"""Handle media group post submission"""
|
||||
post_caption = " "
|
||||
raw_caption = ""
|
||||
ml_scores_json = None
|
||||
|
||||
if album and album[0].caption:
|
||||
raw_caption = album[0].caption or ""
|
||||
post_caption = get_text_message(album[0].caption.lower(), first_name, message.from_user.username)
|
||||
|
||||
# Получаем скоры для текста
|
||||
deepseek_score, rag_score, rag_confidence, rag_score_pos_only, ml_scores_json = await self._get_scores(raw_caption)
|
||||
|
||||
post_caption = get_text_message(
|
||||
album[0].caption.lower(),
|
||||
first_name,
|
||||
message.from_user.username,
|
||||
deepseek_score=deepseek_score,
|
||||
rag_score=rag_score,
|
||||
rag_confidence=rag_confidence,
|
||||
rag_score_pos_only=rag_score_pos_only,
|
||||
)
|
||||
|
||||
is_anonymous = determine_anonymity(raw_caption)
|
||||
media_group = await prepare_media_group_from_middlewares(album, post_caption)
|
||||
@@ -333,6 +443,10 @@ class PostService:
|
||||
)
|
||||
await self.db.add_post(main_post)
|
||||
|
||||
# Сохраняем скоры в фоне
|
||||
if ml_scores_json:
|
||||
asyncio.create_task(self._save_scores_background(main_post_id, ml_scores_json))
|
||||
|
||||
for msg_id in media_group_message_ids:
|
||||
await self.db.add_message_link(main_post_id, msg_id)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user