feat: интеграция ML-скоринга с использованием RAG и DeepSeek

- Обновлен Dockerfile для установки необходимых зависимостей.
- Добавлены новые переменные окружения для настройки ML-скоринга в env.example.
- Реализованы методы для получения и обновления ML-скоров в AsyncBotDB и PostRepository.
- Обновлены обработчики публикации постов для интеграции ML-скоринга.
- Добавлен новый обработчик для получения статистики ML-скоринга в админ-панели.
- Обновлены функции для форматирования сообщений с учетом ML-скоров.
This commit is contained in:
2026-01-26 18:40:38 +03:00
parent e2b1353408
commit 7f6f0f028c
25 changed files with 2833 additions and 52 deletions

View File

@@ -0,0 +1,390 @@
"""
Тесты для сервисов ML-скоринга постов.
"""
import json
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
# Импорты для тестирования базовых классов
from helper_bot.services.scoring.base import ScoringResult, CombinedScore
from helper_bot.services.scoring.exceptions import (
ScoringError,
InsufficientExamplesError,
TextTooShortError,
)
class TestScoringResult:
"""Тесты для ScoringResult."""
def test_create_valid_score(self):
"""Тест создания валидного результата."""
result = ScoringResult(
score=0.75,
source="rag",
model="test-model",
)
assert result.score == 0.75
assert result.source == "rag"
assert result.model == "test-model"
def test_score_validation_lower_bound(self):
"""Тест валидации нижней границы скора."""
with pytest.raises(ValueError):
ScoringResult(score=-0.1, source="test", model="test")
def test_score_validation_upper_bound(self):
"""Тест валидации верхней границы скора."""
with pytest.raises(ValueError):
ScoringResult(score=1.1, source="test", model="test")
def test_to_dict(self):
"""Тест преобразования в словарь."""
result = ScoringResult(
score=0.7534,
source="rag",
model="test-model",
confidence=0.85,
timestamp=1234567890,
)
d = result.to_dict()
assert d["score"] == 0.7534 # Округлено до 4 знаков
assert d["model"] == "test-model"
assert d["ts"] == 1234567890
assert d["confidence"] == 0.85
def test_from_dict(self):
"""Тест создания из словаря."""
data = {
"score": 0.75,
"model": "test-model",
"ts": 1234567890,
"confidence": 0.9,
}
result = ScoringResult.from_dict("rag", data)
assert result.score == 0.75
assert result.source == "rag"
assert result.model == "test-model"
assert result.timestamp == 1234567890
assert result.confidence == 0.9
class TestCombinedScore:
"""Тесты для CombinedScore."""
def test_empty_combined_score(self):
"""Тест пустого объединенного скора."""
score = CombinedScore()
assert score.deepseek is None
assert score.rag is None
assert score.deepseek_score is None
assert score.rag_score is None
assert not score.has_any_score()
def test_combined_score_with_rag(self):
"""Тест объединенного скора с RAG."""
rag_result = ScoringResult(score=0.8, source="rag", model="rubert")
score = CombinedScore(rag=rag_result)
assert score.rag_score == 0.8
assert score.deepseek_score is None
assert score.has_any_score()
def test_combined_score_with_both(self):
"""Тест объединенного скора с обоими сервисами."""
rag_result = ScoringResult(score=0.8, source="rag", model="rubert")
deepseek_result = ScoringResult(score=0.7, source="deepseek", model="deepseek-chat")
score = CombinedScore(rag=rag_result, deepseek=deepseek_result)
assert score.rag_score == 0.8
assert score.deepseek_score == 0.7
assert score.has_any_score()
def test_to_json_dict(self):
"""Тест преобразования в JSON словарь."""
rag_result = ScoringResult(score=0.8, source="rag", model="rubert", timestamp=123)
deepseek_result = ScoringResult(score=0.7, source="deepseek", model="deepseek-chat", timestamp=456)
score = CombinedScore(rag=rag_result, deepseek=deepseek_result)
d = score.to_json_dict()
assert "rag" in d
assert "deepseek" in d
assert d["rag"]["score"] == 0.8
assert d["deepseek"]["score"] == 0.7
# Проверяем что это валидный JSON
json_str = json.dumps(d)
assert json_str
class TestVectorStore:
"""Тесты для VectorStore (требует numpy)."""
@pytest.fixture
def vector_store(self):
"""Создает VectorStore для тестов."""
try:
import numpy as np
from helper_bot.services.scoring.vector_store import VectorStore
return VectorStore(vector_dim=768, max_examples=100)
except ImportError:
pytest.skip("numpy не установлен")
def test_add_positive_example(self, vector_store):
"""Тест добавления положительного примера."""
import numpy as np
vector = np.random.randn(768).astype(np.float32)
result = vector_store.add_positive(vector, "hash1")
assert result is True
assert vector_store.positive_count == 1
def test_add_duplicate_example(self, vector_store):
"""Тест добавления дубликата."""
import numpy as np
vector = np.random.randn(768).astype(np.float32)
vector_store.add_positive(vector, "hash1")
result = vector_store.add_positive(vector, "hash1") # Дубликат
assert result is False
assert vector_store.positive_count == 1
def test_max_examples_limit(self, vector_store):
"""Тест ограничения максимального количества примеров."""
import numpy as np
# Добавляем больше чем max_examples
for i in range(150):
vector = np.random.randn(768).astype(np.float32)
vector_store.add_positive(vector, f"hash_{i}")
assert vector_store.positive_count == 100 # max_examples
def test_calculate_similarity_no_examples(self, vector_store):
"""Тест расчета скора без примеров."""
import numpy as np
vector = np.random.randn(768).astype(np.float32)
with pytest.raises(InsufficientExamplesError):
vector_store.calculate_similarity_score(vector)
def test_calculate_similarity_with_examples(self, vector_store):
"""Тест расчета скора с примерами."""
import numpy as np
# Добавляем положительные примеры
for i in range(10):
vector = np.random.randn(768).astype(np.float32)
vector_store.add_positive(vector, f"pos_{i}")
# Добавляем отрицательные примеры
for i in range(10):
vector = np.random.randn(768).astype(np.float32)
vector_store.add_negative(vector, f"neg_{i}")
# Рассчитываем скор для нового вектора
test_vector = np.random.randn(768).astype(np.float32)
score, confidence = vector_store.calculate_similarity_score(test_vector)
assert 0.0 <= score <= 1.0
assert 0.0 <= confidence <= 1.0
def test_compute_text_hash(self, vector_store):
"""Тест вычисления хеша текста."""
from helper_bot.services.scoring.vector_store import VectorStore
hash1 = VectorStore.compute_text_hash("Привет мир")
hash2 = VectorStore.compute_text_hash("Привет мир")
hash3 = VectorStore.compute_text_hash("Другой текст")
assert hash1 == hash2
assert hash1 != hash3
class TestDeepSeekService:
"""Тесты для DeepSeekService."""
@pytest.fixture
def deepseek_service(self):
"""Создает DeepSeekService для тестов."""
from helper_bot.services.scoring.deepseek_service import DeepSeekService
return DeepSeekService(
api_key="test_key",
enabled=True,
timeout=5,
)
def test_service_disabled_without_key(self):
"""Тест отключения сервиса без API ключа."""
from helper_bot.services.scoring.deepseek_service import DeepSeekService
service = DeepSeekService(api_key=None, enabled=True)
assert service.is_enabled is False
def test_parse_score_response_valid(self, deepseek_service):
"""Тест парсинга валидного ответа."""
assert deepseek_service._parse_score_response("0.75") == 0.75
assert deepseek_service._parse_score_response("0.5") == 0.5
assert deepseek_service._parse_score_response("1.0") == 1.0
assert deepseek_service._parse_score_response("0") == 0.0
def test_parse_score_response_with_quotes(self, deepseek_service):
"""Тест парсинга ответа с кавычками."""
assert deepseek_service._parse_score_response('"0.75"') == 0.75
assert deepseek_service._parse_score_response("'0.8'") == 0.8
def test_parse_score_response_with_text(self, deepseek_service):
"""Тест парсинга ответа с текстом."""
# Сервис должен найти число в тексте
assert deepseek_service._parse_score_response("Score: 0.75") == 0.75
def test_clean_text(self, deepseek_service):
"""Тест очистки текста."""
assert deepseek_service._clean_text(" hello world ") == "hello world"
assert deepseek_service._clean_text("^") == ""
assert deepseek_service._clean_text("") == ""
@pytest.mark.asyncio
async def test_calculate_score_disabled(self):
"""Тест расчета скора при отключенном сервисе."""
from helper_bot.services.scoring.deepseek_service import DeepSeekService
service = DeepSeekService(api_key=None, enabled=False)
with pytest.raises(ScoringError):
await service.calculate_score("Test text")
@pytest.mark.asyncio
async def test_calculate_score_short_text(self, deepseek_service):
"""Тест расчета скора для короткого текста."""
with pytest.raises(TextTooShortError):
await deepseek_service.calculate_score("ab")
class TestScoringManager:
"""Тесты для ScoringManager."""
@pytest.fixture
def mock_rag_service(self):
"""Создает мок RAG сервиса."""
mock = AsyncMock()
mock.is_enabled = True
mock.calculate_score = AsyncMock(return_value=ScoringResult(
score=0.8,
source="rag",
model="rubert",
))
return mock
@pytest.fixture
def mock_deepseek_service(self):
"""Создает мок DeepSeek сервиса."""
mock = AsyncMock()
mock.is_enabled = True
mock.calculate_score = AsyncMock(return_value=ScoringResult(
score=0.7,
source="deepseek",
model="deepseek-chat",
))
return mock
@pytest.mark.asyncio
async def test_score_post_both_services(self, mock_rag_service, mock_deepseek_service):
"""Тест скоринга с обоими сервисами."""
from helper_bot.services.scoring.scoring_manager import ScoringManager
manager = ScoringManager(
rag_service=mock_rag_service,
deepseek_service=mock_deepseek_service,
)
result = await manager.score_post("Тестовый пост")
assert result.rag_score == 0.8
assert result.deepseek_score == 0.7
assert result.has_any_score()
@pytest.mark.asyncio
async def test_score_post_rag_only(self, mock_rag_service):
"""Тест скоринга только с RAG."""
from helper_bot.services.scoring.scoring_manager import ScoringManager
manager = ScoringManager(
rag_service=mock_rag_service,
deepseek_service=None,
)
result = await manager.score_post("Тестовый пост")
assert result.rag_score == 0.8
assert result.deepseek_score is None
@pytest.mark.asyncio
async def test_score_post_empty_text(self, mock_rag_service):
"""Тест скоринга пустого текста."""
from helper_bot.services.scoring.scoring_manager import ScoringManager
manager = ScoringManager(rag_service=mock_rag_service)
result = await manager.score_post("")
assert not result.has_any_score()
mock_rag_service.calculate_score.assert_not_called()
@pytest.mark.asyncio
async def test_score_post_service_error(self, mock_rag_service, mock_deepseek_service):
"""Тест обработки ошибки сервиса."""
from helper_bot.services.scoring.scoring_manager import ScoringManager
# RAG выбрасывает ошибку
mock_rag_service.calculate_score = AsyncMock(side_effect=Exception("Test error"))
manager = ScoringManager(
rag_service=mock_rag_service,
deepseek_service=mock_deepseek_service,
)
result = await manager.score_post("Тестовый пост")
# DeepSeek должен вернуть результат
assert result.deepseek_score == 0.7
# RAG должен быть None с ошибкой
assert result.rag_score is None
assert "rag" in result.errors
@pytest.mark.asyncio
async def test_on_post_published(self, mock_rag_service, mock_deepseek_service):
"""Тест обучения на опубликованном посте."""
from helper_bot.services.scoring.scoring_manager import ScoringManager
manager = ScoringManager(
rag_service=mock_rag_service,
deepseek_service=mock_deepseek_service,
)
await manager.on_post_published("Опубликованный пост")
mock_rag_service.add_positive_example.assert_called_once_with("Опубликованный пост")
mock_deepseek_service.add_positive_example.assert_called_once_with("Опубликованный пост")
@pytest.mark.asyncio
async def test_on_post_declined(self, mock_rag_service, mock_deepseek_service):
"""Тест обучения на отклоненном посте."""
from helper_bot.services.scoring.scoring_manager import ScoringManager
manager = ScoringManager(
rag_service=mock_rag_service,
deepseek_service=mock_deepseek_service,
)
await manager.on_post_declined("Отклоненный пост")
mock_rag_service.add_negative_example.assert_called_once_with("Отклоненный пост")
mock_deepseek_service.add_negative_example.assert_called_once_with("Отклоненный пост")