213 lines
6.2 KiB
Python
213 lines
6.2 KiB
Python
"""
|
||
Тесты для submitted-коллекции VectorStore.
|
||
"""
|
||
|
||
import numpy as np
|
||
import pytest
|
||
|
||
from app.storage.vector_store import VectorStore
|
||
|
||
|
||
@pytest.fixture
|
||
def vector_store(tmp_path):
|
||
"""VectorStore с временным путём для submitted."""
|
||
return VectorStore(
|
||
vector_dim=4,
|
||
max_examples=10,
|
||
max_submitted=5,
|
||
storage_path=None,
|
||
submitted_path=str(tmp_path / "submitted.npz"),
|
||
)
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_vector():
|
||
"""Нормализованный вектор для тестов."""
|
||
v = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
|
||
return v / np.linalg.norm(v)
|
||
|
||
|
||
def test_add_submitted(vector_store, sample_vector):
|
||
"""Добавление submitted-поста."""
|
||
added = vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="abc123",
|
||
created_at=1000,
|
||
post_id=42,
|
||
text="Test post",
|
||
rag_score=0.85,
|
||
)
|
||
assert added is True
|
||
assert vector_store.submitted_count == 1
|
||
|
||
|
||
def test_add_submitted_duplicate(vector_store, sample_vector):
|
||
"""Дубликат по хешу не добавляется."""
|
||
vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="same_hash",
|
||
created_at=1000,
|
||
text="First",
|
||
)
|
||
added = vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="same_hash",
|
||
created_at=2000,
|
||
text="Second",
|
||
)
|
||
assert added is False
|
||
assert vector_store.submitted_count == 1
|
||
|
||
|
||
def test_add_submitted_fifo(vector_store, sample_vector):
|
||
"""При превышении max_submitted удаляется самый старый (FIFO)."""
|
||
for i in range(7):
|
||
v = np.array(
|
||
[float(i + 1), 0.0, 0.0, 0.0], dtype=np.float32
|
||
) # i+1 чтобы избежать нулевого вектора
|
||
v = v / np.linalg.norm(v)
|
||
vector_store.add_submitted(
|
||
vector=v,
|
||
text_hash=f"hash_{i}",
|
||
created_at=1000 + i,
|
||
post_id=i,
|
||
text=f"Post {i}",
|
||
)
|
||
assert vector_store.submitted_count == 5 # max_submitted
|
||
# Должны остаться посты 2, 3, 4, 5, 6 (удалены 0, 1)
|
||
post_ids = vector_store._submitted_post_ids
|
||
assert 0 not in post_ids
|
||
assert 1 not in post_ids
|
||
assert 2 in post_ids
|
||
|
||
|
||
def test_find_similar_submitted_empty(vector_store, sample_vector):
|
||
"""Поиск в пустой коллекции возвращает пустой список."""
|
||
result = vector_store.find_similar_submitted(
|
||
vector=sample_vector,
|
||
threshold=0.5,
|
||
hours=24,
|
||
)
|
||
assert result == []
|
||
|
||
|
||
def test_find_similar_submitted(vector_store, sample_vector):
|
||
"""Поиск похожих постов с фильтром по времени и threshold."""
|
||
import time
|
||
|
||
now = int(time.time())
|
||
# Похожий вектор
|
||
similar_v = np.array([0.99, 0.01, 0.0, 0.0], dtype=np.float32)
|
||
similar_v = similar_v / np.linalg.norm(similar_v)
|
||
# Непохожий вектор
|
||
different_v = np.array([0.0, 1.0, 0.0, 0.0], dtype=np.float32)
|
||
different_v = different_v / np.linalg.norm(different_v)
|
||
|
||
vector_store.add_submitted(
|
||
vector=similar_v,
|
||
text_hash="similar",
|
||
created_at=now - 3600, # 1 час назад
|
||
post_id=1,
|
||
text="Similar post",
|
||
rag_score=0.9,
|
||
)
|
||
vector_store.add_submitted(
|
||
vector=different_v,
|
||
text_hash="different",
|
||
created_at=now - 3600,
|
||
post_id=2,
|
||
text="Different post",
|
||
rag_score=0.5,
|
||
)
|
||
|
||
result = vector_store.find_similar_submitted(
|
||
vector=sample_vector,
|
||
threshold=0.9,
|
||
hours=24,
|
||
)
|
||
assert len(result) == 1
|
||
assert result[0]["post_id"] == 1
|
||
assert result[0]["text"] == "Similar post"
|
||
assert result[0]["similarity"] >= 0.9
|
||
|
||
|
||
def test_find_similar_submitted_time_filter(vector_store, sample_vector):
|
||
"""Фильтр по hours исключает старые посты."""
|
||
import time
|
||
|
||
now = int(time.time())
|
||
vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="old",
|
||
created_at=now - 48 * 3600, # 48 часов назад
|
||
post_id=1,
|
||
text="Old post",
|
||
)
|
||
vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="recent",
|
||
created_at=now - 3600, # 1 час назад
|
||
post_id=2,
|
||
text="Recent post",
|
||
)
|
||
|
||
result = vector_store.find_similar_submitted(
|
||
vector=sample_vector,
|
||
threshold=0.5,
|
||
hours=24,
|
||
)
|
||
assert len(result) == 1
|
||
assert result[0]["post_id"] == 2
|
||
|
||
|
||
def test_submitted_persistence(vector_store, sample_vector, tmp_path):
|
||
"""Сохранение и загрузка submitted-коллекции."""
|
||
vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="persist",
|
||
created_at=12345,
|
||
post_id=999,
|
||
text="Persisted post",
|
||
rag_score=0.77,
|
||
)
|
||
vector_store.save_submitted_to_disk()
|
||
|
||
# Новый store загружает данные
|
||
store2 = VectorStore(
|
||
vector_dim=4,
|
||
max_submitted=5,
|
||
storage_path=None,
|
||
submitted_path=str(tmp_path / "submitted.npz"),
|
||
)
|
||
assert store2.submitted_count == 1
|
||
assert store2._submitted_post_ids[0] == 999
|
||
assert store2._submitted_texts[0] == "Persisted post"
|
||
assert store2._submitted_rag_scores[0] == 0.77
|
||
|
||
|
||
def test_get_stats_includes_submitted(vector_store, sample_vector):
|
||
"""get_stats включает submitted_count и max_submitted."""
|
||
vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="stat",
|
||
created_at=1000,
|
||
text="For stats",
|
||
)
|
||
stats = vector_store.get_stats()
|
||
assert "submitted_count" in stats
|
||
assert stats["submitted_count"] == 1
|
||
assert "max_submitted" in stats
|
||
assert stats["max_submitted"] == 5
|
||
|
||
|
||
def test_clear_submitted(vector_store, sample_vector):
|
||
"""clear() очищает submitted-коллекцию."""
|
||
vector_store.add_submitted(
|
||
vector=sample_vector,
|
||
text_hash="clear",
|
||
created_at=1000,
|
||
text="To clear",
|
||
)
|
||
vector_store.clear()
|
||
assert vector_store.submitted_count == 0
|