feat: add submitted collection, /similar and /submitted endpoints (Stage 4)
Made-with: Cursor
This commit is contained in:
212
tests/test_vector_store_submitted.py
Normal file
212
tests/test_vector_store_submitted.py
Normal file
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Тесты для submitted-коллекции VectorStore.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from app.storage.vector_store import VectorStore
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vector_store(tmp_path):
|
||||
"""VectorStore с временным путём для submitted."""
|
||||
return VectorStore(
|
||||
vector_dim=4,
|
||||
max_examples=10,
|
||||
max_submitted=5,
|
||||
storage_path=None,
|
||||
submitted_path=str(tmp_path / "submitted.npz"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_vector():
|
||||
"""Нормализованный вектор для тестов."""
|
||||
v = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
|
||||
return v / np.linalg.norm(v)
|
||||
|
||||
|
||||
def test_add_submitted(vector_store, sample_vector):
|
||||
"""Добавление submitted-поста."""
|
||||
added = vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="abc123",
|
||||
created_at=1000,
|
||||
post_id=42,
|
||||
text="Test post",
|
||||
rag_score=0.85,
|
||||
)
|
||||
assert added is True
|
||||
assert vector_store.submitted_count == 1
|
||||
|
||||
|
||||
def test_add_submitted_duplicate(vector_store, sample_vector):
|
||||
"""Дубликат по хешу не добавляется."""
|
||||
vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="same_hash",
|
||||
created_at=1000,
|
||||
text="First",
|
||||
)
|
||||
added = vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="same_hash",
|
||||
created_at=2000,
|
||||
text="Second",
|
||||
)
|
||||
assert added is False
|
||||
assert vector_store.submitted_count == 1
|
||||
|
||||
|
||||
def test_add_submitted_fifo(vector_store, sample_vector):
|
||||
"""При превышении max_submitted удаляется самый старый (FIFO)."""
|
||||
for i in range(7):
|
||||
v = np.array(
|
||||
[float(i + 1), 0.0, 0.0, 0.0], dtype=np.float32
|
||||
) # i+1 чтобы избежать нулевого вектора
|
||||
v = v / np.linalg.norm(v)
|
||||
vector_store.add_submitted(
|
||||
vector=v,
|
||||
text_hash=f"hash_{i}",
|
||||
created_at=1000 + i,
|
||||
post_id=i,
|
||||
text=f"Post {i}",
|
||||
)
|
||||
assert vector_store.submitted_count == 5 # max_submitted
|
||||
# Должны остаться посты 2, 3, 4, 5, 6 (удалены 0, 1)
|
||||
post_ids = vector_store._submitted_post_ids
|
||||
assert 0 not in post_ids
|
||||
assert 1 not in post_ids
|
||||
assert 2 in post_ids
|
||||
|
||||
|
||||
def test_find_similar_submitted_empty(vector_store, sample_vector):
|
||||
"""Поиск в пустой коллекции возвращает пустой список."""
|
||||
result = vector_store.find_similar_submitted(
|
||||
vector=sample_vector,
|
||||
threshold=0.5,
|
||||
hours=24,
|
||||
)
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_find_similar_submitted(vector_store, sample_vector):
|
||||
"""Поиск похожих постов с фильтром по времени и threshold."""
|
||||
import time
|
||||
|
||||
now = int(time.time())
|
||||
# Похожий вектор
|
||||
similar_v = np.array([0.99, 0.01, 0.0, 0.0], dtype=np.float32)
|
||||
similar_v = similar_v / np.linalg.norm(similar_v)
|
||||
# Непохожий вектор
|
||||
different_v = np.array([0.0, 1.0, 0.0, 0.0], dtype=np.float32)
|
||||
different_v = different_v / np.linalg.norm(different_v)
|
||||
|
||||
vector_store.add_submitted(
|
||||
vector=similar_v,
|
||||
text_hash="similar",
|
||||
created_at=now - 3600, # 1 час назад
|
||||
post_id=1,
|
||||
text="Similar post",
|
||||
rag_score=0.9,
|
||||
)
|
||||
vector_store.add_submitted(
|
||||
vector=different_v,
|
||||
text_hash="different",
|
||||
created_at=now - 3600,
|
||||
post_id=2,
|
||||
text="Different post",
|
||||
rag_score=0.5,
|
||||
)
|
||||
|
||||
result = vector_store.find_similar_submitted(
|
||||
vector=sample_vector,
|
||||
threshold=0.9,
|
||||
hours=24,
|
||||
)
|
||||
assert len(result) == 1
|
||||
assert result[0]["post_id"] == 1
|
||||
assert result[0]["text"] == "Similar post"
|
||||
assert result[0]["similarity"] >= 0.9
|
||||
|
||||
|
||||
def test_find_similar_submitted_time_filter(vector_store, sample_vector):
|
||||
"""Фильтр по hours исключает старые посты."""
|
||||
import time
|
||||
|
||||
now = int(time.time())
|
||||
vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="old",
|
||||
created_at=now - 48 * 3600, # 48 часов назад
|
||||
post_id=1,
|
||||
text="Old post",
|
||||
)
|
||||
vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="recent",
|
||||
created_at=now - 3600, # 1 час назад
|
||||
post_id=2,
|
||||
text="Recent post",
|
||||
)
|
||||
|
||||
result = vector_store.find_similar_submitted(
|
||||
vector=sample_vector,
|
||||
threshold=0.5,
|
||||
hours=24,
|
||||
)
|
||||
assert len(result) == 1
|
||||
assert result[0]["post_id"] == 2
|
||||
|
||||
|
||||
def test_submitted_persistence(vector_store, sample_vector, tmp_path):
|
||||
"""Сохранение и загрузка submitted-коллекции."""
|
||||
vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="persist",
|
||||
created_at=12345,
|
||||
post_id=999,
|
||||
text="Persisted post",
|
||||
rag_score=0.77,
|
||||
)
|
||||
vector_store.save_submitted_to_disk()
|
||||
|
||||
# Новый store загружает данные
|
||||
store2 = VectorStore(
|
||||
vector_dim=4,
|
||||
max_submitted=5,
|
||||
storage_path=None,
|
||||
submitted_path=str(tmp_path / "submitted.npz"),
|
||||
)
|
||||
assert store2.submitted_count == 1
|
||||
assert store2._submitted_post_ids[0] == 999
|
||||
assert store2._submitted_texts[0] == "Persisted post"
|
||||
assert store2._submitted_rag_scores[0] == 0.77
|
||||
|
||||
|
||||
def test_get_stats_includes_submitted(vector_store, sample_vector):
|
||||
"""get_stats включает submitted_count и max_submitted."""
|
||||
vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="stat",
|
||||
created_at=1000,
|
||||
text="For stats",
|
||||
)
|
||||
stats = vector_store.get_stats()
|
||||
assert "submitted_count" in stats
|
||||
assert stats["submitted_count"] == 1
|
||||
assert "max_submitted" in stats
|
||||
assert stats["max_submitted"] == 5
|
||||
|
||||
|
||||
def test_clear_submitted(vector_store, sample_vector):
|
||||
"""clear() очищает submitted-коллекцию."""
|
||||
vector_store.add_submitted(
|
||||
vector=sample_vector,
|
||||
text_hash="clear",
|
||||
created_at=1000,
|
||||
text="To clear",
|
||||
)
|
||||
vector_store.clear()
|
||||
assert vector_store.submitted_count == 0
|
||||
Reference in New Issue
Block a user