""" Тесты для submitted-коллекции VectorStore. """ import numpy as np import pytest from app.storage.vector_store import VectorStore @pytest.fixture def vector_store(tmp_path): """VectorStore с временным путём для submitted.""" return VectorStore( vector_dim=4, max_examples=10, max_submitted=5, storage_path=None, submitted_path=str(tmp_path / "submitted.npz"), ) @pytest.fixture def sample_vector(): """Нормализованный вектор для тестов.""" v = np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32) return v / np.linalg.norm(v) def test_add_submitted(vector_store, sample_vector): """Добавление submitted-поста.""" added = vector_store.add_submitted( vector=sample_vector, text_hash="abc123", created_at=1000, post_id=42, text="Test post", rag_score=0.85, ) assert added is True assert vector_store.submitted_count == 1 def test_add_submitted_duplicate(vector_store, sample_vector): """Дубликат по хешу не добавляется.""" vector_store.add_submitted( vector=sample_vector, text_hash="same_hash", created_at=1000, text="First", ) added = vector_store.add_submitted( vector=sample_vector, text_hash="same_hash", created_at=2000, text="Second", ) assert added is False assert vector_store.submitted_count == 1 def test_add_submitted_fifo(vector_store, sample_vector): """При превышении max_submitted удаляется самый старый (FIFO).""" for i in range(7): v = np.array( [float(i + 1), 0.0, 0.0, 0.0], dtype=np.float32 ) # i+1 чтобы избежать нулевого вектора v = v / np.linalg.norm(v) vector_store.add_submitted( vector=v, text_hash=f"hash_{i}", created_at=1000 + i, post_id=i, text=f"Post {i}", ) assert vector_store.submitted_count == 5 # max_submitted # Должны остаться посты 2, 3, 4, 5, 6 (удалены 0, 1) post_ids = vector_store._submitted_post_ids assert 0 not in post_ids assert 1 not in post_ids assert 2 in post_ids def test_find_similar_submitted_empty(vector_store, sample_vector): """Поиск в пустой коллекции возвращает пустой список.""" result = vector_store.find_similar_submitted( vector=sample_vector, threshold=0.5, hours=24, ) assert result == [] def test_find_similar_submitted(vector_store, sample_vector): """Поиск похожих постов с фильтром по времени и threshold.""" import time now = int(time.time()) # Похожий вектор similar_v = np.array([0.99, 0.01, 0.0, 0.0], dtype=np.float32) similar_v = similar_v / np.linalg.norm(similar_v) # Непохожий вектор different_v = np.array([0.0, 1.0, 0.0, 0.0], dtype=np.float32) different_v = different_v / np.linalg.norm(different_v) vector_store.add_submitted( vector=similar_v, text_hash="similar", created_at=now - 3600, # 1 час назад post_id=1, text="Similar post", rag_score=0.9, ) vector_store.add_submitted( vector=different_v, text_hash="different", created_at=now - 3600, post_id=2, text="Different post", rag_score=0.5, ) result = vector_store.find_similar_submitted( vector=sample_vector, threshold=0.9, hours=24, ) assert len(result) == 1 assert result[0]["post_id"] == 1 assert result[0]["text"] == "Similar post" assert result[0]["similarity"] >= 0.9 def test_find_similar_submitted_time_filter(vector_store, sample_vector): """Фильтр по hours исключает старые посты.""" import time now = int(time.time()) vector_store.add_submitted( vector=sample_vector, text_hash="old", created_at=now - 48 * 3600, # 48 часов назад post_id=1, text="Old post", ) vector_store.add_submitted( vector=sample_vector, text_hash="recent", created_at=now - 3600, # 1 час назад post_id=2, text="Recent post", ) result = vector_store.find_similar_submitted( vector=sample_vector, threshold=0.5, hours=24, ) assert len(result) == 1 assert result[0]["post_id"] == 2 def test_submitted_persistence(vector_store, sample_vector, tmp_path): """Сохранение и загрузка submitted-коллекции.""" vector_store.add_submitted( vector=sample_vector, text_hash="persist", created_at=12345, post_id=999, text="Persisted post", rag_score=0.77, ) vector_store.save_submitted_to_disk() # Новый store загружает данные store2 = VectorStore( vector_dim=4, max_submitted=5, storage_path=None, submitted_path=str(tmp_path / "submitted.npz"), ) assert store2.submitted_count == 1 assert store2._submitted_post_ids[0] == 999 assert store2._submitted_texts[0] == "Persisted post" assert store2._submitted_rag_scores[0] == 0.77 def test_get_stats_includes_submitted(vector_store, sample_vector): """get_stats включает submitted_count и max_submitted.""" vector_store.add_submitted( vector=sample_vector, text_hash="stat", created_at=1000, text="For stats", ) stats = vector_store.get_stats() assert "submitted_count" in stats assert stats["submitted_count"] == 1 assert "max_submitted" in stats assert stats["max_submitted"] == 5 def test_clear_submitted(vector_store, sample_vector): """clear() очищает submitted-коллекцию.""" vector_store.add_submitted( vector=sample_vector, text_hash="clear", created_at=1000, text="To clear", ) vector_store.clear() assert vector_store.submitted_count == 0