Source code for ractogateway.rag.stores.base

"""Abstract base class for vector stores."""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any

from ractogateway.rag._models.document import Chunk
from ractogateway.rag._models.retrieval import RetrievalResult

_EMBEDDING_PREVIEW_COUNT = 5


[docs] class BaseVectorStore(ABC): """Persist and search embedding vectors. All vector stores share the same interface: :meth:`add`, :meth:`search`, :meth:`delete`, :meth:`clear`, and :meth:`count`. The underlying storage backend is determined by the concrete subclass. """
[docs] @abstractmethod def add(self, chunks: list[Chunk]) -> None: """Add *chunks* (with embeddings) to the store. Parameters ---------- chunks: Chunks to index. Each chunk must have a non-``None`` ``embedding``. Raises ------ ValueError If any chunk has ``embedding=None``. """
[docs] @abstractmethod def search( self, embedding: list[float], top_k: int = 5, filters: dict[str, Any] | None = None, ) -> list[RetrievalResult]: """Search for the *top_k* most similar chunks. Parameters ---------- embedding: Query embedding vector. top_k: Number of results to return. filters: Optional metadata filters (store-specific format). Returns ------- list[RetrievalResult] Ranked list of results (rank 1 = most similar). """
[docs] @abstractmethod def delete(self, chunk_ids: list[str]) -> None: """Remove chunks with the given IDs from the store."""
[docs] @abstractmethod def clear(self) -> None: """Remove all chunks from the store."""
[docs] @abstractmethod def count(self) -> int: """Return the total number of indexed chunks."""
# ------------------------------------------------------------------ # Shared validation helper # ------------------------------------------------------------------ @staticmethod def _require_embeddings(chunks: list[Chunk]) -> None: missing = [c.chunk_id for c in chunks if c.embedding is None] if missing: raise ValueError( f"Chunks must have embeddings before adding to a vector store. " f"Missing embeddings on chunk_ids: {missing[:_EMBEDDING_PREVIEW_COUNT]}" + (" (and more)" if len(missing) > _EMBEDDING_PREVIEW_COUNT else "") )