Source code for ractogateway.rag.stores.in_memory_store

"""In-memory vector store — pure Python, zero extra dependencies.

Uses brute-force cosine similarity over a list of stored vectors.
Suitable for development, testing, and small corpora (< 10k chunks).
"""

from __future__ import annotations

import math
from typing import Any

from ractogateway.rag._models.document import Chunk
from ractogateway.rag._models.retrieval import RetrievalResult
from ractogateway.rag.stores.base import BaseVectorStore


def _cosine_similarity(a: list[float], b: list[float]) -> float:
    dot = sum(x * y for x, y in zip(a, b, strict=False))
    norm_a = math.sqrt(sum(x * x for x in a))
    norm_b = math.sqrt(sum(x * x for x in b))
    if norm_a == 0.0 or norm_b == 0.0:
        return 0.0
    return dot / (norm_a * norm_b)


[docs] class InMemoryVectorStore(BaseVectorStore): """Pure-Python brute-force vector store — no extra dependencies. This store keeps all chunks and their embeddings in memory. It is not suitable for production-scale corpora but requires no installation. Parameters ---------- similarity: Similarity function to use. Currently only ``"cosine"`` is supported. """ def __init__(self, similarity: str = "cosine") -> None: if similarity != "cosine": raise ValueError( f"Unsupported similarity: {similarity!r}. Only 'cosine' is supported." ) self._chunks: list[Chunk] = []
[docs] def add(self, chunks: list[Chunk]) -> None: self._require_embeddings(chunks) self._chunks.extend(chunks)
[docs] def search( self, embedding: list[float], top_k: int = 5, filters: dict[str, Any] | None = None, ) -> list[RetrievalResult]: candidates = self._chunks if filters: candidates = [ c for c in candidates if all( c.metadata.extra.get(k) == v or getattr(c.metadata, k, None) == v for k, v in filters.items() ) ] scored: list[tuple[Chunk, float]] = [] for chunk in candidates: chunk_embedding = chunk.embedding if chunk_embedding is None: continue raw_score = _cosine_similarity(embedding, chunk_embedding) # Normalise cosine similarity from [-1, 1] to a non-negative relevance score. scored.append((chunk, max(raw_score, 0.0))) scored.sort(key=lambda x: x[1], reverse=True) top = scored[:top_k] return [ RetrievalResult(chunk=chunk, score=score, rank=rank + 1) for rank, (chunk, score) in enumerate(top) ]
[docs] def delete(self, chunk_ids: list[str]) -> None: id_set = set(chunk_ids) self._chunks = [c for c in self._chunks if c.chunk_id not in id_set]
[docs] def clear(self) -> None: self._chunks = []
[docs] def count(self) -> int: return len(self._chunks)