"""Shared data models for caching subsystem."""
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, Field
from ractogateway.adapters.base import LLMResponse
[docs]
class CacheConfig(BaseModel):
"""Configuration for cache instances.
Parameters
----------
max_size:
Maximum number of entries to hold. When full, the least-recently-used
entry is evicted (LRU policy). ``0`` means unlimited.
ttl_seconds:
Time-to-live in seconds. Entries older than this are treated as
misses and evicted lazily. ``None`` disables TTL.
"""
max_size: int = Field(default=1024, ge=0)
ttl_seconds: float | None = Field(default=None, gt=0)
[docs]
class CacheEntry(BaseModel):
"""A single cached LLM response."""
response: LLMResponse
created_at: float = Field(description="Monotonic timestamp of insertion (time.monotonic()).")
hit_count: int = Field(default=0, ge=0)
model_config = {"arbitrary_types_allowed": True}
[docs]
class CacheStats(BaseModel):
"""Snapshot of cache performance counters."""
hits: int = Field(default=0, ge=0, description="Requests served from cache.")
misses: int = Field(default=0, ge=0, description="Requests that bypassed the cache.")
size: int = Field(default=0, ge=0, description="Current number of stored entries.")
@property
def total(self) -> int:
"""Total requests seen by the cache."""
return self.hits + self.misses
@property
def hit_rate(self) -> float:
"""Fraction of requests that were cache hits (0.0-1.0)."""
return self.hits / self.total if self.total else 0.0
def __repr__(self) -> str: # pragma: no cover
return (
f"CacheStats(hits={self.hits}, misses={self.misses}, "
f"size={self.size}, hit_rate={self.hit_rate:.1%})"
)
[docs]
class SemanticCacheConfig(BaseModel):
"""Configuration for the semantic similarity cache.
Parameters
----------
threshold:
Minimum cosine similarity (0.0-1.0) required to declare a cache hit.
Defaults to ``0.95`` (very strict — avoids false positives).
max_size:
Maximum entries before LRU eviction. ``0`` means unlimited.
ttl_seconds:
Optional TTL; ``None`` disables expiry.
"""
threshold: float = Field(default=0.95, ge=0.0, le=1.0)
max_size: int = Field(default=512, ge=0)
ttl_seconds: float | None = Field(default=None, gt=0)
[docs]
class SemanticCacheEntry(BaseModel):
"""One entry in the semantic cache, pairing an embedding with a response."""
vector: list[float] = Field(description="Embedding of the original query.")
response: LLMResponse
created_at: float
hit_count: int = Field(default=0, ge=0)
model_config = {"arbitrary_types_allowed": True}
[docs]
def model_post_init(self, __context: Any) -> None:
# Store vector as plain list for easy serialisation / comparison.
self.vector = list(self.vector)