Source code for ractogateway.cache.exact_cache

"""Exact-match key-value cache with LRU eviction and optional TTL.

Uses ``collections.OrderedDict`` for O(1) get / put / evict — a standard
least-recently-used (LRU) cache pattern.  No external dependencies.

Thread-safety is provided by a ``threading.Lock`` so the cache is safe to
share across threads without any external synchronisation.
"""

from __future__ import annotations

import hashlib
import threading
import time
from collections import OrderedDict

from ractogateway.adapters.base import LLMResponse
from ractogateway.cache._models import CacheConfig, CacheEntry, CacheStats


def _make_key(
    user_message: str,
    system_prompt: str,
    model: str,
    temperature: float,
    max_tokens: int,
) -> str:
    """Build a deterministic SHA-256 cache key from request parameters.

    Hashing avoids key-size bloat while remaining collision-resistant for
    practical workloads.  The digest is hex-encoded (64 chars).
    """
    raw = "\x00".join([user_message, system_prompt, model, str(temperature), str(max_tokens)])
    return hashlib.sha256(raw.encode()).hexdigest()


[docs] class ExactMatchCache: """Ultra-low-latency key-value cache for identical LLM requests. Parameters ---------- max_size: LRU capacity. ``0`` = unlimited (no eviction). ttl_seconds: Entries older than *ttl_seconds* are treated as misses and transparently evicted. ``None`` disables expiry. Example:: from ractogateway.cache import ExactMatchCache cache = ExactMatchCache(max_size=512, ttl_seconds=3600) # Wire into a kit: kit = OpenAIDeveloperKit(model="gpt-4o", exact_cache=cache) """ def __init__( self, max_size: int = 1024, ttl_seconds: float | None = None, ) -> None: self._config = CacheConfig(max_size=max_size, ttl_seconds=ttl_seconds) # OrderedDict: insertion order = LRU order; move_to_end(key) on hit. self._store: OrderedDict[str, CacheEntry] = OrderedDict() self._lock = threading.Lock() self._hits = 0 self._misses = 0 # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------
[docs] def get( self, user_message: str, system_prompt: str, model: str, temperature: float, max_tokens: int, ) -> LLMResponse | None: """Return a cached response or ``None`` on a miss. O(1) — dictionary lookup + optional move-to-end. """ key = _make_key(user_message, system_prompt, model, temperature, max_tokens) with self._lock: entry = self._store.get(key) if entry is None: self._misses += 1 return None # TTL check if self._config.ttl_seconds is not None: age = time.monotonic() - entry.created_at if age > self._config.ttl_seconds: del self._store[key] self._misses += 1 return None # Cache hit — promote to most-recently-used position self._store.move_to_end(key) entry.hit_count += 1 self._hits += 1 return entry.response
[docs] def put( self, user_message: str, system_prompt: str, model: str, temperature: float, max_tokens: int, response: LLMResponse, ) -> None: """Store a response. Evicts LRU entry when at capacity. O(1) amortised — dictionary insert + optional popitem(last=False). """ key = _make_key(user_message, system_prompt, model, temperature, max_tokens) with self._lock: if key in self._store: # Update in place; promote to MRU end. self._store[key].response = response self._store[key].created_at = time.monotonic() self._store.move_to_end(key) return entry = CacheEntry(response=response, created_at=time.monotonic()) self._store[key] = entry self._store.move_to_end(key) # Evict least-recently-used entry when over capacity cap = self._config.max_size if cap > 0: while len(self._store) > cap: self._store.popitem(last=False) # O(1) — pop from LRU end
[docs] def invalidate( self, user_message: str, system_prompt: str, model: str, temperature: float, max_tokens: int, ) -> bool: """Remove a specific entry. Returns ``True`` if it was present.""" key = _make_key(user_message, system_prompt, model, temperature, max_tokens) with self._lock: if key in self._store: del self._store[key] return True return False
[docs] def clear(self) -> None: """Evict all cached entries and reset counters.""" with self._lock: self._store.clear() self._hits = 0 self._misses = 0
@property def stats(self) -> CacheStats: """Return a snapshot of hit/miss/size counters.""" with self._lock: return CacheStats(hits=self._hits, misses=self._misses, size=len(self._store)) def __len__(self) -> int: with self._lock: return len(self._store) def __repr__(self) -> str: # pragma: no cover s = self.stats return ( f"ExactMatchCache(max_size={self._config.max_size}, " f"ttl={self._config.ttl_seconds}s, " f"size={s.size}, hit_rate={s.hit_rate:.1%})" )