Source code for ractogateway._models.chat

"""Strongly-typed input models for chat completion calls."""

from __future__ import annotations

from enum import Enum
from typing import Any

from pydantic import BaseModel, Field

from ractogateway.prompts.engine import RactoFile, RactoPrompt
from ractogateway.tools.registry import ToolRegistry



[docs]
class MessageRole(str, Enum):
    """Role of a single message in a conversation."""

    SYSTEM = "system"
    USER = "user"
    ASSISTANT = "assistant"




[docs]
class Message(BaseModel):
    """A single conversation turn.

    Used inside ``ChatConfig.history`` to provide prior conversation context
    to the model for multi-turn conversations.
    """

    role: MessageRole
    content: str




[docs]
class ChatConfig(BaseModel):
    """Validated input for every ``chat`` / ``achat`` / ``stream`` / ``astream`` call.

    Pass a single ``ChatConfig`` to any developer-kit method.  Every field has
    a safe default so you only need to supply what you actually need.

    Minimal example::

        config = ChatConfig(user_message="Explain Python generators.")
        response = kit.chat(config)

    Vision / multimodal example::

        from ractogateway.prompts.engine import RactoFile

        config = ChatConfig(
            user_message="Describe this chart.",
            attachments=[RactoFile.from_path("sales_q4.png")],
        )

    Structured JSON output example::

        class Sentiment(BaseModel):
            label: str
            score: float

        config = ChatConfig(
            user_message="I love this library!",
            response_model=Sentiment,
        )
    """

    user_message: str = Field(
        ...,
        min_length=1,
        description=(
            "The end-user's text query or instruction sent to the model. "
            "When attachments are also provided this text is always included "
            "as the final 'text' part of the user turn so the model receives "
            "both the files and your question in one message. "
            "Minimum length: 1 character."
        ),
    )
    prompt: RactoPrompt | None = Field(
        default=None,
        description=(
            "Structured RACTO prompt that compiles to the system instruction. "
            "When ``None`` the kit's ``default_prompt`` is used instead; "
            "at least one of the two must be set or ``chat()`` will raise. "
            "The prompt defines the model's role, aim, constraints, tone, and "
            "expected output format — including JSON schemas for structured output."
        ),
    )
    temperature: float = Field(
        default=0.0,
        ge=0.0,
        le=2.0,
        description=(
            "Sampling temperature controlling output randomness. "
            "0.0 = fully deterministic / greedy decoding (best for structured "
            "JSON, code generation, and factual Q&A). "
            "0.3-0.7 = balanced creativity (good for summaries, analysis). "
            "0.8-1.2 = creative writing, brainstorming. "
            "Above 1.2 = highly random; use with caution. "
            "Clamped to [0.0, 2.0] by validation."
        ),
    )
    max_tokens: int = Field(
        default=4096,
        gt=0,
        description=(
            "Maximum number of tokens the model may generate in its response. "
            "Does not count the prompt / input tokens. "
            "Increase for long-form documents or multi-step reasoning chains; "
            "decrease to enforce concise replies or reduce cost. "
            "Must be greater than 0."
        ),
    )
    tools: ToolRegistry | None = Field(
        default=None,
        description=(
            "Optional ``ToolRegistry`` containing Python functions the model "
            "may call (function / tool calling). "
            "Create one with ``ToolRegistry([fn1, fn2])`` where each function "
            "is decorated with ``@tool``. "
            "When ``None`` tool calling is disabled. "
            "Combine with ``auto_execute_tools=True`` to let the kit run the "
            "functions automatically and continue the loop."
        ),
    )
    auto_execute_tools: bool = Field(
        default=False,
        description=(
            "When ``True``, ``chat()`` / ``achat()`` automatically execute every "
            "tool call requested by the model, feed the results back, and repeat "
            "until the model produces a final text response or ``max_tool_turns`` "
            "is reached. "
            "When ``False`` (default) tool-call results are returned in "
            "``LLMResponse.tool_calls`` for you to handle manually. "
            "Requires ``tools`` to be set."
        ),
    )
    max_tool_turns: int = Field(
        default=3,
        ge=1,
        le=10,
        description=(
            "Maximum number of automatic tool-execution rounds before the kit "
            "returns the last response as-is. "
            "Only meaningful when ``auto_execute_tools=True``. "
            "Range: 1-10."
        ),
    )
    response_model: type[BaseModel] | None = Field(
        default=None,
        description=(
            "Optional Pydantic model class for structured JSON output. "
            "When set the kit instructs the model to respond with a JSON object "
            "that matches the model's schema, then validates and parses it. "
            "The validated dict is available as ``LLMResponse.parsed``. "
            "Combine with ``max_validation_retries`` to auto-correct bad JSON. "
            "Example: ``response_model=MySentimentModel``."
        ),
    )
    max_validation_retries: int = Field(
        default=2,
        ge=0,
        le=5,
        description=(
            "Number of automatic retry attempts when ``response_model`` "
            "validation fails. "
            "On each retry the exact Pydantic validation errors together with "
            "the invalid JSON are fed back to the model as a correction prompt "
            "so it can fix its output. "
            "Set to 0 to disable retries and raise "
            "``ResponseModelValidationError`` immediately on the first failure. "
            "Range: 0-5."
        ),
    )
    history: list[Message] = Field(
        default_factory=list,
        description=(
            "Prior conversation turns for multi-turn / stateful chat. "
            "Each entry is a ``Message(role=MessageRole.USER/ASSISTANT, "
            "content='...')`` object. "
            "Turns are spliced between the system message and the current user "
            "message before the API call so the model sees the full context. "
            "Defaults to an empty list (single-turn / stateless mode)."
        ),
    )
    attachments: list[RactoFile] | None = Field(
        default=None,
        description=(
            "Optional list of file attachments for multimodal / VLM calls. "
            "Create files with ``RactoFile.from_path('image.jpg')`` (auto-detects "
            "MIME type) or ``RactoFile.from_bytes(raw_bytes, 'image/png')``. "
            "\n\n"
            "Attachment processing differs per provider and file type:\n\n"
            "**OpenAI / HuggingFace** (``image_url`` content blocks):\n"
            "  - Images (JPEG, PNG, GIF, WebP) → inline ``data:`` URI "
            "``image_url`` block; processed by GPT-4o, GPT-4-vision, "
            "LLaVA-HF, Qwen-VL, etc.\n"
            "  - Text files (CSV, TXT, HTML, Markdown) → ``text`` block; "
            "the raw UTF-8 content is embedded and the model reads it.\n"
            "  - PDFs → ``text`` block (raw bytes decoded); for reliable PDF "
            "understanding prefer the Anthropic kit which has native PDF support.\n"
            "  - XLSX / DOCX / other binary → ``image_url`` data-URI fallback; "
            "the model will not meaningfully parse binary spreadsheets — "
            "pre-convert to CSV or text before attaching.\n\n"
            "**Anthropic** (native content blocks):\n"
            "  - Images → ``image`` block with base-64 source; "
            "Claude 3.x and later process these natively.\n"
            "  - PDFs → ``document`` block with base-64 source; "
            "Claude has first-class PDF understanding (text + layout).\n"
            "  - Text (CSV, TXT, Markdown) → ``text`` block (UTF-8).\n"
            "  - XLSX / other binary → ``text`` block showing "
            "``[File: name (mime_type) — base64 encoded]`` followed by the "
            "base-64 payload; the model sees the raw encoding, not parsed data. "
            "Pre-convert spreadsheets to CSV.\n\n"
            "**Google Gemini** (``inline_data`` parts):\n"
            "  - Text (CSV, TXT) → ``{\"text\": decoded_content}`` part.\n"
            "  - All other files (images, PDFs, XLSX, audio, video) → "
            "``{\"inline_data\": {\"mime_type\": ..., \"data\": base64}}`` part; "
            "Gemini 2.x supports images, PDFs, audio (MP3/WAV), video (MP4), "
            "and spreadsheets (XLSX) natively.\n\n"
            "**Ollama** (``images`` list on the message):\n"
            "  - Images → placed in the top-level ``images`` list "
            "(base-64 strings) understood by llava, llava-llama3, minicpm-v, "
            "bakllava, and other Ollama vision models.\n"
            "  - Text files (CSV, TXT) → prepended as UTF-8 text in the "
            "``content`` field before your ``user_message``.\n"
            "  - PDFs / XLSX / other binary → silently ignored by the Ollama "
            "adapter (Ollama has no native document support). Pre-extract text.\n\n"
            "**Recommendation by file type**:\n"
            "  - Images → all 5 kits work; Ollama requires a vision model.\n"
            "  - PDFs → use Anthropic (native) or Gemini (inline_data).\n"
            "  - CSV / plain-text → all 5 kits embed as readable text.\n"
            "  - XLSX / DOCX → convert to CSV / Markdown first for best results.\n"
            "  - Audio / Video → Gemini only (inline_data with correct MIME)."
        ),
    )
    chain_of_thought: bool = Field(
        default=False,
        description=(
            "When ``True``, instructs the model to reason step by step before "
            "delivering its final answer. "
            "A chain-of-thought constraint is appended to the compiled system "
            "prompt, asking the model to state each reasoning step clearly and "
            "then conclude with its answer. "
            "Compatible with all five provider kits (OpenAI, Anthropic, Google, "
            "Ollama, HuggingFace). "
            "Combine with ``temperature > 0`` for richer reasoning traces."
        ),
    )
    native_thinking: bool = Field(
        default=False,
        description=(
            "Enable native model-level extended thinking / reasoning output. "
            "The model's actual reasoning process is returned alongside the answer. "
            "\n\n"
            "**Anthropic** (``claude-3-7-sonnet-20250219`` and later): "
            "Adds a ``thinking`` content block before the answer; temperature is "
            "automatically forced to 1 as required by the API. "
            "Thinking text streams in ``StreamChunk.delta.thinking`` / "
            "``StreamChunk.accumulated_thinking``; non-streaming responses expose "
            "it in ``LLMResponse.thinking``. "
            "\n\n"
            "**Google** (``gemini-2.5-pro``, ``gemini-2.0-flash-thinking-exp``): "
            "Thought parts are separated from the answer and returned in the same "
            "``thinking`` fields. "
            "\n\n"
            "**OpenAI** (``o1``, ``o3``, ``o3-mini``): Reasoning happens internally; "
            "no reasoning text is exposed but ``LLMResponse.usage[reasoning_tokens]`` "
            "shows how many tokens were consumed. "
            "\n\n"
            "Use ``thinking_budget`` to control reasoning token spend."
        ),
    )
    thinking_budget: int = Field(
        default=10000,
        gt=0,
        description=(
            "Maximum tokens the model may spend on internal thinking / reasoning. "
            "Only meaningful when ``native_thinking=True``. "
            "Higher budgets allow deeper reasoning but increase cost and latency. "
            "Anthropic: 1024-100000 (recommended >= 1024). "
            "Google: set to a positive integer; the model respects it as a soft cap. "
            "Must be greater than 0."
        ),
    )
    extra: dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Provider-specific keyword arguments forwarded verbatim to the "
            "underlying API call. "
            "Common examples: "
            "``top_p`` (nucleus sampling probability), "
            "``stop`` (list of stop sequences), "
            "``seed`` (for reproducible sampling on OpenAI), "
            "``frequency_penalty`` / ``presence_penalty`` (OpenAI), "
            "``repetition_penalty`` (HuggingFace), "
            "``top_k`` (Ollama / HuggingFace). "
            "Unknown keys are silently forwarded so provider-specific features "
            "are always accessible without needing a new field."
        ),
    )

    model_config = {"arbitrary_types_allowed": True}