Source code for ractogateway.ollama_developer_kit.server

"""OllamaServerManager — start and stop an Ollama server from Python.

Useful when you want to control the Ollama process lifecycle (custom port,
programmatic startup/shutdown) directly from your application code.

Usage::

    from ractogateway import ollama_developer_kit as local

    with local.OllamaServerManager(port=11500) as srv:
        kit = local.Chat(model="llama3.2", base_url=srv.base_url)
        response = kit.chat(local.ChatConfig(user_message="Hello!"))
        print(response.content)

Or manually::

    srv = OllamaServerManager(port=11500)
    srv.start()
    ...
    srv.stop()
"""

from __future__ import annotations

import atexit
import os
import subprocess
import time
from typing import Any



[docs]
class OllamaServerManager:
    """Manage the lifecycle of an Ollama server subprocess.

    The server is started with the ``OLLAMA_HOST`` environment variable set to
    ``{host}:{port}``, which makes Ollama listen on the requested address.

    Parameters
    ----------
    host:
        Bind address.  Defaults to ``"127.0.0.1"`` (localhost only).
    port:
        TCP port for the Ollama REST API.  Defaults to ``11434`` (the standard
        Ollama port).  Change this to run multiple Ollama instances or avoid
        conflicts with an already-running server.
    startup_timeout:
        Seconds to wait for the server to become ready after starting the
        subprocess.  Raises :class:`TimeoutError` if the server doesn't
        respond within this window.
    ollama_bin:
        Path to the ``ollama`` executable.  Defaults to ``"ollama"`` (looked
        up via PATH).

    Attributes
    ----------
    base_url : str
        The full ``http://{host}:{port}`` URL of the managed server.  Use this
        to construct a :class:`~ractogateway.ollama_developer_kit.kit.OllamaDeveloperKit`::

            kit = local.Chat(model="llama3.2", base_url=srv.base_url)

    Examples
    --------
    **Context manager** (recommended — guarantees cleanup)::

        with OllamaServerManager(port=11500) as srv:
            kit = local.Chat(model="llama3.2", base_url=srv.base_url)
            print(kit.chat(local.ChatConfig(user_message="Hi")).content)

    **Manual start / stop**::

        srv = OllamaServerManager(port=11500)
        srv.start()
        try:
            ...
        finally:
            srv.stop()
    """

    def __init__(
        self,
        *,
        host: str = "127.0.0.1",
        port: int = 11434,
        startup_timeout: float = 30.0,
        ollama_bin: str = "ollama",
    ) -> None:
        self.host = host
        self.port = port
        self.startup_timeout = startup_timeout
        self.ollama_bin = ollama_bin
        self._proc: subprocess.Popen[bytes] | None = None

    # ------------------------------------------------------------------
    # Properties
    # ------------------------------------------------------------------

    @property
    def base_url(self) -> str:
        """Return ``http://{host}:{port}``."""
        return f"http://{self.host}:{self.port}"

    @property
    def is_running(self) -> bool:
        """``True`` when the subprocess is alive."""
        return self._proc is not None and self._proc.poll() is None

    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------


[docs]
    def start(self) -> OllamaServerManager:
        """Start the Ollama server subprocess.

        Returns *self* so that the call can be chained::

            srv = OllamaServerManager(port=11500).start()

        Raises
        ------
        RuntimeError
            If the server is already running.
        FileNotFoundError
            If the ``ollama`` binary cannot be found.
        TimeoutError
            If the server does not become ready within *startup_timeout* seconds.
        """
        if self.is_running:
            raise RuntimeError(
                f"OllamaServerManager is already running on {self.base_url}."
            )

        env = {**os.environ, "OLLAMA_HOST": f"{self.host}:{self.port}"}
        self._proc = subprocess.Popen(
            [self.ollama_bin, "serve"],
            env=env,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        )
        atexit.register(self._atexit_cleanup)
        self._wait_until_ready()
        return self



[docs]
    def stop(self) -> None:
        """Stop the Ollama server subprocess gracefully.

        Sends SIGTERM first; if the process doesn't exit within 5 seconds,
        SIGKILL is used.  Silently does nothing if the server is not running.
        """
        if self._proc is None:
            return
        if self._proc.poll() is None:
            self._proc.terminate()
            try:
                self._proc.wait(timeout=5)
            except subprocess.TimeoutExpired:
                self._proc.kill()
                self._proc.wait()
        self._proc = None



[docs]
    def pull(self, model: str) -> None:
        """Pull *model* from the Ollama library into the running server.

        Equivalent to running ``ollama pull <model>`` in a shell, but
        scoped to the server managed by this instance via ``OLLAMA_HOST``.

        Parameters
        ----------
        model:
            Model name, e.g. ``"llama3.2"``, ``"nomic-embed-text"``.

        Raises
        ------
        RuntimeError
            If the server is not running.
        subprocess.CalledProcessError
            If ``ollama pull`` exits with a non-zero status.
        """
        if not self.is_running:
            raise RuntimeError(
                "OllamaServerManager is not running.  Call .start() first."
            )
        env = {**os.environ, "OLLAMA_HOST": f"{self.host}:{self.port}"}
        subprocess.run(
            [self.ollama_bin, "pull", model],
            env=env,
            check=True,
        )



[docs]
    def list_models(self) -> list[str]:
        """Return the names of locally-available models on this server.

        Uses a lightweight HTTP request to the ``/api/tags`` endpoint instead
        of a subprocess so it works even when ``ollama`` CLI is not on PATH.

        Returns
        -------
        list[str]
            Model names (e.g. ``["llama3.2:latest", "nomic-embed-text:latest"]``).
        """
        import json
        import urllib.request

        url = f"{self.base_url}/api/tags"
        with urllib.request.urlopen(url, timeout=5) as resp:  # noqa: S310
            data: Any = json.loads(resp.read())
        return [m["name"] for m in data.get("models", [])]


    # ------------------------------------------------------------------
    # Context-manager protocol
    # ------------------------------------------------------------------

    def __enter__(self) -> OllamaServerManager:
        self.start()
        return self

    def __exit__(self, *_: object) -> None:
        self.stop()

    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------

    def _wait_until_ready(self) -> None:
        """Poll ``/api/tags`` until the server responds or the timeout expires."""
        import urllib.error
        import urllib.request

        deadline = time.monotonic() + self.startup_timeout
        url = f"{self.base_url}/api/tags"
        last_exc: Exception | None = None

        while time.monotonic() < deadline:
            # Check whether the subprocess died unexpectedly
            if self._proc is not None and self._proc.poll() is not None:
                raise RuntimeError(
                    f"Ollama subprocess exited with code {self._proc.returncode} "
                    "before becoming ready.  Is the 'ollama' binary installed?"
                )
            try:
                with urllib.request.urlopen(url, timeout=1) as _resp:  # noqa: S310
                    return  # server is ready
            except Exception as exc:
                last_exc = exc
                time.sleep(0.25)

        self.stop()
        raise TimeoutError(
            f"Ollama server at {self.base_url} did not become ready within "
            f"{self.startup_timeout}s.  Last error: {last_exc}"
        )

    def _atexit_cleanup(self) -> None:
        """Registered with :func:`atexit` to ensure the subprocess is stopped."""
        self.stop()

    def __repr__(self) -> str:
        status = "running" if self.is_running else "stopped"
        return f"OllamaServerManager(base_url={self.base_url!r}, status={status!r})"