Source code for ractogateway.ollama_developer_kit.server

"""OllamaServerManager — start and stop an Ollama server from Python.

Useful when you want to control the Ollama process lifecycle (custom port,
programmatic startup/shutdown) directly from your application code.

Usage::

    from ractogateway import ollama_developer_kit as local

    with local.OllamaServerManager(port=11500) as srv:
        kit = local.Chat(model="llama3.2", base_url=srv.base_url)
        response = kit.chat(local.ChatConfig(user_message="Hello!"))
        print(response.content)

Or manually::

    srv = OllamaServerManager(port=11500)
    srv.start()
    ...
    srv.stop()
"""

from __future__ import annotations

import atexit
import os
import subprocess
import time
from typing import Any


[docs] class OllamaServerManager: """Manage the lifecycle of an Ollama server subprocess. The server is started with the ``OLLAMA_HOST`` environment variable set to ``{host}:{port}``, which makes Ollama listen on the requested address. Parameters ---------- host: Bind address. Defaults to ``"127.0.0.1"`` (localhost only). port: TCP port for the Ollama REST API. Defaults to ``11434`` (the standard Ollama port). Change this to run multiple Ollama instances or avoid conflicts with an already-running server. startup_timeout: Seconds to wait for the server to become ready after starting the subprocess. Raises :class:`TimeoutError` if the server doesn't respond within this window. ollama_bin: Path to the ``ollama`` executable. Defaults to ``"ollama"`` (looked up via PATH). Attributes ---------- base_url : str The full ``http://{host}:{port}`` URL of the managed server. Use this to construct a :class:`~ractogateway.ollama_developer_kit.kit.OllamaDeveloperKit`:: kit = local.Chat(model="llama3.2", base_url=srv.base_url) Examples -------- **Context manager** (recommended — guarantees cleanup):: with OllamaServerManager(port=11500) as srv: kit = local.Chat(model="llama3.2", base_url=srv.base_url) print(kit.chat(local.ChatConfig(user_message="Hi")).content) **Manual start / stop**:: srv = OllamaServerManager(port=11500) srv.start() try: ... finally: srv.stop() """ def __init__( self, *, host: str = "127.0.0.1", port: int = 11434, startup_timeout: float = 30.0, ollama_bin: str = "ollama", ) -> None: self.host = host self.port = port self.startup_timeout = startup_timeout self.ollama_bin = ollama_bin self._proc: subprocess.Popen[bytes] | None = None # ------------------------------------------------------------------ # Properties # ------------------------------------------------------------------ @property def base_url(self) -> str: """Return ``http://{host}:{port}``.""" return f"http://{self.host}:{self.port}" @property def is_running(self) -> bool: """``True`` when the subprocess is alive.""" return self._proc is not None and self._proc.poll() is None # ------------------------------------------------------------------ # Lifecycle # ------------------------------------------------------------------
[docs] def start(self) -> OllamaServerManager: """Start the Ollama server subprocess. Returns *self* so that the call can be chained:: srv = OllamaServerManager(port=11500).start() Raises ------ RuntimeError If the server is already running. FileNotFoundError If the ``ollama`` binary cannot be found. TimeoutError If the server does not become ready within *startup_timeout* seconds. """ if self.is_running: raise RuntimeError( f"OllamaServerManager is already running on {self.base_url}." ) env = {**os.environ, "OLLAMA_HOST": f"{self.host}:{self.port}"} self._proc = subprocess.Popen( [self.ollama_bin, "serve"], env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) atexit.register(self._atexit_cleanup) self._wait_until_ready() return self
[docs] def stop(self) -> None: """Stop the Ollama server subprocess gracefully. Sends SIGTERM first; if the process doesn't exit within 5 seconds, SIGKILL is used. Silently does nothing if the server is not running. """ if self._proc is None: return if self._proc.poll() is None: self._proc.terminate() try: self._proc.wait(timeout=5) except subprocess.TimeoutExpired: self._proc.kill() self._proc.wait() self._proc = None
[docs] def pull(self, model: str) -> None: """Pull *model* from the Ollama library into the running server. Equivalent to running ``ollama pull <model>`` in a shell, but scoped to the server managed by this instance via ``OLLAMA_HOST``. Parameters ---------- model: Model name, e.g. ``"llama3.2"``, ``"nomic-embed-text"``. Raises ------ RuntimeError If the server is not running. subprocess.CalledProcessError If ``ollama pull`` exits with a non-zero status. """ if not self.is_running: raise RuntimeError( "OllamaServerManager is not running. Call .start() first." ) env = {**os.environ, "OLLAMA_HOST": f"{self.host}:{self.port}"} subprocess.run( [self.ollama_bin, "pull", model], env=env, check=True, )
[docs] def list_models(self) -> list[str]: """Return the names of locally-available models on this server. Uses a lightweight HTTP request to the ``/api/tags`` endpoint instead of a subprocess so it works even when ``ollama`` CLI is not on PATH. Returns ------- list[str] Model names (e.g. ``["llama3.2:latest", "nomic-embed-text:latest"]``). """ import json import urllib.request url = f"{self.base_url}/api/tags" with urllib.request.urlopen(url, timeout=5) as resp: # noqa: S310 data: Any = json.loads(resp.read()) return [m["name"] for m in data.get("models", [])]
# ------------------------------------------------------------------ # Context-manager protocol # ------------------------------------------------------------------ def __enter__(self) -> OllamaServerManager: self.start() return self def __exit__(self, *_: object) -> None: self.stop() # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ def _wait_until_ready(self) -> None: """Poll ``/api/tags`` until the server responds or the timeout expires.""" import urllib.error import urllib.request deadline = time.monotonic() + self.startup_timeout url = f"{self.base_url}/api/tags" last_exc: Exception | None = None while time.monotonic() < deadline: # Check whether the subprocess died unexpectedly if self._proc is not None and self._proc.poll() is not None: raise RuntimeError( f"Ollama subprocess exited with code {self._proc.returncode} " "before becoming ready. Is the 'ollama' binary installed?" ) try: with urllib.request.urlopen(url, timeout=1) as _resp: # noqa: S310 return # server is ready except Exception as exc: last_exc = exc time.sleep(0.25) self.stop() raise TimeoutError( f"Ollama server at {self.base_url} did not become ready within " f"{self.startup_timeout}s. Last error: {last_exc}" ) def _atexit_cleanup(self) -> None: """Registered with :func:`atexit` to ensure the subprocess is stopped.""" self.stop() def __repr__(self) -> str: status = "running" if self.is_running else "stopped" return f"OllamaServerManager(base_url={self.base_url!r}, status={status!r})"