learning_ai_common_plat/packages/ollama-client/src/client.ts

import type {
  OllamaClientOptions,
  OllamaModel,
  OllamaRunningModel,
  OllamaShowResponse,
  OllamaPullProgress,
  OllamaVersionResponse,
} from './types.js';
import { parseNdjsonStream } from './ndjson.js';

/**
 * Ollama API client for model management operations.
 *
 * Provides typed methods for all non-streaming Ollama endpoints:
 * tags, ps, show, pull, load, unload, delete, version.
 */
export class OllamaClient {
  readonly baseUrl: string;
  private readonly timeoutMs: number;

  constructor(options: OllamaClientOptions) {
    this.baseUrl = options.baseUrl.replace(/\/+$/, '');
    this.timeoutMs = options.timeoutMs ?? 30_000;
  }

  private async fetchJson<T>(path: string, init?: RequestInit): Promise<T> {
    const controller = new AbortController();
    const timeout = setTimeout(() => controller.abort(), this.timeoutMs);

    try {
      const res = await fetch(`${this.baseUrl}${path}`, {
        ...init,
        signal: init?.signal ?? controller.signal,
        headers: { 'Content-Type': 'application/json', ...init?.headers },
      });
      if (!res.ok) {
        const text = await res.text().catch(() => '');
        throw new Error(`Ollama ${path} failed (${res.status}): ${text.slice(0, 200)}`);
      }
      return (await res.json()) as T;
    } finally {
      clearTimeout(timeout);
    }
  }

  /** List all locally available models (GET /api/tags). */
  async tags(): Promise<OllamaModel[]> {
    const data = await this.fetchJson<{ models: OllamaModel[] }>('/api/tags');
    return data.models ?? [];
  }

  /** List currently running/loaded models (GET /api/ps). */
  async ps(): Promise<OllamaRunningModel[]> {
    const data = await this.fetchJson<{ models: OllamaRunningModel[] }>('/api/ps');
    return data.models ?? [];
  }

  /** Show model details (POST /api/show). */
  async show(model: string): Promise<OllamaShowResponse> {
    return this.fetchJson<OllamaShowResponse>('/api/show', {
      method: 'POST',
      body: JSON.stringify({ name: model }),
    });
  }

  /**
   * Pull a model from the Ollama registry (POST /api/pull).
   *
   * When `stream: false`, waits for the full download to complete.
   * When `stream: true`, returns an async generator of progress chunks.
   */
  async pull(model: string, stream?: false): Promise<{ status: string }>;
  async pull(model: string, stream: true): Promise<AsyncGenerator<OllamaPullProgress>>;
  async pull(
    model: string,
    stream: boolean = false
  ): Promise<{ status: string } | AsyncGenerator<OllamaPullProgress>> {
    if (!stream) {
      // Model pulls can download GBs — use 10 minute timeout instead of the default
      const pullTimeoutMs = Math.max(this.timeoutMs, 600_000);
      const res = await fetch(`${this.baseUrl}/api/pull`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ name: model, stream: false }),
        signal: AbortSignal.timeout(pullTimeoutMs),
      });
      if (!res.ok) {
        const text = await res.text().catch(() => '');
        throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`);
      }
      return (await res.json()) as { status: string };
    }

    // Streaming pull — return async generator (no timeout, consumer controls lifetime)
    const res = await fetch(`${this.baseUrl}/api/pull`, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ name: model, stream: true }),
    });
    if (!res.ok) {
      const text = await res.text().catch(() => '');
      throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`);
    }
    if (!res.body) throw new Error('No response body from Ollama pull');

    return parseNdjsonStream<OllamaPullProgress>(res.body);
  }

  /**
   * Load a model into memory (POST /api/generate with empty prompt + keep_alive).
   *
   * @param model - Model name
   * @param keepAlive - How long to keep the model loaded (default: '10m')
   */
  async load(model: string, keepAlive: string = '10m'): Promise<void> {
    await this.fetchJson('/api/generate', {
      method: 'POST',
      body: JSON.stringify({ model, prompt: '', keep_alive: keepAlive }),
    });
  }

  /**
   * Unload a model from memory (POST /api/generate with keep_alive: '0').
   */
  async unload(model: string): Promise<void> {
    await this.fetchJson('/api/generate', {
      method: 'POST',
      body: JSON.stringify({ model, prompt: '', keep_alive: '0' }),
    });
  }

  /** Delete a model (DELETE /api/delete). */
  async delete(model: string): Promise<void> {
    await this.fetchJson('/api/delete', {
      method: 'DELETE',
      body: JSON.stringify({ name: model }),
    });
  }

  /** Get Ollama server version (GET /api/version). */
  async version(): Promise<string> {
    const data = await this.fetchJson<OllamaVersionResponse>('/api/version');
    return data.version;
  }
}