feat(ollama-client): shared Ollama API client package

2026-03-29 12:43:01 -07:00 · 2026-03-29 12:43:01 -07:00 · 31cf7c0c6f
commit 31cf7c0c6f
parent bfa55998a2
21 changed files with 1472 additions and 0 deletions
--- a/packages/ollama-client/package.json
+++ b/packages/ollama-client/package.json
@ -0,0 +1,29 @@
+{
+  "name": "@bytelyst/ollama-client",
+  "version": "0.1.0",
+  "description": "Shared Ollama API client — streaming chat, embeddings, model management, health checks",
+  "type": "module",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    }
+  },
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "files": [
+    "dist"
+  ],
+  "scripts": {
+    "build": "tsc",
+    "test": "vitest run --pool forks",
+    "typecheck": "tsc --noEmit"
+  },
+  "devDependencies": {
+    "vitest": "^3.0.0",
+    "typescript": "^5.7.0"
+  },
+  "publishConfig": {
+    "registry": "http://localhost:3300/api/packages/bytelyst/npm/"
+  }
+}
--- a/packages/ollama-client/src/client-parsers.test.ts
+++ b/packages/ollama-client/src/client-parsers.test.ts
@ -0,0 +1,94 @@
+import { describe, it, expect, vi } from 'vitest';
+import { consumeSSEStream, consumeNdjsonStream } from './client-parsers.js';
+
+function createResponse(body: string): Response {
+  const encoder = new TextEncoder();
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      controller.enqueue(encoder.encode(body));
+      controller.close();
+    },
+  });
+  return { ok: true, body: stream } as unknown as Response;
+}
+
+describe('consumeSSEStream', () => {
+  it('parses data: lines and calls onData', async () => {
+    const response = createResponse('data: {"msg":"hello"}\ndata: {"msg":"world"}\n\n');
+    const onData = vi.fn();
+    const onDone = vi.fn();
+
+    await consumeSSEStream(response, onData, onDone);
+
+    expect(onData).toHaveBeenCalledTimes(2);
+    expect(onData).toHaveBeenCalledWith({ msg: 'hello' });
+    expect(onData).toHaveBeenCalledWith({ msg: 'world' });
+    expect(onDone).toHaveBeenCalled();
+  });
+
+  it('stops on [DONE] marker', async () => {
+    const response = createResponse('data: {"msg":"hello"}\ndata: [DONE]\ndata: {"msg":"after"}\n');
+    const onData = vi.fn();
+    const onDone = vi.fn();
+
+    await consumeSSEStream(response, onData, onDone);
+
+    expect(onData).toHaveBeenCalledTimes(1);
+    expect(onDone).toHaveBeenCalled();
+  });
+
+  it('calls onDone when response has no body', async () => {
+    const response = { ok: true, body: null } as unknown as Response;
+    const onDone = vi.fn();
+
+    await consumeSSEStream(response, vi.fn(), onDone);
+    expect(onDone).toHaveBeenCalled();
+  });
+
+  it('skips malformed SSE data', async () => {
+    const response = createResponse('data: not-json\ndata: {"valid":true}\n');
+    const onData = vi.fn();
+    const onDone = vi.fn();
+
+    await consumeSSEStream(response, onData, onDone);
+    expect(onData).toHaveBeenCalledTimes(1);
+    expect(onData).toHaveBeenCalledWith({ valid: true });
+  });
+});
+
+describe('consumeNdjsonStream', () => {
+  it('parses NDJSON lines and calls onChunk', async () => {
+    const response = createResponse('{"a":1}\n{"a":2}\n');
+    const onChunk = vi.fn();
+
+    await consumeNdjsonStream(response, onChunk);
+
+    expect(onChunk).toHaveBeenCalledTimes(2);
+    expect(onChunk).toHaveBeenCalledWith({ a: 1 });
+    expect(onChunk).toHaveBeenCalledWith({ a: 2 });
+  });
+
+  it('handles no body', async () => {
+    const response = { ok: true, body: null } as unknown as Response;
+    const onChunk = vi.fn();
+
+    await consumeNdjsonStream(response, onChunk);
+    expect(onChunk).not.toHaveBeenCalled();
+  });
+
+  it('processes remaining buffer', async () => {
+    const response = createResponse('{"a":1}\n{"a":2}');
+    const onChunk = vi.fn();
+
+    await consumeNdjsonStream(response, onChunk);
+    expect(onChunk).toHaveBeenCalledTimes(2);
+  });
+
+  it('skips malformed lines', async () => {
+    const response = createResponse('{"a":1}\nbad\n{"a":2}\n');
+    const onChunk = vi.fn();
+
+    await consumeNdjsonStream(response, onChunk);
+    expect(onChunk).toHaveBeenCalledTimes(2);
+  });
+});
--- a/packages/ollama-client/src/client-parsers.ts
+++ b/packages/ollama-client/src/client-parsers.ts
@ -0,0 +1,115 @@
+/**
+ * Browser-side stream consumers for Next.js API route clients.
+ *
+ * These functions consume a `Response` from a fetch call that returns
+ * streaming data (SSE or NDJSON) and invoke callbacks for each chunk.
+ */
+
+/**
+ * Consume a Server-Sent Events (SSE) stream from a Response.
+ *
+ * Parses `data:` lines and invokes `onData` for each parsed JSON object.
+ * Stops when the stream ends or `[DONE]` is received.
+ *
+ * @param response - Fetch Response with SSE body
+ * @param onData - Callback for each parsed data event
+ * @param onDone - Callback when stream completes
+ */
+export async function consumeSSEStream(
+  response: Response,
+  onData: (data: Record<string, unknown>) => void,
+  onDone: () => void
+): Promise<void> {
+  if (!response.body) {
+    onDone();
+    return;
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed) continue;
+
+        if (trimmed.startsWith('data:')) {
+          const payload = trimmed.slice(5).trim();
+          if (payload === '[DONE]') {
+            onDone();
+            return;
+          }
+          try {
+            onData(JSON.parse(payload) as Record<string, unknown>);
+          } catch {
+            // skip malformed SSE data
+          }
+        }
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+
+  onDone();
+}
+
+/**
+ * Consume an NDJSON (newline-delimited JSON) stream from a Response.
+ *
+ * Parses each line as JSON and invokes `onChunk` for each parsed object.
+ *
+ * @param response - Fetch Response with NDJSON body
+ * @param onChunk - Callback for each parsed NDJSON line
+ */
+export async function consumeNdjsonStream<T>(
+  response: Response,
+  onChunk: (chunk: T) => void
+): Promise<void> {
+  if (!response.body) return;
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed) continue;
+        try {
+          onChunk(JSON.parse(trimmed) as T);
+        } catch {
+          // skip malformed lines
+        }
+      }
+    }
+
+    // Process remaining buffer
+    if (buffer.trim()) {
+      try {
+        onChunk(JSON.parse(buffer.trim()) as T);
+      } catch {
+        // skip
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
--- a/packages/ollama-client/src/client.test.ts
+++ b/packages/ollama-client/src/client.test.ts
@ -0,0 +1,157 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { OllamaClient } from './client.js';
+
+const BASE_URL = 'http://localhost:11434';
+
+function mockFetch(response: unknown, options?: { ok?: boolean; status?: number }) {
+  return vi.fn().mockResolvedValue({
+    ok: options?.ok ?? true,
+    status: options?.status ?? 200,
+    json: () => Promise.resolve(response),
+    text: () => Promise.resolve(JSON.stringify(response)),
+  });
+}
+
+describe('OllamaClient', () => {
+  let client: OllamaClient;
+
+  beforeEach(() => {
+    client = new OllamaClient({ baseUrl: BASE_URL });
+    vi.restoreAllMocks();
+  });
+
+  describe('constructor', () => {
+    it('strips trailing slashes from baseUrl', () => {
+      const c = new OllamaClient({ baseUrl: 'http://localhost:11434///' });
+      expect(c.baseUrl).toBe('http://localhost:11434');
+    });
+  });
+
+  describe('tags', () => {
+    it('returns list of models', async () => {
+      const models = [{ name: 'llama3', size: 1000, digest: 'abc', modified_at: '2024-01-01' }];
+      globalThis.fetch = mockFetch({ models });
+
+      const result = await client.tags();
+      expect(result).toEqual(models);
+      expect(globalThis.fetch).toHaveBeenCalledWith(
+        `${BASE_URL}/api/tags`,
+        expect.objectContaining({
+          headers: expect.objectContaining({ 'Content-Type': 'application/json' }),
+        })
+      );
+    });
+
+    it('returns empty array when models is null', async () => {
+      globalThis.fetch = mockFetch({ models: null });
+      const result = await client.tags();
+      expect(result).toEqual([]);
+    });
+  });
+
+  describe('ps', () => {
+    it('returns running models', async () => {
+      const models = [
+        { name: 'llama3', size: 1000, digest: 'abc', expires_at: '2024-01-01', size_vram: 500 },
+      ];
+      globalThis.fetch = mockFetch({ models });
+
+      const result = await client.ps();
+      expect(result).toEqual(models);
+    });
+  });
+
+  describe('show', () => {
+    it('sends POST with model name', async () => {
+      const showData = { modelfile: '', parameters: '', template: '', details: {} };
+      globalThis.fetch = mockFetch(showData);
+
+      const result = await client.show('llama3');
+      expect(result).toEqual(showData);
+      expect(globalThis.fetch).toHaveBeenCalledWith(
+        `${BASE_URL}/api/show`,
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({ name: 'llama3' }),
+        })
+      );
+    });
+  });
+
+  describe('pull (non-streaming)', () => {
+    it('pulls a model', async () => {
+      globalThis.fetch = mockFetch({ status: 'success' });
+
+      const result = await client.pull('llama3');
+      expect(result).toEqual({ status: 'success' });
+    });
+
+    it('throws on failure', async () => {
+      globalThis.fetch = mockFetch('Pull failed', { ok: false, status: 500 });
+      await expect(client.pull('bad-model')).rejects.toThrow('Ollama pull failed (500)');
+    });
+  });
+
+  describe('load', () => {
+    it('sends generate with keep_alive', async () => {
+      globalThis.fetch = mockFetch({});
+
+      await client.load('llama3', '15m');
+      expect(globalThis.fetch).toHaveBeenCalledWith(
+        `${BASE_URL}/api/generate`,
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({ model: 'llama3', prompt: '', keep_alive: '15m' }),
+        })
+      );
+    });
+  });
+
+  describe('unload', () => {
+    it('sends generate with keep_alive: 0', async () => {
+      globalThis.fetch = mockFetch({});
+
+      await client.unload('llama3');
+      expect(globalThis.fetch).toHaveBeenCalledWith(
+        `${BASE_URL}/api/generate`,
+        expect.objectContaining({
+          method: 'POST',
+          body: JSON.stringify({ model: 'llama3', prompt: '', keep_alive: '0' }),
+        })
+      );
+    });
+  });
+
+  describe('delete', () => {
+    it('sends DELETE request', async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({ ok: true, status: 200 });
+
+      await client.delete('llama3');
+      expect(globalThis.fetch).toHaveBeenCalledWith(
+        `${BASE_URL}/api/delete`,
+        expect.objectContaining({
+          method: 'DELETE',
+          body: JSON.stringify({ name: 'llama3' }),
+        })
+      );
+    });
+
+    it('throws on failure', async () => {
+      globalThis.fetch = vi.fn().mockResolvedValue({
+        ok: false,
+        status: 404,
+        text: () => Promise.resolve('model not found'),
+      });
+      await expect(client.delete('nope')).rejects.toThrow('Ollama delete failed (404)');
+    });
+  });
+
+  describe('version', () => {
+    it('returns version string', async () => {
+      globalThis.fetch = mockFetch({ version: '0.5.4' });
+
+      const result = await client.version();
+      expect(result).toBe('0.5.4');
+    });
+  });
+});
--- a/packages/ollama-client/src/client.ts
+++ b/packages/ollama-client/src/client.ts
@ -0,0 +1,150 @@
+import type {
+  OllamaClientOptions,
+  OllamaModel,
+  OllamaRunningModel,
+  OllamaShowResponse,
+  OllamaPullProgress,
+  OllamaVersionResponse,
+} from './types.js';
+
+/**
+ * Ollama API client for model management operations.
+ *
+ * Provides typed methods for all non-streaming Ollama endpoints:
+ * tags, ps, show, pull, load, unload, delete, version.
+ */
+export class OllamaClient {
+  readonly baseUrl: string;
+  private readonly timeoutMs: number;
+
+  constructor(options: OllamaClientOptions) {
+    this.baseUrl = options.baseUrl.replace(/\/+$/, '');
+    this.timeoutMs = options.timeoutMs ?? 30_000;
+  }
+
+  private async fetchJson<T>(path: string, init?: RequestInit): Promise<T> {
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
+
+    try {
+      const res = await fetch(`${this.baseUrl}${path}`, {
+        ...init,
+        signal: init?.signal ?? controller.signal,
+        headers: { 'Content-Type': 'application/json', ...init?.headers },
+      });
+      if (!res.ok) {
+        const text = await res.text().catch(() => '');
+        throw new Error(`Ollama ${path} failed (${res.status}): ${text.slice(0, 200)}`);
+      }
+      return (await res.json()) as T;
+    } finally {
+      clearTimeout(timeout);
+    }
+  }
+
+  /** List all locally available models (GET /api/tags). */
+  async tags(): Promise<OllamaModel[]> {
+    const data = await this.fetchJson<{ models: OllamaModel[] }>('/api/tags');
+    return data.models ?? [];
+  }
+
+  /** List currently running/loaded models (GET /api/ps). */
+  async ps(): Promise<OllamaRunningModel[]> {
+    const data = await this.fetchJson<{ models: OllamaRunningModel[] }>('/api/ps');
+    return data.models ?? [];
+  }
+
+  /** Show model details (POST /api/show). */
+  async show(model: string): Promise<OllamaShowResponse> {
+    return this.fetchJson<OllamaShowResponse>('/api/show', {
+      method: 'POST',
+      body: JSON.stringify({ name: model }),
+    });
+  }
+
+  /**
+   * Pull a model from the Ollama registry (POST /api/pull).
+   *
+   * When `stream: false`, waits for the full download to complete.
+   * When `stream: true`, returns an async generator of progress chunks.
+   */
+  async pull(model: string, stream?: false): Promise<{ status: string }>;
+  async pull(model: string, stream: true): Promise<AsyncGenerator<OllamaPullProgress>>;
+  async pull(
+    model: string,
+    stream: boolean = false
+  ): Promise<{ status: string } | AsyncGenerator<OllamaPullProgress>> {
+    if (!stream) {
+      const res = await fetch(`${this.baseUrl}/api/pull`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ name: model, stream: false }),
+        signal: AbortSignal.timeout(this.timeoutMs),
+      });
+      if (!res.ok) {
+        const text = await res.text().catch(() => '');
+        throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`);
+      }
+      return (await res.json()) as { status: string };
+    }
+
+    // Streaming pull — return async generator
+    const baseUrl = this.baseUrl;
+    const res = await fetch(`${baseUrl}/api/pull`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ name: model, stream: true }),
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => '');
+      throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`);
+    }
+    if (!res.body) throw new Error('No response body from Ollama pull');
+
+    const { parseNdjsonStream } = await import('./ndjson.js');
+    return parseNdjsonStream<OllamaPullProgress>(res.body);
+  }
+
+  /**
+   * Load a model into memory (POST /api/generate with empty prompt + keep_alive).
+   *
+   * @param model - Model name
+   * @param keepAlive - How long to keep the model loaded (default: '10m')
+   */
+  async load(model: string, keepAlive: string = '10m'): Promise<void> {
+    await this.fetchJson('/api/generate', {
+      method: 'POST',
+      body: JSON.stringify({ model, prompt: '', keep_alive: keepAlive }),
+    });
+  }
+
+  /**
+   * Unload a model from memory (POST /api/generate with keep_alive: '0').
+   */
+  async unload(model: string): Promise<void> {
+    await this.fetchJson('/api/generate', {
+      method: 'POST',
+      body: JSON.stringify({ model, prompt: '', keep_alive: '0' }),
+    });
+  }
+
+  /** Delete a model (DELETE /api/delete). */
+  async delete(model: string): Promise<void> {
+    const res = await fetch(`${this.baseUrl}/api/delete`, {
+      method: 'DELETE',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ name: model }),
+      signal: AbortSignal.timeout(this.timeoutMs),
+    });
+    if (!res.ok) {
+      const text = await res.text().catch(() => '');
+      throw new Error(`Ollama delete failed (${res.status}): ${text.slice(0, 200)}`);
+    }
+  }
+
+  /** Get Ollama server version (GET /api/version). */
+  async version(): Promise<string> {
+    const data = await this.fetchJson<OllamaVersionResponse>('/api/version');
+    return data.version;
+  }
+}
--- a/packages/ollama-client/src/config.test.ts
+++ b/packages/ollama-client/src/config.test.ts
@ -0,0 +1,43 @@
+import { describe, it, expect } from 'vitest';
+import { resolveOllamaUrl } from './config.js';
+
+describe('resolveOllamaUrl', () => {
+  it('returns default localhost when no env vars set', () => {
+    expect(resolveOllamaUrl({})).toBe('http://localhost:11434');
+  });
+
+  it('uses OLLAMA_URL when set', () => {
+    expect(resolveOllamaUrl({ OLLAMA_URL: 'http://myhost:11434' })).toBe('http://myhost:11434');
+  });
+
+  it('uses OLLAMA_HOST when set', () => {
+    expect(resolveOllamaUrl({ OLLAMA_HOST: 'http://otherhost:11434' })).toBe(
+      'http://otherhost:11434'
+    );
+  });
+
+  it('prefers OLLAMA_URL over OLLAMA_HOST', () => {
+    expect(
+      resolveOllamaUrl({
+        OLLAMA_URL: 'http://primary:11434',
+        OLLAMA_HOST: 'http://secondary:11434',
+      })
+    ).toBe('http://primary:11434');
+  });
+
+  it('normalizes URL without scheme', () => {
+    expect(resolveOllamaUrl({ OLLAMA_URL: 'myhost:11434' })).toBe('http://myhost:11434');
+  });
+
+  it('strips trailing slashes', () => {
+    expect(resolveOllamaUrl({ OLLAMA_URL: 'http://myhost:11434///' })).toBe('http://myhost:11434');
+  });
+
+  it('trims whitespace', () => {
+    expect(resolveOllamaUrl({ OLLAMA_URL: '  http://myhost:11434  ' })).toBe('http://myhost:11434');
+  });
+
+  it('preserves https scheme', () => {
+    expect(resolveOllamaUrl({ OLLAMA_URL: 'https://secure:11434' })).toBe('https://secure:11434');
+  });
+});
--- a/packages/ollama-client/src/config.ts
+++ b/packages/ollama-client/src/config.ts
@ -0,0 +1,47 @@
+import { execSync } from 'child_process';
+import fs from 'fs';
+
+function normalizeUrl(input: string): string {
+  const trimmed = input.trim().replace(/\/+$/, '');
+  if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) {
+    return trimmed;
+  }
+  return `http://${trimmed}`;
+}
+
+function detectWslGatewayOllamaUrl(): string | null {
+  try {
+    if (process.platform !== 'linux') return null;
+    const version = fs.readFileSync('/proc/version', 'utf-8').toLowerCase();
+    if (!version.includes('microsoft')) return null;
+
+    const gw = execSync("ip route show default | awk '{print $3}' | head -1", {
+      encoding: 'utf-8',
+      stdio: ['ignore', 'pipe', 'ignore'],
+    }).trim();
+    if (!gw) return null;
+    return `http://${gw}:11434`;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Resolve the Ollama base URL from environment variables with WSL2 fallback.
+ *
+ * Priority:
+ * 1. `OLLAMA_URL` or `OLLAMA_HOST` env var (explicit config)
+ * 2. WSL2 gateway IP (Windows-hosted Ollama detected via /proc/version)
+ * 3. `http://localhost:11434` (default)
+ *
+ * @param env - Environment variables object (defaults to `process.env`)
+ */
+export function resolveOllamaUrl(env: Record<string, string | undefined> = process.env): string {
+  const explicit = env.OLLAMA_URL || env.OLLAMA_HOST;
+  if (explicit) return normalizeUrl(explicit);
+
+  const inferred = detectWslGatewayOllamaUrl();
+  if (inferred) return inferred;
+
+  return 'http://localhost:11434';
+}
--- a/packages/ollama-client/src/embed.test.ts
+++ b/packages/ollama-client/src/embed.test.ts
@ -0,0 +1,66 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { getEmbedding, getEmbeddingVector } from './embed.js';
+
+const BASE_URL = 'http://localhost:11434';
+
+describe('getEmbedding', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('returns embedding response', async () => {
+    const response = { model: 'nomic-embed-text', embeddings: [[0.1, 0.2, 0.3]] };
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve(response),
+    });
+
+    const result = await getEmbedding(BASE_URL, { model: 'nomic-embed-text', input: 'hello' });
+    expect(result).toEqual(response);
+    expect(globalThis.fetch).toHaveBeenCalledWith(
+      `${BASE_URL}/api/embed`,
+      expect.objectContaining({
+        method: 'POST',
+        body: JSON.stringify({ model: 'nomic-embed-text', input: 'hello' }),
+      })
+    );
+  });
+
+  it('throws on error response', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: false,
+      status: 500,
+      text: () => Promise.resolve('internal error'),
+    });
+
+    await expect(
+      getEmbedding(BASE_URL, { model: 'nomic-embed-text', input: 'hello' })
+    ).rejects.toThrow('Ollama embed failed (500)');
+  });
+});
+
+describe('getEmbeddingVector', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('returns first embedding vector', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({ model: 'nomic-embed-text', embeddings: [[0.1, 0.2]] }),
+    });
+
+    const result = await getEmbeddingVector(BASE_URL, 'hello');
+    expect(result).toEqual([0.1, 0.2]);
+  });
+
+  it('returns empty array when no embeddings', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      json: () => Promise.resolve({ model: 'nomic-embed-text', embeddings: [] }),
+    });
+
+    const result = await getEmbeddingVector(BASE_URL, 'hello');
+    expect(result).toEqual([]);
+  });
+});
--- a/packages/ollama-client/src/embed.ts
+++ b/packages/ollama-client/src/embed.ts
@ -0,0 +1,47 @@
+import type { OllamaEmbedOptions, OllamaEmbeddingResponse } from './types.js';
+
+/**
+ * Get embeddings for text using an Ollama embedding model.
+ *
+ * @param baseUrl - Ollama server base URL
+ * @param options - Embedding options (model, input text)
+ * @returns Array of embedding vectors (one per input string)
+ */
+export async function getEmbedding(
+  baseUrl: string,
+  options: OllamaEmbedOptions
+): Promise<OllamaEmbeddingResponse> {
+  const res = await fetch(`${baseUrl}/api/embed`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      model: options.model,
+      input: options.input,
+      ...(options.options && { options: options.options }),
+    }),
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => '');
+    throw new Error(`Ollama embed failed (${res.status}): ${text.slice(0, 200)}`);
+  }
+
+  return (await res.json()) as OllamaEmbeddingResponse;
+}
+
+/**
+ * Convenience: get a single embedding vector for a text string.
+ *
+ * @param baseUrl - Ollama server base URL
+ * @param text - Text to embed
+ * @param model - Embedding model name (default: 'nomic-embed-text')
+ * @returns Single embedding vector
+ */
+export async function getEmbeddingVector(
+  baseUrl: string,
+  text: string,
+  model: string = 'nomic-embed-text'
+): Promise<number[]> {
+  const response = await getEmbedding(baseUrl, { model, input: text });
+  return response.embeddings?.[0] ?? [];
+}
--- a/packages/ollama-client/src/format.test.ts
+++ b/packages/ollama-client/src/format.test.ts
@ -0,0 +1,73 @@
+import { describe, it, expect } from 'vitest';
+import { formatBytes, estimateTokens, getModelContextWindow, formatUptime } from './format.js';
+
+describe('formatBytes', () => {
+  it('formats 0 bytes', () => {
+    expect(formatBytes(0)).toBe('0 B');
+  });
+
+  it('formats bytes', () => {
+    expect(formatBytes(512)).toBe('512 B');
+  });
+
+  it('formats kilobytes', () => {
+    expect(formatBytes(1536)).toBe('1.5 KB');
+  });
+
+  it('formats megabytes', () => {
+    expect(formatBytes(5242880)).toBe('5 MB');
+  });
+
+  it('formats gigabytes', () => {
+    expect(formatBytes(4294967296)).toBe('4 GB');
+  });
+});
+
+describe('estimateTokens', () => {
+  it('returns 0 for empty string', () => {
+    expect(estimateTokens('')).toBe(0);
+  });
+
+  it('returns 0 for whitespace-only string', () => {
+    expect(estimateTokens('   ')).toBe(0);
+  });
+
+  it('estimates tokens for a sentence', () => {
+    const result = estimateTokens('Hello world this is a test');
+    expect(result).toBeGreaterThan(0);
+    // 6 words × 1.3 = 7.8, ceil = 8
+    expect(result).toBe(8);
+  });
+});
+
+describe('getModelContextWindow', () => {
+  it('returns 128k for models with 128k in name', () => {
+    expect(getModelContextWindow('llama3-128k')).toBe(128_000);
+  });
+
+  it('returns 32k for models with 32k in name', () => {
+    expect(getModelContextWindow('gpt4-32k')).toBe(32_000);
+  });
+
+  it('returns 4096 for models without size marker', () => {
+    expect(getModelContextWindow('llama3:latest')).toBe(4_096);
+  });
+});
+
+describe('formatUptime', () => {
+  it('formats minutes only', () => {
+    expect(formatUptime(120)).toBe('2m');
+  });
+
+  it('formats hours and minutes', () => {
+    expect(formatUptime(3661)).toBe('1h 1m');
+  });
+
+  it('formats days, hours, and minutes', () => {
+    expect(formatUptime(90061)).toBe('1d 1h 1m');
+  });
+
+  it('formats zero', () => {
+    expect(formatUptime(0)).toBe('0m');
+  });
+});
--- a/packages/ollama-client/src/format.ts
+++ b/packages/ollama-client/src/format.ts
@ -0,0 +1,56 @@
+/**
+ * Format a byte count into a human-readable string.
+ *
+ * @example formatBytes(1536) // '1.5 KB'
+ * @example formatBytes(0)    // '0 B'
+ */
+export function formatBytes(bytes: number): string {
+  if (bytes === 0) return '0 B';
+  const k = 1024;
+  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
+  const i = Math.floor(Math.log(bytes) / Math.log(k));
+  return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
+}
+
+/**
+ * Approximate token count for a text string.
+ *
+ * Uses a word-count × 1.3 heuristic (typical for English text with LLM tokenizers).
+ */
+export function estimateTokens(text: string): number {
+  const trimmed = text.trim();
+  if (!trimmed) return 0;
+  return Math.ceil(trimmed.split(/\s+/).length * 1.3);
+}
+
+/**
+ * Best-effort model context window lookup based on model name.
+ *
+ * Checks for common context window markers in the model name string.
+ * Falls back to 4096 if no marker is found.
+ */
+export function getModelContextWindow(modelName: string): number {
+  const n = modelName.toLowerCase();
+  if (n.includes('128k')) return 128_000;
+  if (n.includes('64k')) return 64_000;
+  if (n.includes('32k')) return 32_000;
+  if (n.includes('16k')) return 16_000;
+  if (n.includes('8k')) return 8_000;
+  return 4_096;
+}
+
+/**
+ * Format a duration in seconds to a human-readable uptime string.
+ *
+ * @example formatUptime(90061) // '1d 1h 1m'
+ * @example formatUptime(3661)  // '1h 1m'
+ * @example formatUptime(120)   // '2m'
+ */
+export function formatUptime(seconds: number): string {
+  const d = Math.floor(seconds / 86400);
+  const h = Math.floor((seconds % 86400) / 3600);
+  const m = Math.floor((seconds % 3600) / 60);
+  if (d > 0) return `${d}d ${h}h ${m}m`;
+  if (h > 0) return `${h}h ${m}m`;
+  return `${m}m`;
+}
--- a/packages/ollama-client/src/health.test.ts
+++ b/packages/ollama-client/src/health.test.ts
@ -0,0 +1,53 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { checkHealth, checkHealthDetailed } from './health.js';
+
+const BASE_URL = 'http://localhost:11434';
+
+describe('checkHealth', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('returns true when server is healthy', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({ ok: true });
+    expect(await checkHealth(BASE_URL)).toBe(true);
+  });
+
+  it('returns false when server returns non-ok', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({ ok: false });
+    expect(await checkHealth(BASE_URL)).toBe(false);
+  });
+
+  it('returns false when fetch throws', async () => {
+    globalThis.fetch = vi.fn().mockRejectedValue(new Error('ECONNREFUSED'));
+    expect(await checkHealth(BASE_URL)).toBe(false);
+  });
+});
+
+describe('checkHealthDetailed', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('returns online + version when server is healthy', async () => {
+    globalThis.fetch = vi.fn().mockImplementation((url: string) => {
+      if (url.includes('/api/tags')) return Promise.resolve({ ok: true });
+      if (url.includes('/api/version'))
+        return Promise.resolve({
+          ok: true,
+          json: () => Promise.resolve({ version: '0.5.4' }),
+        });
+      return Promise.reject(new Error('unexpected'));
+    });
+
+    const result = await checkHealthDetailed(BASE_URL);
+    expect(result).toEqual({ online: true, version: '0.5.4', url: BASE_URL });
+  });
+
+  it('returns offline when server is unreachable', async () => {
+    globalThis.fetch = vi.fn().mockRejectedValue(new Error('ECONNREFUSED'));
+
+    const result = await checkHealthDetailed(BASE_URL);
+    expect(result).toEqual({ online: false, version: null, url: BASE_URL });
+  });
+});
--- a/packages/ollama-client/src/health.ts
+++ b/packages/ollama-client/src/health.ts
@ -0,0 +1,44 @@
+/**
+ * Check if the Ollama server is reachable.
+ *
+ * @param baseUrl - Ollama server base URL
+ * @param timeoutMs - Timeout in milliseconds (default: 3000)
+ * @returns `true` if the server responds to GET /api/tags within the timeout
+ */
+export async function checkHealth(baseUrl: string, timeoutMs: number = 3000): Promise<boolean> {
+  try {
+    const res = await fetch(`${baseUrl}/api/tags`, {
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+    return res.ok;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Check health and return structured result with version info.
+ */
+export async function checkHealthDetailed(
+  baseUrl: string,
+  timeoutMs: number = 3000
+): Promise<{ online: boolean; version: string | null; url: string }> {
+  try {
+    const [tagsRes, versionRes] = await Promise.allSettled([
+      fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(timeoutMs) }),
+      fetch(`${baseUrl}/api/version`, { signal: AbortSignal.timeout(timeoutMs) }),
+    ]);
+
+    const online = tagsRes.status === 'fulfilled' && tagsRes.value.ok;
+    let version: string | null = null;
+
+    if (versionRes.status === 'fulfilled' && versionRes.value.ok) {
+      const data = (await versionRes.value.json()) as { version?: string };
+      version = data.version ?? null;
+    }
+
+    return { online, version, url: baseUrl };
+  } catch {
+    return { online: false, version: null, url: baseUrl };
+  }
+}
--- a/packages/ollama-client/src/index.ts
+++ b/packages/ollama-client/src/index.ts
@ -0,0 +1,41 @@
+// Types
+export type {
+  OllamaModel,
+  OllamaModelDetails,
+  OllamaRunningModel,
+  OllamaChatMessage,
+  OllamaStreamChunk,
+  OllamaGenerateChunk,
+  OllamaEmbeddingResponse,
+  OllamaShowResponse,
+  OllamaPullProgress,
+  OllamaVersionResponse,
+  OllamaClientOptions,
+  OllamaChatOptions,
+  OllamaGenerateOptions,
+  OllamaEmbedOptions,
+} from './types.js';
+
+// Config
+export { resolveOllamaUrl } from './config.js';
+
+// Client
+export { OllamaClient } from './client.js';
+
+// Streaming
+export { streamChat, streamGenerate } from './stream.js';
+
+// Embeddings
+export { getEmbedding, getEmbeddingVector } from './embed.js';
+
+// Health
+export { checkHealth, checkHealthDetailed } from './health.js';
+
+// NDJSON parser
+export { parseNdjsonStream } from './ndjson.js';
+
+// Client-side stream consumers
+export { consumeSSEStream, consumeNdjsonStream } from './client-parsers.js';
+
+// Format utilities
+export { formatBytes, estimateTokens, getModelContextWindow, formatUptime } from './format.js';
--- a/packages/ollama-client/src/ndjson.test.ts
+++ b/packages/ollama-client/src/ndjson.test.ts
@ -0,0 +1,73 @@
+import { describe, it, expect } from 'vitest';
+import { parseNdjsonStream } from './ndjson.js';
+
+function createReadableStream(chunks: string[]): ReadableStream<Uint8Array> {
+  const encoder = new TextEncoder();
+  let index = 0;
+  return new ReadableStream({
+    pull(controller) {
+      if (index < chunks.length) {
+        controller.enqueue(encoder.encode(chunks[index]));
+        index++;
+      } else {
+        controller.close();
+      }
+    },
+  });
+}
+
+describe('parseNdjsonStream', () => {
+  it('parses complete NDJSON lines', async () => {
+    const stream = createReadableStream(['{"a":1}\n{"a":2}\n{"a":3}\n']);
+    const results: unknown[] = [];
+    for await (const chunk of parseNdjsonStream(stream)) {
+      results.push(chunk);
+    }
+    expect(results).toEqual([{ a: 1 }, { a: 2 }, { a: 3 }]);
+  });
+
+  it('handles partial lines split across chunks', async () => {
+    const stream = createReadableStream(['{"a":', '1}\n{"a":2}\n']);
+    const results: unknown[] = [];
+    for await (const chunk of parseNdjsonStream(stream)) {
+      results.push(chunk);
+    }
+    expect(results).toEqual([{ a: 1 }, { a: 2 }]);
+  });
+
+  it('skips empty lines', async () => {
+    const stream = createReadableStream(['{"a":1}\n\n\n{"a":2}\n']);
+    const results: unknown[] = [];
+    for await (const chunk of parseNdjsonStream(stream)) {
+      results.push(chunk);
+    }
+    expect(results).toEqual([{ a: 1 }, { a: 2 }]);
+  });
+
+  it('skips malformed JSON lines', async () => {
+    const stream = createReadableStream(['{"a":1}\nnot json\n{"a":2}\n']);
+    const results: unknown[] = [];
+    for await (const chunk of parseNdjsonStream(stream)) {
+      results.push(chunk);
+    }
+    expect(results).toEqual([{ a: 1 }, { a: 2 }]);
+  });
+
+  it('processes remaining buffer after stream ends', async () => {
+    const stream = createReadableStream(['{"a":1}\n{"a":2}']);
+    const results: unknown[] = [];
+    for await (const chunk of parseNdjsonStream(stream)) {
+      results.push(chunk);
+    }
+    expect(results).toEqual([{ a: 1 }, { a: 2 }]);
+  });
+
+  it('handles empty stream', async () => {
+    const stream = createReadableStream([]);
+    const results: unknown[] = [];
+    for await (const chunk of parseNdjsonStream(stream)) {
+      results.push(chunk);
+    }
+    expect(results).toEqual([]);
+  });
+});
--- a/packages/ollama-client/src/ndjson.ts
+++ b/packages/ollama-client/src/ndjson.ts
@ -0,0 +1,45 @@
+/**
+ * Parse an NDJSON (newline-delimited JSON) stream into an async generator.
+ *
+ * Works with both Node.js `ReadableStream` and browser `ReadableStream<Uint8Array>`.
+ * Handles partial lines across chunk boundaries gracefully.
+ */
+export async function* parseNdjsonStream<T = unknown>(
+  body: ReadableStream<Uint8Array>
+): AsyncGenerator<T> {
+  const reader = body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = '';
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+
+      buffer += decoder.decode(value, { stream: true });
+      const lines = buffer.split('\n');
+      buffer = lines.pop() ?? '';
+
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed) continue;
+        try {
+          yield JSON.parse(trimmed) as T;
+        } catch {
+          // skip malformed lines
+        }
+      }
+    }
+
+    // Process remaining buffer after stream ends
+    if (buffer.trim()) {
+      try {
+        yield JSON.parse(buffer.trim()) as T;
+      } catch {
+        // skip
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
--- a/packages/ollama-client/src/stream.test.ts
+++ b/packages/ollama-client/src/stream.test.ts
@ -0,0 +1,99 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { streamChat, streamGenerate } from './stream.js';
+
+const BASE_URL = 'http://localhost:11434';
+
+function createNdjsonResponse(chunks: object[]): Response {
+  const encoder = new TextEncoder();
+  const ndjson = chunks.map(c => JSON.stringify(c)).join('\n') + '\n';
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      controller.enqueue(encoder.encode(ndjson));
+      controller.close();
+    },
+  });
+  return { ok: true, status: 200, body: stream } as unknown as Response;
+}
+
+describe('streamChat', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('yields chat stream chunks', async () => {
+    const chunks = [
+      { model: 'llama3', message: { role: 'assistant', content: 'Hello' }, done: false },
+      { model: 'llama3', message: { role: 'assistant', content: ' world' }, done: false },
+      { model: 'llama3', message: { role: 'assistant', content: '' }, done: true, eval_count: 10 },
+    ];
+    globalThis.fetch = vi.fn().mockResolvedValue(createNdjsonResponse(chunks));
+
+    const results = [];
+    for await (const chunk of streamChat(BASE_URL, {
+      model: 'llama3',
+      messages: [{ role: 'user', content: 'Hi' }],
+    })) {
+      results.push(chunk);
+    }
+
+    expect(results).toHaveLength(3);
+    expect(results[0].message.content).toBe('Hello');
+    expect(results[2].done).toBe(true);
+    expect(results[2].eval_count).toBe(10);
+  });
+
+  it('throws on non-ok response', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: false,
+      status: 500,
+      text: () => Promise.resolve('internal error'),
+    });
+
+    const gen = streamChat(BASE_URL, {
+      model: 'llama3',
+      messages: [{ role: 'user', content: 'Hi' }],
+    });
+    await expect(gen.next()).rejects.toThrow('Ollama chat failed (500)');
+  });
+
+  it('throws when response has no body', async () => {
+    globalThis.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      body: null,
+    });
+
+    const gen = streamChat(BASE_URL, {
+      model: 'llama3',
+      messages: [{ role: 'user', content: 'Hi' }],
+    });
+    await expect(gen.next()).rejects.toThrow('No response body');
+  });
+});
+
+describe('streamGenerate', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('yields generate stream chunks', async () => {
+    const chunks = [
+      { model: 'llama3', response: 'Hello', done: false },
+      { model: 'llama3', response: ' world', done: false },
+      { model: 'llama3', response: '', done: true, eval_count: 5 },
+    ];
+    globalThis.fetch = vi.fn().mockResolvedValue(createNdjsonResponse(chunks));
+
+    const results = [];
+    for await (const chunk of streamGenerate(BASE_URL, {
+      model: 'llama3',
+      prompt: 'Say hello',
+    })) {
+      results.push(chunk);
+    }
+
+    expect(results).toHaveLength(3);
+    expect(results[0].response).toBe('Hello');
+    expect(results[2].done).toBe(true);
+  });
+});
--- a/packages/ollama-client/src/stream.ts
+++ b/packages/ollama-client/src/stream.ts
@ -0,0 +1,89 @@
+import type {
+  OllamaChatOptions,
+  OllamaGenerateOptions,
+  OllamaStreamChunk,
+  OllamaGenerateChunk,
+} from './types.js';
+import { parseNdjsonStream } from './ndjson.js';
+
+/**
+ * Stream a chat completion from Ollama.
+ *
+ * Yields NDJSON chunks from `POST /api/chat` as an async generator.
+ *
+ * @param baseUrl - Ollama server base URL
+ * @param options - Chat options (model, messages, signal, etc.)
+ */
+export async function* streamChat(
+  baseUrl: string,
+  options: OllamaChatOptions
+): AsyncGenerator<OllamaStreamChunk> {
+  const { model, messages, signal, ...rest } = options;
+
+  const res = await fetch(`${baseUrl}/api/chat`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      model,
+      messages,
+      stream: true,
+      ...('options' in rest && rest.options ? { options: rest.options } : {}),
+      ...('format' in rest && rest.format ? { format: rest.format } : {}),
+      ...('keep_alive' in rest && rest.keep_alive ? { keep_alive: rest.keep_alive } : {}),
+    }),
+    signal,
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => '');
+    throw new Error(`Ollama chat failed (${res.status}): ${text.slice(0, 200)}`);
+  }
+
+  if (!res.body) {
+    throw new Error('No response body from Ollama');
+  }
+
+  yield* parseNdjsonStream<OllamaStreamChunk>(res.body);
+}
+
+/**
+ * Stream a text generation from Ollama.
+ *
+ * Yields NDJSON chunks from `POST /api/generate` as an async generator.
+ *
+ * @param baseUrl - Ollama server base URL
+ * @param options - Generate options (model, prompt, signal, etc.)
+ */
+export async function* streamGenerate(
+  baseUrl: string,
+  options: OllamaGenerateOptions
+): AsyncGenerator<OllamaGenerateChunk> {
+  const { model, prompt, signal, ...rest } = options;
+
+  const res = await fetch(`${baseUrl}/api/generate`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      model,
+      prompt,
+      stream: true,
+      ...('system' in rest && rest.system ? { system: rest.system } : {}),
+      ...('options' in rest && rest.options ? { options: rest.options } : {}),
+      ...('format' in rest && rest.format ? { format: rest.format } : {}),
+      ...('keep_alive' in rest && rest.keep_alive ? { keep_alive: rest.keep_alive } : {}),
+      ...('context' in rest && rest.context ? { context: rest.context } : {}),
+    }),
+    signal,
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => '');
+    throw new Error(`Ollama generate failed (${res.status}): ${text.slice(0, 200)}`);
+  }
+
+  if (!res.body) {
+    throw new Error('No response body from Ollama');
+  }
+
+  yield* parseNdjsonStream<OllamaGenerateChunk>(res.body);
+}
--- a/packages/ollama-client/src/types.ts
+++ b/packages/ollama-client/src/types.ts
@ -0,0 +1,133 @@
+// --- Model types ---
+
+export interface OllamaModelDetails {
+  family?: string;
+  families?: string[];
+  format?: string;
+  parameter_size?: string;
+  quantization_level?: string;
+}
+
+export interface OllamaModel {
+  name: string;
+  model?: string;
+  size: number;
+  digest: string;
+  modified_at: string;
+  details?: OllamaModelDetails;
+}
+
+export interface OllamaRunningModel {
+  name: string;
+  model?: string;
+  size: number;
+  digest: string;
+  expires_at: string;
+  size_vram: number;
+  details?: OllamaModelDetails;
+}
+
+// --- Chat types ---
+
+export interface OllamaChatMessage {
+  role: 'user' | 'assistant' | 'system';
+  content: string;
+  images?: string[];
+}
+
+export interface OllamaStreamChunk {
+  model: string;
+  message: { role: string; content: string };
+  done: boolean;
+  total_duration?: number;
+  eval_count?: number;
+  eval_duration?: number;
+  prompt_eval_count?: number;
+  prompt_eval_duration?: number;
+  load_duration?: number;
+}
+
+// --- Generate types ---
+
+export interface OllamaGenerateChunk {
+  model: string;
+  response: string;
+  done: boolean;
+  total_duration?: number;
+  eval_count?: number;
+  eval_duration?: number;
+  prompt_eval_count?: number;
+  prompt_eval_duration?: number;
+  load_duration?: number;
+  context?: number[];
+}
+
+// --- Embedding types ---
+
+export interface OllamaEmbeddingResponse {
+  model: string;
+  embeddings: number[][];
+  total_duration?: number;
+  load_duration?: number;
+  prompt_eval_count?: number;
+}
+
+// --- Show types ---
+
+export interface OllamaShowResponse {
+  modelfile: string;
+  parameters: string;
+  template: string;
+  details: OllamaModelDetails;
+  model_info?: Record<string, unknown>;
+}
+
+// --- Pull progress types ---
+
+export interface OllamaPullProgress {
+  status: string;
+  digest?: string;
+  total?: number;
+  completed?: number;
+}
+
+// --- Version types ---
+
+export interface OllamaVersionResponse {
+  version: string;
+}
+
+// --- Client options ---
+
+export interface OllamaClientOptions {
+  /** Base URL of the Ollama server (e.g. http://localhost:11434) */
+  baseUrl: string;
+  /** Default request timeout in milliseconds (default: 30000) */
+  timeoutMs?: number;
+}
+
+export interface OllamaChatOptions {
+  model: string;
+  messages: OllamaChatMessage[];
+  signal?: AbortSignal;
+  options?: Record<string, unknown>;
+  format?: 'json' | string;
+  keep_alive?: string;
+}
+
+export interface OllamaGenerateOptions {
+  model: string;
+  prompt: string;
+  signal?: AbortSignal;
+  system?: string;
+  options?: Record<string, unknown>;
+  format?: 'json' | string;
+  keep_alive?: string;
+  context?: number[];
+}
+
+export interface OllamaEmbedOptions {
+  model: string;
+  input: string | string[];
+  options?: Record<string, unknown>;
+}
--- a/packages/ollama-client/tsconfig.json
+++ b/packages/ollama-client/tsconfig.json
@ -0,0 +1,9 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src"],
+  "exclude": ["src/**/*.test.ts"]
+}
--- a/packages/ollama-client/vitest.config.ts
+++ b/packages/ollama-client/vitest.config.ts
@ -0,0 +1,9 @@
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    globals: true,
+    passWithNoTests: true,
+    pool: 'forks',
+  },
+});