diff --git a/packages/ollama-client/package.json b/packages/ollama-client/package.json new file mode 100644 index 00000000..4da24825 --- /dev/null +++ b/packages/ollama-client/package.json @@ -0,0 +1,29 @@ +{ + "name": "@bytelyst/ollama-client", + "version": "0.1.0", + "description": "Shared Ollama API client — streaming chat, embeddings, model management, health checks", + "type": "module", + "exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + } + }, + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": [ + "dist" + ], + "scripts": { + "build": "tsc", + "test": "vitest run --pool forks", + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "vitest": "^3.0.0", + "typescript": "^5.7.0" + }, + "publishConfig": { + "registry": "http://localhost:3300/api/packages/bytelyst/npm/" + } +} diff --git a/packages/ollama-client/src/client-parsers.test.ts b/packages/ollama-client/src/client-parsers.test.ts new file mode 100644 index 00000000..bd651948 --- /dev/null +++ b/packages/ollama-client/src/client-parsers.test.ts @@ -0,0 +1,94 @@ +import { describe, it, expect, vi } from 'vitest'; +import { consumeSSEStream, consumeNdjsonStream } from './client-parsers.js'; + +function createResponse(body: string): Response { + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(body)); + controller.close(); + }, + }); + return { ok: true, body: stream } as unknown as Response; +} + +describe('consumeSSEStream', () => { + it('parses data: lines and calls onData', async () => { + const response = createResponse('data: {"msg":"hello"}\ndata: {"msg":"world"}\n\n'); + const onData = vi.fn(); + const onDone = vi.fn(); + + await consumeSSEStream(response, onData, onDone); + + expect(onData).toHaveBeenCalledTimes(2); + expect(onData).toHaveBeenCalledWith({ msg: 'hello' }); + expect(onData).toHaveBeenCalledWith({ msg: 'world' }); + expect(onDone).toHaveBeenCalled(); + }); + + it('stops on [DONE] marker', async () => { + const response = createResponse('data: {"msg":"hello"}\ndata: [DONE]\ndata: {"msg":"after"}\n'); + const onData = vi.fn(); + const onDone = vi.fn(); + + await consumeSSEStream(response, onData, onDone); + + expect(onData).toHaveBeenCalledTimes(1); + expect(onDone).toHaveBeenCalled(); + }); + + it('calls onDone when response has no body', async () => { + const response = { ok: true, body: null } as unknown as Response; + const onDone = vi.fn(); + + await consumeSSEStream(response, vi.fn(), onDone); + expect(onDone).toHaveBeenCalled(); + }); + + it('skips malformed SSE data', async () => { + const response = createResponse('data: not-json\ndata: {"valid":true}\n'); + const onData = vi.fn(); + const onDone = vi.fn(); + + await consumeSSEStream(response, onData, onDone); + expect(onData).toHaveBeenCalledTimes(1); + expect(onData).toHaveBeenCalledWith({ valid: true }); + }); +}); + +describe('consumeNdjsonStream', () => { + it('parses NDJSON lines and calls onChunk', async () => { + const response = createResponse('{"a":1}\n{"a":2}\n'); + const onChunk = vi.fn(); + + await consumeNdjsonStream(response, onChunk); + + expect(onChunk).toHaveBeenCalledTimes(2); + expect(onChunk).toHaveBeenCalledWith({ a: 1 }); + expect(onChunk).toHaveBeenCalledWith({ a: 2 }); + }); + + it('handles no body', async () => { + const response = { ok: true, body: null } as unknown as Response; + const onChunk = vi.fn(); + + await consumeNdjsonStream(response, onChunk); + expect(onChunk).not.toHaveBeenCalled(); + }); + + it('processes remaining buffer', async () => { + const response = createResponse('{"a":1}\n{"a":2}'); + const onChunk = vi.fn(); + + await consumeNdjsonStream(response, onChunk); + expect(onChunk).toHaveBeenCalledTimes(2); + }); + + it('skips malformed lines', async () => { + const response = createResponse('{"a":1}\nbad\n{"a":2}\n'); + const onChunk = vi.fn(); + + await consumeNdjsonStream(response, onChunk); + expect(onChunk).toHaveBeenCalledTimes(2); + }); +}); diff --git a/packages/ollama-client/src/client-parsers.ts b/packages/ollama-client/src/client-parsers.ts new file mode 100644 index 00000000..a86c356b --- /dev/null +++ b/packages/ollama-client/src/client-parsers.ts @@ -0,0 +1,115 @@ +/** + * Browser-side stream consumers for Next.js API route clients. + * + * These functions consume a `Response` from a fetch call that returns + * streaming data (SSE or NDJSON) and invoke callbacks for each chunk. + */ + +/** + * Consume a Server-Sent Events (SSE) stream from a Response. + * + * Parses `data:` lines and invokes `onData` for each parsed JSON object. + * Stops when the stream ends or `[DONE]` is received. + * + * @param response - Fetch Response with SSE body + * @param onData - Callback for each parsed data event + * @param onDone - Callback when stream completes + */ +export async function consumeSSEStream( + response: Response, + onData: (data: Record) => void, + onDone: () => void +): Promise { + if (!response.body) { + onDone(); + return; + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() ?? ''; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + + if (trimmed.startsWith('data:')) { + const payload = trimmed.slice(5).trim(); + if (payload === '[DONE]') { + onDone(); + return; + } + try { + onData(JSON.parse(payload) as Record); + } catch { + // skip malformed SSE data + } + } + } + } + } finally { + reader.releaseLock(); + } + + onDone(); +} + +/** + * Consume an NDJSON (newline-delimited JSON) stream from a Response. + * + * Parses each line as JSON and invokes `onChunk` for each parsed object. + * + * @param response - Fetch Response with NDJSON body + * @param onChunk - Callback for each parsed NDJSON line + */ +export async function consumeNdjsonStream( + response: Response, + onChunk: (chunk: T) => void +): Promise { + if (!response.body) return; + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() ?? ''; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + onChunk(JSON.parse(trimmed) as T); + } catch { + // skip malformed lines + } + } + } + + // Process remaining buffer + if (buffer.trim()) { + try { + onChunk(JSON.parse(buffer.trim()) as T); + } catch { + // skip + } + } + } finally { + reader.releaseLock(); + } +} diff --git a/packages/ollama-client/src/client.test.ts b/packages/ollama-client/src/client.test.ts new file mode 100644 index 00000000..f7530ac5 --- /dev/null +++ b/packages/ollama-client/src/client.test.ts @@ -0,0 +1,157 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { OllamaClient } from './client.js'; + +const BASE_URL = 'http://localhost:11434'; + +function mockFetch(response: unknown, options?: { ok?: boolean; status?: number }) { + return vi.fn().mockResolvedValue({ + ok: options?.ok ?? true, + status: options?.status ?? 200, + json: () => Promise.resolve(response), + text: () => Promise.resolve(JSON.stringify(response)), + }); +} + +describe('OllamaClient', () => { + let client: OllamaClient; + + beforeEach(() => { + client = new OllamaClient({ baseUrl: BASE_URL }); + vi.restoreAllMocks(); + }); + + describe('constructor', () => { + it('strips trailing slashes from baseUrl', () => { + const c = new OllamaClient({ baseUrl: 'http://localhost:11434///' }); + expect(c.baseUrl).toBe('http://localhost:11434'); + }); + }); + + describe('tags', () => { + it('returns list of models', async () => { + const models = [{ name: 'llama3', size: 1000, digest: 'abc', modified_at: '2024-01-01' }]; + globalThis.fetch = mockFetch({ models }); + + const result = await client.tags(); + expect(result).toEqual(models); + expect(globalThis.fetch).toHaveBeenCalledWith( + `${BASE_URL}/api/tags`, + expect.objectContaining({ + headers: expect.objectContaining({ 'Content-Type': 'application/json' }), + }) + ); + }); + + it('returns empty array when models is null', async () => { + globalThis.fetch = mockFetch({ models: null }); + const result = await client.tags(); + expect(result).toEqual([]); + }); + }); + + describe('ps', () => { + it('returns running models', async () => { + const models = [ + { name: 'llama3', size: 1000, digest: 'abc', expires_at: '2024-01-01', size_vram: 500 }, + ]; + globalThis.fetch = mockFetch({ models }); + + const result = await client.ps(); + expect(result).toEqual(models); + }); + }); + + describe('show', () => { + it('sends POST with model name', async () => { + const showData = { modelfile: '', parameters: '', template: '', details: {} }; + globalThis.fetch = mockFetch(showData); + + const result = await client.show('llama3'); + expect(result).toEqual(showData); + expect(globalThis.fetch).toHaveBeenCalledWith( + `${BASE_URL}/api/show`, + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ name: 'llama3' }), + }) + ); + }); + }); + + describe('pull (non-streaming)', () => { + it('pulls a model', async () => { + globalThis.fetch = mockFetch({ status: 'success' }); + + const result = await client.pull('llama3'); + expect(result).toEqual({ status: 'success' }); + }); + + it('throws on failure', async () => { + globalThis.fetch = mockFetch('Pull failed', { ok: false, status: 500 }); + await expect(client.pull('bad-model')).rejects.toThrow('Ollama pull failed (500)'); + }); + }); + + describe('load', () => { + it('sends generate with keep_alive', async () => { + globalThis.fetch = mockFetch({}); + + await client.load('llama3', '15m'); + expect(globalThis.fetch).toHaveBeenCalledWith( + `${BASE_URL}/api/generate`, + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ model: 'llama3', prompt: '', keep_alive: '15m' }), + }) + ); + }); + }); + + describe('unload', () => { + it('sends generate with keep_alive: 0', async () => { + globalThis.fetch = mockFetch({}); + + await client.unload('llama3'); + expect(globalThis.fetch).toHaveBeenCalledWith( + `${BASE_URL}/api/generate`, + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ model: 'llama3', prompt: '', keep_alive: '0' }), + }) + ); + }); + }); + + describe('delete', () => { + it('sends DELETE request', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ ok: true, status: 200 }); + + await client.delete('llama3'); + expect(globalThis.fetch).toHaveBeenCalledWith( + `${BASE_URL}/api/delete`, + expect.objectContaining({ + method: 'DELETE', + body: JSON.stringify({ name: 'llama3' }), + }) + ); + }); + + it('throws on failure', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 404, + text: () => Promise.resolve('model not found'), + }); + await expect(client.delete('nope')).rejects.toThrow('Ollama delete failed (404)'); + }); + }); + + describe('version', () => { + it('returns version string', async () => { + globalThis.fetch = mockFetch({ version: '0.5.4' }); + + const result = await client.version(); + expect(result).toBe('0.5.4'); + }); + }); +}); diff --git a/packages/ollama-client/src/client.ts b/packages/ollama-client/src/client.ts new file mode 100644 index 00000000..8f76d3bb --- /dev/null +++ b/packages/ollama-client/src/client.ts @@ -0,0 +1,150 @@ +import type { + OllamaClientOptions, + OllamaModel, + OllamaRunningModel, + OllamaShowResponse, + OllamaPullProgress, + OllamaVersionResponse, +} from './types.js'; + +/** + * Ollama API client for model management operations. + * + * Provides typed methods for all non-streaming Ollama endpoints: + * tags, ps, show, pull, load, unload, delete, version. + */ +export class OllamaClient { + readonly baseUrl: string; + private readonly timeoutMs: number; + + constructor(options: OllamaClientOptions) { + this.baseUrl = options.baseUrl.replace(/\/+$/, ''); + this.timeoutMs = options.timeoutMs ?? 30_000; + } + + private async fetchJson(path: string, init?: RequestInit): Promise { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), this.timeoutMs); + + try { + const res = await fetch(`${this.baseUrl}${path}`, { + ...init, + signal: init?.signal ?? controller.signal, + headers: { 'Content-Type': 'application/json', ...init?.headers }, + }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama ${path} failed (${res.status}): ${text.slice(0, 200)}`); + } + return (await res.json()) as T; + } finally { + clearTimeout(timeout); + } + } + + /** List all locally available models (GET /api/tags). */ + async tags(): Promise { + const data = await this.fetchJson<{ models: OllamaModel[] }>('/api/tags'); + return data.models ?? []; + } + + /** List currently running/loaded models (GET /api/ps). */ + async ps(): Promise { + const data = await this.fetchJson<{ models: OllamaRunningModel[] }>('/api/ps'); + return data.models ?? []; + } + + /** Show model details (POST /api/show). */ + async show(model: string): Promise { + return this.fetchJson('/api/show', { + method: 'POST', + body: JSON.stringify({ name: model }), + }); + } + + /** + * Pull a model from the Ollama registry (POST /api/pull). + * + * When `stream: false`, waits for the full download to complete. + * When `stream: true`, returns an async generator of progress chunks. + */ + async pull(model: string, stream?: false): Promise<{ status: string }>; + async pull(model: string, stream: true): Promise>; + async pull( + model: string, + stream: boolean = false + ): Promise<{ status: string } | AsyncGenerator> { + if (!stream) { + const res = await fetch(`${this.baseUrl}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: model, stream: false }), + signal: AbortSignal.timeout(this.timeoutMs), + }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`); + } + return (await res.json()) as { status: string }; + } + + // Streaming pull — return async generator + const baseUrl = this.baseUrl; + const res = await fetch(`${baseUrl}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: model, stream: true }), + }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`); + } + if (!res.body) throw new Error('No response body from Ollama pull'); + + const { parseNdjsonStream } = await import('./ndjson.js'); + return parseNdjsonStream(res.body); + } + + /** + * Load a model into memory (POST /api/generate with empty prompt + keep_alive). + * + * @param model - Model name + * @param keepAlive - How long to keep the model loaded (default: '10m') + */ + async load(model: string, keepAlive: string = '10m'): Promise { + await this.fetchJson('/api/generate', { + method: 'POST', + body: JSON.stringify({ model, prompt: '', keep_alive: keepAlive }), + }); + } + + /** + * Unload a model from memory (POST /api/generate with keep_alive: '0'). + */ + async unload(model: string): Promise { + await this.fetchJson('/api/generate', { + method: 'POST', + body: JSON.stringify({ model, prompt: '', keep_alive: '0' }), + }); + } + + /** Delete a model (DELETE /api/delete). */ + async delete(model: string): Promise { + const res = await fetch(`${this.baseUrl}/api/delete`, { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: model }), + signal: AbortSignal.timeout(this.timeoutMs), + }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama delete failed (${res.status}): ${text.slice(0, 200)}`); + } + } + + /** Get Ollama server version (GET /api/version). */ + async version(): Promise { + const data = await this.fetchJson('/api/version'); + return data.version; + } +} diff --git a/packages/ollama-client/src/config.test.ts b/packages/ollama-client/src/config.test.ts new file mode 100644 index 00000000..fe2fb4ad --- /dev/null +++ b/packages/ollama-client/src/config.test.ts @@ -0,0 +1,43 @@ +import { describe, it, expect } from 'vitest'; +import { resolveOllamaUrl } from './config.js'; + +describe('resolveOllamaUrl', () => { + it('returns default localhost when no env vars set', () => { + expect(resolveOllamaUrl({})).toBe('http://localhost:11434'); + }); + + it('uses OLLAMA_URL when set', () => { + expect(resolveOllamaUrl({ OLLAMA_URL: 'http://myhost:11434' })).toBe('http://myhost:11434'); + }); + + it('uses OLLAMA_HOST when set', () => { + expect(resolveOllamaUrl({ OLLAMA_HOST: 'http://otherhost:11434' })).toBe( + 'http://otherhost:11434' + ); + }); + + it('prefers OLLAMA_URL over OLLAMA_HOST', () => { + expect( + resolveOllamaUrl({ + OLLAMA_URL: 'http://primary:11434', + OLLAMA_HOST: 'http://secondary:11434', + }) + ).toBe('http://primary:11434'); + }); + + it('normalizes URL without scheme', () => { + expect(resolveOllamaUrl({ OLLAMA_URL: 'myhost:11434' })).toBe('http://myhost:11434'); + }); + + it('strips trailing slashes', () => { + expect(resolveOllamaUrl({ OLLAMA_URL: 'http://myhost:11434///' })).toBe('http://myhost:11434'); + }); + + it('trims whitespace', () => { + expect(resolveOllamaUrl({ OLLAMA_URL: ' http://myhost:11434 ' })).toBe('http://myhost:11434'); + }); + + it('preserves https scheme', () => { + expect(resolveOllamaUrl({ OLLAMA_URL: 'https://secure:11434' })).toBe('https://secure:11434'); + }); +}); diff --git a/packages/ollama-client/src/config.ts b/packages/ollama-client/src/config.ts new file mode 100644 index 00000000..a34db870 --- /dev/null +++ b/packages/ollama-client/src/config.ts @@ -0,0 +1,47 @@ +import { execSync } from 'child_process'; +import fs from 'fs'; + +function normalizeUrl(input: string): string { + const trimmed = input.trim().replace(/\/+$/, ''); + if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) { + return trimmed; + } + return `http://${trimmed}`; +} + +function detectWslGatewayOllamaUrl(): string | null { + try { + if (process.platform !== 'linux') return null; + const version = fs.readFileSync('/proc/version', 'utf-8').toLowerCase(); + if (!version.includes('microsoft')) return null; + + const gw = execSync("ip route show default | awk '{print $3}' | head -1", { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'ignore'], + }).trim(); + if (!gw) return null; + return `http://${gw}:11434`; + } catch { + return null; + } +} + +/** + * Resolve the Ollama base URL from environment variables with WSL2 fallback. + * + * Priority: + * 1. `OLLAMA_URL` or `OLLAMA_HOST` env var (explicit config) + * 2. WSL2 gateway IP (Windows-hosted Ollama detected via /proc/version) + * 3. `http://localhost:11434` (default) + * + * @param env - Environment variables object (defaults to `process.env`) + */ +export function resolveOllamaUrl(env: Record = process.env): string { + const explicit = env.OLLAMA_URL || env.OLLAMA_HOST; + if (explicit) return normalizeUrl(explicit); + + const inferred = detectWslGatewayOllamaUrl(); + if (inferred) return inferred; + + return 'http://localhost:11434'; +} diff --git a/packages/ollama-client/src/embed.test.ts b/packages/ollama-client/src/embed.test.ts new file mode 100644 index 00000000..48ffe943 --- /dev/null +++ b/packages/ollama-client/src/embed.test.ts @@ -0,0 +1,66 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { getEmbedding, getEmbeddingVector } from './embed.js'; + +const BASE_URL = 'http://localhost:11434'; + +describe('getEmbedding', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('returns embedding response', async () => { + const response = { model: 'nomic-embed-text', embeddings: [[0.1, 0.2, 0.3]] }; + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve(response), + }); + + const result = await getEmbedding(BASE_URL, { model: 'nomic-embed-text', input: 'hello' }); + expect(result).toEqual(response); + expect(globalThis.fetch).toHaveBeenCalledWith( + `${BASE_URL}/api/embed`, + expect.objectContaining({ + method: 'POST', + body: JSON.stringify({ model: 'nomic-embed-text', input: 'hello' }), + }) + ); + }); + + it('throws on error response', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + text: () => Promise.resolve('internal error'), + }); + + await expect( + getEmbedding(BASE_URL, { model: 'nomic-embed-text', input: 'hello' }) + ).rejects.toThrow('Ollama embed failed (500)'); + }); +}); + +describe('getEmbeddingVector', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('returns first embedding vector', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ model: 'nomic-embed-text', embeddings: [[0.1, 0.2]] }), + }); + + const result = await getEmbeddingVector(BASE_URL, 'hello'); + expect(result).toEqual([0.1, 0.2]); + }); + + it('returns empty array when no embeddings', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ model: 'nomic-embed-text', embeddings: [] }), + }); + + const result = await getEmbeddingVector(BASE_URL, 'hello'); + expect(result).toEqual([]); + }); +}); diff --git a/packages/ollama-client/src/embed.ts b/packages/ollama-client/src/embed.ts new file mode 100644 index 00000000..7578c124 --- /dev/null +++ b/packages/ollama-client/src/embed.ts @@ -0,0 +1,47 @@ +import type { OllamaEmbedOptions, OllamaEmbeddingResponse } from './types.js'; + +/** + * Get embeddings for text using an Ollama embedding model. + * + * @param baseUrl - Ollama server base URL + * @param options - Embedding options (model, input text) + * @returns Array of embedding vectors (one per input string) + */ +export async function getEmbedding( + baseUrl: string, + options: OllamaEmbedOptions +): Promise { + const res = await fetch(`${baseUrl}/api/embed`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: options.model, + input: options.input, + ...(options.options && { options: options.options }), + }), + }); + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama embed failed (${res.status}): ${text.slice(0, 200)}`); + } + + return (await res.json()) as OllamaEmbeddingResponse; +} + +/** + * Convenience: get a single embedding vector for a text string. + * + * @param baseUrl - Ollama server base URL + * @param text - Text to embed + * @param model - Embedding model name (default: 'nomic-embed-text') + * @returns Single embedding vector + */ +export async function getEmbeddingVector( + baseUrl: string, + text: string, + model: string = 'nomic-embed-text' +): Promise { + const response = await getEmbedding(baseUrl, { model, input: text }); + return response.embeddings?.[0] ?? []; +} diff --git a/packages/ollama-client/src/format.test.ts b/packages/ollama-client/src/format.test.ts new file mode 100644 index 00000000..0ea32f3f --- /dev/null +++ b/packages/ollama-client/src/format.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from 'vitest'; +import { formatBytes, estimateTokens, getModelContextWindow, formatUptime } from './format.js'; + +describe('formatBytes', () => { + it('formats 0 bytes', () => { + expect(formatBytes(0)).toBe('0 B'); + }); + + it('formats bytes', () => { + expect(formatBytes(512)).toBe('512 B'); + }); + + it('formats kilobytes', () => { + expect(formatBytes(1536)).toBe('1.5 KB'); + }); + + it('formats megabytes', () => { + expect(formatBytes(5242880)).toBe('5 MB'); + }); + + it('formats gigabytes', () => { + expect(formatBytes(4294967296)).toBe('4 GB'); + }); +}); + +describe('estimateTokens', () => { + it('returns 0 for empty string', () => { + expect(estimateTokens('')).toBe(0); + }); + + it('returns 0 for whitespace-only string', () => { + expect(estimateTokens(' ')).toBe(0); + }); + + it('estimates tokens for a sentence', () => { + const result = estimateTokens('Hello world this is a test'); + expect(result).toBeGreaterThan(0); + // 6 words × 1.3 = 7.8, ceil = 8 + expect(result).toBe(8); + }); +}); + +describe('getModelContextWindow', () => { + it('returns 128k for models with 128k in name', () => { + expect(getModelContextWindow('llama3-128k')).toBe(128_000); + }); + + it('returns 32k for models with 32k in name', () => { + expect(getModelContextWindow('gpt4-32k')).toBe(32_000); + }); + + it('returns 4096 for models without size marker', () => { + expect(getModelContextWindow('llama3:latest')).toBe(4_096); + }); +}); + +describe('formatUptime', () => { + it('formats minutes only', () => { + expect(formatUptime(120)).toBe('2m'); + }); + + it('formats hours and minutes', () => { + expect(formatUptime(3661)).toBe('1h 1m'); + }); + + it('formats days, hours, and minutes', () => { + expect(formatUptime(90061)).toBe('1d 1h 1m'); + }); + + it('formats zero', () => { + expect(formatUptime(0)).toBe('0m'); + }); +}); diff --git a/packages/ollama-client/src/format.ts b/packages/ollama-client/src/format.ts new file mode 100644 index 00000000..ef336634 --- /dev/null +++ b/packages/ollama-client/src/format.ts @@ -0,0 +1,56 @@ +/** + * Format a byte count into a human-readable string. + * + * @example formatBytes(1536) // '1.5 KB' + * @example formatBytes(0) // '0 B' + */ +export function formatBytes(bytes: number): string { + if (bytes === 0) return '0 B'; + const k = 1024; + const sizes = ['B', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`; +} + +/** + * Approximate token count for a text string. + * + * Uses a word-count × 1.3 heuristic (typical for English text with LLM tokenizers). + */ +export function estimateTokens(text: string): number { + const trimmed = text.trim(); + if (!trimmed) return 0; + return Math.ceil(trimmed.split(/\s+/).length * 1.3); +} + +/** + * Best-effort model context window lookup based on model name. + * + * Checks for common context window markers in the model name string. + * Falls back to 4096 if no marker is found. + */ +export function getModelContextWindow(modelName: string): number { + const n = modelName.toLowerCase(); + if (n.includes('128k')) return 128_000; + if (n.includes('64k')) return 64_000; + if (n.includes('32k')) return 32_000; + if (n.includes('16k')) return 16_000; + if (n.includes('8k')) return 8_000; + return 4_096; +} + +/** + * Format a duration in seconds to a human-readable uptime string. + * + * @example formatUptime(90061) // '1d 1h 1m' + * @example formatUptime(3661) // '1h 1m' + * @example formatUptime(120) // '2m' + */ +export function formatUptime(seconds: number): string { + const d = Math.floor(seconds / 86400); + const h = Math.floor((seconds % 86400) / 3600); + const m = Math.floor((seconds % 3600) / 60); + if (d > 0) return `${d}d ${h}h ${m}m`; + if (h > 0) return `${h}h ${m}m`; + return `${m}m`; +} diff --git a/packages/ollama-client/src/health.test.ts b/packages/ollama-client/src/health.test.ts new file mode 100644 index 00000000..c1b71610 --- /dev/null +++ b/packages/ollama-client/src/health.test.ts @@ -0,0 +1,53 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { checkHealth, checkHealthDetailed } from './health.js'; + +const BASE_URL = 'http://localhost:11434'; + +describe('checkHealth', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('returns true when server is healthy', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ ok: true }); + expect(await checkHealth(BASE_URL)).toBe(true); + }); + + it('returns false when server returns non-ok', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ ok: false }); + expect(await checkHealth(BASE_URL)).toBe(false); + }); + + it('returns false when fetch throws', async () => { + globalThis.fetch = vi.fn().mockRejectedValue(new Error('ECONNREFUSED')); + expect(await checkHealth(BASE_URL)).toBe(false); + }); +}); + +describe('checkHealthDetailed', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('returns online + version when server is healthy', async () => { + globalThis.fetch = vi.fn().mockImplementation((url: string) => { + if (url.includes('/api/tags')) return Promise.resolve({ ok: true }); + if (url.includes('/api/version')) + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ version: '0.5.4' }), + }); + return Promise.reject(new Error('unexpected')); + }); + + const result = await checkHealthDetailed(BASE_URL); + expect(result).toEqual({ online: true, version: '0.5.4', url: BASE_URL }); + }); + + it('returns offline when server is unreachable', async () => { + globalThis.fetch = vi.fn().mockRejectedValue(new Error('ECONNREFUSED')); + + const result = await checkHealthDetailed(BASE_URL); + expect(result).toEqual({ online: false, version: null, url: BASE_URL }); + }); +}); diff --git a/packages/ollama-client/src/health.ts b/packages/ollama-client/src/health.ts new file mode 100644 index 00000000..da3410b2 --- /dev/null +++ b/packages/ollama-client/src/health.ts @@ -0,0 +1,44 @@ +/** + * Check if the Ollama server is reachable. + * + * @param baseUrl - Ollama server base URL + * @param timeoutMs - Timeout in milliseconds (default: 3000) + * @returns `true` if the server responds to GET /api/tags within the timeout + */ +export async function checkHealth(baseUrl: string, timeoutMs: number = 3000): Promise { + try { + const res = await fetch(`${baseUrl}/api/tags`, { + signal: AbortSignal.timeout(timeoutMs), + }); + return res.ok; + } catch { + return false; + } +} + +/** + * Check health and return structured result with version info. + */ +export async function checkHealthDetailed( + baseUrl: string, + timeoutMs: number = 3000 +): Promise<{ online: boolean; version: string | null; url: string }> { + try { + const [tagsRes, versionRes] = await Promise.allSettled([ + fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(timeoutMs) }), + fetch(`${baseUrl}/api/version`, { signal: AbortSignal.timeout(timeoutMs) }), + ]); + + const online = tagsRes.status === 'fulfilled' && tagsRes.value.ok; + let version: string | null = null; + + if (versionRes.status === 'fulfilled' && versionRes.value.ok) { + const data = (await versionRes.value.json()) as { version?: string }; + version = data.version ?? null; + } + + return { online, version, url: baseUrl }; + } catch { + return { online: false, version: null, url: baseUrl }; + } +} diff --git a/packages/ollama-client/src/index.ts b/packages/ollama-client/src/index.ts new file mode 100644 index 00000000..1393e4d3 --- /dev/null +++ b/packages/ollama-client/src/index.ts @@ -0,0 +1,41 @@ +// Types +export type { + OllamaModel, + OllamaModelDetails, + OllamaRunningModel, + OllamaChatMessage, + OllamaStreamChunk, + OllamaGenerateChunk, + OllamaEmbeddingResponse, + OllamaShowResponse, + OllamaPullProgress, + OllamaVersionResponse, + OllamaClientOptions, + OllamaChatOptions, + OllamaGenerateOptions, + OllamaEmbedOptions, +} from './types.js'; + +// Config +export { resolveOllamaUrl } from './config.js'; + +// Client +export { OllamaClient } from './client.js'; + +// Streaming +export { streamChat, streamGenerate } from './stream.js'; + +// Embeddings +export { getEmbedding, getEmbeddingVector } from './embed.js'; + +// Health +export { checkHealth, checkHealthDetailed } from './health.js'; + +// NDJSON parser +export { parseNdjsonStream } from './ndjson.js'; + +// Client-side stream consumers +export { consumeSSEStream, consumeNdjsonStream } from './client-parsers.js'; + +// Format utilities +export { formatBytes, estimateTokens, getModelContextWindow, formatUptime } from './format.js'; diff --git a/packages/ollama-client/src/ndjson.test.ts b/packages/ollama-client/src/ndjson.test.ts new file mode 100644 index 00000000..41be1d46 --- /dev/null +++ b/packages/ollama-client/src/ndjson.test.ts @@ -0,0 +1,73 @@ +import { describe, it, expect } from 'vitest'; +import { parseNdjsonStream } from './ndjson.js'; + +function createReadableStream(chunks: string[]): ReadableStream { + const encoder = new TextEncoder(); + let index = 0; + return new ReadableStream({ + pull(controller) { + if (index < chunks.length) { + controller.enqueue(encoder.encode(chunks[index])); + index++; + } else { + controller.close(); + } + }, + }); +} + +describe('parseNdjsonStream', () => { + it('parses complete NDJSON lines', async () => { + const stream = createReadableStream(['{"a":1}\n{"a":2}\n{"a":3}\n']); + const results: unknown[] = []; + for await (const chunk of parseNdjsonStream(stream)) { + results.push(chunk); + } + expect(results).toEqual([{ a: 1 }, { a: 2 }, { a: 3 }]); + }); + + it('handles partial lines split across chunks', async () => { + const stream = createReadableStream(['{"a":', '1}\n{"a":2}\n']); + const results: unknown[] = []; + for await (const chunk of parseNdjsonStream(stream)) { + results.push(chunk); + } + expect(results).toEqual([{ a: 1 }, { a: 2 }]); + }); + + it('skips empty lines', async () => { + const stream = createReadableStream(['{"a":1}\n\n\n{"a":2}\n']); + const results: unknown[] = []; + for await (const chunk of parseNdjsonStream(stream)) { + results.push(chunk); + } + expect(results).toEqual([{ a: 1 }, { a: 2 }]); + }); + + it('skips malformed JSON lines', async () => { + const stream = createReadableStream(['{"a":1}\nnot json\n{"a":2}\n']); + const results: unknown[] = []; + for await (const chunk of parseNdjsonStream(stream)) { + results.push(chunk); + } + expect(results).toEqual([{ a: 1 }, { a: 2 }]); + }); + + it('processes remaining buffer after stream ends', async () => { + const stream = createReadableStream(['{"a":1}\n{"a":2}']); + const results: unknown[] = []; + for await (const chunk of parseNdjsonStream(stream)) { + results.push(chunk); + } + expect(results).toEqual([{ a: 1 }, { a: 2 }]); + }); + + it('handles empty stream', async () => { + const stream = createReadableStream([]); + const results: unknown[] = []; + for await (const chunk of parseNdjsonStream(stream)) { + results.push(chunk); + } + expect(results).toEqual([]); + }); +}); diff --git a/packages/ollama-client/src/ndjson.ts b/packages/ollama-client/src/ndjson.ts new file mode 100644 index 00000000..76426491 --- /dev/null +++ b/packages/ollama-client/src/ndjson.ts @@ -0,0 +1,45 @@ +/** + * Parse an NDJSON (newline-delimited JSON) stream into an async generator. + * + * Works with both Node.js `ReadableStream` and browser `ReadableStream`. + * Handles partial lines across chunk boundaries gracefully. + */ +export async function* parseNdjsonStream( + body: ReadableStream +): AsyncGenerator { + const reader = body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() ?? ''; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + yield JSON.parse(trimmed) as T; + } catch { + // skip malformed lines + } + } + } + + // Process remaining buffer after stream ends + if (buffer.trim()) { + try { + yield JSON.parse(buffer.trim()) as T; + } catch { + // skip + } + } + } finally { + reader.releaseLock(); + } +} diff --git a/packages/ollama-client/src/stream.test.ts b/packages/ollama-client/src/stream.test.ts new file mode 100644 index 00000000..905d9f1d --- /dev/null +++ b/packages/ollama-client/src/stream.test.ts @@ -0,0 +1,99 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { streamChat, streamGenerate } from './stream.js'; + +const BASE_URL = 'http://localhost:11434'; + +function createNdjsonResponse(chunks: object[]): Response { + const encoder = new TextEncoder(); + const ndjson = chunks.map(c => JSON.stringify(c)).join('\n') + '\n'; + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(ndjson)); + controller.close(); + }, + }); + return { ok: true, status: 200, body: stream } as unknown as Response; +} + +describe('streamChat', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('yields chat stream chunks', async () => { + const chunks = [ + { model: 'llama3', message: { role: 'assistant', content: 'Hello' }, done: false }, + { model: 'llama3', message: { role: 'assistant', content: ' world' }, done: false }, + { model: 'llama3', message: { role: 'assistant', content: '' }, done: true, eval_count: 10 }, + ]; + globalThis.fetch = vi.fn().mockResolvedValue(createNdjsonResponse(chunks)); + + const results = []; + for await (const chunk of streamChat(BASE_URL, { + model: 'llama3', + messages: [{ role: 'user', content: 'Hi' }], + })) { + results.push(chunk); + } + + expect(results).toHaveLength(3); + expect(results[0].message.content).toBe('Hello'); + expect(results[2].done).toBe(true); + expect(results[2].eval_count).toBe(10); + }); + + it('throws on non-ok response', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 500, + text: () => Promise.resolve('internal error'), + }); + + const gen = streamChat(BASE_URL, { + model: 'llama3', + messages: [{ role: 'user', content: 'Hi' }], + }); + await expect(gen.next()).rejects.toThrow('Ollama chat failed (500)'); + }); + + it('throws when response has no body', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + body: null, + }); + + const gen = streamChat(BASE_URL, { + model: 'llama3', + messages: [{ role: 'user', content: 'Hi' }], + }); + await expect(gen.next()).rejects.toThrow('No response body'); + }); +}); + +describe('streamGenerate', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('yields generate stream chunks', async () => { + const chunks = [ + { model: 'llama3', response: 'Hello', done: false }, + { model: 'llama3', response: ' world', done: false }, + { model: 'llama3', response: '', done: true, eval_count: 5 }, + ]; + globalThis.fetch = vi.fn().mockResolvedValue(createNdjsonResponse(chunks)); + + const results = []; + for await (const chunk of streamGenerate(BASE_URL, { + model: 'llama3', + prompt: 'Say hello', + })) { + results.push(chunk); + } + + expect(results).toHaveLength(3); + expect(results[0].response).toBe('Hello'); + expect(results[2].done).toBe(true); + }); +}); diff --git a/packages/ollama-client/src/stream.ts b/packages/ollama-client/src/stream.ts new file mode 100644 index 00000000..dcf34b06 --- /dev/null +++ b/packages/ollama-client/src/stream.ts @@ -0,0 +1,89 @@ +import type { + OllamaChatOptions, + OllamaGenerateOptions, + OllamaStreamChunk, + OllamaGenerateChunk, +} from './types.js'; +import { parseNdjsonStream } from './ndjson.js'; + +/** + * Stream a chat completion from Ollama. + * + * Yields NDJSON chunks from `POST /api/chat` as an async generator. + * + * @param baseUrl - Ollama server base URL + * @param options - Chat options (model, messages, signal, etc.) + */ +export async function* streamChat( + baseUrl: string, + options: OllamaChatOptions +): AsyncGenerator { + const { model, messages, signal, ...rest } = options; + + const res = await fetch(`${baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + messages, + stream: true, + ...('options' in rest && rest.options ? { options: rest.options } : {}), + ...('format' in rest && rest.format ? { format: rest.format } : {}), + ...('keep_alive' in rest && rest.keep_alive ? { keep_alive: rest.keep_alive } : {}), + }), + signal, + }); + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama chat failed (${res.status}): ${text.slice(0, 200)}`); + } + + if (!res.body) { + throw new Error('No response body from Ollama'); + } + + yield* parseNdjsonStream(res.body); +} + +/** + * Stream a text generation from Ollama. + * + * Yields NDJSON chunks from `POST /api/generate` as an async generator. + * + * @param baseUrl - Ollama server base URL + * @param options - Generate options (model, prompt, signal, etc.) + */ +export async function* streamGenerate( + baseUrl: string, + options: OllamaGenerateOptions +): AsyncGenerator { + const { model, prompt, signal, ...rest } = options; + + const res = await fetch(`${baseUrl}/api/generate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model, + prompt, + stream: true, + ...('system' in rest && rest.system ? { system: rest.system } : {}), + ...('options' in rest && rest.options ? { options: rest.options } : {}), + ...('format' in rest && rest.format ? { format: rest.format } : {}), + ...('keep_alive' in rest && rest.keep_alive ? { keep_alive: rest.keep_alive } : {}), + ...('context' in rest && rest.context ? { context: rest.context } : {}), + }), + signal, + }); + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Ollama generate failed (${res.status}): ${text.slice(0, 200)}`); + } + + if (!res.body) { + throw new Error('No response body from Ollama'); + } + + yield* parseNdjsonStream(res.body); +} diff --git a/packages/ollama-client/src/types.ts b/packages/ollama-client/src/types.ts new file mode 100644 index 00000000..410d89d4 --- /dev/null +++ b/packages/ollama-client/src/types.ts @@ -0,0 +1,133 @@ +// --- Model types --- + +export interface OllamaModelDetails { + family?: string; + families?: string[]; + format?: string; + parameter_size?: string; + quantization_level?: string; +} + +export interface OllamaModel { + name: string; + model?: string; + size: number; + digest: string; + modified_at: string; + details?: OllamaModelDetails; +} + +export interface OllamaRunningModel { + name: string; + model?: string; + size: number; + digest: string; + expires_at: string; + size_vram: number; + details?: OllamaModelDetails; +} + +// --- Chat types --- + +export interface OllamaChatMessage { + role: 'user' | 'assistant' | 'system'; + content: string; + images?: string[]; +} + +export interface OllamaStreamChunk { + model: string; + message: { role: string; content: string }; + done: boolean; + total_duration?: number; + eval_count?: number; + eval_duration?: number; + prompt_eval_count?: number; + prompt_eval_duration?: number; + load_duration?: number; +} + +// --- Generate types --- + +export interface OllamaGenerateChunk { + model: string; + response: string; + done: boolean; + total_duration?: number; + eval_count?: number; + eval_duration?: number; + prompt_eval_count?: number; + prompt_eval_duration?: number; + load_duration?: number; + context?: number[]; +} + +// --- Embedding types --- + +export interface OllamaEmbeddingResponse { + model: string; + embeddings: number[][]; + total_duration?: number; + load_duration?: number; + prompt_eval_count?: number; +} + +// --- Show types --- + +export interface OllamaShowResponse { + modelfile: string; + parameters: string; + template: string; + details: OllamaModelDetails; + model_info?: Record; +} + +// --- Pull progress types --- + +export interface OllamaPullProgress { + status: string; + digest?: string; + total?: number; + completed?: number; +} + +// --- Version types --- + +export interface OllamaVersionResponse { + version: string; +} + +// --- Client options --- + +export interface OllamaClientOptions { + /** Base URL of the Ollama server (e.g. http://localhost:11434) */ + baseUrl: string; + /** Default request timeout in milliseconds (default: 30000) */ + timeoutMs?: number; +} + +export interface OllamaChatOptions { + model: string; + messages: OllamaChatMessage[]; + signal?: AbortSignal; + options?: Record; + format?: 'json' | string; + keep_alive?: string; +} + +export interface OllamaGenerateOptions { + model: string; + prompt: string; + signal?: AbortSignal; + system?: string; + options?: Record; + format?: 'json' | string; + keep_alive?: string; + context?: number[]; +} + +export interface OllamaEmbedOptions { + model: string; + input: string | string[]; + options?: Record; +} diff --git a/packages/ollama-client/tsconfig.json b/packages/ollama-client/tsconfig.json new file mode 100644 index 00000000..5edad813 --- /dev/null +++ b/packages/ollama-client/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "include": ["src"], + "exclude": ["src/**/*.test.ts"] +} diff --git a/packages/ollama-client/vitest.config.ts b/packages/ollama-client/vitest.config.ts new file mode 100644 index 00000000..811c18ac --- /dev/null +++ b/packages/ollama-client/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + globals: true, + passWithNoTests: true, + pool: 'forks', + }, +});