feat(ollama-client): shared Ollama API client package

This commit is contained in:
saravanakumardb1 2026-03-29 12:43:01 -07:00
parent bfa55998a2
commit 31cf7c0c6f
21 changed files with 1472 additions and 0 deletions

View File

@ -0,0 +1,29 @@
{
"name": "@bytelyst/ollama-client",
"version": "0.1.0",
"description": "Shared Ollama API client — streaming chat, embeddings, model management, health checks",
"type": "module",
"exports": {
".": {
"import": "./dist/index.js",
"types": "./dist/index.d.ts"
}
},
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"files": [
"dist"
],
"scripts": {
"build": "tsc",
"test": "vitest run --pool forks",
"typecheck": "tsc --noEmit"
},
"devDependencies": {
"vitest": "^3.0.0",
"typescript": "^5.7.0"
},
"publishConfig": {
"registry": "http://localhost:3300/api/packages/bytelyst/npm/"
}
}

View File

@ -0,0 +1,94 @@
import { describe, it, expect, vi } from 'vitest';
import { consumeSSEStream, consumeNdjsonStream } from './client-parsers.js';
function createResponse(body: string): Response {
const encoder = new TextEncoder();
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(body));
controller.close();
},
});
return { ok: true, body: stream } as unknown as Response;
}
describe('consumeSSEStream', () => {
it('parses data: lines and calls onData', async () => {
const response = createResponse('data: {"msg":"hello"}\ndata: {"msg":"world"}\n\n');
const onData = vi.fn();
const onDone = vi.fn();
await consumeSSEStream(response, onData, onDone);
expect(onData).toHaveBeenCalledTimes(2);
expect(onData).toHaveBeenCalledWith({ msg: 'hello' });
expect(onData).toHaveBeenCalledWith({ msg: 'world' });
expect(onDone).toHaveBeenCalled();
});
it('stops on [DONE] marker', async () => {
const response = createResponse('data: {"msg":"hello"}\ndata: [DONE]\ndata: {"msg":"after"}\n');
const onData = vi.fn();
const onDone = vi.fn();
await consumeSSEStream(response, onData, onDone);
expect(onData).toHaveBeenCalledTimes(1);
expect(onDone).toHaveBeenCalled();
});
it('calls onDone when response has no body', async () => {
const response = { ok: true, body: null } as unknown as Response;
const onDone = vi.fn();
await consumeSSEStream(response, vi.fn(), onDone);
expect(onDone).toHaveBeenCalled();
});
it('skips malformed SSE data', async () => {
const response = createResponse('data: not-json\ndata: {"valid":true}\n');
const onData = vi.fn();
const onDone = vi.fn();
await consumeSSEStream(response, onData, onDone);
expect(onData).toHaveBeenCalledTimes(1);
expect(onData).toHaveBeenCalledWith({ valid: true });
});
});
describe('consumeNdjsonStream', () => {
it('parses NDJSON lines and calls onChunk', async () => {
const response = createResponse('{"a":1}\n{"a":2}\n');
const onChunk = vi.fn();
await consumeNdjsonStream(response, onChunk);
expect(onChunk).toHaveBeenCalledTimes(2);
expect(onChunk).toHaveBeenCalledWith({ a: 1 });
expect(onChunk).toHaveBeenCalledWith({ a: 2 });
});
it('handles no body', async () => {
const response = { ok: true, body: null } as unknown as Response;
const onChunk = vi.fn();
await consumeNdjsonStream(response, onChunk);
expect(onChunk).not.toHaveBeenCalled();
});
it('processes remaining buffer', async () => {
const response = createResponse('{"a":1}\n{"a":2}');
const onChunk = vi.fn();
await consumeNdjsonStream(response, onChunk);
expect(onChunk).toHaveBeenCalledTimes(2);
});
it('skips malformed lines', async () => {
const response = createResponse('{"a":1}\nbad\n{"a":2}\n');
const onChunk = vi.fn();
await consumeNdjsonStream(response, onChunk);
expect(onChunk).toHaveBeenCalledTimes(2);
});
});

View File

@ -0,0 +1,115 @@
/**
* Browser-side stream consumers for Next.js API route clients.
*
* These functions consume a `Response` from a fetch call that returns
* streaming data (SSE or NDJSON) and invoke callbacks for each chunk.
*/
/**
* Consume a Server-Sent Events (SSE) stream from a Response.
*
* Parses `data:` lines and invokes `onData` for each parsed JSON object.
* Stops when the stream ends or `[DONE]` is received.
*
* @param response - Fetch Response with SSE body
* @param onData - Callback for each parsed data event
* @param onDone - Callback when stream completes
*/
export async function consumeSSEStream(
response: Response,
onData: (data: Record<string, unknown>) => void,
onDone: () => void
): Promise<void> {
if (!response.body) {
onDone();
return;
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed.startsWith('data:')) {
const payload = trimmed.slice(5).trim();
if (payload === '[DONE]') {
onDone();
return;
}
try {
onData(JSON.parse(payload) as Record<string, unknown>);
} catch {
// skip malformed SSE data
}
}
}
}
} finally {
reader.releaseLock();
}
onDone();
}
/**
* Consume an NDJSON (newline-delimited JSON) stream from a Response.
*
* Parses each line as JSON and invokes `onChunk` for each parsed object.
*
* @param response - Fetch Response with NDJSON body
* @param onChunk - Callback for each parsed NDJSON line
*/
export async function consumeNdjsonStream<T>(
response: Response,
onChunk: (chunk: T) => void
): Promise<void> {
if (!response.body) return;
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
onChunk(JSON.parse(trimmed) as T);
} catch {
// skip malformed lines
}
}
}
// Process remaining buffer
if (buffer.trim()) {
try {
onChunk(JSON.parse(buffer.trim()) as T);
} catch {
// skip
}
}
} finally {
reader.releaseLock();
}
}

View File

@ -0,0 +1,157 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { OllamaClient } from './client.js';
const BASE_URL = 'http://localhost:11434';
function mockFetch(response: unknown, options?: { ok?: boolean; status?: number }) {
return vi.fn().mockResolvedValue({
ok: options?.ok ?? true,
status: options?.status ?? 200,
json: () => Promise.resolve(response),
text: () => Promise.resolve(JSON.stringify(response)),
});
}
describe('OllamaClient', () => {
let client: OllamaClient;
beforeEach(() => {
client = new OllamaClient({ baseUrl: BASE_URL });
vi.restoreAllMocks();
});
describe('constructor', () => {
it('strips trailing slashes from baseUrl', () => {
const c = new OllamaClient({ baseUrl: 'http://localhost:11434///' });
expect(c.baseUrl).toBe('http://localhost:11434');
});
});
describe('tags', () => {
it('returns list of models', async () => {
const models = [{ name: 'llama3', size: 1000, digest: 'abc', modified_at: '2024-01-01' }];
globalThis.fetch = mockFetch({ models });
const result = await client.tags();
expect(result).toEqual(models);
expect(globalThis.fetch).toHaveBeenCalledWith(
`${BASE_URL}/api/tags`,
expect.objectContaining({
headers: expect.objectContaining({ 'Content-Type': 'application/json' }),
})
);
});
it('returns empty array when models is null', async () => {
globalThis.fetch = mockFetch({ models: null });
const result = await client.tags();
expect(result).toEqual([]);
});
});
describe('ps', () => {
it('returns running models', async () => {
const models = [
{ name: 'llama3', size: 1000, digest: 'abc', expires_at: '2024-01-01', size_vram: 500 },
];
globalThis.fetch = mockFetch({ models });
const result = await client.ps();
expect(result).toEqual(models);
});
});
describe('show', () => {
it('sends POST with model name', async () => {
const showData = { modelfile: '', parameters: '', template: '', details: {} };
globalThis.fetch = mockFetch(showData);
const result = await client.show('llama3');
expect(result).toEqual(showData);
expect(globalThis.fetch).toHaveBeenCalledWith(
`${BASE_URL}/api/show`,
expect.objectContaining({
method: 'POST',
body: JSON.stringify({ name: 'llama3' }),
})
);
});
});
describe('pull (non-streaming)', () => {
it('pulls a model', async () => {
globalThis.fetch = mockFetch({ status: 'success' });
const result = await client.pull('llama3');
expect(result).toEqual({ status: 'success' });
});
it('throws on failure', async () => {
globalThis.fetch = mockFetch('Pull failed', { ok: false, status: 500 });
await expect(client.pull('bad-model')).rejects.toThrow('Ollama pull failed (500)');
});
});
describe('load', () => {
it('sends generate with keep_alive', async () => {
globalThis.fetch = mockFetch({});
await client.load('llama3', '15m');
expect(globalThis.fetch).toHaveBeenCalledWith(
`${BASE_URL}/api/generate`,
expect.objectContaining({
method: 'POST',
body: JSON.stringify({ model: 'llama3', prompt: '', keep_alive: '15m' }),
})
);
});
});
describe('unload', () => {
it('sends generate with keep_alive: 0', async () => {
globalThis.fetch = mockFetch({});
await client.unload('llama3');
expect(globalThis.fetch).toHaveBeenCalledWith(
`${BASE_URL}/api/generate`,
expect.objectContaining({
method: 'POST',
body: JSON.stringify({ model: 'llama3', prompt: '', keep_alive: '0' }),
})
);
});
});
describe('delete', () => {
it('sends DELETE request', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({ ok: true, status: 200 });
await client.delete('llama3');
expect(globalThis.fetch).toHaveBeenCalledWith(
`${BASE_URL}/api/delete`,
expect.objectContaining({
method: 'DELETE',
body: JSON.stringify({ name: 'llama3' }),
})
);
});
it('throws on failure', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: false,
status: 404,
text: () => Promise.resolve('model not found'),
});
await expect(client.delete('nope')).rejects.toThrow('Ollama delete failed (404)');
});
});
describe('version', () => {
it('returns version string', async () => {
globalThis.fetch = mockFetch({ version: '0.5.4' });
const result = await client.version();
expect(result).toBe('0.5.4');
});
});
});

View File

@ -0,0 +1,150 @@
import type {
OllamaClientOptions,
OllamaModel,
OllamaRunningModel,
OllamaShowResponse,
OllamaPullProgress,
OllamaVersionResponse,
} from './types.js';
/**
* Ollama API client for model management operations.
*
* Provides typed methods for all non-streaming Ollama endpoints:
* tags, ps, show, pull, load, unload, delete, version.
*/
export class OllamaClient {
readonly baseUrl: string;
private readonly timeoutMs: number;
constructor(options: OllamaClientOptions) {
this.baseUrl = options.baseUrl.replace(/\/+$/, '');
this.timeoutMs = options.timeoutMs ?? 30_000;
}
private async fetchJson<T>(path: string, init?: RequestInit): Promise<T> {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
try {
const res = await fetch(`${this.baseUrl}${path}`, {
...init,
signal: init?.signal ?? controller.signal,
headers: { 'Content-Type': 'application/json', ...init?.headers },
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama ${path} failed (${res.status}): ${text.slice(0, 200)}`);
}
return (await res.json()) as T;
} finally {
clearTimeout(timeout);
}
}
/** List all locally available models (GET /api/tags). */
async tags(): Promise<OllamaModel[]> {
const data = await this.fetchJson<{ models: OllamaModel[] }>('/api/tags');
return data.models ?? [];
}
/** List currently running/loaded models (GET /api/ps). */
async ps(): Promise<OllamaRunningModel[]> {
const data = await this.fetchJson<{ models: OllamaRunningModel[] }>('/api/ps');
return data.models ?? [];
}
/** Show model details (POST /api/show). */
async show(model: string): Promise<OllamaShowResponse> {
return this.fetchJson<OllamaShowResponse>('/api/show', {
method: 'POST',
body: JSON.stringify({ name: model }),
});
}
/**
* Pull a model from the Ollama registry (POST /api/pull).
*
* When `stream: false`, waits for the full download to complete.
* When `stream: true`, returns an async generator of progress chunks.
*/
async pull(model: string, stream?: false): Promise<{ status: string }>;
async pull(model: string, stream: true): Promise<AsyncGenerator<OllamaPullProgress>>;
async pull(
model: string,
stream: boolean = false
): Promise<{ status: string } | AsyncGenerator<OllamaPullProgress>> {
if (!stream) {
const res = await fetch(`${this.baseUrl}/api/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: model, stream: false }),
signal: AbortSignal.timeout(this.timeoutMs),
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`);
}
return (await res.json()) as { status: string };
}
// Streaming pull — return async generator
const baseUrl = this.baseUrl;
const res = await fetch(`${baseUrl}/api/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: model, stream: true }),
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama pull failed (${res.status}): ${text.slice(0, 200)}`);
}
if (!res.body) throw new Error('No response body from Ollama pull');
const { parseNdjsonStream } = await import('./ndjson.js');
return parseNdjsonStream<OllamaPullProgress>(res.body);
}
/**
* Load a model into memory (POST /api/generate with empty prompt + keep_alive).
*
* @param model - Model name
* @param keepAlive - How long to keep the model loaded (default: '10m')
*/
async load(model: string, keepAlive: string = '10m'): Promise<void> {
await this.fetchJson('/api/generate', {
method: 'POST',
body: JSON.stringify({ model, prompt: '', keep_alive: keepAlive }),
});
}
/**
* Unload a model from memory (POST /api/generate with keep_alive: '0').
*/
async unload(model: string): Promise<void> {
await this.fetchJson('/api/generate', {
method: 'POST',
body: JSON.stringify({ model, prompt: '', keep_alive: '0' }),
});
}
/** Delete a model (DELETE /api/delete). */
async delete(model: string): Promise<void> {
const res = await fetch(`${this.baseUrl}/api/delete`, {
method: 'DELETE',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: model }),
signal: AbortSignal.timeout(this.timeoutMs),
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama delete failed (${res.status}): ${text.slice(0, 200)}`);
}
}
/** Get Ollama server version (GET /api/version). */
async version(): Promise<string> {
const data = await this.fetchJson<OllamaVersionResponse>('/api/version');
return data.version;
}
}

View File

@ -0,0 +1,43 @@
import { describe, it, expect } from 'vitest';
import { resolveOllamaUrl } from './config.js';
describe('resolveOllamaUrl', () => {
it('returns default localhost when no env vars set', () => {
expect(resolveOllamaUrl({})).toBe('http://localhost:11434');
});
it('uses OLLAMA_URL when set', () => {
expect(resolveOllamaUrl({ OLLAMA_URL: 'http://myhost:11434' })).toBe('http://myhost:11434');
});
it('uses OLLAMA_HOST when set', () => {
expect(resolveOllamaUrl({ OLLAMA_HOST: 'http://otherhost:11434' })).toBe(
'http://otherhost:11434'
);
});
it('prefers OLLAMA_URL over OLLAMA_HOST', () => {
expect(
resolveOllamaUrl({
OLLAMA_URL: 'http://primary:11434',
OLLAMA_HOST: 'http://secondary:11434',
})
).toBe('http://primary:11434');
});
it('normalizes URL without scheme', () => {
expect(resolveOllamaUrl({ OLLAMA_URL: 'myhost:11434' })).toBe('http://myhost:11434');
});
it('strips trailing slashes', () => {
expect(resolveOllamaUrl({ OLLAMA_URL: 'http://myhost:11434///' })).toBe('http://myhost:11434');
});
it('trims whitespace', () => {
expect(resolveOllamaUrl({ OLLAMA_URL: ' http://myhost:11434 ' })).toBe('http://myhost:11434');
});
it('preserves https scheme', () => {
expect(resolveOllamaUrl({ OLLAMA_URL: 'https://secure:11434' })).toBe('https://secure:11434');
});
});

View File

@ -0,0 +1,47 @@
import { execSync } from 'child_process';
import fs from 'fs';
function normalizeUrl(input: string): string {
const trimmed = input.trim().replace(/\/+$/, '');
if (trimmed.startsWith('http://') || trimmed.startsWith('https://')) {
return trimmed;
}
return `http://${trimmed}`;
}
function detectWslGatewayOllamaUrl(): string | null {
try {
if (process.platform !== 'linux') return null;
const version = fs.readFileSync('/proc/version', 'utf-8').toLowerCase();
if (!version.includes('microsoft')) return null;
const gw = execSync("ip route show default | awk '{print $3}' | head -1", {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'ignore'],
}).trim();
if (!gw) return null;
return `http://${gw}:11434`;
} catch {
return null;
}
}
/**
* Resolve the Ollama base URL from environment variables with WSL2 fallback.
*
* Priority:
* 1. `OLLAMA_URL` or `OLLAMA_HOST` env var (explicit config)
* 2. WSL2 gateway IP (Windows-hosted Ollama detected via /proc/version)
* 3. `http://localhost:11434` (default)
*
* @param env - Environment variables object (defaults to `process.env`)
*/
export function resolveOllamaUrl(env: Record<string, string | undefined> = process.env): string {
const explicit = env.OLLAMA_URL || env.OLLAMA_HOST;
if (explicit) return normalizeUrl(explicit);
const inferred = detectWslGatewayOllamaUrl();
if (inferred) return inferred;
return 'http://localhost:11434';
}

View File

@ -0,0 +1,66 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { getEmbedding, getEmbeddingVector } from './embed.js';
const BASE_URL = 'http://localhost:11434';
describe('getEmbedding', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('returns embedding response', async () => {
const response = { model: 'nomic-embed-text', embeddings: [[0.1, 0.2, 0.3]] };
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () => Promise.resolve(response),
});
const result = await getEmbedding(BASE_URL, { model: 'nomic-embed-text', input: 'hello' });
expect(result).toEqual(response);
expect(globalThis.fetch).toHaveBeenCalledWith(
`${BASE_URL}/api/embed`,
expect.objectContaining({
method: 'POST',
body: JSON.stringify({ model: 'nomic-embed-text', input: 'hello' }),
})
);
});
it('throws on error response', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: false,
status: 500,
text: () => Promise.resolve('internal error'),
});
await expect(
getEmbedding(BASE_URL, { model: 'nomic-embed-text', input: 'hello' })
).rejects.toThrow('Ollama embed failed (500)');
});
});
describe('getEmbeddingVector', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('returns first embedding vector', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () => Promise.resolve({ model: 'nomic-embed-text', embeddings: [[0.1, 0.2]] }),
});
const result = await getEmbeddingVector(BASE_URL, 'hello');
expect(result).toEqual([0.1, 0.2]);
});
it('returns empty array when no embeddings', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
json: () => Promise.resolve({ model: 'nomic-embed-text', embeddings: [] }),
});
const result = await getEmbeddingVector(BASE_URL, 'hello');
expect(result).toEqual([]);
});
});

View File

@ -0,0 +1,47 @@
import type { OllamaEmbedOptions, OllamaEmbeddingResponse } from './types.js';
/**
* Get embeddings for text using an Ollama embedding model.
*
* @param baseUrl - Ollama server base URL
* @param options - Embedding options (model, input text)
* @returns Array of embedding vectors (one per input string)
*/
export async function getEmbedding(
baseUrl: string,
options: OllamaEmbedOptions
): Promise<OllamaEmbeddingResponse> {
const res = await fetch(`${baseUrl}/api/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: options.model,
input: options.input,
...(options.options && { options: options.options }),
}),
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama embed failed (${res.status}): ${text.slice(0, 200)}`);
}
return (await res.json()) as OllamaEmbeddingResponse;
}
/**
* Convenience: get a single embedding vector for a text string.
*
* @param baseUrl - Ollama server base URL
* @param text - Text to embed
* @param model - Embedding model name (default: 'nomic-embed-text')
* @returns Single embedding vector
*/
export async function getEmbeddingVector(
baseUrl: string,
text: string,
model: string = 'nomic-embed-text'
): Promise<number[]> {
const response = await getEmbedding(baseUrl, { model, input: text });
return response.embeddings?.[0] ?? [];
}

View File

@ -0,0 +1,73 @@
import { describe, it, expect } from 'vitest';
import { formatBytes, estimateTokens, getModelContextWindow, formatUptime } from './format.js';
describe('formatBytes', () => {
it('formats 0 bytes', () => {
expect(formatBytes(0)).toBe('0 B');
});
it('formats bytes', () => {
expect(formatBytes(512)).toBe('512 B');
});
it('formats kilobytes', () => {
expect(formatBytes(1536)).toBe('1.5 KB');
});
it('formats megabytes', () => {
expect(formatBytes(5242880)).toBe('5 MB');
});
it('formats gigabytes', () => {
expect(formatBytes(4294967296)).toBe('4 GB');
});
});
describe('estimateTokens', () => {
it('returns 0 for empty string', () => {
expect(estimateTokens('')).toBe(0);
});
it('returns 0 for whitespace-only string', () => {
expect(estimateTokens(' ')).toBe(0);
});
it('estimates tokens for a sentence', () => {
const result = estimateTokens('Hello world this is a test');
expect(result).toBeGreaterThan(0);
// 6 words × 1.3 = 7.8, ceil = 8
expect(result).toBe(8);
});
});
describe('getModelContextWindow', () => {
it('returns 128k for models with 128k in name', () => {
expect(getModelContextWindow('llama3-128k')).toBe(128_000);
});
it('returns 32k for models with 32k in name', () => {
expect(getModelContextWindow('gpt4-32k')).toBe(32_000);
});
it('returns 4096 for models without size marker', () => {
expect(getModelContextWindow('llama3:latest')).toBe(4_096);
});
});
describe('formatUptime', () => {
it('formats minutes only', () => {
expect(formatUptime(120)).toBe('2m');
});
it('formats hours and minutes', () => {
expect(formatUptime(3661)).toBe('1h 1m');
});
it('formats days, hours, and minutes', () => {
expect(formatUptime(90061)).toBe('1d 1h 1m');
});
it('formats zero', () => {
expect(formatUptime(0)).toBe('0m');
});
});

View File

@ -0,0 +1,56 @@
/**
* Format a byte count into a human-readable string.
*
* @example formatBytes(1536) // '1.5 KB'
* @example formatBytes(0) // '0 B'
*/
export function formatBytes(bytes: number): string {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
}
/**
* Approximate token count for a text string.
*
* Uses a word-count × 1.3 heuristic (typical for English text with LLM tokenizers).
*/
export function estimateTokens(text: string): number {
const trimmed = text.trim();
if (!trimmed) return 0;
return Math.ceil(trimmed.split(/\s+/).length * 1.3);
}
/**
* Best-effort model context window lookup based on model name.
*
* Checks for common context window markers in the model name string.
* Falls back to 4096 if no marker is found.
*/
export function getModelContextWindow(modelName: string): number {
const n = modelName.toLowerCase();
if (n.includes('128k')) return 128_000;
if (n.includes('64k')) return 64_000;
if (n.includes('32k')) return 32_000;
if (n.includes('16k')) return 16_000;
if (n.includes('8k')) return 8_000;
return 4_096;
}
/**
* Format a duration in seconds to a human-readable uptime string.
*
* @example formatUptime(90061) // '1d 1h 1m'
* @example formatUptime(3661) // '1h 1m'
* @example formatUptime(120) // '2m'
*/
export function formatUptime(seconds: number): string {
const d = Math.floor(seconds / 86400);
const h = Math.floor((seconds % 86400) / 3600);
const m = Math.floor((seconds % 3600) / 60);
if (d > 0) return `${d}d ${h}h ${m}m`;
if (h > 0) return `${h}h ${m}m`;
return `${m}m`;
}

View File

@ -0,0 +1,53 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { checkHealth, checkHealthDetailed } from './health.js';
const BASE_URL = 'http://localhost:11434';
describe('checkHealth', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('returns true when server is healthy', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({ ok: true });
expect(await checkHealth(BASE_URL)).toBe(true);
});
it('returns false when server returns non-ok', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({ ok: false });
expect(await checkHealth(BASE_URL)).toBe(false);
});
it('returns false when fetch throws', async () => {
globalThis.fetch = vi.fn().mockRejectedValue(new Error('ECONNREFUSED'));
expect(await checkHealth(BASE_URL)).toBe(false);
});
});
describe('checkHealthDetailed', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('returns online + version when server is healthy', async () => {
globalThis.fetch = vi.fn().mockImplementation((url: string) => {
if (url.includes('/api/tags')) return Promise.resolve({ ok: true });
if (url.includes('/api/version'))
return Promise.resolve({
ok: true,
json: () => Promise.resolve({ version: '0.5.4' }),
});
return Promise.reject(new Error('unexpected'));
});
const result = await checkHealthDetailed(BASE_URL);
expect(result).toEqual({ online: true, version: '0.5.4', url: BASE_URL });
});
it('returns offline when server is unreachable', async () => {
globalThis.fetch = vi.fn().mockRejectedValue(new Error('ECONNREFUSED'));
const result = await checkHealthDetailed(BASE_URL);
expect(result).toEqual({ online: false, version: null, url: BASE_URL });
});
});

View File

@ -0,0 +1,44 @@
/**
* Check if the Ollama server is reachable.
*
* @param baseUrl - Ollama server base URL
* @param timeoutMs - Timeout in milliseconds (default: 3000)
* @returns `true` if the server responds to GET /api/tags within the timeout
*/
export async function checkHealth(baseUrl: string, timeoutMs: number = 3000): Promise<boolean> {
try {
const res = await fetch(`${baseUrl}/api/tags`, {
signal: AbortSignal.timeout(timeoutMs),
});
return res.ok;
} catch {
return false;
}
}
/**
* Check health and return structured result with version info.
*/
export async function checkHealthDetailed(
baseUrl: string,
timeoutMs: number = 3000
): Promise<{ online: boolean; version: string | null; url: string }> {
try {
const [tagsRes, versionRes] = await Promise.allSettled([
fetch(`${baseUrl}/api/tags`, { signal: AbortSignal.timeout(timeoutMs) }),
fetch(`${baseUrl}/api/version`, { signal: AbortSignal.timeout(timeoutMs) }),
]);
const online = tagsRes.status === 'fulfilled' && tagsRes.value.ok;
let version: string | null = null;
if (versionRes.status === 'fulfilled' && versionRes.value.ok) {
const data = (await versionRes.value.json()) as { version?: string };
version = data.version ?? null;
}
return { online, version, url: baseUrl };
} catch {
return { online: false, version: null, url: baseUrl };
}
}

View File

@ -0,0 +1,41 @@
// Types
export type {
OllamaModel,
OllamaModelDetails,
OllamaRunningModel,
OllamaChatMessage,
OllamaStreamChunk,
OllamaGenerateChunk,
OllamaEmbeddingResponse,
OllamaShowResponse,
OllamaPullProgress,
OllamaVersionResponse,
OllamaClientOptions,
OllamaChatOptions,
OllamaGenerateOptions,
OllamaEmbedOptions,
} from './types.js';
// Config
export { resolveOllamaUrl } from './config.js';
// Client
export { OllamaClient } from './client.js';
// Streaming
export { streamChat, streamGenerate } from './stream.js';
// Embeddings
export { getEmbedding, getEmbeddingVector } from './embed.js';
// Health
export { checkHealth, checkHealthDetailed } from './health.js';
// NDJSON parser
export { parseNdjsonStream } from './ndjson.js';
// Client-side stream consumers
export { consumeSSEStream, consumeNdjsonStream } from './client-parsers.js';
// Format utilities
export { formatBytes, estimateTokens, getModelContextWindow, formatUptime } from './format.js';

View File

@ -0,0 +1,73 @@
import { describe, it, expect } from 'vitest';
import { parseNdjsonStream } from './ndjson.js';
function createReadableStream(chunks: string[]): ReadableStream<Uint8Array> {
const encoder = new TextEncoder();
let index = 0;
return new ReadableStream({
pull(controller) {
if (index < chunks.length) {
controller.enqueue(encoder.encode(chunks[index]));
index++;
} else {
controller.close();
}
},
});
}
describe('parseNdjsonStream', () => {
it('parses complete NDJSON lines', async () => {
const stream = createReadableStream(['{"a":1}\n{"a":2}\n{"a":3}\n']);
const results: unknown[] = [];
for await (const chunk of parseNdjsonStream(stream)) {
results.push(chunk);
}
expect(results).toEqual([{ a: 1 }, { a: 2 }, { a: 3 }]);
});
it('handles partial lines split across chunks', async () => {
const stream = createReadableStream(['{"a":', '1}\n{"a":2}\n']);
const results: unknown[] = [];
for await (const chunk of parseNdjsonStream(stream)) {
results.push(chunk);
}
expect(results).toEqual([{ a: 1 }, { a: 2 }]);
});
it('skips empty lines', async () => {
const stream = createReadableStream(['{"a":1}\n\n\n{"a":2}\n']);
const results: unknown[] = [];
for await (const chunk of parseNdjsonStream(stream)) {
results.push(chunk);
}
expect(results).toEqual([{ a: 1 }, { a: 2 }]);
});
it('skips malformed JSON lines', async () => {
const stream = createReadableStream(['{"a":1}\nnot json\n{"a":2}\n']);
const results: unknown[] = [];
for await (const chunk of parseNdjsonStream(stream)) {
results.push(chunk);
}
expect(results).toEqual([{ a: 1 }, { a: 2 }]);
});
it('processes remaining buffer after stream ends', async () => {
const stream = createReadableStream(['{"a":1}\n{"a":2}']);
const results: unknown[] = [];
for await (const chunk of parseNdjsonStream(stream)) {
results.push(chunk);
}
expect(results).toEqual([{ a: 1 }, { a: 2 }]);
});
it('handles empty stream', async () => {
const stream = createReadableStream([]);
const results: unknown[] = [];
for await (const chunk of parseNdjsonStream(stream)) {
results.push(chunk);
}
expect(results).toEqual([]);
});
});

View File

@ -0,0 +1,45 @@
/**
* Parse an NDJSON (newline-delimited JSON) stream into an async generator.
*
* Works with both Node.js `ReadableStream` and browser `ReadableStream<Uint8Array>`.
* Handles partial lines across chunk boundaries gracefully.
*/
export async function* parseNdjsonStream<T = unknown>(
body: ReadableStream<Uint8Array>
): AsyncGenerator<T> {
const reader = body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
yield JSON.parse(trimmed) as T;
} catch {
// skip malformed lines
}
}
}
// Process remaining buffer after stream ends
if (buffer.trim()) {
try {
yield JSON.parse(buffer.trim()) as T;
} catch {
// skip
}
}
} finally {
reader.releaseLock();
}
}

View File

@ -0,0 +1,99 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { streamChat, streamGenerate } from './stream.js';
const BASE_URL = 'http://localhost:11434';
function createNdjsonResponse(chunks: object[]): Response {
const encoder = new TextEncoder();
const ndjson = chunks.map(c => JSON.stringify(c)).join('\n') + '\n';
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(ndjson));
controller.close();
},
});
return { ok: true, status: 200, body: stream } as unknown as Response;
}
describe('streamChat', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('yields chat stream chunks', async () => {
const chunks = [
{ model: 'llama3', message: { role: 'assistant', content: 'Hello' }, done: false },
{ model: 'llama3', message: { role: 'assistant', content: ' world' }, done: false },
{ model: 'llama3', message: { role: 'assistant', content: '' }, done: true, eval_count: 10 },
];
globalThis.fetch = vi.fn().mockResolvedValue(createNdjsonResponse(chunks));
const results = [];
for await (const chunk of streamChat(BASE_URL, {
model: 'llama3',
messages: [{ role: 'user', content: 'Hi' }],
})) {
results.push(chunk);
}
expect(results).toHaveLength(3);
expect(results[0].message.content).toBe('Hello');
expect(results[2].done).toBe(true);
expect(results[2].eval_count).toBe(10);
});
it('throws on non-ok response', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: false,
status: 500,
text: () => Promise.resolve('internal error'),
});
const gen = streamChat(BASE_URL, {
model: 'llama3',
messages: [{ role: 'user', content: 'Hi' }],
});
await expect(gen.next()).rejects.toThrow('Ollama chat failed (500)');
});
it('throws when response has no body', async () => {
globalThis.fetch = vi.fn().mockResolvedValue({
ok: true,
status: 200,
body: null,
});
const gen = streamChat(BASE_URL, {
model: 'llama3',
messages: [{ role: 'user', content: 'Hi' }],
});
await expect(gen.next()).rejects.toThrow('No response body');
});
});
describe('streamGenerate', () => {
beforeEach(() => {
vi.restoreAllMocks();
});
it('yields generate stream chunks', async () => {
const chunks = [
{ model: 'llama3', response: 'Hello', done: false },
{ model: 'llama3', response: ' world', done: false },
{ model: 'llama3', response: '', done: true, eval_count: 5 },
];
globalThis.fetch = vi.fn().mockResolvedValue(createNdjsonResponse(chunks));
const results = [];
for await (const chunk of streamGenerate(BASE_URL, {
model: 'llama3',
prompt: 'Say hello',
})) {
results.push(chunk);
}
expect(results).toHaveLength(3);
expect(results[0].response).toBe('Hello');
expect(results[2].done).toBe(true);
});
});

View File

@ -0,0 +1,89 @@
import type {
OllamaChatOptions,
OllamaGenerateOptions,
OllamaStreamChunk,
OllamaGenerateChunk,
} from './types.js';
import { parseNdjsonStream } from './ndjson.js';
/**
* Stream a chat completion from Ollama.
*
* Yields NDJSON chunks from `POST /api/chat` as an async generator.
*
* @param baseUrl - Ollama server base URL
* @param options - Chat options (model, messages, signal, etc.)
*/
export async function* streamChat(
baseUrl: string,
options: OllamaChatOptions
): AsyncGenerator<OllamaStreamChunk> {
const { model, messages, signal, ...rest } = options;
const res = await fetch(`${baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
messages,
stream: true,
...('options' in rest && rest.options ? { options: rest.options } : {}),
...('format' in rest && rest.format ? { format: rest.format } : {}),
...('keep_alive' in rest && rest.keep_alive ? { keep_alive: rest.keep_alive } : {}),
}),
signal,
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama chat failed (${res.status}): ${text.slice(0, 200)}`);
}
if (!res.body) {
throw new Error('No response body from Ollama');
}
yield* parseNdjsonStream<OllamaStreamChunk>(res.body);
}
/**
* Stream a text generation from Ollama.
*
* Yields NDJSON chunks from `POST /api/generate` as an async generator.
*
* @param baseUrl - Ollama server base URL
* @param options - Generate options (model, prompt, signal, etc.)
*/
export async function* streamGenerate(
baseUrl: string,
options: OllamaGenerateOptions
): AsyncGenerator<OllamaGenerateChunk> {
const { model, prompt, signal, ...rest } = options;
const res = await fetch(`${baseUrl}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
prompt,
stream: true,
...('system' in rest && rest.system ? { system: rest.system } : {}),
...('options' in rest && rest.options ? { options: rest.options } : {}),
...('format' in rest && rest.format ? { format: rest.format } : {}),
...('keep_alive' in rest && rest.keep_alive ? { keep_alive: rest.keep_alive } : {}),
...('context' in rest && rest.context ? { context: rest.context } : {}),
}),
signal,
});
if (!res.ok) {
const text = await res.text().catch(() => '');
throw new Error(`Ollama generate failed (${res.status}): ${text.slice(0, 200)}`);
}
if (!res.body) {
throw new Error('No response body from Ollama');
}
yield* parseNdjsonStream<OllamaGenerateChunk>(res.body);
}

View File

@ -0,0 +1,133 @@
// --- Model types ---
export interface OllamaModelDetails {
family?: string;
families?: string[];
format?: string;
parameter_size?: string;
quantization_level?: string;
}
export interface OllamaModel {
name: string;
model?: string;
size: number;
digest: string;
modified_at: string;
details?: OllamaModelDetails;
}
export interface OllamaRunningModel {
name: string;
model?: string;
size: number;
digest: string;
expires_at: string;
size_vram: number;
details?: OllamaModelDetails;
}
// --- Chat types ---
export interface OllamaChatMessage {
role: 'user' | 'assistant' | 'system';
content: string;
images?: string[];
}
export interface OllamaStreamChunk {
model: string;
message: { role: string; content: string };
done: boolean;
total_duration?: number;
eval_count?: number;
eval_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
load_duration?: number;
}
// --- Generate types ---
export interface OllamaGenerateChunk {
model: string;
response: string;
done: boolean;
total_duration?: number;
eval_count?: number;
eval_duration?: number;
prompt_eval_count?: number;
prompt_eval_duration?: number;
load_duration?: number;
context?: number[];
}
// --- Embedding types ---
export interface OllamaEmbeddingResponse {
model: string;
embeddings: number[][];
total_duration?: number;
load_duration?: number;
prompt_eval_count?: number;
}
// --- Show types ---
export interface OllamaShowResponse {
modelfile: string;
parameters: string;
template: string;
details: OllamaModelDetails;
model_info?: Record<string, unknown>;
}
// --- Pull progress types ---
export interface OllamaPullProgress {
status: string;
digest?: string;
total?: number;
completed?: number;
}
// --- Version types ---
export interface OllamaVersionResponse {
version: string;
}
// --- Client options ---
export interface OllamaClientOptions {
/** Base URL of the Ollama server (e.g. http://localhost:11434) */
baseUrl: string;
/** Default request timeout in milliseconds (default: 30000) */
timeoutMs?: number;
}
export interface OllamaChatOptions {
model: string;
messages: OllamaChatMessage[];
signal?: AbortSignal;
options?: Record<string, unknown>;
format?: 'json' | string;
keep_alive?: string;
}
export interface OllamaGenerateOptions {
model: string;
prompt: string;
signal?: AbortSignal;
system?: string;
options?: Record<string, unknown>;
format?: 'json' | string;
keep_alive?: string;
context?: number[];
}
export interface OllamaEmbedOptions {
model: string;
input: string | string[];
options?: Record<string, unknown>;
}

View File

@ -0,0 +1,9 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"],
"exclude": ["src/**/*.test.ts"]
}

View File

@ -0,0 +1,9 @@
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
globals: true,
passWithNoTests: true,
pool: 'forks',
},
});