feat(local-llm): Sprint 2 — streaming pull progress, token metrics, fixes (B2/F1,F6,B7,B8)

New features:
- B2/F1: Streaming model pull with real-time progress bar. New
  /api/ollama/pull/route.ts pipes NDJSON from Ollama stream:true.
  UI shows status, completed/total bytes, and percentage during download.
- F6: Token/s metrics after prompt generation. Parses eval_count and
  eval_duration from the final NDJSON chunk. Displays tok/s, total
  tokens, and duration in the prompt modal footer.

Bug fixes:
- B7: Parse vm_stat page size from output instead of hardcoding 16384.
  Reads 'page size of N bytes' from the first line for portability.
- B8: Whisper model discovery now scans multiple directories:
  WHISPER_MODELS_DIR env var, ~/whisper-models, /opt/homebrew/share/
  whisper-cpp/models/, ~/.cache/whisper/. Returns the first dir with
  .bin files found.
This commit is contained in:
saravanakumardb1 2026-02-19 15:16:33 -08:00
parent 9a807f64cf
commit 2d9475bd15
4 changed files with 164 additions and 26 deletions

View File

@ -0,0 +1,44 @@
import { NextRequest } from 'next/server';
const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434';
export async function POST(request: NextRequest) {
try {
const body = await request.json();
const { model } = body;
if (!model || typeof model !== 'string') {
return new Response(JSON.stringify({ error: 'Missing model name' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
const response = await fetch(`${OLLAMA_URL}/api/pull`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name: model, stream: true }),
});
if (!response.ok || !response.body) {
return new Response(JSON.stringify({ error: `Ollama pull error: ${response.status}` }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
// Pipe the Ollama pull stream directly to the client
return new Response(response.body, {
headers: {
'Content-Type': 'application/x-ndjson',
'Transfer-Encoding': 'chunked',
'Cache-Control': 'no-cache',
},
});
} catch (err) {
return new Response(JSON.stringify({ error: String(err) }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
}

View File

@ -100,7 +100,8 @@ async function getAccurateMemory(): Promise<{
const totalMem = os.totalmem();
try {
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
const pageSize = 16384; // macOS Apple Silicon default
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
const parse = (label: string): number => {
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
return match ? parseInt(match[1]) * pageSize : 0;

View File

@ -20,23 +20,37 @@ async function getWhisperBinaries(): Promise<string[]> {
}
}
async function getWhisperModels(): Promise<Array<{ name: string; size: number; path: string }>> {
const modelsDir = join(homedir(), 'whisper-models');
try {
const files = await readdir(modelsDir);
const models = await Promise.all(
files
.filter(f => f.endsWith('.bin'))
.map(async f => {
const filePath = join(modelsDir, f);
const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '')
.split(':')
.filter(Boolean)
.concat([
join(homedir(), 'whisper-models'),
'/opt/homebrew/share/whisper-cpp/models',
join(homedir(), '.cache', 'whisper'),
]);
async function getWhisperModels(): Promise<{
models: Array<{ name: string; size: number; path: string }>;
modelsDir: string;
}> {
for (const dir of WHISPER_MODEL_DIRS) {
try {
const files = await readdir(dir);
const binFiles = files.filter(f => f.endsWith('.bin'));
if (binFiles.length === 0) continue;
const models = await Promise.all(
binFiles.map(async f => {
const filePath = join(dir, f);
const s = await stat(filePath);
return { name: f.replace('ggml-', '').replace('.bin', ''), size: s.size, path: filePath };
})
);
return models;
} catch {
return [];
);
return { models, modelsDir: dir };
} catch {
// dir doesn't exist, try next
}
}
return { models: [], modelsDir: WHISPER_MODEL_DIRS[0] };
}
async function getWhisperVersion(): Promise<string> {
@ -49,7 +63,7 @@ async function getWhisperVersion(): Promise<string> {
}
export async function GET() {
const [binaries, models, version] = await Promise.all([
const [binaries, whisperResult, version] = await Promise.all([
getWhisperBinaries(),
getWhisperModels(),
getWhisperVersion(),
@ -59,7 +73,7 @@ export async function GET() {
installed: binaries.length > 0,
version,
binaries,
models,
modelsDir: join(homedir(), 'whisper-models'),
models: whisperResult.models,
modelsDir: whisperResult.modelsDir,
});
}

View File

@ -150,7 +150,17 @@ export default function Dashboard() {
const [toasts, setToasts] = useState<Toast[]>([]);
const [pullInput, setPullInput] = useState('');
const [pullLoading, setPullLoading] = useState(false);
const [pullProgress, setPullProgress] = useState<{
status: string;
completed: number;
total: number;
} | null>(null);
const [copied, setCopied] = useState(false);
const [streamMetrics, setStreamMetrics] = useState<{
tokensPerSec: number;
totalTokens: number;
durationMs: number;
} | null>(null);
const [deleteConfirm, setDeleteConfirm] = useState<string | null>(null);
const responseRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null);
@ -228,26 +238,60 @@ export default function Dashboard() {
const handlePull = async () => {
if (!pullInput.trim()) return;
const modelName = pullInput.trim();
setPullLoading(true);
addToast(`Pulling ${pullInput}... this may take a while`, 'info');
setPullProgress({ status: 'starting', completed: 0, total: 0 });
try {
const res = await fetch('/api/ollama', {
const res = await fetch('/api/ollama/pull', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ action: 'pull', model: pullInput.trim() }),
body: JSON.stringify({ model: modelName }),
});
const data = await res.json();
if (data.success) {
addToast(`Successfully pulled ${pullInput}`, 'success');
setPullInput('');
} else {
addToast(data.error || 'Pull failed', 'error');
if (!res.ok || !res.body) {
addToast(`Pull failed: ${res.status}`, 'error');
setPullLoading(false);
setPullProgress(null);
return;
}
const reader = res.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || '';
for (const line of lines) {
if (!line.trim()) continue;
try {
const chunk = JSON.parse(line);
if (chunk.total && chunk.completed) {
setPullProgress({
status: chunk.status || 'downloading',
completed: chunk.completed,
total: chunk.total,
});
} else if (chunk.status) {
setPullProgress(prev => ({
status: chunk.status,
completed: prev?.completed || 0,
total: prev?.total || 0,
}));
}
} catch {
/* skip */
}
}
}
addToast(`Successfully pulled ${modelName}`, 'success');
setPullInput('');
await fetchAll();
} catch (err) {
addToast(`Pull failed: ${err}`, 'error');
}
setPullLoading(false);
setPullProgress(null);
};
// Streaming prompt
@ -255,6 +299,7 @@ export default function Dashboard() {
if (!promptModel || !promptText.trim()) return;
setPromptLoading(true);
setPromptResponse('');
setStreamMetrics(null);
const controller = new AbortController();
abortRef.current = controller;
try {
@ -288,6 +333,11 @@ export default function Dashboard() {
setPromptResponse(fullResponse);
responseRef.current?.scrollTo(0, responseRef.current.scrollHeight);
}
if (chunk.done && chunk.eval_count && chunk.eval_duration) {
const durationMs = chunk.eval_duration / 1e6;
const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
}
} catch {
/* skip malformed lines */
}
@ -522,6 +572,29 @@ export default function Dashboard() {
</div>
)}
{/* Pull Progress Bar */}
{pullProgress && (
<div className="mb-4 p-3 rounded-lg" style={{ background: 'var(--surface-muted)' }}>
<div className="flex items-center justify-between mb-1.5">
<span className="text-xs font-medium" style={{ color: 'var(--text-secondary)' }}>
{pullProgress.status}
</span>
{pullProgress.total > 0 && (
<span className="text-xs font-mono" style={{ color: 'var(--text-tertiary)' }}>
{formatBytes(pullProgress.completed)} / {formatBytes(pullProgress.total)}
{' · '}
{Math.round((pullProgress.completed / pullProgress.total) * 100)}%
</span>
)}
</div>
<ProgressBar
value={pullProgress.completed}
max={pullProgress.total || 1}
color="var(--accent-primary)"
/>
</div>
)}
{ollama?.status !== 'online' ? (
<div
className="flex items-center gap-3 p-4 rounded-lg"
@ -1065,6 +1138,12 @@ export default function Dashboard() {
Streaming...
</span>
)}
{streamMetrics && !promptLoading && (
<span className="text-[11px] font-mono" style={{ color: 'var(--text-tertiary)' }}>
{streamMetrics.tokensPerSec.toFixed(1)} tok/s &middot; {streamMetrics.totalTokens}{' '}
tokens &middot; {(streamMetrics.durationMs / 1000).toFixed(1)}s
</span>
)}
</div>
{promptResponse && (
<div