feat(local-llm): Sprint 2 — streaming pull progress, token metrics, fixes (B2/F1,F6,B7,B8)

New features: - B2/F1: Streaming model pull with real-time progress bar. New /api/ollama/pull/route.ts pipes NDJSON from Ollama stream:true. UI shows status, completed/total bytes, and percentage during download. - F6: Token/s metrics after prompt generation. Parses eval_count and eval_duration from the final NDJSON chunk. Displays tok/s, total tokens, and duration in the prompt modal footer. Bug fixes: - B7: Parse vm_stat page size from output instead of hardcoding 16384. Reads 'page size of N bytes' from the first line for portability. - B8: Whisper model discovery now scans multiple directories: WHISPER_MODELS_DIR env var, ~/whisper-models, /opt/homebrew/share/ whisper-cpp/models/, ~/.cache/whisper/. Returns the first dir with .bin files found.
2026-02-19 15:16:33 -08:00 · 2026-02-19 15:16:33 -08:00 · 2d9475bd15
commit 2d9475bd15
parent 9a807f64cf
4 changed files with 164 additions and 26 deletions
--- a/__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts
@ -0,0 +1,44 @@
+import { NextRequest } from 'next/server';
+
+const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434';
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+    const { model } = body;
+
+    if (!model || typeof model !== 'string') {
+      return new Response(JSON.stringify({ error: 'Missing model name' }), {
+        status: 400,
+        headers: { 'Content-Type': 'application/json' },
+      });
+    }
+
+    const response = await fetch(`${OLLAMA_URL}/api/pull`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ name: model, stream: true }),
+    });
+
+    if (!response.ok || !response.body) {
+      return new Response(JSON.stringify({ error: `Ollama pull error: ${response.status}` }), {
+        status: 500,
+        headers: { 'Content-Type': 'application/json' },
+      });
+    }
+
+    // Pipe the Ollama pull stream directly to the client
+    return new Response(response.body, {
+      headers: {
+        'Content-Type': 'application/x-ndjson',
+        'Transfer-Encoding': 'chunked',
+        'Cache-Control': 'no-cache',
+      },
+    });
+  } catch (err) {
+    return new Response(JSON.stringify({ error: String(err) }), {
+      status: 500,
+      headers: { 'Content-Type': 'application/json' },
+    });
+  }
+}
--- a/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts
@ -100,7 +100,8 @@ async function getAccurateMemory(): Promise<{
  const totalMem = os.totalmem();
  try {
    const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
-    const pageSize = 16384; // macOS Apple Silicon default
+    const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
+    const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
    const parse = (label: string): number => {
      const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
      return match ? parseInt(match[1]) * pageSize : 0;
--- a/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts
+++ b/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts
@ -20,23 +20,37 @@ async function getWhisperBinaries(): Promise<string[]> {
  }
 }

-async function getWhisperModels(): Promise<Array<{ name: string; size: number; path: string }>> {
-  const modelsDir = join(homedir(), 'whisper-models');
-  try {
-    const files = await readdir(modelsDir);
-    const models = await Promise.all(
-      files
-        .filter(f => f.endsWith('.bin'))
-        .map(async f => {
-          const filePath = join(modelsDir, f);
+const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '')
+  .split(':')
+  .filter(Boolean)
+  .concat([
+    join(homedir(), 'whisper-models'),
+    '/opt/homebrew/share/whisper-cpp/models',
+    join(homedir(), '.cache', 'whisper'),
+  ]);
+
+async function getWhisperModels(): Promise<{
+  models: Array<{ name: string; size: number; path: string }>;
+  modelsDir: string;
+}> {
+  for (const dir of WHISPER_MODEL_DIRS) {
+    try {
+      const files = await readdir(dir);
+      const binFiles = files.filter(f => f.endsWith('.bin'));
+      if (binFiles.length === 0) continue;
+      const models = await Promise.all(
+        binFiles.map(async f => {
+          const filePath = join(dir, f);
          const s = await stat(filePath);
          return { name: f.replace('ggml-', '').replace('.bin', ''), size: s.size, path: filePath };
        })
-    );
-    return models;
-  } catch {
-    return [];
+      );
+      return { models, modelsDir: dir };
+    } catch {
+      // dir doesn't exist, try next
+    }
  }
+  return { models: [], modelsDir: WHISPER_MODEL_DIRS[0] };
 }

 async function getWhisperVersion(): Promise<string> {
@ -49,7 +63,7 @@ async function getWhisperVersion(): Promise<string> {
 }

 export async function GET() {
-  const [binaries, models, version] = await Promise.all([
+  const [binaries, whisperResult, version] = await Promise.all([
    getWhisperBinaries(),
    getWhisperModels(),
    getWhisperVersion(),
@ -59,7 +73,7 @@ export async function GET() {
    installed: binaries.length > 0,
    version,
    binaries,
-    models,
-    modelsDir: join(homedir(), 'whisper-models'),
+    models: whisperResult.models,
+    modelsDir: whisperResult.modelsDir,
  });
 }
--- a/__LOCAL_LLMs/dashboard/src/app/page.tsx
+++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx
@ -150,7 +150,17 @@ export default function Dashboard() {
  const [toasts, setToasts] = useState<Toast[]>([]);
  const [pullInput, setPullInput] = useState('');
  const [pullLoading, setPullLoading] = useState(false);
+  const [pullProgress, setPullProgress] = useState<{
+    status: string;
+    completed: number;
+    total: number;
+  } | null>(null);
  const [copied, setCopied] = useState(false);
+  const [streamMetrics, setStreamMetrics] = useState<{
+    tokensPerSec: number;
+    totalTokens: number;
+    durationMs: number;
+  } | null>(null);
  const [deleteConfirm, setDeleteConfirm] = useState<string | null>(null);
  const responseRef = useRef<HTMLDivElement>(null);
  const abortRef = useRef<AbortController | null>(null);
@ -228,26 +238,60 @@ export default function Dashboard() {

  const handlePull = async () => {
    if (!pullInput.trim()) return;
+    const modelName = pullInput.trim();
    setPullLoading(true);
-    addToast(`Pulling ${pullInput}... this may take a while`, 'info');
+    setPullProgress({ status: 'starting', completed: 0, total: 0 });
    try {
-      const res = await fetch('/api/ollama', {
+      const res = await fetch('/api/ollama/pull', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ action: 'pull', model: pullInput.trim() }),
+        body: JSON.stringify({ model: modelName }),
      });
-      const data = await res.json();
-      if (data.success) {
-        addToast(`Successfully pulled ${pullInput}`, 'success');
-        setPullInput('');
-      } else {
-        addToast(data.error || 'Pull failed', 'error');
+      if (!res.ok || !res.body) {
+        addToast(`Pull failed: ${res.status}`, 'error');
+        setPullLoading(false);
+        setPullProgress(null);
+        return;
      }
+      const reader = res.body.getReader();
+      const decoder = new TextDecoder();
+      let buffer = '';
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split('\n');
+        buffer = lines.pop() || '';
+        for (const line of lines) {
+          if (!line.trim()) continue;
+          try {
+            const chunk = JSON.parse(line);
+            if (chunk.total && chunk.completed) {
+              setPullProgress({
+                status: chunk.status || 'downloading',
+                completed: chunk.completed,
+                total: chunk.total,
+              });
+            } else if (chunk.status) {
+              setPullProgress(prev => ({
+                status: chunk.status,
+                completed: prev?.completed || 0,
+                total: prev?.total || 0,
+              }));
+            }
+          } catch {
+            /* skip */
+          }
+        }
+      }
+      addToast(`Successfully pulled ${modelName}`, 'success');
+      setPullInput('');
      await fetchAll();
    } catch (err) {
      addToast(`Pull failed: ${err}`, 'error');
    }
    setPullLoading(false);
+    setPullProgress(null);
  };

  // Streaming prompt
@ -255,6 +299,7 @@ export default function Dashboard() {
    if (!promptModel || !promptText.trim()) return;
    setPromptLoading(true);
    setPromptResponse('');
+    setStreamMetrics(null);
    const controller = new AbortController();
    abortRef.current = controller;
    try {
@ -288,6 +333,11 @@ export default function Dashboard() {
              setPromptResponse(fullResponse);
              responseRef.current?.scrollTo(0, responseRef.current.scrollHeight);
            }
+            if (chunk.done && chunk.eval_count && chunk.eval_duration) {
+              const durationMs = chunk.eval_duration / 1e6;
+              const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
+              setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
+            }
          } catch {
            /* skip malformed lines */
          }
@ -522,6 +572,29 @@ export default function Dashboard() {
            </div>
          )}

+          {/* Pull Progress Bar */}
+          {pullProgress && (
+            <div className="mb-4 p-3 rounded-lg" style={{ background: 'var(--surface-muted)' }}>
+              <div className="flex items-center justify-between mb-1.5">
+                <span className="text-xs font-medium" style={{ color: 'var(--text-secondary)' }}>
+                  {pullProgress.status}
+                </span>
+                {pullProgress.total > 0 && (
+                  <span className="text-xs font-mono" style={{ color: 'var(--text-tertiary)' }}>
+                    {formatBytes(pullProgress.completed)} / {formatBytes(pullProgress.total)}
+                    {' · '}
+                    {Math.round((pullProgress.completed / pullProgress.total) * 100)}%
+                  </span>
+                )}
+              </div>
+              <ProgressBar
+                value={pullProgress.completed}
+                max={pullProgress.total || 1}
+                color="var(--accent-primary)"
+              />
+            </div>
+          )}
+
          {ollama?.status !== 'online' ? (
            <div
              className="flex items-center gap-3 p-4 rounded-lg"
@ -1065,6 +1138,12 @@ export default function Dashboard() {
                  Streaming...
                </span>
              )}
+              {streamMetrics && !promptLoading && (
+                <span className="text-[11px] font-mono" style={{ color: 'var(--text-tertiary)' }}>
+                  {streamMetrics.tokensPerSec.toFixed(1)} tok/s &middot; {streamMetrics.totalTokens}{' '}
+                  tokens &middot; {(streamMetrics.durationMs / 1000).toFixed(1)}s
+                </span>
+              )}
            </div>
            {promptResponse && (
              <div