diff --git a/__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts b/__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts new file mode 100644 index 00000000..43b3b063 --- /dev/null +++ b/__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts @@ -0,0 +1,44 @@ +import { NextRequest } from 'next/server'; + +const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434'; + +export async function POST(request: NextRequest) { + try { + const body = await request.json(); + const { model } = body; + + if (!model || typeof model !== 'string') { + return new Response(JSON.stringify({ error: 'Missing model name' }), { + status: 400, + headers: { 'Content-Type': 'application/json' }, + }); + } + + const response = await fetch(`${OLLAMA_URL}/api/pull`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: model, stream: true }), + }); + + if (!response.ok || !response.body) { + return new Response(JSON.stringify({ error: `Ollama pull error: ${response.status}` }), { + status: 500, + headers: { 'Content-Type': 'application/json' }, + }); + } + + // Pipe the Ollama pull stream directly to the client + return new Response(response.body, { + headers: { + 'Content-Type': 'application/x-ndjson', + 'Transfer-Encoding': 'chunked', + 'Cache-Control': 'no-cache', + }, + }); + } catch (err) { + return new Response(JSON.stringify({ error: String(err) }), { + status: 500, + headers: { 'Content-Type': 'application/json' }, + }); + } +} diff --git a/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts b/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts index 82e74590..ab279f25 100644 --- a/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts +++ b/__LOCAL_LLMs/dashboard/src/app/api/system/route.ts @@ -100,7 +100,8 @@ async function getAccurateMemory(): Promise<{ const totalMem = os.totalmem(); try { const { stdout } = await execAsync('vm_stat', { timeout: 2000 }); - const pageSize = 16384; // macOS Apple Silicon default + const pageSizeMatch = stdout.match(/page size of (\d+) bytes/); + const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384; const parse = (label: string): number => { const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`)); return match ? parseInt(match[1]) * pageSize : 0; diff --git a/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts b/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts index 59193a28..6fae23f9 100644 --- a/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts +++ b/__LOCAL_LLMs/dashboard/src/app/api/whisper/route.ts @@ -20,23 +20,37 @@ async function getWhisperBinaries(): Promise { } } -async function getWhisperModels(): Promise> { - const modelsDir = join(homedir(), 'whisper-models'); - try { - const files = await readdir(modelsDir); - const models = await Promise.all( - files - .filter(f => f.endsWith('.bin')) - .map(async f => { - const filePath = join(modelsDir, f); +const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '') + .split(':') + .filter(Boolean) + .concat([ + join(homedir(), 'whisper-models'), + '/opt/homebrew/share/whisper-cpp/models', + join(homedir(), '.cache', 'whisper'), + ]); + +async function getWhisperModels(): Promise<{ + models: Array<{ name: string; size: number; path: string }>; + modelsDir: string; +}> { + for (const dir of WHISPER_MODEL_DIRS) { + try { + const files = await readdir(dir); + const binFiles = files.filter(f => f.endsWith('.bin')); + if (binFiles.length === 0) continue; + const models = await Promise.all( + binFiles.map(async f => { + const filePath = join(dir, f); const s = await stat(filePath); return { name: f.replace('ggml-', '').replace('.bin', ''), size: s.size, path: filePath }; }) - ); - return models; - } catch { - return []; + ); + return { models, modelsDir: dir }; + } catch { + // dir doesn't exist, try next + } } + return { models: [], modelsDir: WHISPER_MODEL_DIRS[0] }; } async function getWhisperVersion(): Promise { @@ -49,7 +63,7 @@ async function getWhisperVersion(): Promise { } export async function GET() { - const [binaries, models, version] = await Promise.all([ + const [binaries, whisperResult, version] = await Promise.all([ getWhisperBinaries(), getWhisperModels(), getWhisperVersion(), @@ -59,7 +73,7 @@ export async function GET() { installed: binaries.length > 0, version, binaries, - models, - modelsDir: join(homedir(), 'whisper-models'), + models: whisperResult.models, + modelsDir: whisperResult.modelsDir, }); } diff --git a/__LOCAL_LLMs/dashboard/src/app/page.tsx b/__LOCAL_LLMs/dashboard/src/app/page.tsx index 80dc12ae..678710ba 100644 --- a/__LOCAL_LLMs/dashboard/src/app/page.tsx +++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx @@ -150,7 +150,17 @@ export default function Dashboard() { const [toasts, setToasts] = useState([]); const [pullInput, setPullInput] = useState(''); const [pullLoading, setPullLoading] = useState(false); + const [pullProgress, setPullProgress] = useState<{ + status: string; + completed: number; + total: number; + } | null>(null); const [copied, setCopied] = useState(false); + const [streamMetrics, setStreamMetrics] = useState<{ + tokensPerSec: number; + totalTokens: number; + durationMs: number; + } | null>(null); const [deleteConfirm, setDeleteConfirm] = useState(null); const responseRef = useRef(null); const abortRef = useRef(null); @@ -228,26 +238,60 @@ export default function Dashboard() { const handlePull = async () => { if (!pullInput.trim()) return; + const modelName = pullInput.trim(); setPullLoading(true); - addToast(`Pulling ${pullInput}... this may take a while`, 'info'); + setPullProgress({ status: 'starting', completed: 0, total: 0 }); try { - const res = await fetch('/api/ollama', { + const res = await fetch('/api/ollama/pull', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ action: 'pull', model: pullInput.trim() }), + body: JSON.stringify({ model: modelName }), }); - const data = await res.json(); - if (data.success) { - addToast(`Successfully pulled ${pullInput}`, 'success'); - setPullInput(''); - } else { - addToast(data.error || 'Pull failed', 'error'); + if (!res.ok || !res.body) { + addToast(`Pull failed: ${res.status}`, 'error'); + setPullLoading(false); + setPullProgress(null); + return; } + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + for (const line of lines) { + if (!line.trim()) continue; + try { + const chunk = JSON.parse(line); + if (chunk.total && chunk.completed) { + setPullProgress({ + status: chunk.status || 'downloading', + completed: chunk.completed, + total: chunk.total, + }); + } else if (chunk.status) { + setPullProgress(prev => ({ + status: chunk.status, + completed: prev?.completed || 0, + total: prev?.total || 0, + })); + } + } catch { + /* skip */ + } + } + } + addToast(`Successfully pulled ${modelName}`, 'success'); + setPullInput(''); await fetchAll(); } catch (err) { addToast(`Pull failed: ${err}`, 'error'); } setPullLoading(false); + setPullProgress(null); }; // Streaming prompt @@ -255,6 +299,7 @@ export default function Dashboard() { if (!promptModel || !promptText.trim()) return; setPromptLoading(true); setPromptResponse(''); + setStreamMetrics(null); const controller = new AbortController(); abortRef.current = controller; try { @@ -288,6 +333,11 @@ export default function Dashboard() { setPromptResponse(fullResponse); responseRef.current?.scrollTo(0, responseRef.current.scrollHeight); } + if (chunk.done && chunk.eval_count && chunk.eval_duration) { + const durationMs = chunk.eval_duration / 1e6; + const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; + setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); + } } catch { /* skip malformed lines */ } @@ -522,6 +572,29 @@ export default function Dashboard() { )} + {/* Pull Progress Bar */} + {pullProgress && ( +
+
+ + {pullProgress.status} + + {pullProgress.total > 0 && ( + + {formatBytes(pullProgress.completed)} / {formatBytes(pullProgress.total)} + {' ยท '} + {Math.round((pullProgress.completed / pullProgress.total) * 100)}% + + )} +
+ +
+ )} + {ollama?.status !== 'online' ? (
)} + {streamMetrics && !promptLoading && ( + + {streamMetrics.tokensPerSec.toFixed(1)} tok/s · {streamMetrics.totalTokens}{' '} + tokens · {(streamMetrics.durationMs / 1000).toFixed(1)}s + + )}
{promptResponse && (