From 588d21c70e0362bec16a26566fc5e416289bafe0 Mon Sep 17 00:00:00 2001 From: saravanakumardb1 Date: Thu, 19 Feb 2026 23:20:30 -0800 Subject: [PATCH] =?UTF-8?q?feat(local-llm):=20Phase=204=20=E2=80=94=20runt?= =?UTF-8?q?ime=20metrics=20+=20UX=20polish=20(N11-N14)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit N11: Persist tok/s per model to localStorage (llm-model-benchmarks), display on model card as faded accent text N12: Live countdown to auto-unload — 1s interval, color-coded (green >5m, yellow 1-5m, red <1m 'Unloading soon') N13: Session stats per model (prompts + tokens) in expanded details N14: Co-load suggestions strip below models list showing which unloaded models fit in remaining free memory --- __LOCAL_LLMs/dashboard/src/app/page.tsx | 154 ++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/__LOCAL_LLMs/dashboard/src/app/page.tsx b/__LOCAL_LLMs/dashboard/src/app/page.tsx index 2f7ef77c..2b20938e 100644 --- a/__LOCAL_LLMs/dashboard/src/app/page.tsx +++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx @@ -107,6 +107,13 @@ export default function Dashboard() { const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>( 'name' ); + const [modelBenchmarks, setModelBenchmarks] = useState< + Record + >({}); + const [sessionStats, setSessionStats] = useState< + Record + >({}); + const [countdownTick, setCountdownTick] = useState(0); const responseRef = useRef(null); const abortRef = useRef(null); const compareAbortRef = useRef(null); @@ -164,6 +171,12 @@ export default function Dashboard() { setAutoLoadModel(localStorage.getItem('llm-auto-load-model')); const savedSort = localStorage.getItem('llm-model-sort'); if (savedSort) setModelSort(savedSort as typeof modelSort); + try { + const savedBench = localStorage.getItem('llm-model-benchmarks'); + if (savedBench) setModelBenchmarks(JSON.parse(savedBench)); + } catch { + /* ignore */ + } }, []); useEffect(() => { @@ -190,6 +203,13 @@ export default function Dashboard() { // eslint-disable-next-line react-hooks/exhaustive-deps }, [promptModel, chatMode]); + // N12: Countdown tick — update every second for live unload countdown + useEffect(() => { + if (!ollama || ollama.running.length === 0) return; + const interval = setInterval(() => setCountdownTick(t => t + 1), 1000); + return () => clearInterval(interval); + }, [ollama?.running.length]); + // F16: Auto-load preferred model when Ollama is online but nothing loaded useEffect(() => { if (!autoLoadModel || !ollama || ollama.status !== 'online') return; @@ -516,6 +536,32 @@ export default function Dashboard() { const durationMs = chunk.eval_duration / 1e6; const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); + // N11: Persist benchmark + if (promptModel) { + const updated = { + ...modelBenchmarks, + [promptModel]: { + tokPerSec: tokensPerSec, + totalTokens: chunk.eval_count, + timestamp: Date.now(), + }, + }; + setModelBenchmarks(updated); + localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated)); + } + // N13: Update session stats + if (promptModel) { + setSessionStats(prev => { + const cur = prev[promptModel] || { prompts: 0, tokens: 0 }; + return { + ...prev, + [promptModel]: { + prompts: cur.prompts + 1, + tokens: cur.tokens + chunk.eval_count, + }, + }; + }); + } } } catch { /* skip malformed lines */ @@ -603,6 +649,32 @@ export default function Dashboard() { const durationMs = chunk.eval_duration / 1e6; const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); + // N11: Persist benchmark + if (promptModel) { + const updated = { + ...modelBenchmarks, + [promptModel]: { + tokPerSec: tokensPerSec, + totalTokens: chunk.eval_count, + timestamp: Date.now(), + }, + }; + setModelBenchmarks(updated); + localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated)); + } + // N13: Update session stats + if (promptModel) { + setSessionStats(prev => { + const cur = prev[promptModel] || { prompts: 0, tokens: 0 }; + return { + ...prev, + [promptModel]: { + prompts: cur.prompts + 1, + tokens: cur.tokens + chunk.eval_count, + }, + }; + }); + } } } catch { /* skip */ @@ -1107,7 +1179,41 @@ export default function Dashboard() { ) : null; })()} + {modelBenchmarks[model.name] && ( + + ~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s + + )} + {running && + (() => { + const rm = ollama?.running.find(r => r.name === model.name); + if (!rm?.expires_at) return null; + const remaining = new Date(rm.expires_at).getTime() - Date.now(); + void countdownTick; + if (remaining <= 0) return null; + const mins = Math.floor(remaining / 60000); + const secs = Math.floor((remaining % 60000) / 1000); + const isUrgent = remaining < 60000; + const isWarning = remaining < 300000; + return ( +
+ {isUrgent ? 'Unloading soon' : `Unloads in ${mins}m ${secs}s`} +
+ ); + })()}
@@ -1228,6 +1334,13 @@ export default function Dashboard() {

Digest: {model.digest?.substring(0, 16)}...

Modified: {new Date(model.modified_at).toLocaleString()}

{model.details?.family &&

Family: {model.details.family}

} + {sessionStats[model.name] && ( +

+ Session: {sessionStats[model.name].prompts} prompt + {sessionStats[model.name].prompts !== 1 ? 's' : ''} ·{' '} + {sessionStats[model.name].tokens.toLocaleString()} tokens +

+ )} {/* Model Tags (F14) */}
{['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => ( @@ -1339,6 +1452,47 @@ export default function Dashboard() { No models installed. Run "ollama pull <model>" to get started.

)} + {/* N14: Co-load suggestions */} + {system && + ollama.running.length > 0 && + (() => { + const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0); + const freeForModels = + system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1; + const suggestions = ollama.models + .filter(m => !isRunning(m.name)) + .map(m => ({ + name: m.name, + est: estimateRam(m.size, m.details?.quantization_level), + })) + .filter(m => m.est < freeForModels) + .sort((a, b) => b.est - a.est) + .slice(0, 3); + if (suggestions.length === 0) return null; + return ( +
+ + Can also load:{' '} + + {suggestions.map((s, i) => ( + + {i > 0 && ', '} + {s.name.split(':')[0]} (~{formatBytes(s.est)}) + + ))} +
+ ); + })()}
)}