diff --git a/__LOCAL_LLMs/dashboard/src/app/page.tsx b/__LOCAL_LLMs/dashboard/src/app/page.tsx index 2f7ef77c..2b20938e 100644 --- a/__LOCAL_LLMs/dashboard/src/app/page.tsx +++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx @@ -107,6 +107,13 @@ export default function Dashboard() { const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>( 'name' ); + const [modelBenchmarks, setModelBenchmarks] = useState< + Record + >({}); + const [sessionStats, setSessionStats] = useState< + Record + >({}); + const [countdownTick, setCountdownTick] = useState(0); const responseRef = useRef(null); const abortRef = useRef(null); const compareAbortRef = useRef(null); @@ -164,6 +171,12 @@ export default function Dashboard() { setAutoLoadModel(localStorage.getItem('llm-auto-load-model')); const savedSort = localStorage.getItem('llm-model-sort'); if (savedSort) setModelSort(savedSort as typeof modelSort); + try { + const savedBench = localStorage.getItem('llm-model-benchmarks'); + if (savedBench) setModelBenchmarks(JSON.parse(savedBench)); + } catch { + /* ignore */ + } }, []); useEffect(() => { @@ -190,6 +203,13 @@ export default function Dashboard() { // eslint-disable-next-line react-hooks/exhaustive-deps }, [promptModel, chatMode]); + // N12: Countdown tick — update every second for live unload countdown + useEffect(() => { + if (!ollama || ollama.running.length === 0) return; + const interval = setInterval(() => setCountdownTick(t => t + 1), 1000); + return () => clearInterval(interval); + }, [ollama?.running.length]); + // F16: Auto-load preferred model when Ollama is online but nothing loaded useEffect(() => { if (!autoLoadModel || !ollama || ollama.status !== 'online') return; @@ -516,6 +536,32 @@ export default function Dashboard() { const durationMs = chunk.eval_duration / 1e6; const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); + // N11: Persist benchmark + if (promptModel) { + const updated = { + ...modelBenchmarks, + [promptModel]: { + tokPerSec: tokensPerSec, + totalTokens: chunk.eval_count, + timestamp: Date.now(), + }, + }; + setModelBenchmarks(updated); + localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated)); + } + // N13: Update session stats + if (promptModel) { + setSessionStats(prev => { + const cur = prev[promptModel] || { prompts: 0, tokens: 0 }; + return { + ...prev, + [promptModel]: { + prompts: cur.prompts + 1, + tokens: cur.tokens + chunk.eval_count, + }, + }; + }); + } } } catch { /* skip malformed lines */ @@ -603,6 +649,32 @@ export default function Dashboard() { const durationMs = chunk.eval_duration / 1e6; const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); + // N11: Persist benchmark + if (promptModel) { + const updated = { + ...modelBenchmarks, + [promptModel]: { + tokPerSec: tokensPerSec, + totalTokens: chunk.eval_count, + timestamp: Date.now(), + }, + }; + setModelBenchmarks(updated); + localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated)); + } + // N13: Update session stats + if (promptModel) { + setSessionStats(prev => { + const cur = prev[promptModel] || { prompts: 0, tokens: 0 }; + return { + ...prev, + [promptModel]: { + prompts: cur.prompts + 1, + tokens: cur.tokens + chunk.eval_count, + }, + }; + }); + } } } catch { /* skip */ @@ -1107,7 +1179,41 @@ export default function Dashboard() { ) : null; })()} + {modelBenchmarks[model.name] && ( + + ~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s + + )} + {running && + (() => { + const rm = ollama?.running.find(r => r.name === model.name); + if (!rm?.expires_at) return null; + const remaining = new Date(rm.expires_at).getTime() - Date.now(); + void countdownTick; + if (remaining <= 0) return null; + const mins = Math.floor(remaining / 60000); + const secs = Math.floor((remaining % 60000) / 1000); + const isUrgent = remaining < 60000; + const isWarning = remaining < 300000; + return ( +
+ {isUrgent ? 'Unloading soon' : `Unloads in ${mins}m ${secs}s`} +
+ ); + })()}
@@ -1228,6 +1334,13 @@ export default function Dashboard() {

Digest: {model.digest?.substring(0, 16)}...

Modified: {new Date(model.modified_at).toLocaleString()}

{model.details?.family &&

Family: {model.details.family}

} + {sessionStats[model.name] && ( +

+ Session: {sessionStats[model.name].prompts} prompt + {sessionStats[model.name].prompts !== 1 ? 's' : ''} ·{' '} + {sessionStats[model.name].tokens.toLocaleString()} tokens +

+ )} {/* Model Tags (F14) */}
{['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => ( @@ -1339,6 +1452,47 @@ export default function Dashboard() { No models installed. Run "ollama pull <model>" to get started.

)} + {/* N14: Co-load suggestions */} + {system && + ollama.running.length > 0 && + (() => { + const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0); + const freeForModels = + system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1; + const suggestions = ollama.models + .filter(m => !isRunning(m.name)) + .map(m => ({ + name: m.name, + est: estimateRam(m.size, m.details?.quantization_level), + })) + .filter(m => m.est < freeForModels) + .sort((a, b) => b.est - a.est) + .slice(0, 3); + if (suggestions.length === 0) return null; + return ( +
+ + Can also load:{' '} + + {suggestions.map((s, i) => ( + + {i > 0 && ', '} + {s.name.split(':')[0]} (~{formatBytes(s.est)}) + + ))} +
+ ); + })()}
)}