feat(local-llm): Phase 4 — runtime metrics + UX polish (N11-N14)

N11: Persist tok/s per model to localStorage (llm-model-benchmarks),
     display on model card as faded accent text
N12: Live countdown to auto-unload — 1s interval, color-coded
     (green >5m, yellow 1-5m, red <1m 'Unloading soon')
N13: Session stats per model (prompts + tokens) in expanded details
N14: Co-load suggestions strip below models list showing which
     unloaded models fit in remaining free memory
This commit is contained in:
saravanakumardb1 2026-02-19 23:20:30 -08:00
parent 6f6baf99c8
commit 588d21c70e

View File

@ -107,6 +107,13 @@ export default function Dashboard() {
const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>( const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>(
'name' 'name'
); );
const [modelBenchmarks, setModelBenchmarks] = useState<
Record<string, { tokPerSec: number; totalTokens: number; timestamp: number }>
>({});
const [sessionStats, setSessionStats] = useState<
Record<string, { prompts: number; tokens: number }>
>({});
const [countdownTick, setCountdownTick] = useState(0);
const responseRef = useRef<HTMLDivElement>(null); const responseRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null); const abortRef = useRef<AbortController | null>(null);
const compareAbortRef = useRef<AbortController | null>(null); const compareAbortRef = useRef<AbortController | null>(null);
@ -164,6 +171,12 @@ export default function Dashboard() {
setAutoLoadModel(localStorage.getItem('llm-auto-load-model')); setAutoLoadModel(localStorage.getItem('llm-auto-load-model'));
const savedSort = localStorage.getItem('llm-model-sort'); const savedSort = localStorage.getItem('llm-model-sort');
if (savedSort) setModelSort(savedSort as typeof modelSort); if (savedSort) setModelSort(savedSort as typeof modelSort);
try {
const savedBench = localStorage.getItem('llm-model-benchmarks');
if (savedBench) setModelBenchmarks(JSON.parse(savedBench));
} catch {
/* ignore */
}
}, []); }, []);
useEffect(() => { useEffect(() => {
@ -190,6 +203,13 @@ export default function Dashboard() {
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, [promptModel, chatMode]); }, [promptModel, chatMode]);
// N12: Countdown tick — update every second for live unload countdown
useEffect(() => {
if (!ollama || ollama.running.length === 0) return;
const interval = setInterval(() => setCountdownTick(t => t + 1), 1000);
return () => clearInterval(interval);
}, [ollama?.running.length]);
// F16: Auto-load preferred model when Ollama is online but nothing loaded // F16: Auto-load preferred model when Ollama is online but nothing loaded
useEffect(() => { useEffect(() => {
if (!autoLoadModel || !ollama || ollama.status !== 'online') return; if (!autoLoadModel || !ollama || ollama.status !== 'online') return;
@ -516,6 +536,32 @@ export default function Dashboard() {
const durationMs = chunk.eval_duration / 1e6; const durationMs = chunk.eval_duration / 1e6;
const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
// N11: Persist benchmark
if (promptModel) {
const updated = {
...modelBenchmarks,
[promptModel]: {
tokPerSec: tokensPerSec,
totalTokens: chunk.eval_count,
timestamp: Date.now(),
},
};
setModelBenchmarks(updated);
localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
}
// N13: Update session stats
if (promptModel) {
setSessionStats(prev => {
const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
return {
...prev,
[promptModel]: {
prompts: cur.prompts + 1,
tokens: cur.tokens + chunk.eval_count,
},
};
});
}
} }
} catch { } catch {
/* skip malformed lines */ /* skip malformed lines */
@ -603,6 +649,32 @@ export default function Dashboard() {
const durationMs = chunk.eval_duration / 1e6; const durationMs = chunk.eval_duration / 1e6;
const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0; const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs }); setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
// N11: Persist benchmark
if (promptModel) {
const updated = {
...modelBenchmarks,
[promptModel]: {
tokPerSec: tokensPerSec,
totalTokens: chunk.eval_count,
timestamp: Date.now(),
},
};
setModelBenchmarks(updated);
localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
}
// N13: Update session stats
if (promptModel) {
setSessionStats(prev => {
const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
return {
...prev,
[promptModel]: {
prompts: cur.prompts + 1,
tokens: cur.tokens + chunk.eval_count,
},
};
});
}
} }
} catch { } catch {
/* skip */ /* skip */
@ -1107,7 +1179,41 @@ export default function Dashboard() {
</span> </span>
) : null; ) : null;
})()} })()}
{modelBenchmarks[model.name] && (
<span
style={{ color: 'var(--accent-secondary)' }}
title={`Last benchmarked: ${new Date(modelBenchmarks[model.name].timestamp).toLocaleString()}`}
>
~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
</span>
)}
</div> </div>
{running &&
(() => {
const rm = ollama?.running.find(r => r.name === model.name);
if (!rm?.expires_at) return null;
const remaining = new Date(rm.expires_at).getTime() - Date.now();
void countdownTick;
if (remaining <= 0) return null;
const mins = Math.floor(remaining / 60000);
const secs = Math.floor((remaining % 60000) / 1000);
const isUrgent = remaining < 60000;
const isWarning = remaining < 300000;
return (
<div
className="text-[10px] mt-0.5"
style={{
color: isUrgent
? 'var(--danger)'
: isWarning
? 'var(--warning)'
: 'var(--text-tertiary)',
}}
>
{isUrgent ? 'Unloading soon' : `Unloads in ${mins}m ${secs}s`}
</div>
);
})()}
</div> </div>
</div> </div>
<div className="flex items-center gap-2 ml-3"> <div className="flex items-center gap-2 ml-3">
@ -1228,6 +1334,13 @@ export default function Dashboard() {
<p>Digest: {model.digest?.substring(0, 16)}...</p> <p>Digest: {model.digest?.substring(0, 16)}...</p>
<p>Modified: {new Date(model.modified_at).toLocaleString()}</p> <p>Modified: {new Date(model.modified_at).toLocaleString()}</p>
{model.details?.family && <p>Family: {model.details.family}</p>} {model.details?.family && <p>Family: {model.details.family}</p>}
{sessionStats[model.name] && (
<p style={{ color: 'var(--accent-secondary)' }}>
Session: {sessionStats[model.name].prompts} prompt
{sessionStats[model.name].prompts !== 1 ? 's' : ''} ·{' '}
{sessionStats[model.name].tokens.toLocaleString()} tokens
</p>
)}
{/* Model Tags (F14) */} {/* Model Tags (F14) */}
<div className="flex flex-wrap items-center gap-1.5 mt-2 font-sans"> <div className="flex flex-wrap items-center gap-1.5 mt-2 font-sans">
{['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => ( {['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => (
@ -1339,6 +1452,47 @@ export default function Dashboard() {
No models installed. Run &quot;ollama pull &lt;model&gt;&quot; to get started. No models installed. Run &quot;ollama pull &lt;model&gt;&quot; to get started.
</p> </p>
)} )}
{/* N14: Co-load suggestions */}
{system &&
ollama.running.length > 0 &&
(() => {
const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
const freeForModels =
system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1;
const suggestions = ollama.models
.filter(m => !isRunning(m.name))
.map(m => ({
name: m.name,
est: estimateRam(m.size, m.details?.quantization_level),
}))
.filter(m => m.est < freeForModels)
.sort((a, b) => b.est - a.est)
.slice(0, 3);
if (suggestions.length === 0) return null;
return (
<div
className="mt-3 p-3 rounded-lg"
style={{
background: 'var(--surface-muted)',
border: '1px solid var(--border-subtle)',
}}
>
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
Can also load:{' '}
</span>
{suggestions.map((s, i) => (
<span
key={s.name}
className="text-[11px] font-mono"
style={{ color: 'var(--accent-secondary)' }}
>
{i > 0 && ', '}
{s.name.split(':')[0]} (~{formatBytes(s.est)})
</span>
))}
</div>
);
})()}
</div> </div>
)} )}
</div> </div>