feat(local-llm): Phase 4 — runtime metrics + UX polish (N11-N14)
N11: Persist tok/s per model to localStorage (llm-model-benchmarks),
display on model card as faded accent text
N12: Live countdown to auto-unload — 1s interval, color-coded
(green >5m, yellow 1-5m, red <1m 'Unloading soon')
N13: Session stats per model (prompts + tokens) in expanded details
N14: Co-load suggestions strip below models list showing which
unloaded models fit in remaining free memory
This commit is contained in:
parent
6f6baf99c8
commit
588d21c70e
@ -107,6 +107,13 @@ export default function Dashboard() {
|
||||
const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>(
|
||||
'name'
|
||||
);
|
||||
const [modelBenchmarks, setModelBenchmarks] = useState<
|
||||
Record<string, { tokPerSec: number; totalTokens: number; timestamp: number }>
|
||||
>({});
|
||||
const [sessionStats, setSessionStats] = useState<
|
||||
Record<string, { prompts: number; tokens: number }>
|
||||
>({});
|
||||
const [countdownTick, setCountdownTick] = useState(0);
|
||||
const responseRef = useRef<HTMLDivElement>(null);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const compareAbortRef = useRef<AbortController | null>(null);
|
||||
@ -164,6 +171,12 @@ export default function Dashboard() {
|
||||
setAutoLoadModel(localStorage.getItem('llm-auto-load-model'));
|
||||
const savedSort = localStorage.getItem('llm-model-sort');
|
||||
if (savedSort) setModelSort(savedSort as typeof modelSort);
|
||||
try {
|
||||
const savedBench = localStorage.getItem('llm-model-benchmarks');
|
||||
if (savedBench) setModelBenchmarks(JSON.parse(savedBench));
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
@ -190,6 +203,13 @@ export default function Dashboard() {
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [promptModel, chatMode]);
|
||||
|
||||
// N12: Countdown tick — update every second for live unload countdown
|
||||
useEffect(() => {
|
||||
if (!ollama || ollama.running.length === 0) return;
|
||||
const interval = setInterval(() => setCountdownTick(t => t + 1), 1000);
|
||||
return () => clearInterval(interval);
|
||||
}, [ollama?.running.length]);
|
||||
|
||||
// F16: Auto-load preferred model when Ollama is online but nothing loaded
|
||||
useEffect(() => {
|
||||
if (!autoLoadModel || !ollama || ollama.status !== 'online') return;
|
||||
@ -516,6 +536,32 @@ export default function Dashboard() {
|
||||
const durationMs = chunk.eval_duration / 1e6;
|
||||
const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
|
||||
setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
|
||||
// N11: Persist benchmark
|
||||
if (promptModel) {
|
||||
const updated = {
|
||||
...modelBenchmarks,
|
||||
[promptModel]: {
|
||||
tokPerSec: tokensPerSec,
|
||||
totalTokens: chunk.eval_count,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
};
|
||||
setModelBenchmarks(updated);
|
||||
localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
|
||||
}
|
||||
// N13: Update session stats
|
||||
if (promptModel) {
|
||||
setSessionStats(prev => {
|
||||
const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
|
||||
return {
|
||||
...prev,
|
||||
[promptModel]: {
|
||||
prompts: cur.prompts + 1,
|
||||
tokens: cur.tokens + chunk.eval_count,
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
/* skip malformed lines */
|
||||
@ -603,6 +649,32 @@ export default function Dashboard() {
|
||||
const durationMs = chunk.eval_duration / 1e6;
|
||||
const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
|
||||
setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
|
||||
// N11: Persist benchmark
|
||||
if (promptModel) {
|
||||
const updated = {
|
||||
...modelBenchmarks,
|
||||
[promptModel]: {
|
||||
tokPerSec: tokensPerSec,
|
||||
totalTokens: chunk.eval_count,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
};
|
||||
setModelBenchmarks(updated);
|
||||
localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
|
||||
}
|
||||
// N13: Update session stats
|
||||
if (promptModel) {
|
||||
setSessionStats(prev => {
|
||||
const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
|
||||
return {
|
||||
...prev,
|
||||
[promptModel]: {
|
||||
prompts: cur.prompts + 1,
|
||||
tokens: cur.tokens + chunk.eval_count,
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
/* skip */
|
||||
@ -1107,7 +1179,41 @@ export default function Dashboard() {
|
||||
</span>
|
||||
) : null;
|
||||
})()}
|
||||
{modelBenchmarks[model.name] && (
|
||||
<span
|
||||
style={{ color: 'var(--accent-secondary)' }}
|
||||
title={`Last benchmarked: ${new Date(modelBenchmarks[model.name].timestamp).toLocaleString()}`}
|
||||
>
|
||||
~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{running &&
|
||||
(() => {
|
||||
const rm = ollama?.running.find(r => r.name === model.name);
|
||||
if (!rm?.expires_at) return null;
|
||||
const remaining = new Date(rm.expires_at).getTime() - Date.now();
|
||||
void countdownTick;
|
||||
if (remaining <= 0) return null;
|
||||
const mins = Math.floor(remaining / 60000);
|
||||
const secs = Math.floor((remaining % 60000) / 1000);
|
||||
const isUrgent = remaining < 60000;
|
||||
const isWarning = remaining < 300000;
|
||||
return (
|
||||
<div
|
||||
className="text-[10px] mt-0.5"
|
||||
style={{
|
||||
color: isUrgent
|
||||
? 'var(--danger)'
|
||||
: isWarning
|
||||
? 'var(--warning)'
|
||||
: 'var(--text-tertiary)',
|
||||
}}
|
||||
>
|
||||
{isUrgent ? 'Unloading soon' : `Unloads in ${mins}m ${secs}s`}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 ml-3">
|
||||
@ -1228,6 +1334,13 @@ export default function Dashboard() {
|
||||
<p>Digest: {model.digest?.substring(0, 16)}...</p>
|
||||
<p>Modified: {new Date(model.modified_at).toLocaleString()}</p>
|
||||
{model.details?.family && <p>Family: {model.details.family}</p>}
|
||||
{sessionStats[model.name] && (
|
||||
<p style={{ color: 'var(--accent-secondary)' }}>
|
||||
Session: {sessionStats[model.name].prompts} prompt
|
||||
{sessionStats[model.name].prompts !== 1 ? 's' : ''} ·{' '}
|
||||
{sessionStats[model.name].tokens.toLocaleString()} tokens
|
||||
</p>
|
||||
)}
|
||||
{/* Model Tags (F14) */}
|
||||
<div className="flex flex-wrap items-center gap-1.5 mt-2 font-sans">
|
||||
{['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => (
|
||||
@ -1339,6 +1452,47 @@ export default function Dashboard() {
|
||||
No models installed. Run "ollama pull <model>" to get started.
|
||||
</p>
|
||||
)}
|
||||
{/* N14: Co-load suggestions */}
|
||||
{system &&
|
||||
ollama.running.length > 0 &&
|
||||
(() => {
|
||||
const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
|
||||
const freeForModels =
|
||||
system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1;
|
||||
const suggestions = ollama.models
|
||||
.filter(m => !isRunning(m.name))
|
||||
.map(m => ({
|
||||
name: m.name,
|
||||
est: estimateRam(m.size, m.details?.quantization_level),
|
||||
}))
|
||||
.filter(m => m.est < freeForModels)
|
||||
.sort((a, b) => b.est - a.est)
|
||||
.slice(0, 3);
|
||||
if (suggestions.length === 0) return null;
|
||||
return (
|
||||
<div
|
||||
className="mt-3 p-3 rounded-lg"
|
||||
style={{
|
||||
background: 'var(--surface-muted)',
|
||||
border: '1px solid var(--border-subtle)',
|
||||
}}
|
||||
>
|
||||
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
|
||||
Can also load:{' '}
|
||||
</span>
|
||||
{suggestions.map((s, i) => (
|
||||
<span
|
||||
key={s.name}
|
||||
className="text-[11px] font-mono"
|
||||
style={{ color: 'var(--accent-secondary)' }}
|
||||
>
|
||||
{i > 0 && ', '}
|
||||
{s.name.split(':')[0]} (~{formatBytes(s.est)})
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user