feat(local-llm): Phase 1 — pre-load intelligence + bug fixes (N1-N3, BN1-BN2, BN5)

N1: Estimated RAM per model with quant-aware multipliers (Q4=1.2x, Q5=1.25x, Q8=1.1x, F16=1.05x)
N2: Will-it-fit indicator (green/yellow/red dot) next to Load button
N3: Aggregate loaded model VRAM in panel header badge
BN1: Compare buttons now filter to running models only
BN2: AbortController on compare stream, cancel on modal close
BN5: Delete confirmation shows model name + disk reclaim size
This commit is contained in:
saravanakumardb1 2026-02-19 23:09:49 -08:00
parent ae231d5aac
commit 040013e495
2 changed files with 120 additions and 38 deletions

View File

@ -6,6 +6,32 @@ export function formatBytes(bytes: number): string {
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`; return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
} }
// N1: Estimate RAM usage from disk size + quantization level
// Apple Silicon uses unified memory — GPU and CPU share the same pool
export function estimateRam(diskSize: number, quant?: string): number {
const q = (quant || '').toUpperCase();
let multiplier = 1.2; // default
if (q.includes('Q4')) multiplier = 1.2;
else if (q.includes('Q5')) multiplier = 1.25;
else if (q.includes('Q8')) multiplier = 1.1;
else if (q.includes('F16') || q.includes('F32')) multiplier = 1.05;
return Math.round(diskSize * multiplier);
}
// N2: Check if model fits in available memory
export type FitStatus = 'fits' | 'tight' | 'no';
export function checkMemoryFit(
estimatedRam: number,
freeMemory: number,
cachedMemory: number
): FitStatus {
const available = freeMemory + cachedMemory * 0.5;
const ratio = estimatedRam / available;
if (ratio < 0.7) return 'fits';
if (ratio <= 1.0) return 'tight';
return 'no';
}
export function formatUptime(seconds: number): string { export function formatUptime(seconds: number): string {
const d = Math.floor(seconds / 86400); const d = Math.floor(seconds / 86400);
const h = Math.floor((seconds % 86400) / 3600); const h = Math.floor((seconds % 86400) / 3600);

View File

@ -44,7 +44,7 @@ import type {
PullProgress, PullProgress,
StreamMetrics, StreamMetrics,
} from './lib/types'; } from './lib/types';
import { formatBytes, formatUptime } from './lib/format'; import { formatBytes, formatUptime, estimateRam, checkMemoryFit } from './lib/format';
import { StatusDot } from './components/StatusDot'; import { StatusDot } from './components/StatusDot';
import { ProgressBar } from './components/ProgressBar'; import { ProgressBar } from './components/ProgressBar';
import { Sparkline } from './components/Sparkline'; import { Sparkline } from './components/Sparkline';
@ -96,6 +96,7 @@ export default function Dashboard() {
const [compareResponse, setCompareResponse] = useState(''); const [compareResponse, setCompareResponse] = useState('');
const responseRef = useRef<HTMLDivElement>(null); const responseRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null); const abortRef = useRef<AbortController | null>(null);
const compareAbortRef = useRef<AbortController | null>(null);
const fetchingRef = useRef(false); const fetchingRef = useRef(false);
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => { const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
@ -232,14 +233,18 @@ export default function Dashboard() {
}; };
// Model comparison (F5) — send same prompt to second model // Model comparison (F5) — send same prompt to second model
// BN2: Added AbortController so compare stream cancels on modal close
const handleCompare = async (prompt: string, model2: string) => { const handleCompare = async (prompt: string, model2: string) => {
setCompareModel(model2); setCompareModel(model2);
setCompareResponse(''); setCompareResponse('');
const controller = new AbortController();
compareAbortRef.current = controller;
try { try {
const res = await fetch('/api/ollama/stream', { const res = await fetch('/api/ollama/stream', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: model2, prompt }), body: JSON.stringify({ model: model2, prompt }),
signal: controller.signal,
}); });
if (!res.ok || !res.body) { if (!res.ok || !res.body) {
setCompareResponse('Error'); setCompareResponse('Error');
@ -270,8 +275,11 @@ export default function Dashboard() {
} }
if (!full) setCompareResponse('(empty response)'); if (!full) setCompareResponse('(empty response)');
} catch (err) { } catch (err) {
setCompareResponse(`Error: ${err}`); if (!controller.signal.aborted) {
setCompareResponse(`Error: ${err}`);
}
} }
compareAbortRef.current = null;
}; };
// Auto-load model helpers (F16) // Auto-load model helpers (F16)
@ -784,6 +792,11 @@ export default function Dashboard() {
style={{ background: 'var(--surface-muted)', color: 'var(--success)' }} style={{ background: 'var(--surface-muted)', color: 'var(--success)' }}
> >
{ollama.runningCount} active {ollama.runningCount} active
{ollama.running.length > 0 &&
(() => {
const totalVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
return ` · ${formatBytes(totalVram)} VRAM`;
})()}
</span> </span>
)} )}
</div> </div>
@ -898,6 +911,10 @@ export default function Dashboard() {
.map(model => { .map(model => {
const running = isRunning(model.name); const running = isRunning(model.name);
const expanded = expandedModel === model.name; const expanded = expandedModel === model.name;
const estRam = estimateRam(model.size, model.details?.quantization_level);
const fitStatus = system
? checkMemoryFit(estRam, system.memory.free, system.memory.cached)
: null;
return ( return (
<div <div
key={model.name} key={model.name}
@ -941,7 +958,7 @@ export default function Dashboard() {
)} )}
</div> </div>
<div <div
className="flex items-center gap-3 text-xs mt-0.5" className="flex items-center gap-3 text-xs mt-0.5 flex-wrap"
style={{ color: 'var(--text-tertiary)' }} style={{ color: 'var(--text-tertiary)' }}
> >
<span>{formatBytes(model.size)}</span> <span>{formatBytes(model.size)}</span>
@ -951,6 +968,9 @@ export default function Dashboard() {
{model.details?.quantization_level && ( {model.details?.quantization_level && (
<span>{model.details.quantization_level}</span> <span>{model.details.quantization_level}</span>
)} )}
<span title="Estimated RAM when loaded (Apple Silicon unified memory)">
~{formatBytes(estRam)} RAM
</span>
</div> </div>
</div> </div>
</div> </div>
@ -985,22 +1005,44 @@ export default function Dashboard() {
</button> </button>
</> </>
) : ( ) : (
<button <div className="flex items-center gap-2">
onClick={() => handleModelAction('load', model.name)} {fitStatus && !running && (
disabled={actionLoading === `load-${model.name}`} <span
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors" className="w-2 h-2 rounded-full shrink-0"
style={{ title={
background: 'rgba(52, 211, 153, 0.1)', fitStatus === 'fits'
color: 'var(--success)', ? 'Fits comfortably in available memory'
}} : fitStatus === 'tight'
> ? 'Tight — may cause swap pressure'
{actionLoading === `load-${model.name}` ? ( : "Won't fit — will swap heavily"
<RefreshCw className="w-3 h-3 animate-spin" /> }
) : ( style={{
<Play className="w-3 h-3" /> background:
fitStatus === 'fits'
? 'var(--success)'
: fitStatus === 'tight'
? 'var(--warning)'
: 'var(--danger)',
}}
/>
)} )}
Load <button
</button> onClick={() => handleModelAction('load', model.name)}
disabled={actionLoading === `load-${model.name}`}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
style={{
background: 'rgba(52, 211, 153, 0.1)',
color: 'var(--success)',
}}
>
{actionLoading === `load-${model.name}` ? (
<RefreshCw className="w-3 h-3 animate-spin" />
) : (
<Play className="w-3 h-3" />
)}
Load
</button>
</div>
)} )}
<button <button
onClick={() => { onClick={() => {
@ -1121,7 +1163,7 @@ export default function Dashboard() {
{deleteConfirm === model.name ? ( {deleteConfirm === model.name ? (
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
<span className="text-xs" style={{ color: 'var(--danger)' }}> <span className="text-xs" style={{ color: 'var(--danger)' }}>
Delete this model? Delete {model.name}? Reclaim {formatBytes(model.size)}
</span> </span>
<button <button
onClick={() => handleModelAction('delete', model.name)} onClick={() => handleModelAction('delete', model.name)}
@ -1523,9 +1565,13 @@ export default function Dashboard() {
style={{ background: 'rgba(0,0,0,0.6)' }} style={{ background: 'rgba(0,0,0,0.6)' }}
onClick={e => { onClick={e => {
if (e.target === e.currentTarget && !promptLoading) { if (e.target === e.currentTarget && !promptLoading) {
abortRef.current?.abort();
compareAbortRef.current?.abort();
setPromptModel(null); setPromptModel(null);
setPromptResponse(''); setPromptResponse('');
setPromptText(''); setPromptText('');
setCompareModel(null);
setCompareResponse('');
} }
}} }}
> >
@ -1575,9 +1621,13 @@ export default function Dashboard() {
<button <button
onClick={() => { onClick={() => {
if (!promptLoading) { if (!promptLoading) {
abortRef.current?.abort();
compareAbortRef.current?.abort();
setPromptModel(null); setPromptModel(null);
setPromptResponse(''); setPromptResponse('');
setPromptText(''); setPromptText('');
setCompareModel(null);
setCompareResponse('');
} }
}} }}
className="p-2 rounded-lg transition-colors hover:bg-[var(--surface-card)]" className="p-2 rounded-lg transition-colors hover:bg-[var(--surface-card)]"
@ -1800,28 +1850,34 @@ export default function Dashboard() {
</div> </div>
)} )}
</div> </div>
{/* Compare with another model (F5) */} {/* Compare with another model (F5) — BN1: only show loaded models */}
{!promptLoading && !compareModel && ollama && ollama.models.length > 1 && ( {!promptLoading && !compareModel && ollama && ollama.running.length > 0 && (
<div className="flex items-center gap-2"> <div className="flex items-center gap-2 flex-wrap">
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}> <span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
Compare with: Compare with:
</span> </span>
{ollama.models {ollama.running.filter(r => r.name !== promptModel).slice(0, 3).length === 0 ? (
.filter(m => m.name !== promptModel) <span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
.slice(0, 3) Load another model to compare
.map(m => ( </span>
<button ) : (
key={m.name} ollama.running
onClick={() => handleCompare(promptText, m.name)} .filter(r => r.name !== promptModel)
className="text-[11px] px-2 py-1 rounded font-mono transition-colors" .slice(0, 3)
style={{ .map(r => (
background: 'var(--surface-muted)', <button
color: 'var(--accent-secondary)', key={r.name}
}} onClick={() => handleCompare(promptText, r.name)}
> className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
{m.name} style={{
</button> background: 'var(--surface-muted)',
))} color: 'var(--accent-secondary)',
}}
>
{r.name}
</button>
))
)}
</div> </div>
)} )}
</div> </div>