feat(local-llm): Phase 1 — pre-load intelligence + bug fixes (N1-N3, BN1-BN2, BN5)

N1: Estimated RAM per model with quant-aware multipliers (Q4=1.2x, Q5=1.25x, Q8=1.1x, F16=1.05x)
N2: Will-it-fit indicator (green/yellow/red dot) next to Load button
N3: Aggregate loaded model VRAM in panel header badge
BN1: Compare buttons now filter to running models only
BN2: AbortController on compare stream, cancel on modal close
BN5: Delete confirmation shows model name + disk reclaim size
This commit is contained in:
saravanakumardb1 2026-02-19 23:09:49 -08:00
parent ae231d5aac
commit 040013e495
2 changed files with 120 additions and 38 deletions

View File

@ -6,6 +6,32 @@ export function formatBytes(bytes: number): string {
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
}
// N1: Estimate RAM usage from disk size + quantization level
// Apple Silicon uses unified memory — GPU and CPU share the same pool
export function estimateRam(diskSize: number, quant?: string): number {
const q = (quant || '').toUpperCase();
let multiplier = 1.2; // default
if (q.includes('Q4')) multiplier = 1.2;
else if (q.includes('Q5')) multiplier = 1.25;
else if (q.includes('Q8')) multiplier = 1.1;
else if (q.includes('F16') || q.includes('F32')) multiplier = 1.05;
return Math.round(diskSize * multiplier);
}
// N2: Check if model fits in available memory
export type FitStatus = 'fits' | 'tight' | 'no';
export function checkMemoryFit(
estimatedRam: number,
freeMemory: number,
cachedMemory: number
): FitStatus {
const available = freeMemory + cachedMemory * 0.5;
const ratio = estimatedRam / available;
if (ratio < 0.7) return 'fits';
if (ratio <= 1.0) return 'tight';
return 'no';
}
export function formatUptime(seconds: number): string {
const d = Math.floor(seconds / 86400);
const h = Math.floor((seconds % 86400) / 3600);

View File

@ -44,7 +44,7 @@ import type {
PullProgress,
StreamMetrics,
} from './lib/types';
import { formatBytes, formatUptime } from './lib/format';
import { formatBytes, formatUptime, estimateRam, checkMemoryFit } from './lib/format';
import { StatusDot } from './components/StatusDot';
import { ProgressBar } from './components/ProgressBar';
import { Sparkline } from './components/Sparkline';
@ -96,6 +96,7 @@ export default function Dashboard() {
const [compareResponse, setCompareResponse] = useState('');
const responseRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null);
const compareAbortRef = useRef<AbortController | null>(null);
const fetchingRef = useRef(false);
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
@ -232,14 +233,18 @@ export default function Dashboard() {
};
// Model comparison (F5) — send same prompt to second model
// BN2: Added AbortController so compare stream cancels on modal close
const handleCompare = async (prompt: string, model2: string) => {
setCompareModel(model2);
setCompareResponse('');
const controller = new AbortController();
compareAbortRef.current = controller;
try {
const res = await fetch('/api/ollama/stream', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: model2, prompt }),
signal: controller.signal,
});
if (!res.ok || !res.body) {
setCompareResponse('Error');
@ -270,8 +275,11 @@ export default function Dashboard() {
}
if (!full) setCompareResponse('(empty response)');
} catch (err) {
setCompareResponse(`Error: ${err}`);
if (!controller.signal.aborted) {
setCompareResponse(`Error: ${err}`);
}
}
compareAbortRef.current = null;
};
// Auto-load model helpers (F16)
@ -784,6 +792,11 @@ export default function Dashboard() {
style={{ background: 'var(--surface-muted)', color: 'var(--success)' }}
>
{ollama.runningCount} active
{ollama.running.length > 0 &&
(() => {
const totalVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
return ` · ${formatBytes(totalVram)} VRAM`;
})()}
</span>
)}
</div>
@ -898,6 +911,10 @@ export default function Dashboard() {
.map(model => {
const running = isRunning(model.name);
const expanded = expandedModel === model.name;
const estRam = estimateRam(model.size, model.details?.quantization_level);
const fitStatus = system
? checkMemoryFit(estRam, system.memory.free, system.memory.cached)
: null;
return (
<div
key={model.name}
@ -941,7 +958,7 @@ export default function Dashboard() {
)}
</div>
<div
className="flex items-center gap-3 text-xs mt-0.5"
className="flex items-center gap-3 text-xs mt-0.5 flex-wrap"
style={{ color: 'var(--text-tertiary)' }}
>
<span>{formatBytes(model.size)}</span>
@ -951,6 +968,9 @@ export default function Dashboard() {
{model.details?.quantization_level && (
<span>{model.details.quantization_level}</span>
)}
<span title="Estimated RAM when loaded (Apple Silicon unified memory)">
~{formatBytes(estRam)} RAM
</span>
</div>
</div>
</div>
@ -985,22 +1005,44 @@ export default function Dashboard() {
</button>
</>
) : (
<button
onClick={() => handleModelAction('load', model.name)}
disabled={actionLoading === `load-${model.name}`}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
style={{
background: 'rgba(52, 211, 153, 0.1)',
color: 'var(--success)',
}}
>
{actionLoading === `load-${model.name}` ? (
<RefreshCw className="w-3 h-3 animate-spin" />
) : (
<Play className="w-3 h-3" />
<div className="flex items-center gap-2">
{fitStatus && !running && (
<span
className="w-2 h-2 rounded-full shrink-0"
title={
fitStatus === 'fits'
? 'Fits comfortably in available memory'
: fitStatus === 'tight'
? 'Tight — may cause swap pressure'
: "Won't fit — will swap heavily"
}
style={{
background:
fitStatus === 'fits'
? 'var(--success)'
: fitStatus === 'tight'
? 'var(--warning)'
: 'var(--danger)',
}}
/>
)}
Load
</button>
<button
onClick={() => handleModelAction('load', model.name)}
disabled={actionLoading === `load-${model.name}`}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
style={{
background: 'rgba(52, 211, 153, 0.1)',
color: 'var(--success)',
}}
>
{actionLoading === `load-${model.name}` ? (
<RefreshCw className="w-3 h-3 animate-spin" />
) : (
<Play className="w-3 h-3" />
)}
Load
</button>
</div>
)}
<button
onClick={() => {
@ -1121,7 +1163,7 @@ export default function Dashboard() {
{deleteConfirm === model.name ? (
<div className="flex items-center gap-2">
<span className="text-xs" style={{ color: 'var(--danger)' }}>
Delete this model?
Delete {model.name}? Reclaim {formatBytes(model.size)}
</span>
<button
onClick={() => handleModelAction('delete', model.name)}
@ -1523,9 +1565,13 @@ export default function Dashboard() {
style={{ background: 'rgba(0,0,0,0.6)' }}
onClick={e => {
if (e.target === e.currentTarget && !promptLoading) {
abortRef.current?.abort();
compareAbortRef.current?.abort();
setPromptModel(null);
setPromptResponse('');
setPromptText('');
setCompareModel(null);
setCompareResponse('');
}
}}
>
@ -1575,9 +1621,13 @@ export default function Dashboard() {
<button
onClick={() => {
if (!promptLoading) {
abortRef.current?.abort();
compareAbortRef.current?.abort();
setPromptModel(null);
setPromptResponse('');
setPromptText('');
setCompareModel(null);
setCompareResponse('');
}
}}
className="p-2 rounded-lg transition-colors hover:bg-[var(--surface-card)]"
@ -1800,28 +1850,34 @@ export default function Dashboard() {
</div>
)}
</div>
{/* Compare with another model (F5) */}
{!promptLoading && !compareModel && ollama && ollama.models.length > 1 && (
<div className="flex items-center gap-2">
{/* Compare with another model (F5) — BN1: only show loaded models */}
{!promptLoading && !compareModel && ollama && ollama.running.length > 0 && (
<div className="flex items-center gap-2 flex-wrap">
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
Compare with:
</span>
{ollama.models
.filter(m => m.name !== promptModel)
.slice(0, 3)
.map(m => (
<button
key={m.name}
onClick={() => handleCompare(promptText, m.name)}
className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
style={{
background: 'var(--surface-muted)',
color: 'var(--accent-secondary)',
}}
>
{m.name}
</button>
))}
{ollama.running.filter(r => r.name !== promptModel).slice(0, 3).length === 0 ? (
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
Load another model to compare
</span>
) : (
ollama.running
.filter(r => r.name !== promptModel)
.slice(0, 3)
.map(r => (
<button
key={r.name}
onClick={() => handleCompare(promptText, r.name)}
className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
style={{
background: 'var(--surface-muted)',
color: 'var(--accent-secondary)',
}}
>
{r.name}
</button>
))
)}
</div>
)}
</div>