diff --git a/__LOCAL_LLMs/dashboard/src/app/lib/format.ts b/__LOCAL_LLMs/dashboard/src/app/lib/format.ts index 7ba29664..b86ca543 100644 --- a/__LOCAL_LLMs/dashboard/src/app/lib/format.ts +++ b/__LOCAL_LLMs/dashboard/src/app/lib/format.ts @@ -6,6 +6,32 @@ export function formatBytes(bytes: number): string { return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`; } +// N1: Estimate RAM usage from disk size + quantization level +// Apple Silicon uses unified memory — GPU and CPU share the same pool +export function estimateRam(diskSize: number, quant?: string): number { + const q = (quant || '').toUpperCase(); + let multiplier = 1.2; // default + if (q.includes('Q4')) multiplier = 1.2; + else if (q.includes('Q5')) multiplier = 1.25; + else if (q.includes('Q8')) multiplier = 1.1; + else if (q.includes('F16') || q.includes('F32')) multiplier = 1.05; + return Math.round(diskSize * multiplier); +} + +// N2: Check if model fits in available memory +export type FitStatus = 'fits' | 'tight' | 'no'; +export function checkMemoryFit( + estimatedRam: number, + freeMemory: number, + cachedMemory: number +): FitStatus { + const available = freeMemory + cachedMemory * 0.5; + const ratio = estimatedRam / available; + if (ratio < 0.7) return 'fits'; + if (ratio <= 1.0) return 'tight'; + return 'no'; +} + export function formatUptime(seconds: number): string { const d = Math.floor(seconds / 86400); const h = Math.floor((seconds % 86400) / 3600); diff --git a/__LOCAL_LLMs/dashboard/src/app/page.tsx b/__LOCAL_LLMs/dashboard/src/app/page.tsx index 1c2aa328..8fdb5847 100644 --- a/__LOCAL_LLMs/dashboard/src/app/page.tsx +++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx @@ -44,7 +44,7 @@ import type { PullProgress, StreamMetrics, } from './lib/types'; -import { formatBytes, formatUptime } from './lib/format'; +import { formatBytes, formatUptime, estimateRam, checkMemoryFit } from './lib/format'; import { StatusDot } from './components/StatusDot'; import { ProgressBar } from './components/ProgressBar'; import { Sparkline } from './components/Sparkline'; @@ -96,6 +96,7 @@ export default function Dashboard() { const [compareResponse, setCompareResponse] = useState(''); const responseRef = useRef(null); const abortRef = useRef(null); + const compareAbortRef = useRef(null); const fetchingRef = useRef(false); const addToast = useCallback((message: string, type: Toast['type'] = 'info') => { @@ -232,14 +233,18 @@ export default function Dashboard() { }; // Model comparison (F5) — send same prompt to second model + // BN2: Added AbortController so compare stream cancels on modal close const handleCompare = async (prompt: string, model2: string) => { setCompareModel(model2); setCompareResponse(''); + const controller = new AbortController(); + compareAbortRef.current = controller; try { const res = await fetch('/api/ollama/stream', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: model2, prompt }), + signal: controller.signal, }); if (!res.ok || !res.body) { setCompareResponse('Error'); @@ -270,8 +275,11 @@ export default function Dashboard() { } if (!full) setCompareResponse('(empty response)'); } catch (err) { - setCompareResponse(`Error: ${err}`); + if (!controller.signal.aborted) { + setCompareResponse(`Error: ${err}`); + } } + compareAbortRef.current = null; }; // Auto-load model helpers (F16) @@ -784,6 +792,11 @@ export default function Dashboard() { style={{ background: 'var(--surface-muted)', color: 'var(--success)' }} > {ollama.runningCount} active + {ollama.running.length > 0 && + (() => { + const totalVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0); + return ` · ${formatBytes(totalVram)} VRAM`; + })()} )} @@ -898,6 +911,10 @@ export default function Dashboard() { .map(model => { const running = isRunning(model.name); const expanded = expandedModel === model.name; + const estRam = estimateRam(model.size, model.details?.quantization_level); + const fitStatus = system + ? checkMemoryFit(estRam, system.memory.free, system.memory.cached) + : null; return (
{formatBytes(model.size)} @@ -951,6 +968,9 @@ export default function Dashboard() { {model.details?.quantization_level && ( {model.details.quantization_level} )} + + ~{formatBytes(estRam)} RAM +
@@ -985,22 +1005,44 @@ export default function Dashboard() { ) : ( - + + )} - ))} + {ollama.running.filter(r => r.name !== promptModel).slice(0, 3).length === 0 ? ( + + Load another model to compare + + ) : ( + ollama.running + .filter(r => r.name !== promptModel) + .slice(0, 3) + .map(r => ( + + )) + )} )}