feat(local-llm): Phase 1 — pre-load intelligence + bug fixes (N1-N3, BN1-BN2, BN5)
N1: Estimated RAM per model with quant-aware multipliers (Q4=1.2x, Q5=1.25x, Q8=1.1x, F16=1.05x) N2: Will-it-fit indicator (green/yellow/red dot) next to Load button N3: Aggregate loaded model VRAM in panel header badge BN1: Compare buttons now filter to running models only BN2: AbortController on compare stream, cancel on modal close BN5: Delete confirmation shows model name + disk reclaim size
This commit is contained in:
parent
ae231d5aac
commit
040013e495
@ -6,6 +6,32 @@ export function formatBytes(bytes: number): string {
|
||||
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
|
||||
}
|
||||
|
||||
// N1: Estimate RAM usage from disk size + quantization level
|
||||
// Apple Silicon uses unified memory — GPU and CPU share the same pool
|
||||
export function estimateRam(diskSize: number, quant?: string): number {
|
||||
const q = (quant || '').toUpperCase();
|
||||
let multiplier = 1.2; // default
|
||||
if (q.includes('Q4')) multiplier = 1.2;
|
||||
else if (q.includes('Q5')) multiplier = 1.25;
|
||||
else if (q.includes('Q8')) multiplier = 1.1;
|
||||
else if (q.includes('F16') || q.includes('F32')) multiplier = 1.05;
|
||||
return Math.round(diskSize * multiplier);
|
||||
}
|
||||
|
||||
// N2: Check if model fits in available memory
|
||||
export type FitStatus = 'fits' | 'tight' | 'no';
|
||||
export function checkMemoryFit(
|
||||
estimatedRam: number,
|
||||
freeMemory: number,
|
||||
cachedMemory: number
|
||||
): FitStatus {
|
||||
const available = freeMemory + cachedMemory * 0.5;
|
||||
const ratio = estimatedRam / available;
|
||||
if (ratio < 0.7) return 'fits';
|
||||
if (ratio <= 1.0) return 'tight';
|
||||
return 'no';
|
||||
}
|
||||
|
||||
export function formatUptime(seconds: number): string {
|
||||
const d = Math.floor(seconds / 86400);
|
||||
const h = Math.floor((seconds % 86400) / 3600);
|
||||
|
||||
@ -44,7 +44,7 @@ import type {
|
||||
PullProgress,
|
||||
StreamMetrics,
|
||||
} from './lib/types';
|
||||
import { formatBytes, formatUptime } from './lib/format';
|
||||
import { formatBytes, formatUptime, estimateRam, checkMemoryFit } from './lib/format';
|
||||
import { StatusDot } from './components/StatusDot';
|
||||
import { ProgressBar } from './components/ProgressBar';
|
||||
import { Sparkline } from './components/Sparkline';
|
||||
@ -96,6 +96,7 @@ export default function Dashboard() {
|
||||
const [compareResponse, setCompareResponse] = useState('');
|
||||
const responseRef = useRef<HTMLDivElement>(null);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const compareAbortRef = useRef<AbortController | null>(null);
|
||||
const fetchingRef = useRef(false);
|
||||
|
||||
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
|
||||
@ -232,14 +233,18 @@ export default function Dashboard() {
|
||||
};
|
||||
|
||||
// Model comparison (F5) — send same prompt to second model
|
||||
// BN2: Added AbortController so compare stream cancels on modal close
|
||||
const handleCompare = async (prompt: string, model2: string) => {
|
||||
setCompareModel(model2);
|
||||
setCompareResponse('');
|
||||
const controller = new AbortController();
|
||||
compareAbortRef.current = controller;
|
||||
try {
|
||||
const res = await fetch('/api/ollama/stream', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ model: model2, prompt }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!res.ok || !res.body) {
|
||||
setCompareResponse('Error');
|
||||
@ -270,8 +275,11 @@ export default function Dashboard() {
|
||||
}
|
||||
if (!full) setCompareResponse('(empty response)');
|
||||
} catch (err) {
|
||||
setCompareResponse(`Error: ${err}`);
|
||||
if (!controller.signal.aborted) {
|
||||
setCompareResponse(`Error: ${err}`);
|
||||
}
|
||||
}
|
||||
compareAbortRef.current = null;
|
||||
};
|
||||
|
||||
// Auto-load model helpers (F16)
|
||||
@ -784,6 +792,11 @@ export default function Dashboard() {
|
||||
style={{ background: 'var(--surface-muted)', color: 'var(--success)' }}
|
||||
>
|
||||
{ollama.runningCount} active
|
||||
{ollama.running.length > 0 &&
|
||||
(() => {
|
||||
const totalVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
|
||||
return ` · ${formatBytes(totalVram)} VRAM`;
|
||||
})()}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
@ -898,6 +911,10 @@ export default function Dashboard() {
|
||||
.map(model => {
|
||||
const running = isRunning(model.name);
|
||||
const expanded = expandedModel === model.name;
|
||||
const estRam = estimateRam(model.size, model.details?.quantization_level);
|
||||
const fitStatus = system
|
||||
? checkMemoryFit(estRam, system.memory.free, system.memory.cached)
|
||||
: null;
|
||||
return (
|
||||
<div
|
||||
key={model.name}
|
||||
@ -941,7 +958,7 @@ export default function Dashboard() {
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className="flex items-center gap-3 text-xs mt-0.5"
|
||||
className="flex items-center gap-3 text-xs mt-0.5 flex-wrap"
|
||||
style={{ color: 'var(--text-tertiary)' }}
|
||||
>
|
||||
<span>{formatBytes(model.size)}</span>
|
||||
@ -951,6 +968,9 @@ export default function Dashboard() {
|
||||
{model.details?.quantization_level && (
|
||||
<span>{model.details.quantization_level}</span>
|
||||
)}
|
||||
<span title="Estimated RAM when loaded (Apple Silicon unified memory)">
|
||||
~{formatBytes(estRam)} RAM
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -985,22 +1005,44 @@ export default function Dashboard() {
|
||||
</button>
|
||||
</>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => handleModelAction('load', model.name)}
|
||||
disabled={actionLoading === `load-${model.name}`}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
|
||||
style={{
|
||||
background: 'rgba(52, 211, 153, 0.1)',
|
||||
color: 'var(--success)',
|
||||
}}
|
||||
>
|
||||
{actionLoading === `load-${model.name}` ? (
|
||||
<RefreshCw className="w-3 h-3 animate-spin" />
|
||||
) : (
|
||||
<Play className="w-3 h-3" />
|
||||
<div className="flex items-center gap-2">
|
||||
{fitStatus && !running && (
|
||||
<span
|
||||
className="w-2 h-2 rounded-full shrink-0"
|
||||
title={
|
||||
fitStatus === 'fits'
|
||||
? 'Fits comfortably in available memory'
|
||||
: fitStatus === 'tight'
|
||||
? 'Tight — may cause swap pressure'
|
||||
: "Won't fit — will swap heavily"
|
||||
}
|
||||
style={{
|
||||
background:
|
||||
fitStatus === 'fits'
|
||||
? 'var(--success)'
|
||||
: fitStatus === 'tight'
|
||||
? 'var(--warning)'
|
||||
: 'var(--danger)',
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
Load
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleModelAction('load', model.name)}
|
||||
disabled={actionLoading === `load-${model.name}`}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
|
||||
style={{
|
||||
background: 'rgba(52, 211, 153, 0.1)',
|
||||
color: 'var(--success)',
|
||||
}}
|
||||
>
|
||||
{actionLoading === `load-${model.name}` ? (
|
||||
<RefreshCw className="w-3 h-3 animate-spin" />
|
||||
) : (
|
||||
<Play className="w-3 h-3" />
|
||||
)}
|
||||
Load
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
<button
|
||||
onClick={() => {
|
||||
@ -1121,7 +1163,7 @@ export default function Dashboard() {
|
||||
{deleteConfirm === model.name ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs" style={{ color: 'var(--danger)' }}>
|
||||
Delete this model?
|
||||
Delete {model.name}? Reclaim {formatBytes(model.size)}
|
||||
</span>
|
||||
<button
|
||||
onClick={() => handleModelAction('delete', model.name)}
|
||||
@ -1523,9 +1565,13 @@ export default function Dashboard() {
|
||||
style={{ background: 'rgba(0,0,0,0.6)' }}
|
||||
onClick={e => {
|
||||
if (e.target === e.currentTarget && !promptLoading) {
|
||||
abortRef.current?.abort();
|
||||
compareAbortRef.current?.abort();
|
||||
setPromptModel(null);
|
||||
setPromptResponse('');
|
||||
setPromptText('');
|
||||
setCompareModel(null);
|
||||
setCompareResponse('');
|
||||
}
|
||||
}}
|
||||
>
|
||||
@ -1575,9 +1621,13 @@ export default function Dashboard() {
|
||||
<button
|
||||
onClick={() => {
|
||||
if (!promptLoading) {
|
||||
abortRef.current?.abort();
|
||||
compareAbortRef.current?.abort();
|
||||
setPromptModel(null);
|
||||
setPromptResponse('');
|
||||
setPromptText('');
|
||||
setCompareModel(null);
|
||||
setCompareResponse('');
|
||||
}
|
||||
}}
|
||||
className="p-2 rounded-lg transition-colors hover:bg-[var(--surface-card)]"
|
||||
@ -1800,28 +1850,34 @@ export default function Dashboard() {
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{/* Compare with another model (F5) */}
|
||||
{!promptLoading && !compareModel && ollama && ollama.models.length > 1 && (
|
||||
<div className="flex items-center gap-2">
|
||||
{/* Compare with another model (F5) — BN1: only show loaded models */}
|
||||
{!promptLoading && !compareModel && ollama && ollama.running.length > 0 && (
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
|
||||
Compare with:
|
||||
</span>
|
||||
{ollama.models
|
||||
.filter(m => m.name !== promptModel)
|
||||
.slice(0, 3)
|
||||
.map(m => (
|
||||
<button
|
||||
key={m.name}
|
||||
onClick={() => handleCompare(promptText, m.name)}
|
||||
className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
|
||||
style={{
|
||||
background: 'var(--surface-muted)',
|
||||
color: 'var(--accent-secondary)',
|
||||
}}
|
||||
>
|
||||
{m.name}
|
||||
</button>
|
||||
))}
|
||||
{ollama.running.filter(r => r.name !== promptModel).slice(0, 3).length === 0 ? (
|
||||
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
|
||||
Load another model to compare
|
||||
</span>
|
||||
) : (
|
||||
ollama.running
|
||||
.filter(r => r.name !== promptModel)
|
||||
.slice(0, 3)
|
||||
.map(r => (
|
||||
<button
|
||||
key={r.name}
|
||||
onClick={() => handleCompare(promptText, r.name)}
|
||||
className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
|
||||
style={{
|
||||
background: 'var(--surface-muted)',
|
||||
color: 'var(--accent-secondary)',
|
||||
}}
|
||||
>
|
||||
{r.name}
|
||||
</button>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user