feat(local-llm): Phase 1 — pre-load intelligence + bug fixes (N1-N3, BN1-BN2, BN5)
N1: Estimated RAM per model with quant-aware multipliers (Q4=1.2x, Q5=1.25x, Q8=1.1x, F16=1.05x) N2: Will-it-fit indicator (green/yellow/red dot) next to Load button N3: Aggregate loaded model VRAM in panel header badge BN1: Compare buttons now filter to running models only BN2: AbortController on compare stream, cancel on modal close BN5: Delete confirmation shows model name + disk reclaim size
This commit is contained in:
parent
ae231d5aac
commit
040013e495
@ -6,6 +6,32 @@ export function formatBytes(bytes: number): string {
|
|||||||
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
|
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(1))} ${sizes[i]}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// N1: Estimate RAM usage from disk size + quantization level
|
||||||
|
// Apple Silicon uses unified memory — GPU and CPU share the same pool
|
||||||
|
export function estimateRam(diskSize: number, quant?: string): number {
|
||||||
|
const q = (quant || '').toUpperCase();
|
||||||
|
let multiplier = 1.2; // default
|
||||||
|
if (q.includes('Q4')) multiplier = 1.2;
|
||||||
|
else if (q.includes('Q5')) multiplier = 1.25;
|
||||||
|
else if (q.includes('Q8')) multiplier = 1.1;
|
||||||
|
else if (q.includes('F16') || q.includes('F32')) multiplier = 1.05;
|
||||||
|
return Math.round(diskSize * multiplier);
|
||||||
|
}
|
||||||
|
|
||||||
|
// N2: Check if model fits in available memory
|
||||||
|
export type FitStatus = 'fits' | 'tight' | 'no';
|
||||||
|
export function checkMemoryFit(
|
||||||
|
estimatedRam: number,
|
||||||
|
freeMemory: number,
|
||||||
|
cachedMemory: number
|
||||||
|
): FitStatus {
|
||||||
|
const available = freeMemory + cachedMemory * 0.5;
|
||||||
|
const ratio = estimatedRam / available;
|
||||||
|
if (ratio < 0.7) return 'fits';
|
||||||
|
if (ratio <= 1.0) return 'tight';
|
||||||
|
return 'no';
|
||||||
|
}
|
||||||
|
|
||||||
export function formatUptime(seconds: number): string {
|
export function formatUptime(seconds: number): string {
|
||||||
const d = Math.floor(seconds / 86400);
|
const d = Math.floor(seconds / 86400);
|
||||||
const h = Math.floor((seconds % 86400) / 3600);
|
const h = Math.floor((seconds % 86400) / 3600);
|
||||||
|
|||||||
@ -44,7 +44,7 @@ import type {
|
|||||||
PullProgress,
|
PullProgress,
|
||||||
StreamMetrics,
|
StreamMetrics,
|
||||||
} from './lib/types';
|
} from './lib/types';
|
||||||
import { formatBytes, formatUptime } from './lib/format';
|
import { formatBytes, formatUptime, estimateRam, checkMemoryFit } from './lib/format';
|
||||||
import { StatusDot } from './components/StatusDot';
|
import { StatusDot } from './components/StatusDot';
|
||||||
import { ProgressBar } from './components/ProgressBar';
|
import { ProgressBar } from './components/ProgressBar';
|
||||||
import { Sparkline } from './components/Sparkline';
|
import { Sparkline } from './components/Sparkline';
|
||||||
@ -96,6 +96,7 @@ export default function Dashboard() {
|
|||||||
const [compareResponse, setCompareResponse] = useState('');
|
const [compareResponse, setCompareResponse] = useState('');
|
||||||
const responseRef = useRef<HTMLDivElement>(null);
|
const responseRef = useRef<HTMLDivElement>(null);
|
||||||
const abortRef = useRef<AbortController | null>(null);
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
|
const compareAbortRef = useRef<AbortController | null>(null);
|
||||||
const fetchingRef = useRef(false);
|
const fetchingRef = useRef(false);
|
||||||
|
|
||||||
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
|
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
|
||||||
@ -232,14 +233,18 @@ export default function Dashboard() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Model comparison (F5) — send same prompt to second model
|
// Model comparison (F5) — send same prompt to second model
|
||||||
|
// BN2: Added AbortController so compare stream cancels on modal close
|
||||||
const handleCompare = async (prompt: string, model2: string) => {
|
const handleCompare = async (prompt: string, model2: string) => {
|
||||||
setCompareModel(model2);
|
setCompareModel(model2);
|
||||||
setCompareResponse('');
|
setCompareResponse('');
|
||||||
|
const controller = new AbortController();
|
||||||
|
compareAbortRef.current = controller;
|
||||||
try {
|
try {
|
||||||
const res = await fetch('/api/ollama/stream', {
|
const res = await fetch('/api/ollama/stream', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ model: model2, prompt }),
|
body: JSON.stringify({ model: model2, prompt }),
|
||||||
|
signal: controller.signal,
|
||||||
});
|
});
|
||||||
if (!res.ok || !res.body) {
|
if (!res.ok || !res.body) {
|
||||||
setCompareResponse('Error');
|
setCompareResponse('Error');
|
||||||
@ -270,8 +275,11 @@ export default function Dashboard() {
|
|||||||
}
|
}
|
||||||
if (!full) setCompareResponse('(empty response)');
|
if (!full) setCompareResponse('(empty response)');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
setCompareResponse(`Error: ${err}`);
|
if (!controller.signal.aborted) {
|
||||||
|
setCompareResponse(`Error: ${err}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
compareAbortRef.current = null;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Auto-load model helpers (F16)
|
// Auto-load model helpers (F16)
|
||||||
@ -784,6 +792,11 @@ export default function Dashboard() {
|
|||||||
style={{ background: 'var(--surface-muted)', color: 'var(--success)' }}
|
style={{ background: 'var(--surface-muted)', color: 'var(--success)' }}
|
||||||
>
|
>
|
||||||
{ollama.runningCount} active
|
{ollama.runningCount} active
|
||||||
|
{ollama.running.length > 0 &&
|
||||||
|
(() => {
|
||||||
|
const totalVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
|
||||||
|
return ` · ${formatBytes(totalVram)} VRAM`;
|
||||||
|
})()}
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
@ -898,6 +911,10 @@ export default function Dashboard() {
|
|||||||
.map(model => {
|
.map(model => {
|
||||||
const running = isRunning(model.name);
|
const running = isRunning(model.name);
|
||||||
const expanded = expandedModel === model.name;
|
const expanded = expandedModel === model.name;
|
||||||
|
const estRam = estimateRam(model.size, model.details?.quantization_level);
|
||||||
|
const fitStatus = system
|
||||||
|
? checkMemoryFit(estRam, system.memory.free, system.memory.cached)
|
||||||
|
: null;
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
key={model.name}
|
key={model.name}
|
||||||
@ -941,7 +958,7 @@ export default function Dashboard() {
|
|||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div
|
<div
|
||||||
className="flex items-center gap-3 text-xs mt-0.5"
|
className="flex items-center gap-3 text-xs mt-0.5 flex-wrap"
|
||||||
style={{ color: 'var(--text-tertiary)' }}
|
style={{ color: 'var(--text-tertiary)' }}
|
||||||
>
|
>
|
||||||
<span>{formatBytes(model.size)}</span>
|
<span>{formatBytes(model.size)}</span>
|
||||||
@ -951,6 +968,9 @@ export default function Dashboard() {
|
|||||||
{model.details?.quantization_level && (
|
{model.details?.quantization_level && (
|
||||||
<span>{model.details.quantization_level}</span>
|
<span>{model.details.quantization_level}</span>
|
||||||
)}
|
)}
|
||||||
|
<span title="Estimated RAM when loaded (Apple Silicon unified memory)">
|
||||||
|
~{formatBytes(estRam)} RAM
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -985,22 +1005,44 @@ export default function Dashboard() {
|
|||||||
</button>
|
</button>
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
<button
|
<div className="flex items-center gap-2">
|
||||||
onClick={() => handleModelAction('load', model.name)}
|
{fitStatus && !running && (
|
||||||
disabled={actionLoading === `load-${model.name}`}
|
<span
|
||||||
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
|
className="w-2 h-2 rounded-full shrink-0"
|
||||||
style={{
|
title={
|
||||||
background: 'rgba(52, 211, 153, 0.1)',
|
fitStatus === 'fits'
|
||||||
color: 'var(--success)',
|
? 'Fits comfortably in available memory'
|
||||||
}}
|
: fitStatus === 'tight'
|
||||||
>
|
? 'Tight — may cause swap pressure'
|
||||||
{actionLoading === `load-${model.name}` ? (
|
: "Won't fit — will swap heavily"
|
||||||
<RefreshCw className="w-3 h-3 animate-spin" />
|
}
|
||||||
) : (
|
style={{
|
||||||
<Play className="w-3 h-3" />
|
background:
|
||||||
|
fitStatus === 'fits'
|
||||||
|
? 'var(--success)'
|
||||||
|
: fitStatus === 'tight'
|
||||||
|
? 'var(--warning)'
|
||||||
|
: 'var(--danger)',
|
||||||
|
}}
|
||||||
|
/>
|
||||||
)}
|
)}
|
||||||
Load
|
<button
|
||||||
</button>
|
onClick={() => handleModelAction('load', model.name)}
|
||||||
|
disabled={actionLoading === `load-${model.name}`}
|
||||||
|
className="flex items-center gap-1.5 px-3 py-1.5 rounded-md text-xs font-medium transition-colors"
|
||||||
|
style={{
|
||||||
|
background: 'rgba(52, 211, 153, 0.1)',
|
||||||
|
color: 'var(--success)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{actionLoading === `load-${model.name}` ? (
|
||||||
|
<RefreshCw className="w-3 h-3 animate-spin" />
|
||||||
|
) : (
|
||||||
|
<Play className="w-3 h-3" />
|
||||||
|
)}
|
||||||
|
Load
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
<button
|
<button
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
@ -1121,7 +1163,7 @@ export default function Dashboard() {
|
|||||||
{deleteConfirm === model.name ? (
|
{deleteConfirm === model.name ? (
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2">
|
||||||
<span className="text-xs" style={{ color: 'var(--danger)' }}>
|
<span className="text-xs" style={{ color: 'var(--danger)' }}>
|
||||||
Delete this model?
|
Delete {model.name}? Reclaim {formatBytes(model.size)}
|
||||||
</span>
|
</span>
|
||||||
<button
|
<button
|
||||||
onClick={() => handleModelAction('delete', model.name)}
|
onClick={() => handleModelAction('delete', model.name)}
|
||||||
@ -1523,9 +1565,13 @@ export default function Dashboard() {
|
|||||||
style={{ background: 'rgba(0,0,0,0.6)' }}
|
style={{ background: 'rgba(0,0,0,0.6)' }}
|
||||||
onClick={e => {
|
onClick={e => {
|
||||||
if (e.target === e.currentTarget && !promptLoading) {
|
if (e.target === e.currentTarget && !promptLoading) {
|
||||||
|
abortRef.current?.abort();
|
||||||
|
compareAbortRef.current?.abort();
|
||||||
setPromptModel(null);
|
setPromptModel(null);
|
||||||
setPromptResponse('');
|
setPromptResponse('');
|
||||||
setPromptText('');
|
setPromptText('');
|
||||||
|
setCompareModel(null);
|
||||||
|
setCompareResponse('');
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
@ -1575,9 +1621,13 @@ export default function Dashboard() {
|
|||||||
<button
|
<button
|
||||||
onClick={() => {
|
onClick={() => {
|
||||||
if (!promptLoading) {
|
if (!promptLoading) {
|
||||||
|
abortRef.current?.abort();
|
||||||
|
compareAbortRef.current?.abort();
|
||||||
setPromptModel(null);
|
setPromptModel(null);
|
||||||
setPromptResponse('');
|
setPromptResponse('');
|
||||||
setPromptText('');
|
setPromptText('');
|
||||||
|
setCompareModel(null);
|
||||||
|
setCompareResponse('');
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
className="p-2 rounded-lg transition-colors hover:bg-[var(--surface-card)]"
|
className="p-2 rounded-lg transition-colors hover:bg-[var(--surface-card)]"
|
||||||
@ -1800,28 +1850,34 @@ export default function Dashboard() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
{/* Compare with another model (F5) */}
|
{/* Compare with another model (F5) — BN1: only show loaded models */}
|
||||||
{!promptLoading && !compareModel && ollama && ollama.models.length > 1 && (
|
{!promptLoading && !compareModel && ollama && ollama.running.length > 0 && (
|
||||||
<div className="flex items-center gap-2">
|
<div className="flex items-center gap-2 flex-wrap">
|
||||||
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
|
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
Compare with:
|
Compare with:
|
||||||
</span>
|
</span>
|
||||||
{ollama.models
|
{ollama.running.filter(r => r.name !== promptModel).slice(0, 3).length === 0 ? (
|
||||||
.filter(m => m.name !== promptModel)
|
<span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
.slice(0, 3)
|
Load another model to compare
|
||||||
.map(m => (
|
</span>
|
||||||
<button
|
) : (
|
||||||
key={m.name}
|
ollama.running
|
||||||
onClick={() => handleCompare(promptText, m.name)}
|
.filter(r => r.name !== promptModel)
|
||||||
className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
|
.slice(0, 3)
|
||||||
style={{
|
.map(r => (
|
||||||
background: 'var(--surface-muted)',
|
<button
|
||||||
color: 'var(--accent-secondary)',
|
key={r.name}
|
||||||
}}
|
onClick={() => handleCompare(promptText, r.name)}
|
||||||
>
|
className="text-[11px] px-2 py-1 rounded font-mono transition-colors"
|
||||||
{m.name}
|
style={{
|
||||||
</button>
|
background: 'var(--surface-muted)',
|
||||||
))}
|
color: 'var(--accent-secondary)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{r.name}
|
||||||
|
</button>
|
||||||
|
))
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user