feat(local-llm): Sprint 2 — streaming pull progress, token metrics, fixes (B2/F1,F6,B7,B8)
New features: - B2/F1: Streaming model pull with real-time progress bar. New /api/ollama/pull/route.ts pipes NDJSON from Ollama stream:true. UI shows status, completed/total bytes, and percentage during download. - F6: Token/s metrics after prompt generation. Parses eval_count and eval_duration from the final NDJSON chunk. Displays tok/s, total tokens, and duration in the prompt modal footer. Bug fixes: - B7: Parse vm_stat page size from output instead of hardcoding 16384. Reads 'page size of N bytes' from the first line for portability. - B8: Whisper model discovery now scans multiple directories: WHISPER_MODELS_DIR env var, ~/whisper-models, /opt/homebrew/share/ whisper-cpp/models/, ~/.cache/whisper/. Returns the first dir with .bin files found.
This commit is contained in:
parent
9a807f64cf
commit
2d9475bd15
44
__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts
Normal file
44
__LOCAL_LLMs/dashboard/src/app/api/ollama/pull/route.ts
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import { NextRequest } from 'next/server';
|
||||||
|
|
||||||
|
const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434';
|
||||||
|
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
const body = await request.json();
|
||||||
|
const { model } = body;
|
||||||
|
|
||||||
|
if (!model || typeof model !== 'string') {
|
||||||
|
return new Response(JSON.stringify({ error: 'Missing model name' }), {
|
||||||
|
status: 400,
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`${OLLAMA_URL}/api/pull`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ name: model, stream: true }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok || !response.body) {
|
||||||
|
return new Response(JSON.stringify({ error: `Ollama pull error: ${response.status}` }), {
|
||||||
|
status: 500,
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pipe the Ollama pull stream directly to the client
|
||||||
|
return new Response(response.body, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/x-ndjson',
|
||||||
|
'Transfer-Encoding': 'chunked',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
return new Response(JSON.stringify({ error: String(err) }), {
|
||||||
|
status: 500,
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -100,7 +100,8 @@ async function getAccurateMemory(): Promise<{
|
|||||||
const totalMem = os.totalmem();
|
const totalMem = os.totalmem();
|
||||||
try {
|
try {
|
||||||
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
const { stdout } = await execAsync('vm_stat', { timeout: 2000 });
|
||||||
const pageSize = 16384; // macOS Apple Silicon default
|
const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
|
||||||
|
const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1]) : 16384;
|
||||||
const parse = (label: string): number => {
|
const parse = (label: string): number => {
|
||||||
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
const match = stdout.match(new RegExp(`${label}:\\s+(\\d+)`));
|
||||||
return match ? parseInt(match[1]) * pageSize : 0;
|
return match ? parseInt(match[1]) * pageSize : 0;
|
||||||
|
|||||||
@ -20,23 +20,37 @@ async function getWhisperBinaries(): Promise<string[]> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getWhisperModels(): Promise<Array<{ name: string; size: number; path: string }>> {
|
const WHISPER_MODEL_DIRS = (process.env.WHISPER_MODELS_DIR || '')
|
||||||
const modelsDir = join(homedir(), 'whisper-models');
|
.split(':')
|
||||||
try {
|
.filter(Boolean)
|
||||||
const files = await readdir(modelsDir);
|
.concat([
|
||||||
const models = await Promise.all(
|
join(homedir(), 'whisper-models'),
|
||||||
files
|
'/opt/homebrew/share/whisper-cpp/models',
|
||||||
.filter(f => f.endsWith('.bin'))
|
join(homedir(), '.cache', 'whisper'),
|
||||||
.map(async f => {
|
]);
|
||||||
const filePath = join(modelsDir, f);
|
|
||||||
|
async function getWhisperModels(): Promise<{
|
||||||
|
models: Array<{ name: string; size: number; path: string }>;
|
||||||
|
modelsDir: string;
|
||||||
|
}> {
|
||||||
|
for (const dir of WHISPER_MODEL_DIRS) {
|
||||||
|
try {
|
||||||
|
const files = await readdir(dir);
|
||||||
|
const binFiles = files.filter(f => f.endsWith('.bin'));
|
||||||
|
if (binFiles.length === 0) continue;
|
||||||
|
const models = await Promise.all(
|
||||||
|
binFiles.map(async f => {
|
||||||
|
const filePath = join(dir, f);
|
||||||
const s = await stat(filePath);
|
const s = await stat(filePath);
|
||||||
return { name: f.replace('ggml-', '').replace('.bin', ''), size: s.size, path: filePath };
|
return { name: f.replace('ggml-', '').replace('.bin', ''), size: s.size, path: filePath };
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
return models;
|
return { models, modelsDir: dir };
|
||||||
} catch {
|
} catch {
|
||||||
return [];
|
// dir doesn't exist, try next
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return { models: [], modelsDir: WHISPER_MODEL_DIRS[0] };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getWhisperVersion(): Promise<string> {
|
async function getWhisperVersion(): Promise<string> {
|
||||||
@ -49,7 +63,7 @@ async function getWhisperVersion(): Promise<string> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
const [binaries, models, version] = await Promise.all([
|
const [binaries, whisperResult, version] = await Promise.all([
|
||||||
getWhisperBinaries(),
|
getWhisperBinaries(),
|
||||||
getWhisperModels(),
|
getWhisperModels(),
|
||||||
getWhisperVersion(),
|
getWhisperVersion(),
|
||||||
@ -59,7 +73,7 @@ export async function GET() {
|
|||||||
installed: binaries.length > 0,
|
installed: binaries.length > 0,
|
||||||
version,
|
version,
|
||||||
binaries,
|
binaries,
|
||||||
models,
|
models: whisperResult.models,
|
||||||
modelsDir: join(homedir(), 'whisper-models'),
|
modelsDir: whisperResult.modelsDir,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@ -150,7 +150,17 @@ export default function Dashboard() {
|
|||||||
const [toasts, setToasts] = useState<Toast[]>([]);
|
const [toasts, setToasts] = useState<Toast[]>([]);
|
||||||
const [pullInput, setPullInput] = useState('');
|
const [pullInput, setPullInput] = useState('');
|
||||||
const [pullLoading, setPullLoading] = useState(false);
|
const [pullLoading, setPullLoading] = useState(false);
|
||||||
|
const [pullProgress, setPullProgress] = useState<{
|
||||||
|
status: string;
|
||||||
|
completed: number;
|
||||||
|
total: number;
|
||||||
|
} | null>(null);
|
||||||
const [copied, setCopied] = useState(false);
|
const [copied, setCopied] = useState(false);
|
||||||
|
const [streamMetrics, setStreamMetrics] = useState<{
|
||||||
|
tokensPerSec: number;
|
||||||
|
totalTokens: number;
|
||||||
|
durationMs: number;
|
||||||
|
} | null>(null);
|
||||||
const [deleteConfirm, setDeleteConfirm] = useState<string | null>(null);
|
const [deleteConfirm, setDeleteConfirm] = useState<string | null>(null);
|
||||||
const responseRef = useRef<HTMLDivElement>(null);
|
const responseRef = useRef<HTMLDivElement>(null);
|
||||||
const abortRef = useRef<AbortController | null>(null);
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
@ -228,26 +238,60 @@ export default function Dashboard() {
|
|||||||
|
|
||||||
const handlePull = async () => {
|
const handlePull = async () => {
|
||||||
if (!pullInput.trim()) return;
|
if (!pullInput.trim()) return;
|
||||||
|
const modelName = pullInput.trim();
|
||||||
setPullLoading(true);
|
setPullLoading(true);
|
||||||
addToast(`Pulling ${pullInput}... this may take a while`, 'info');
|
setPullProgress({ status: 'starting', completed: 0, total: 0 });
|
||||||
try {
|
try {
|
||||||
const res = await fetch('/api/ollama', {
|
const res = await fetch('/api/ollama/pull', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ action: 'pull', model: pullInput.trim() }),
|
body: JSON.stringify({ model: modelName }),
|
||||||
});
|
});
|
||||||
const data = await res.json();
|
if (!res.ok || !res.body) {
|
||||||
if (data.success) {
|
addToast(`Pull failed: ${res.status}`, 'error');
|
||||||
addToast(`Successfully pulled ${pullInput}`, 'success');
|
setPullLoading(false);
|
||||||
setPullInput('');
|
setPullProgress(null);
|
||||||
} else {
|
return;
|
||||||
addToast(data.error || 'Pull failed', 'error');
|
|
||||||
}
|
}
|
||||||
|
const reader = res.body.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = '';
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
buffer += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buffer.split('\n');
|
||||||
|
buffer = lines.pop() || '';
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) continue;
|
||||||
|
try {
|
||||||
|
const chunk = JSON.parse(line);
|
||||||
|
if (chunk.total && chunk.completed) {
|
||||||
|
setPullProgress({
|
||||||
|
status: chunk.status || 'downloading',
|
||||||
|
completed: chunk.completed,
|
||||||
|
total: chunk.total,
|
||||||
|
});
|
||||||
|
} else if (chunk.status) {
|
||||||
|
setPullProgress(prev => ({
|
||||||
|
status: chunk.status,
|
||||||
|
completed: prev?.completed || 0,
|
||||||
|
total: prev?.total || 0,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
/* skip */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
addToast(`Successfully pulled ${modelName}`, 'success');
|
||||||
|
setPullInput('');
|
||||||
await fetchAll();
|
await fetchAll();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
addToast(`Pull failed: ${err}`, 'error');
|
addToast(`Pull failed: ${err}`, 'error');
|
||||||
}
|
}
|
||||||
setPullLoading(false);
|
setPullLoading(false);
|
||||||
|
setPullProgress(null);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Streaming prompt
|
// Streaming prompt
|
||||||
@ -255,6 +299,7 @@ export default function Dashboard() {
|
|||||||
if (!promptModel || !promptText.trim()) return;
|
if (!promptModel || !promptText.trim()) return;
|
||||||
setPromptLoading(true);
|
setPromptLoading(true);
|
||||||
setPromptResponse('');
|
setPromptResponse('');
|
||||||
|
setStreamMetrics(null);
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
abortRef.current = controller;
|
abortRef.current = controller;
|
||||||
try {
|
try {
|
||||||
@ -288,6 +333,11 @@ export default function Dashboard() {
|
|||||||
setPromptResponse(fullResponse);
|
setPromptResponse(fullResponse);
|
||||||
responseRef.current?.scrollTo(0, responseRef.current.scrollHeight);
|
responseRef.current?.scrollTo(0, responseRef.current.scrollHeight);
|
||||||
}
|
}
|
||||||
|
if (chunk.done && chunk.eval_count && chunk.eval_duration) {
|
||||||
|
const durationMs = chunk.eval_duration / 1e6;
|
||||||
|
const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
|
||||||
|
setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
|
||||||
|
}
|
||||||
} catch {
|
} catch {
|
||||||
/* skip malformed lines */
|
/* skip malformed lines */
|
||||||
}
|
}
|
||||||
@ -522,6 +572,29 @@ export default function Dashboard() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Pull Progress Bar */}
|
||||||
|
{pullProgress && (
|
||||||
|
<div className="mb-4 p-3 rounded-lg" style={{ background: 'var(--surface-muted)' }}>
|
||||||
|
<div className="flex items-center justify-between mb-1.5">
|
||||||
|
<span className="text-xs font-medium" style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
{pullProgress.status}
|
||||||
|
</span>
|
||||||
|
{pullProgress.total > 0 && (
|
||||||
|
<span className="text-xs font-mono" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
|
{formatBytes(pullProgress.completed)} / {formatBytes(pullProgress.total)}
|
||||||
|
{' · '}
|
||||||
|
{Math.round((pullProgress.completed / pullProgress.total) * 100)}%
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<ProgressBar
|
||||||
|
value={pullProgress.completed}
|
||||||
|
max={pullProgress.total || 1}
|
||||||
|
color="var(--accent-primary)"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{ollama?.status !== 'online' ? (
|
{ollama?.status !== 'online' ? (
|
||||||
<div
|
<div
|
||||||
className="flex items-center gap-3 p-4 rounded-lg"
|
className="flex items-center gap-3 p-4 rounded-lg"
|
||||||
@ -1065,6 +1138,12 @@ export default function Dashboard() {
|
|||||||
Streaming...
|
Streaming...
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
{streamMetrics && !promptLoading && (
|
||||||
|
<span className="text-[11px] font-mono" style={{ color: 'var(--text-tertiary)' }}>
|
||||||
|
{streamMetrics.tokensPerSec.toFixed(1)} tok/s · {streamMetrics.totalTokens}{' '}
|
||||||
|
tokens · {(streamMetrics.durationMs / 1000).toFixed(1)}s
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
{promptResponse && (
|
{promptResponse && (
|
||||||
<div
|
<div
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user