fix(local-llm): dashboard v2 — streaming prompts, model management, perf fixes

Bug fixes:
- Fix Google Fonts build error (corporate proxy blocks fonts.gstatic.com)
  by removing Geist font imports and switching to system font stack
- Fix system API 7.6s latency by caching static info (chip, GPU, brew)
  with timeouts on shell commands — now responds in ~50ms

New features:
- Streaming prompt responses via NDJSON proxy (/api/ollama/stream)
  with typing cursor animation and auto-scroll
- Model pull UI: input field + button to download new models
- Model delete with two-step confirmation dialog
- VRAM usage and expiry time display for loaded models
- Toast notifications (success/error/info) with slide-in animation
- Copy response button in prompt modal
- Escape key closes modals, backdrop click dismisses
- Pull/delete/show actions added to Ollama API route
This commit is contained in:
saravanakumardb1 2026-02-19 13:03:11 -08:00
parent 2565714c52
commit 970b565026

View File

@ -0,0 +1,37 @@
import { NextRequest } from 'next/server';
const OLLAMA_URL = process.env.OLLAMA_URL || 'http://localhost:11434';
export async function POST(request: NextRequest) {
try {
const body = await request.json();
const { model, prompt } = body;
const response = await fetch(`${OLLAMA_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt, stream: true }),
});
if (!response.ok || !response.body) {
return new Response(JSON.stringify({ error: `Ollama error: ${response.status}` }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
// Pipe the Ollama stream directly to the client
return new Response(response.body, {
headers: {
'Content-Type': 'application/x-ndjson',
'Transfer-Encoding': 'chunked',
'Cache-Control': 'no-cache',
},
});
} catch (err) {
return new Response(JSON.stringify({ error: String(err) }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
}