perf(local-llm): Sprint 5 — request dedup + cache TTLs (P1, P2, P3)
Performance fixes: - P1: Add fetchingRef guard to fetchAll() — prevents duplicate requests from rapid Refresh button clicks or overlapping interval ticks - P2: Add 5-minute TTL to staticCache (chip, GPU, brew packages) — previously cached indefinitely per server process, now refreshes after brew upgrades without requiring a restart - P3: Add 60-second TTL cache for Ollama models disk usage (du command) — previously traversed ~/.ollama/models on every 15s refresh cycle, now reuses cached value for 60s
This commit is contained in:
parent
9892fe7145
commit
b1fda3a1a5
@ -6,12 +6,17 @@ import os from 'os';
|
||||
const execAsync = promisify(exec);
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
// Cache slow commands (chip, gpu, brew don't change during a session)
|
||||
// Cache slow commands with TTL
|
||||
let staticCache: {
|
||||
chip: string;
|
||||
gpu: string;
|
||||
brewPackages: Array<{ name: string; version: string }>;
|
||||
ts: number;
|
||||
} | null = null;
|
||||
const STATIC_TTL = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
let ollamaDiskCache: { value: number; ts: number } | null = null;
|
||||
const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds
|
||||
|
||||
async function getChipInfo(): Promise<string> {
|
||||
try {
|
||||
@ -80,16 +85,24 @@ async function getBrewPackages(): Promise<Array<{ name: string; version: string
|
||||
}
|
||||
|
||||
async function getStaticInfo() {
|
||||
if (staticCache) return staticCache;
|
||||
if (staticCache && Date.now() - staticCache.ts < STATIC_TTL) return staticCache;
|
||||
const [chip, gpu, brewPackages] = await Promise.all([
|
||||
getChipInfo(),
|
||||
getGpuInfo(),
|
||||
getBrewPackages(),
|
||||
]);
|
||||
staticCache = { chip, gpu, brewPackages };
|
||||
staticCache = { chip, gpu, brewPackages, ts: Date.now() };
|
||||
return staticCache;
|
||||
}
|
||||
|
||||
async function getCachedOllamaDiskUsage(): Promise<number> {
|
||||
if (ollamaDiskCache && Date.now() - ollamaDiskCache.ts < OLLAMA_DISK_TTL)
|
||||
return ollamaDiskCache.value;
|
||||
const value = await getOllamaModelsDiskUsage();
|
||||
ollamaDiskCache = { value, ts: Date.now() };
|
||||
return value;
|
||||
}
|
||||
|
||||
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
|
||||
async function getAccurateMemory(): Promise<{
|
||||
total: number;
|
||||
@ -140,7 +153,7 @@ export async function GET() {
|
||||
const [staticInfo, disk, ollamaDisk, memory] = await Promise.all([
|
||||
getStaticInfo(),
|
||||
getDiskSpace(),
|
||||
getOllamaModelsDiskUsage(),
|
||||
getCachedOllamaDiskUsage(),
|
||||
getAccurateMemory(),
|
||||
]);
|
||||
|
||||
|
||||
@ -68,6 +68,7 @@ export default function Dashboard() {
|
||||
const [modelfileData, setModelfileData] = useState<Record<string, string>>({});
|
||||
const responseRef = useRef<HTMLDivElement>(null);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
const fetchingRef = useRef(false);
|
||||
|
||||
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
|
||||
const id = Date.now() + Math.random();
|
||||
@ -76,6 +77,8 @@ export default function Dashboard() {
|
||||
}, []);
|
||||
|
||||
const fetchAll = useCallback(async () => {
|
||||
if (fetchingRef.current) return;
|
||||
fetchingRef.current = true;
|
||||
setLoading(true);
|
||||
const [oRes, wRes, sRes] = await Promise.allSettled([
|
||||
fetch('/api/ollama').then(r => r.json()),
|
||||
@ -87,6 +90,7 @@ export default function Dashboard() {
|
||||
if (sRes.status === 'fulfilled') setSystem(sRes.value);
|
||||
setLastRefresh(new Date());
|
||||
setLoading(false);
|
||||
fetchingRef.current = false;
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user