perf(local-llm): Sprint 5 — request dedup + cache TTLs (P1, P2, P3)

Performance fixes:
- P1: Add fetchingRef guard to fetchAll() — prevents duplicate requests
  from rapid Refresh button clicks or overlapping interval ticks
- P2: Add 5-minute TTL to staticCache (chip, GPU, brew packages) —
  previously cached indefinitely per server process, now refreshes
  after brew upgrades without requiring a restart
- P3: Add 60-second TTL cache for Ollama models disk usage (du command)
  — previously traversed ~/.ollama/models on every 15s refresh cycle,
  now reuses cached value for 60s
This commit is contained in:
saravanakumardb1 2026-02-19 15:28:07 -08:00
parent 9892fe7145
commit b1fda3a1a5
2 changed files with 21 additions and 4 deletions

View File

@ -6,12 +6,17 @@ import os from 'os';
const execAsync = promisify(exec);
const execFileAsync = promisify(execFile);
// Cache slow commands (chip, gpu, brew don't change during a session)
// Cache slow commands with TTL
let staticCache: {
chip: string;
gpu: string;
brewPackages: Array<{ name: string; version: string }>;
ts: number;
} | null = null;
const STATIC_TTL = 5 * 60 * 1000; // 5 minutes
let ollamaDiskCache: { value: number; ts: number } | null = null;
const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds
async function getChipInfo(): Promise<string> {
try {
@ -80,16 +85,24 @@ async function getBrewPackages(): Promise<Array<{ name: string; version: string
}
async function getStaticInfo() {
if (staticCache) return staticCache;
if (staticCache && Date.now() - staticCache.ts < STATIC_TTL) return staticCache;
const [chip, gpu, brewPackages] = await Promise.all([
getChipInfo(),
getGpuInfo(),
getBrewPackages(),
]);
staticCache = { chip, gpu, brewPackages };
staticCache = { chip, gpu, brewPackages, ts: Date.now() };
return staticCache;
}
async function getCachedOllamaDiskUsage(): Promise<number> {
if (ollamaDiskCache && Date.now() - ollamaDiskCache.ts < OLLAMA_DISK_TTL)
return ollamaDiskCache.value;
const value = await getOllamaModelsDiskUsage();
ollamaDiskCache = { value, ts: Date.now() };
return value;
}
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
async function getAccurateMemory(): Promise<{
total: number;
@ -140,7 +153,7 @@ export async function GET() {
const [staticInfo, disk, ollamaDisk, memory] = await Promise.all([
getStaticInfo(),
getDiskSpace(),
getOllamaModelsDiskUsage(),
getCachedOllamaDiskUsage(),
getAccurateMemory(),
]);

View File

@ -68,6 +68,7 @@ export default function Dashboard() {
const [modelfileData, setModelfileData] = useState<Record<string, string>>({});
const responseRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null);
const fetchingRef = useRef(false);
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
const id = Date.now() + Math.random();
@ -76,6 +77,8 @@ export default function Dashboard() {
}, []);
const fetchAll = useCallback(async () => {
if (fetchingRef.current) return;
fetchingRef.current = true;
setLoading(true);
const [oRes, wRes, sRes] = await Promise.allSettled([
fetch('/api/ollama').then(r => r.json()),
@ -87,6 +90,7 @@ export default function Dashboard() {
if (sRes.status === 'fulfilled') setSystem(sRes.value);
setLastRefresh(new Date());
setLoading(false);
fetchingRef.current = false;
}, []);
useEffect(() => {