perf(local-llm): Sprint 5 — request dedup + cache TTLs (P1, P2, P3)

Performance fixes:
- P1: Add fetchingRef guard to fetchAll() — prevents duplicate requests
  from rapid Refresh button clicks or overlapping interval ticks
- P2: Add 5-minute TTL to staticCache (chip, GPU, brew packages) —
  previously cached indefinitely per server process, now refreshes
  after brew upgrades without requiring a restart
- P3: Add 60-second TTL cache for Ollama models disk usage (du command)
  — previously traversed ~/.ollama/models on every 15s refresh cycle,
  now reuses cached value for 60s
This commit is contained in:
saravanakumardb1 2026-02-19 15:28:07 -08:00
parent 9892fe7145
commit b1fda3a1a5
2 changed files with 21 additions and 4 deletions

View File

@ -6,12 +6,17 @@ import os from 'os';
const execAsync = promisify(exec); const execAsync = promisify(exec);
const execFileAsync = promisify(execFile); const execFileAsync = promisify(execFile);
// Cache slow commands (chip, gpu, brew don't change during a session) // Cache slow commands with TTL
let staticCache: { let staticCache: {
chip: string; chip: string;
gpu: string; gpu: string;
brewPackages: Array<{ name: string; version: string }>; brewPackages: Array<{ name: string; version: string }>;
ts: number;
} | null = null; } | null = null;
const STATIC_TTL = 5 * 60 * 1000; // 5 minutes
let ollamaDiskCache: { value: number; ts: number } | null = null;
const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds
async function getChipInfo(): Promise<string> { async function getChipInfo(): Promise<string> {
try { try {
@ -80,16 +85,24 @@ async function getBrewPackages(): Promise<Array<{ name: string; version: string
} }
async function getStaticInfo() { async function getStaticInfo() {
if (staticCache) return staticCache; if (staticCache && Date.now() - staticCache.ts < STATIC_TTL) return staticCache;
const [chip, gpu, brewPackages] = await Promise.all([ const [chip, gpu, brewPackages] = await Promise.all([
getChipInfo(), getChipInfo(),
getGpuInfo(), getGpuInfo(),
getBrewPackages(), getBrewPackages(),
]); ]);
staticCache = { chip, gpu, brewPackages }; staticCache = { chip, gpu, brewPackages, ts: Date.now() };
return staticCache; return staticCache;
} }
async function getCachedOllamaDiskUsage(): Promise<number> {
if (ollamaDiskCache && Date.now() - ollamaDiskCache.ts < OLLAMA_DISK_TTL)
return ollamaDiskCache.value;
const value = await getOllamaModelsDiskUsage();
ollamaDiskCache = { value, ts: Date.now() };
return value;
}
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache) // macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
async function getAccurateMemory(): Promise<{ async function getAccurateMemory(): Promise<{
total: number; total: number;
@ -140,7 +153,7 @@ export async function GET() {
const [staticInfo, disk, ollamaDisk, memory] = await Promise.all([ const [staticInfo, disk, ollamaDisk, memory] = await Promise.all([
getStaticInfo(), getStaticInfo(),
getDiskSpace(), getDiskSpace(),
getOllamaModelsDiskUsage(), getCachedOllamaDiskUsage(),
getAccurateMemory(), getAccurateMemory(),
]); ]);

View File

@ -68,6 +68,7 @@ export default function Dashboard() {
const [modelfileData, setModelfileData] = useState<Record<string, string>>({}); const [modelfileData, setModelfileData] = useState<Record<string, string>>({});
const responseRef = useRef<HTMLDivElement>(null); const responseRef = useRef<HTMLDivElement>(null);
const abortRef = useRef<AbortController | null>(null); const abortRef = useRef<AbortController | null>(null);
const fetchingRef = useRef(false);
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => { const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
const id = Date.now() + Math.random(); const id = Date.now() + Math.random();
@ -76,6 +77,8 @@ export default function Dashboard() {
}, []); }, []);
const fetchAll = useCallback(async () => { const fetchAll = useCallback(async () => {
if (fetchingRef.current) return;
fetchingRef.current = true;
setLoading(true); setLoading(true);
const [oRes, wRes, sRes] = await Promise.allSettled([ const [oRes, wRes, sRes] = await Promise.allSettled([
fetch('/api/ollama').then(r => r.json()), fetch('/api/ollama').then(r => r.json()),
@ -87,6 +90,7 @@ export default function Dashboard() {
if (sRes.status === 'fulfilled') setSystem(sRes.value); if (sRes.status === 'fulfilled') setSystem(sRes.value);
setLastRefresh(new Date()); setLastRefresh(new Date());
setLoading(false); setLoading(false);
fetchingRef.current = false;
}, []); }, []);
useEffect(() => { useEffect(() => {