perf(local-llm): Sprint 5 — request dedup + cache TTLs (P1, P2, P3)
Performance fixes: - P1: Add fetchingRef guard to fetchAll() — prevents duplicate requests from rapid Refresh button clicks or overlapping interval ticks - P2: Add 5-minute TTL to staticCache (chip, GPU, brew packages) — previously cached indefinitely per server process, now refreshes after brew upgrades without requiring a restart - P3: Add 60-second TTL cache for Ollama models disk usage (du command) — previously traversed ~/.ollama/models on every 15s refresh cycle, now reuses cached value for 60s
This commit is contained in:
parent
9892fe7145
commit
b1fda3a1a5
@ -6,12 +6,17 @@ import os from 'os';
|
|||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
const execFileAsync = promisify(execFile);
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
// Cache slow commands (chip, gpu, brew don't change during a session)
|
// Cache slow commands with TTL
|
||||||
let staticCache: {
|
let staticCache: {
|
||||||
chip: string;
|
chip: string;
|
||||||
gpu: string;
|
gpu: string;
|
||||||
brewPackages: Array<{ name: string; version: string }>;
|
brewPackages: Array<{ name: string; version: string }>;
|
||||||
|
ts: number;
|
||||||
} | null = null;
|
} | null = null;
|
||||||
|
const STATIC_TTL = 5 * 60 * 1000; // 5 minutes
|
||||||
|
|
||||||
|
let ollamaDiskCache: { value: number; ts: number } | null = null;
|
||||||
|
const OLLAMA_DISK_TTL = 60 * 1000; // 60 seconds
|
||||||
|
|
||||||
async function getChipInfo(): Promise<string> {
|
async function getChipInfo(): Promise<string> {
|
||||||
try {
|
try {
|
||||||
@ -80,16 +85,24 @@ async function getBrewPackages(): Promise<Array<{ name: string; version: string
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function getStaticInfo() {
|
async function getStaticInfo() {
|
||||||
if (staticCache) return staticCache;
|
if (staticCache && Date.now() - staticCache.ts < STATIC_TTL) return staticCache;
|
||||||
const [chip, gpu, brewPackages] = await Promise.all([
|
const [chip, gpu, brewPackages] = await Promise.all([
|
||||||
getChipInfo(),
|
getChipInfo(),
|
||||||
getGpuInfo(),
|
getGpuInfo(),
|
||||||
getBrewPackages(),
|
getBrewPackages(),
|
||||||
]);
|
]);
|
||||||
staticCache = { chip, gpu, brewPackages };
|
staticCache = { chip, gpu, brewPackages, ts: Date.now() };
|
||||||
return staticCache;
|
return staticCache;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getCachedOllamaDiskUsage(): Promise<number> {
|
||||||
|
if (ollamaDiskCache && Date.now() - ollamaDiskCache.ts < OLLAMA_DISK_TTL)
|
||||||
|
return ollamaDiskCache.value;
|
||||||
|
const value = await getOllamaModelsDiskUsage();
|
||||||
|
ollamaDiskCache = { value, ts: Date.now() };
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
|
// macOS vm_stat gives accurate memory breakdown (os.freemem() excludes reclaimable cache)
|
||||||
async function getAccurateMemory(): Promise<{
|
async function getAccurateMemory(): Promise<{
|
||||||
total: number;
|
total: number;
|
||||||
@ -140,7 +153,7 @@ export async function GET() {
|
|||||||
const [staticInfo, disk, ollamaDisk, memory] = await Promise.all([
|
const [staticInfo, disk, ollamaDisk, memory] = await Promise.all([
|
||||||
getStaticInfo(),
|
getStaticInfo(),
|
||||||
getDiskSpace(),
|
getDiskSpace(),
|
||||||
getOllamaModelsDiskUsage(),
|
getCachedOllamaDiskUsage(),
|
||||||
getAccurateMemory(),
|
getAccurateMemory(),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
|||||||
@ -68,6 +68,7 @@ export default function Dashboard() {
|
|||||||
const [modelfileData, setModelfileData] = useState<Record<string, string>>({});
|
const [modelfileData, setModelfileData] = useState<Record<string, string>>({});
|
||||||
const responseRef = useRef<HTMLDivElement>(null);
|
const responseRef = useRef<HTMLDivElement>(null);
|
||||||
const abortRef = useRef<AbortController | null>(null);
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
|
const fetchingRef = useRef(false);
|
||||||
|
|
||||||
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
|
const addToast = useCallback((message: string, type: Toast['type'] = 'info') => {
|
||||||
const id = Date.now() + Math.random();
|
const id = Date.now() + Math.random();
|
||||||
@ -76,6 +77,8 @@ export default function Dashboard() {
|
|||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const fetchAll = useCallback(async () => {
|
const fetchAll = useCallback(async () => {
|
||||||
|
if (fetchingRef.current) return;
|
||||||
|
fetchingRef.current = true;
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
const [oRes, wRes, sRes] = await Promise.allSettled([
|
const [oRes, wRes, sRes] = await Promise.allSettled([
|
||||||
fetch('/api/ollama').then(r => r.json()),
|
fetch('/api/ollama').then(r => r.json()),
|
||||||
@ -87,6 +90,7 @@ export default function Dashboard() {
|
|||||||
if (sRes.status === 'fulfilled') setSystem(sRes.value);
|
if (sRes.status === 'fulfilled') setSystem(sRes.value);
|
||||||
setLastRefresh(new Date());
|
setLastRefresh(new Date());
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
|
fetchingRef.current = false;
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user