feat(local-llm): Phase 4 — runtime metrics + UX polish (N11-N14)

N11: Persist tok/s per model to localStorage (llm-model-benchmarks), display on model card as faded accent text N12: Live countdown to auto-unload — 1s interval, color-coded (green >5m, yellow 1-5m, red <1m 'Unloading soon') N13: Session stats per model (prompts + tokens) in expanded details N14: Co-load suggestions strip below models list showing which unloaded models fit in remaining free memory
2026-02-19 23:20:30 -08:00 · 2026-02-19 23:20:30 -08:00 · 588d21c70e
commit 588d21c70e
parent 6f6baf99c8
1 changed files with 154 additions and 0 deletions
--- a/__LOCAL_LLMs/dashboard/src/app/page.tsx
+++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx
@ -107,6 +107,13 @@ export default function Dashboard() {
  const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>(
    'name'
  );
+  const [modelBenchmarks, setModelBenchmarks] = useState<
+    Record<string, { tokPerSec: number; totalTokens: number; timestamp: number }>
+  >({});
+  const [sessionStats, setSessionStats] = useState<
+    Record<string, { prompts: number; tokens: number }>
+  >({});
+  const [countdownTick, setCountdownTick] = useState(0);
  const responseRef = useRef<HTMLDivElement>(null);
  const abortRef = useRef<AbortController | null>(null);
  const compareAbortRef = useRef<AbortController | null>(null);
@ -164,6 +171,12 @@ export default function Dashboard() {
    setAutoLoadModel(localStorage.getItem('llm-auto-load-model'));
    const savedSort = localStorage.getItem('llm-model-sort');
    if (savedSort) setModelSort(savedSort as typeof modelSort);
+    try {
+      const savedBench = localStorage.getItem('llm-model-benchmarks');
+      if (savedBench) setModelBenchmarks(JSON.parse(savedBench));
+    } catch {
+      /* ignore */
+    }
  }, []);

  useEffect(() => {
@ -190,6 +203,13 @@ export default function Dashboard() {
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [promptModel, chatMode]);

+  // N12: Countdown tick — update every second for live unload countdown
+  useEffect(() => {
+    if (!ollama || ollama.running.length === 0) return;
+    const interval = setInterval(() => setCountdownTick(t => t + 1), 1000);
+    return () => clearInterval(interval);
+  }, [ollama?.running.length]);
+
  // F16: Auto-load preferred model when Ollama is online but nothing loaded
  useEffect(() => {
    if (!autoLoadModel || !ollama || ollama.status !== 'online') return;
@ -516,6 +536,32 @@ export default function Dashboard() {
              const durationMs = chunk.eval_duration / 1e6;
              const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
              setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
+              // N11: Persist benchmark
+              if (promptModel) {
+                const updated = {
+                  ...modelBenchmarks,
+                  [promptModel]: {
+                    tokPerSec: tokensPerSec,
+                    totalTokens: chunk.eval_count,
+                    timestamp: Date.now(),
+                  },
+                };
+                setModelBenchmarks(updated);
+                localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
+              }
+              // N13: Update session stats
+              if (promptModel) {
+                setSessionStats(prev => {
+                  const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
+                  return {
+                    ...prev,
+                    [promptModel]: {
+                      prompts: cur.prompts + 1,
+                      tokens: cur.tokens + chunk.eval_count,
+                    },
+                  };
+                });
+              }
            }
          } catch {
            /* skip malformed lines */
@ -603,6 +649,32 @@ export default function Dashboard() {
              const durationMs = chunk.eval_duration / 1e6;
              const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
              setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
+              // N11: Persist benchmark
+              if (promptModel) {
+                const updated = {
+                  ...modelBenchmarks,
+                  [promptModel]: {
+                    tokPerSec: tokensPerSec,
+                    totalTokens: chunk.eval_count,
+                    timestamp: Date.now(),
+                  },
+                };
+                setModelBenchmarks(updated);
+                localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
+              }
+              // N13: Update session stats
+              if (promptModel) {
+                setSessionStats(prev => {
+                  const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
+                  return {
+                    ...prev,
+                    [promptModel]: {
+                      prompts: cur.prompts + 1,
+                      tokens: cur.tokens + chunk.eval_count,
+                    },
+                  };
+                });
+              }
            }
          } catch {
            /* skip */
@ -1107,7 +1179,41 @@ export default function Dashboard() {
                                  </span>
                                ) : null;
                              })()}
+                              {modelBenchmarks[model.name] && (
+                                <span
+                                  style={{ color: 'var(--accent-secondary)' }}
+                                  title={`Last benchmarked: ${new Date(modelBenchmarks[model.name].timestamp).toLocaleString()}`}
+                                >
+                                  ~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
+                                </span>
+                              )}
                            </div>
+                            {running &&
+                              (() => {
+                                const rm = ollama?.running.find(r => r.name === model.name);
+                                if (!rm?.expires_at) return null;
+                                const remaining = new Date(rm.expires_at).getTime() - Date.now();
+                                void countdownTick;
+                                if (remaining <= 0) return null;
+                                const mins = Math.floor(remaining / 60000);
+                                const secs = Math.floor((remaining % 60000) / 1000);
+                                const isUrgent = remaining < 60000;
+                                const isWarning = remaining < 300000;
+                                return (
+                                  <div
+                                    className="text-[10px] mt-0.5"
+                                    style={{
+                                      color: isUrgent
+                                        ? 'var(--danger)'
+                                        : isWarning
+                                          ? 'var(--warning)'
+                                          : 'var(--text-tertiary)',
+                                    }}
+                                  >
+                                    {isUrgent ? 'Unloading soon' : `Unloads in ${mins}m ${secs}s`}
+                                  </div>
+                                );
+                              })()}
                          </div>
                        </div>
                        <div className="flex items-center gap-2 ml-3">
@ -1228,6 +1334,13 @@ export default function Dashboard() {
                          <p>Digest: {model.digest?.substring(0, 16)}...</p>
                          <p>Modified: {new Date(model.modified_at).toLocaleString()}</p>
                          {model.details?.family && <p>Family: {model.details.family}</p>}
+                          {sessionStats[model.name] && (
+                            <p style={{ color: 'var(--accent-secondary)' }}>
+                              Session: {sessionStats[model.name].prompts} prompt
+                              {sessionStats[model.name].prompts !== 1 ? 's' : ''} ·{' '}
+                              {sessionStats[model.name].tokens.toLocaleString()} tokens
+                            </p>
+                          )}
                          {/* Model Tags (F14) */}
                          <div className="flex flex-wrap items-center gap-1.5 mt-2 font-sans">
                            {['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => (
@ -1339,6 +1452,47 @@ export default function Dashboard() {
                  No models installed. Run &quot;ollama pull &lt;model&gt;&quot; to get started.
                </p>
              )}
+              {/* N14: Co-load suggestions */}
+              {system &&
+                ollama.running.length > 0 &&
+                (() => {
+                  const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
+                  const freeForModels =
+                    system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1;
+                  const suggestions = ollama.models
+                    .filter(m => !isRunning(m.name))
+                    .map(m => ({
+                      name: m.name,
+                      est: estimateRam(m.size, m.details?.quantization_level),
+                    }))
+                    .filter(m => m.est < freeForModels)
+                    .sort((a, b) => b.est - a.est)
+                    .slice(0, 3);
+                  if (suggestions.length === 0) return null;
+                  return (
+                    <div
+                      className="mt-3 p-3 rounded-lg"
+                      style={{
+                        background: 'var(--surface-muted)',
+                        border: '1px solid var(--border-subtle)',
+                      }}
+                    >
+                      <span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
+                        Can also load:{' '}
+                      </span>
+                      {suggestions.map((s, i) => (
+                        <span
+                          key={s.name}
+                          className="text-[11px] font-mono"
+                          style={{ color: 'var(--accent-secondary)' }}
+                        >
+                          {i > 0 && ', '}
+                          {s.name.split(':')[0]} (~{formatBytes(s.est)})
+                        </span>
+                      ))}
+                    </div>
+                  );
+                })()}
            </div>
          )}
        </div>