From 588d21c70e0362bec16a26566fc5e416289bafe0 Mon Sep 17 00:00:00 2001
From: saravanakumardb1 <saravanakumardb1@users.noreply.github.com>
Date: Thu, 19 Feb 2026 23:20:30 -0800
Subject: [PATCH] =?UTF-8?q?feat(local-llm):=20Phase=204=20=E2=80=94=20runt?=
 =?UTF-8?q?ime=20metrics=20+=20UX=20polish=20(N11-N14)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

N11: Persist tok/s per model to localStorage (llm-model-benchmarks),
     display on model card as faded accent text
N12: Live countdown to auto-unload — 1s interval, color-coded
     (green >5m, yellow 1-5m, red <1m 'Unloading soon')
N13: Session stats per model (prompts + tokens) in expanded details
N14: Co-load suggestions strip below models list showing which
     unloaded models fit in remaining free memory
---
 __LOCAL_LLMs/dashboard/src/app/page.tsx | 154 ++++++++++++++++++++++++
 1 file changed, 154 insertions(+)
diff --git a/__LOCAL_LLMs/dashboard/src/app/page.tsx b/__LOCAL_LLMs/dashboard/src/app/page.tsx
index 2f7ef77c..2b20938e 100644
--- a/__LOCAL_LLMs/dashboard/src/app/page.tsx
+++ b/__LOCAL_LLMs/dashboard/src/app/page.tsx
@@ -107,6 +107,13 @@ export default function Dashboard() {
   const [modelSort, setModelSort] = useState<'name' | 'size' | 'params' | 'running' | 'modified'>(
     'name'
   );
+  const [modelBenchmarks, setModelBenchmarks] = useState<
+    Record<string, { tokPerSec: number; totalTokens: number; timestamp: number }>
+  >({});
+  const [sessionStats, setSessionStats] = useState<
+    Record<string, { prompts: number; tokens: number }>
+  >({});
+  const [countdownTick, setCountdownTick] = useState(0);
   const responseRef = useRef<HTMLDivElement>(null);
   const abortRef = useRef<AbortController | null>(null);
   const compareAbortRef = useRef<AbortController | null>(null);
@@ -164,6 +171,12 @@ export default function Dashboard() {
     setAutoLoadModel(localStorage.getItem('llm-auto-load-model'));
     const savedSort = localStorage.getItem('llm-model-sort');
     if (savedSort) setModelSort(savedSort as typeof modelSort);
+    try {
+      const savedBench = localStorage.getItem('llm-model-benchmarks');
+      if (savedBench) setModelBenchmarks(JSON.parse(savedBench));
+    } catch {
+      /* ignore */
+    }
   }, []);
 
   useEffect(() => {
@@ -190,6 +203,13 @@ export default function Dashboard() {
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [promptModel, chatMode]);
 
+  // N12: Countdown tick — update every second for live unload countdown
+  useEffect(() => {
+    if (!ollama || ollama.running.length === 0) return;
+    const interval = setInterval(() => setCountdownTick(t => t + 1), 1000);
+    return () => clearInterval(interval);
+  }, [ollama?.running.length]);
+
   // F16: Auto-load preferred model when Ollama is online but nothing loaded
   useEffect(() => {
     if (!autoLoadModel || !ollama || ollama.status !== 'online') return;
@@ -516,6 +536,32 @@ export default function Dashboard() {
               const durationMs = chunk.eval_duration / 1e6;
               const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
               setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
+              // N11: Persist benchmark
+              if (promptModel) {
+                const updated = {
+                  ...modelBenchmarks,
+                  [promptModel]: {
+                    tokPerSec: tokensPerSec,
+                    totalTokens: chunk.eval_count,
+                    timestamp: Date.now(),
+                  },
+                };
+                setModelBenchmarks(updated);
+                localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
+              }
+              // N13: Update session stats
+              if (promptModel) {
+                setSessionStats(prev => {
+                  const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
+                  return {
+                    ...prev,
+                    [promptModel]: {
+                      prompts: cur.prompts + 1,
+                      tokens: cur.tokens + chunk.eval_count,
+                    },
+                  };
+                });
+              }
             }
           } catch {
             /* skip malformed lines */
@@ -603,6 +649,32 @@ export default function Dashboard() {
               const durationMs = chunk.eval_duration / 1e6;
               const tokensPerSec = durationMs > 0 ? (chunk.eval_count / durationMs) * 1000 : 0;
               setStreamMetrics({ tokensPerSec, totalTokens: chunk.eval_count, durationMs });
+              // N11: Persist benchmark
+              if (promptModel) {
+                const updated = {
+                  ...modelBenchmarks,
+                  [promptModel]: {
+                    tokPerSec: tokensPerSec,
+                    totalTokens: chunk.eval_count,
+                    timestamp: Date.now(),
+                  },
+                };
+                setModelBenchmarks(updated);
+                localStorage.setItem('llm-model-benchmarks', JSON.stringify(updated));
+              }
+              // N13: Update session stats
+              if (promptModel) {
+                setSessionStats(prev => {
+                  const cur = prev[promptModel] || { prompts: 0, tokens: 0 };
+                  return {
+                    ...prev,
+                    [promptModel]: {
+                      prompts: cur.prompts + 1,
+                      tokens: cur.tokens + chunk.eval_count,
+                    },
+                  };
+                });
+              }
             }
           } catch {
             /* skip */
@@ -1107,7 +1179,41 @@ export default function Dashboard() {
                                   </span>
                                 ) : null;
                               })()}
+                              {modelBenchmarks[model.name] && (
+                                <span
+                                  style={{ color: 'var(--accent-secondary)' }}
+                                  title={`Last benchmarked: ${new Date(modelBenchmarks[model.name].timestamp).toLocaleString()}`}
+                                >
+                                  ~{modelBenchmarks[model.name].tokPerSec.toFixed(1)} tok/s
+                                </span>
+                              )}
                             </div>
+                            {running &&
+                              (() => {
+                                const rm = ollama?.running.find(r => r.name === model.name);
+                                if (!rm?.expires_at) return null;
+                                const remaining = new Date(rm.expires_at).getTime() - Date.now();
+                                void countdownTick;
+                                if (remaining <= 0) return null;
+                                const mins = Math.floor(remaining / 60000);
+                                const secs = Math.floor((remaining % 60000) / 1000);
+                                const isUrgent = remaining < 60000;
+                                const isWarning = remaining < 300000;
+                                return (
+                                  <div
+                                    className="text-[10px] mt-0.5"
+                                    style={{
+                                      color: isUrgent
+                                        ? 'var(--danger)'
+                                        : isWarning
+                                          ? 'var(--warning)'
+                                          : 'var(--text-tertiary)',
+                                    }}
+                                  >
+                                    {isUrgent ? 'Unloading soon' : `Unloads in ${mins}m ${secs}s`}
+                                  </div>
+                                );
+                              })()}
                           </div>
                         </div>
                         <div className="flex items-center gap-2 ml-3">
@@ -1228,6 +1334,13 @@ export default function Dashboard() {
                           <p>Digest: {model.digest?.substring(0, 16)}...</p>
                           <p>Modified: {new Date(model.modified_at).toLocaleString()}</p>
                           {model.details?.family && <p>Family: {model.details.family}</p>}
+                          {sessionStats[model.name] && (
+                            <p style={{ color: 'var(--accent-secondary)' }}>
+                              Session: {sessionStats[model.name].prompts} prompt
+                              {sessionStats[model.name].prompts !== 1 ? 's' : ''} ·{' '}
+                              {sessionStats[model.name].tokens.toLocaleString()} tokens
+                            </p>
+                          )}
                           {/* Model Tags (F14) */}
                           <div className="flex flex-wrap items-center gap-1.5 mt-2 font-sans">
                             {['coding', 'chat', 'fast', 'vision', 'reasoning'].map(tag => (
@@ -1339,6 +1452,47 @@ export default function Dashboard() {
                   No models installed. Run &quot;ollama pull &lt;model&gt;&quot; to get started.
                 </p>
               )}
+              {/* N14: Co-load suggestions */}
+              {system &&
+                ollama.running.length > 0 &&
+                (() => {
+                  const usedVram = ollama.running.reduce((sum, r) => sum + r.size_vram, 0);
+                  const freeForModels =
+                    system.memory.free + system.memory.cached * 0.5 - usedVram * 0.1;
+                  const suggestions = ollama.models
+                    .filter(m => !isRunning(m.name))
+                    .map(m => ({
+                      name: m.name,
+                      est: estimateRam(m.size, m.details?.quantization_level),
+                    }))
+                    .filter(m => m.est < freeForModels)
+                    .sort((a, b) => b.est - a.est)
+                    .slice(0, 3);
+                  if (suggestions.length === 0) return null;
+                  return (
+                    <div
+                      className="mt-3 p-3 rounded-lg"
+                      style={{
+                        background: 'var(--surface-muted)',
+                        border: '1px solid var(--border-subtle)',
+                      }}
+                    >
+                      <span className="text-[11px]" style={{ color: 'var(--text-tertiary)' }}>
+                        Can also load:{' '}
+                      </span>
+                      {suggestions.map((s, i) => (
+                        <span
+                          key={s.name}
+                          className="text-[11px] font-mono"
+                          style={{ color: 'var(--accent-secondary)' }}
+                        >
+                          {i > 0 && ', '}
+                          {s.name.split(':')[0]} (~{formatBytes(s.est)})
+                        </span>
+                      ))}
+                    </div>
+                  );
+                })()}
             </div>
           )}
         </div>